Statistics
| Branch: | Revision:

root / fpu / softfloat.c @ fd728f2f

History | View | Annotate | Download (253.8 kB)

1 8d725fac Andreas Färber
/*
2 8d725fac Andreas Färber
 * QEMU float support
3 8d725fac Andreas Färber
 *
4 8d725fac Andreas Färber
 * Derived from SoftFloat.
5 8d725fac Andreas Färber
 */
6 158142c2 bellard
7 158142c2 bellard
/*============================================================================
8 158142c2 bellard

9 158142c2 bellard
This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
10 158142c2 bellard
Package, Release 2b.
11 158142c2 bellard

12 158142c2 bellard
Written by John R. Hauser.  This work was made possible in part by the
13 158142c2 bellard
International Computer Science Institute, located at Suite 600, 1947 Center
14 158142c2 bellard
Street, Berkeley, California 94704.  Funding was partially provided by the
15 158142c2 bellard
National Science Foundation under grant MIP-9311980.  The original version
16 158142c2 bellard
of this code was written as part of a project to build a fixed-point vector
17 158142c2 bellard
processor in collaboration with the University of California at Berkeley,
18 158142c2 bellard
overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
19 158142c2 bellard
is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
20 158142c2 bellard
arithmetic/SoftFloat.html'.
21 158142c2 bellard

22 158142c2 bellard
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
23 158142c2 bellard
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
24 158142c2 bellard
RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
25 158142c2 bellard
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
26 158142c2 bellard
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
27 158142c2 bellard
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
28 158142c2 bellard
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
29 158142c2 bellard
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
30 158142c2 bellard

31 158142c2 bellard
Derivative works are acceptable, even for commercial purposes, so long as
32 158142c2 bellard
(1) the source code for the derivative work includes prominent notice that
33 158142c2 bellard
the work is derivative, and (2) the source code includes prominent notice with
34 158142c2 bellard
these four paragraphs for those parts of this code that are retained.
35 158142c2 bellard

36 158142c2 bellard
=============================================================================*/
37 158142c2 bellard
38 2ac8bd03 Peter Maydell
/* softfloat (and in particular the code in softfloat-specialize.h) is
39 2ac8bd03 Peter Maydell
 * target-dependent and needs the TARGET_* macros.
40 2ac8bd03 Peter Maydell
 */
41 2ac8bd03 Peter Maydell
#include "config.h"
42 2ac8bd03 Peter Maydell
43 6b4c305c Paolo Bonzini
#include "fpu/softfloat.h"
44 158142c2 bellard
45 158142c2 bellard
/*----------------------------------------------------------------------------
46 158142c2 bellard
| Primitive arithmetic functions, including multi-word arithmetic, and
47 158142c2 bellard
| division and square root approximations.  (Can be specialized to target if
48 158142c2 bellard
| desired.)
49 158142c2 bellard
*----------------------------------------------------------------------------*/
50 158142c2 bellard
#include "softfloat-macros.h"
51 158142c2 bellard
52 158142c2 bellard
/*----------------------------------------------------------------------------
53 158142c2 bellard
| Functions and definitions to determine:  (1) whether tininess for underflow
54 158142c2 bellard
| is detected before or after rounding by default, (2) what (if anything)
55 158142c2 bellard
| happens when exceptions are raised, (3) how signaling NaNs are distinguished
56 158142c2 bellard
| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
57 158142c2 bellard
| are propagated from function inputs to output.  These details are target-
58 158142c2 bellard
| specific.
59 158142c2 bellard
*----------------------------------------------------------------------------*/
60 158142c2 bellard
#include "softfloat-specialize.h"
61 158142c2 bellard
62 158142c2 bellard
void set_float_rounding_mode(int val STATUS_PARAM)
63 158142c2 bellard
{
64 158142c2 bellard
    STATUS(float_rounding_mode) = val;
65 158142c2 bellard
}
66 158142c2 bellard
67 1d6bda35 bellard
void set_float_exception_flags(int val STATUS_PARAM)
68 1d6bda35 bellard
{
69 1d6bda35 bellard
    STATUS(float_exception_flags) = val;
70 1d6bda35 bellard
}
71 1d6bda35 bellard
72 158142c2 bellard
void set_floatx80_rounding_precision(int val STATUS_PARAM)
73 158142c2 bellard
{
74 158142c2 bellard
    STATUS(floatx80_rounding_precision) = val;
75 158142c2 bellard
}
76 158142c2 bellard
77 158142c2 bellard
/*----------------------------------------------------------------------------
78 bb4d4bb3 Peter Maydell
| Returns the fraction bits of the half-precision floating-point value `a'.
79 bb4d4bb3 Peter Maydell
*----------------------------------------------------------------------------*/
80 bb4d4bb3 Peter Maydell
81 bb4d4bb3 Peter Maydell
INLINE uint32_t extractFloat16Frac(float16 a)
82 bb4d4bb3 Peter Maydell
{
83 bb4d4bb3 Peter Maydell
    return float16_val(a) & 0x3ff;
84 bb4d4bb3 Peter Maydell
}
85 bb4d4bb3 Peter Maydell
86 bb4d4bb3 Peter Maydell
/*----------------------------------------------------------------------------
87 bb4d4bb3 Peter Maydell
| Returns the exponent bits of the half-precision floating-point value `a'.
88 bb4d4bb3 Peter Maydell
*----------------------------------------------------------------------------*/
89 bb4d4bb3 Peter Maydell
90 94a49d86 Andreas Färber
INLINE int_fast16_t extractFloat16Exp(float16 a)
91 bb4d4bb3 Peter Maydell
{
92 bb4d4bb3 Peter Maydell
    return (float16_val(a) >> 10) & 0x1f;
93 bb4d4bb3 Peter Maydell
}
94 bb4d4bb3 Peter Maydell
95 bb4d4bb3 Peter Maydell
/*----------------------------------------------------------------------------
96 bb4d4bb3 Peter Maydell
| Returns the sign bit of the single-precision floating-point value `a'.
97 bb4d4bb3 Peter Maydell
*----------------------------------------------------------------------------*/
98 bb4d4bb3 Peter Maydell
99 bb4d4bb3 Peter Maydell
INLINE flag extractFloat16Sign(float16 a)
100 bb4d4bb3 Peter Maydell
{
101 bb4d4bb3 Peter Maydell
    return float16_val(a)>>15;
102 bb4d4bb3 Peter Maydell
}
103 bb4d4bb3 Peter Maydell
104 bb4d4bb3 Peter Maydell
/*----------------------------------------------------------------------------
105 158142c2 bellard
| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
106 158142c2 bellard
| and 7, and returns the properly rounded 32-bit integer corresponding to the
107 158142c2 bellard
| input.  If `zSign' is 1, the input is negated before being converted to an
108 158142c2 bellard
| integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
109 158142c2 bellard
| is simply rounded to an integer, with the inexact exception raised if the
110 158142c2 bellard
| input cannot be represented exactly as an integer.  However, if the fixed-
111 158142c2 bellard
| point input is too large, the invalid exception is raised and the largest
112 158142c2 bellard
| positive or negative integer is returned.
113 158142c2 bellard
*----------------------------------------------------------------------------*/
114 158142c2 bellard
115 bb98fe42 Andreas Färber
static int32 roundAndPackInt32( flag zSign, uint64_t absZ STATUS_PARAM)
116 158142c2 bellard
{
117 158142c2 bellard
    int8 roundingMode;
118 158142c2 bellard
    flag roundNearestEven;
119 158142c2 bellard
    int8 roundIncrement, roundBits;
120 760e1416 Peter Maydell
    int32_t z;
121 158142c2 bellard
122 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
123 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
124 158142c2 bellard
    roundIncrement = 0x40;
125 158142c2 bellard
    if ( ! roundNearestEven ) {
126 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
127 158142c2 bellard
            roundIncrement = 0;
128 158142c2 bellard
        }
129 158142c2 bellard
        else {
130 158142c2 bellard
            roundIncrement = 0x7F;
131 158142c2 bellard
            if ( zSign ) {
132 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
133 158142c2 bellard
            }
134 158142c2 bellard
            else {
135 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
136 158142c2 bellard
            }
137 158142c2 bellard
        }
138 158142c2 bellard
    }
139 158142c2 bellard
    roundBits = absZ & 0x7F;
140 158142c2 bellard
    absZ = ( absZ + roundIncrement )>>7;
141 158142c2 bellard
    absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
142 158142c2 bellard
    z = absZ;
143 158142c2 bellard
    if ( zSign ) z = - z;
144 158142c2 bellard
    if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
145 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
146 bb98fe42 Andreas Färber
        return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
147 158142c2 bellard
    }
148 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
149 158142c2 bellard
    return z;
150 158142c2 bellard
151 158142c2 bellard
}
152 158142c2 bellard
153 158142c2 bellard
/*----------------------------------------------------------------------------
154 158142c2 bellard
| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
155 158142c2 bellard
| `absZ1', with binary point between bits 63 and 64 (between the input words),
156 158142c2 bellard
| and returns the properly rounded 64-bit integer corresponding to the input.
157 158142c2 bellard
| If `zSign' is 1, the input is negated before being converted to an integer.
158 158142c2 bellard
| Ordinarily, the fixed-point input is simply rounded to an integer, with
159 158142c2 bellard
| the inexact exception raised if the input cannot be represented exactly as
160 158142c2 bellard
| an integer.  However, if the fixed-point input is too large, the invalid
161 158142c2 bellard
| exception is raised and the largest positive or negative integer is
162 158142c2 bellard
| returned.
163 158142c2 bellard
*----------------------------------------------------------------------------*/
164 158142c2 bellard
165 bb98fe42 Andreas Färber
static int64 roundAndPackInt64( flag zSign, uint64_t absZ0, uint64_t absZ1 STATUS_PARAM)
166 158142c2 bellard
{
167 158142c2 bellard
    int8 roundingMode;
168 158142c2 bellard
    flag roundNearestEven, increment;
169 760e1416 Peter Maydell
    int64_t z;
170 158142c2 bellard
171 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
172 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
173 bb98fe42 Andreas Färber
    increment = ( (int64_t) absZ1 < 0 );
174 158142c2 bellard
    if ( ! roundNearestEven ) {
175 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
176 158142c2 bellard
            increment = 0;
177 158142c2 bellard
        }
178 158142c2 bellard
        else {
179 158142c2 bellard
            if ( zSign ) {
180 158142c2 bellard
                increment = ( roundingMode == float_round_down ) && absZ1;
181 158142c2 bellard
            }
182 158142c2 bellard
            else {
183 158142c2 bellard
                increment = ( roundingMode == float_round_up ) && absZ1;
184 158142c2 bellard
            }
185 158142c2 bellard
        }
186 158142c2 bellard
    }
187 158142c2 bellard
    if ( increment ) {
188 158142c2 bellard
        ++absZ0;
189 158142c2 bellard
        if ( absZ0 == 0 ) goto overflow;
190 bb98fe42 Andreas Färber
        absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
191 158142c2 bellard
    }
192 158142c2 bellard
    z = absZ0;
193 158142c2 bellard
    if ( zSign ) z = - z;
194 158142c2 bellard
    if ( z && ( ( z < 0 ) ^ zSign ) ) {
195 158142c2 bellard
 overflow:
196 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
197 158142c2 bellard
        return
198 bb98fe42 Andreas Färber
              zSign ? (int64_t) LIT64( 0x8000000000000000 )
199 158142c2 bellard
            : LIT64( 0x7FFFFFFFFFFFFFFF );
200 158142c2 bellard
    }
201 158142c2 bellard
    if ( absZ1 ) STATUS(float_exception_flags) |= float_flag_inexact;
202 158142c2 bellard
    return z;
203 158142c2 bellard
204 158142c2 bellard
}
205 158142c2 bellard
206 158142c2 bellard
/*----------------------------------------------------------------------------
207 fb3ea83a Tom Musta
| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
208 fb3ea83a Tom Musta
| `absZ1', with binary point between bits 63 and 64 (between the input words),
209 fb3ea83a Tom Musta
| and returns the properly rounded 64-bit unsigned integer corresponding to the
210 fb3ea83a Tom Musta
| input.  Ordinarily, the fixed-point input is simply rounded to an integer,
211 fb3ea83a Tom Musta
| with the inexact exception raised if the input cannot be represented exactly
212 fb3ea83a Tom Musta
| as an integer.  However, if the fixed-point input is too large, the invalid
213 fb3ea83a Tom Musta
| exception is raised and the largest unsigned integer is returned.
214 fb3ea83a Tom Musta
*----------------------------------------------------------------------------*/
215 fb3ea83a Tom Musta
216 fb3ea83a Tom Musta
static int64 roundAndPackUint64(flag zSign, uint64_t absZ0,
217 fb3ea83a Tom Musta
                                uint64_t absZ1 STATUS_PARAM)
218 fb3ea83a Tom Musta
{
219 fb3ea83a Tom Musta
    int8 roundingMode;
220 fb3ea83a Tom Musta
    flag roundNearestEven, increment;
221 fb3ea83a Tom Musta
222 fb3ea83a Tom Musta
    roundingMode = STATUS(float_rounding_mode);
223 fb3ea83a Tom Musta
    roundNearestEven = (roundingMode == float_round_nearest_even);
224 fb3ea83a Tom Musta
    increment = ((int64_t)absZ1 < 0);
225 fb3ea83a Tom Musta
    if (!roundNearestEven) {
226 fb3ea83a Tom Musta
        if (roundingMode == float_round_to_zero) {
227 fb3ea83a Tom Musta
            increment = 0;
228 fb3ea83a Tom Musta
        } else if (absZ1) {
229 fb3ea83a Tom Musta
            if (zSign) {
230 fb3ea83a Tom Musta
                increment = (roundingMode == float_round_down) && absZ1;
231 fb3ea83a Tom Musta
            } else {
232 fb3ea83a Tom Musta
                increment = (roundingMode == float_round_up) && absZ1;
233 fb3ea83a Tom Musta
            }
234 fb3ea83a Tom Musta
        }
235 fb3ea83a Tom Musta
    }
236 fb3ea83a Tom Musta
    if (increment) {
237 fb3ea83a Tom Musta
        ++absZ0;
238 fb3ea83a Tom Musta
        if (absZ0 == 0) {
239 fb3ea83a Tom Musta
            float_raise(float_flag_invalid STATUS_VAR);
240 fb3ea83a Tom Musta
            return LIT64(0xFFFFFFFFFFFFFFFF);
241 fb3ea83a Tom Musta
        }
242 fb3ea83a Tom Musta
        absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven);
243 fb3ea83a Tom Musta
    }
244 fb3ea83a Tom Musta
245 fb3ea83a Tom Musta
    if (zSign && absZ0) {
246 fb3ea83a Tom Musta
        float_raise(float_flag_invalid STATUS_VAR);
247 fb3ea83a Tom Musta
        return 0;
248 fb3ea83a Tom Musta
    }
249 fb3ea83a Tom Musta
250 fb3ea83a Tom Musta
    if (absZ1) {
251 fb3ea83a Tom Musta
        STATUS(float_exception_flags) |= float_flag_inexact;
252 fb3ea83a Tom Musta
    }
253 fb3ea83a Tom Musta
    return absZ0;
254 fb3ea83a Tom Musta
}
255 fb3ea83a Tom Musta
256 fb3ea83a Tom Musta
/*----------------------------------------------------------------------------
257 158142c2 bellard
| Returns the fraction bits of the single-precision floating-point value `a'.
258 158142c2 bellard
*----------------------------------------------------------------------------*/
259 158142c2 bellard
260 bb98fe42 Andreas Färber
INLINE uint32_t extractFloat32Frac( float32 a )
261 158142c2 bellard
{
262 158142c2 bellard
263 f090c9d4 pbrook
    return float32_val(a) & 0x007FFFFF;
264 158142c2 bellard
265 158142c2 bellard
}
266 158142c2 bellard
267 158142c2 bellard
/*----------------------------------------------------------------------------
268 158142c2 bellard
| Returns the exponent bits of the single-precision floating-point value `a'.
269 158142c2 bellard
*----------------------------------------------------------------------------*/
270 158142c2 bellard
271 94a49d86 Andreas Färber
INLINE int_fast16_t extractFloat32Exp(float32 a)
272 158142c2 bellard
{
273 158142c2 bellard
274 f090c9d4 pbrook
    return ( float32_val(a)>>23 ) & 0xFF;
275 158142c2 bellard
276 158142c2 bellard
}
277 158142c2 bellard
278 158142c2 bellard
/*----------------------------------------------------------------------------
279 158142c2 bellard
| Returns the sign bit of the single-precision floating-point value `a'.
280 158142c2 bellard
*----------------------------------------------------------------------------*/
281 158142c2 bellard
282 158142c2 bellard
INLINE flag extractFloat32Sign( float32 a )
283 158142c2 bellard
{
284 158142c2 bellard
285 f090c9d4 pbrook
    return float32_val(a)>>31;
286 158142c2 bellard
287 158142c2 bellard
}
288 158142c2 bellard
289 158142c2 bellard
/*----------------------------------------------------------------------------
290 37d18660 Peter Maydell
| If `a' is denormal and we are in flush-to-zero mode then set the
291 37d18660 Peter Maydell
| input-denormal exception and return zero. Otherwise just return the value.
292 37d18660 Peter Maydell
*----------------------------------------------------------------------------*/
293 37d18660 Peter Maydell
static float32 float32_squash_input_denormal(float32 a STATUS_PARAM)
294 37d18660 Peter Maydell
{
295 37d18660 Peter Maydell
    if (STATUS(flush_inputs_to_zero)) {
296 37d18660 Peter Maydell
        if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
297 37d18660 Peter Maydell
            float_raise(float_flag_input_denormal STATUS_VAR);
298 37d18660 Peter Maydell
            return make_float32(float32_val(a) & 0x80000000);
299 37d18660 Peter Maydell
        }
300 37d18660 Peter Maydell
    }
301 37d18660 Peter Maydell
    return a;
302 37d18660 Peter Maydell
}
303 37d18660 Peter Maydell
304 37d18660 Peter Maydell
/*----------------------------------------------------------------------------
305 158142c2 bellard
| Normalizes the subnormal single-precision floating-point value represented
306 158142c2 bellard
| by the denormalized significand `aSig'.  The normalized exponent and
307 158142c2 bellard
| significand are stored at the locations pointed to by `zExpPtr' and
308 158142c2 bellard
| `zSigPtr', respectively.
309 158142c2 bellard
*----------------------------------------------------------------------------*/
310 158142c2 bellard
311 158142c2 bellard
static void
312 94a49d86 Andreas Färber
 normalizeFloat32Subnormal(uint32_t aSig, int_fast16_t *zExpPtr, uint32_t *zSigPtr)
313 158142c2 bellard
{
314 158142c2 bellard
    int8 shiftCount;
315 158142c2 bellard
316 158142c2 bellard
    shiftCount = countLeadingZeros32( aSig ) - 8;
317 158142c2 bellard
    *zSigPtr = aSig<<shiftCount;
318 158142c2 bellard
    *zExpPtr = 1 - shiftCount;
319 158142c2 bellard
320 158142c2 bellard
}
321 158142c2 bellard
322 158142c2 bellard
/*----------------------------------------------------------------------------
323 158142c2 bellard
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
324 158142c2 bellard
| single-precision floating-point value, returning the result.  After being
325 158142c2 bellard
| shifted into the proper positions, the three fields are simply added
326 158142c2 bellard
| together to form the result.  This means that any integer portion of `zSig'
327 158142c2 bellard
| will be added into the exponent.  Since a properly normalized significand
328 158142c2 bellard
| will have an integer portion equal to 1, the `zExp' input should be 1 less
329 158142c2 bellard
| than the desired result exponent whenever `zSig' is a complete, normalized
330 158142c2 bellard
| significand.
331 158142c2 bellard
*----------------------------------------------------------------------------*/
332 158142c2 bellard
333 94a49d86 Andreas Färber
INLINE float32 packFloat32(flag zSign, int_fast16_t zExp, uint32_t zSig)
334 158142c2 bellard
{
335 158142c2 bellard
336 f090c9d4 pbrook
    return make_float32(
337 bb98fe42 Andreas Färber
          ( ( (uint32_t) zSign )<<31 ) + ( ( (uint32_t) zExp )<<23 ) + zSig);
338 158142c2 bellard
339 158142c2 bellard
}
340 158142c2 bellard
341 158142c2 bellard
/*----------------------------------------------------------------------------
342 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
343 158142c2 bellard
| and significand `zSig', and returns the proper single-precision floating-
344 158142c2 bellard
| point value corresponding to the abstract input.  Ordinarily, the abstract
345 158142c2 bellard
| value is simply rounded and packed into the single-precision format, with
346 158142c2 bellard
| the inexact exception raised if the abstract input cannot be represented
347 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
348 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
349 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded to
350 158142c2 bellard
| a subnormal number, and the underflow and inexact exceptions are raised if
351 158142c2 bellard
| the abstract input cannot be represented exactly as a subnormal single-
352 158142c2 bellard
| precision floating-point number.
353 158142c2 bellard
|     The input significand `zSig' has its binary point between bits 30
354 158142c2 bellard
| and 29, which is 7 bits to the left of the usual location.  This shifted
355 158142c2 bellard
| significand must be normalized or smaller.  If `zSig' is not normalized,
356 158142c2 bellard
| `zExp' must be 0; in that case, the result returned is a subnormal number,
357 158142c2 bellard
| and it must not require rounding.  In the usual case that `zSig' is
358 158142c2 bellard
| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
359 158142c2 bellard
| The handling of underflow and overflow follows the IEC/IEEE Standard for
360 158142c2 bellard
| Binary Floating-Point Arithmetic.
361 158142c2 bellard
*----------------------------------------------------------------------------*/
362 158142c2 bellard
363 94a49d86 Andreas Färber
static float32 roundAndPackFloat32(flag zSign, int_fast16_t zExp, uint32_t zSig STATUS_PARAM)
364 158142c2 bellard
{
365 158142c2 bellard
    int8 roundingMode;
366 158142c2 bellard
    flag roundNearestEven;
367 158142c2 bellard
    int8 roundIncrement, roundBits;
368 158142c2 bellard
    flag isTiny;
369 158142c2 bellard
370 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
371 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
372 158142c2 bellard
    roundIncrement = 0x40;
373 158142c2 bellard
    if ( ! roundNearestEven ) {
374 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
375 158142c2 bellard
            roundIncrement = 0;
376 158142c2 bellard
        }
377 158142c2 bellard
        else {
378 158142c2 bellard
            roundIncrement = 0x7F;
379 158142c2 bellard
            if ( zSign ) {
380 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
381 158142c2 bellard
            }
382 158142c2 bellard
            else {
383 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
384 158142c2 bellard
            }
385 158142c2 bellard
        }
386 158142c2 bellard
    }
387 158142c2 bellard
    roundBits = zSig & 0x7F;
388 bb98fe42 Andreas Färber
    if ( 0xFD <= (uint16_t) zExp ) {
389 158142c2 bellard
        if (    ( 0xFD < zExp )
390 158142c2 bellard
             || (    ( zExp == 0xFD )
391 bb98fe42 Andreas Färber
                  && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
392 158142c2 bellard
           ) {
393 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
394 f090c9d4 pbrook
            return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
395 158142c2 bellard
        }
396 158142c2 bellard
        if ( zExp < 0 ) {
397 e6afc87f Peter Maydell
            if (STATUS(flush_to_zero)) {
398 e6afc87f Peter Maydell
                float_raise(float_flag_output_denormal STATUS_VAR);
399 e6afc87f Peter Maydell
                return packFloat32(zSign, 0, 0);
400 e6afc87f Peter Maydell
            }
401 158142c2 bellard
            isTiny =
402 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
403 158142c2 bellard
                || ( zExp < -1 )
404 158142c2 bellard
                || ( zSig + roundIncrement < 0x80000000 );
405 158142c2 bellard
            shift32RightJamming( zSig, - zExp, &zSig );
406 158142c2 bellard
            zExp = 0;
407 158142c2 bellard
            roundBits = zSig & 0x7F;
408 158142c2 bellard
            if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
409 158142c2 bellard
        }
410 158142c2 bellard
    }
411 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
412 158142c2 bellard
    zSig = ( zSig + roundIncrement )>>7;
413 158142c2 bellard
    zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
414 158142c2 bellard
    if ( zSig == 0 ) zExp = 0;
415 158142c2 bellard
    return packFloat32( zSign, zExp, zSig );
416 158142c2 bellard
417 158142c2 bellard
}
418 158142c2 bellard
419 158142c2 bellard
/*----------------------------------------------------------------------------
420 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
421 158142c2 bellard
| and significand `zSig', and returns the proper single-precision floating-
422 158142c2 bellard
| point value corresponding to the abstract input.  This routine is just like
423 158142c2 bellard
| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
424 158142c2 bellard
| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
425 158142c2 bellard
| floating-point exponent.
426 158142c2 bellard
*----------------------------------------------------------------------------*/
427 158142c2 bellard
428 158142c2 bellard
static float32
429 94a49d86 Andreas Färber
 normalizeRoundAndPackFloat32(flag zSign, int_fast16_t zExp, uint32_t zSig STATUS_PARAM)
430 158142c2 bellard
{
431 158142c2 bellard
    int8 shiftCount;
432 158142c2 bellard
433 158142c2 bellard
    shiftCount = countLeadingZeros32( zSig ) - 1;
434 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
435 158142c2 bellard
436 158142c2 bellard
}
437 158142c2 bellard
438 158142c2 bellard
/*----------------------------------------------------------------------------
439 158142c2 bellard
| Returns the fraction bits of the double-precision floating-point value `a'.
440 158142c2 bellard
*----------------------------------------------------------------------------*/
441 158142c2 bellard
442 bb98fe42 Andreas Färber
INLINE uint64_t extractFloat64Frac( float64 a )
443 158142c2 bellard
{
444 158142c2 bellard
445 f090c9d4 pbrook
    return float64_val(a) & LIT64( 0x000FFFFFFFFFFFFF );
446 158142c2 bellard
447 158142c2 bellard
}
448 158142c2 bellard
449 158142c2 bellard
/*----------------------------------------------------------------------------
450 158142c2 bellard
| Returns the exponent bits of the double-precision floating-point value `a'.
451 158142c2 bellard
*----------------------------------------------------------------------------*/
452 158142c2 bellard
453 94a49d86 Andreas Färber
INLINE int_fast16_t extractFloat64Exp(float64 a)
454 158142c2 bellard
{
455 158142c2 bellard
456 f090c9d4 pbrook
    return ( float64_val(a)>>52 ) & 0x7FF;
457 158142c2 bellard
458 158142c2 bellard
}
459 158142c2 bellard
460 158142c2 bellard
/*----------------------------------------------------------------------------
461 158142c2 bellard
| Returns the sign bit of the double-precision floating-point value `a'.
462 158142c2 bellard
*----------------------------------------------------------------------------*/
463 158142c2 bellard
464 158142c2 bellard
INLINE flag extractFloat64Sign( float64 a )
465 158142c2 bellard
{
466 158142c2 bellard
467 f090c9d4 pbrook
    return float64_val(a)>>63;
468 158142c2 bellard
469 158142c2 bellard
}
470 158142c2 bellard
471 158142c2 bellard
/*----------------------------------------------------------------------------
472 37d18660 Peter Maydell
| If `a' is denormal and we are in flush-to-zero mode then set the
473 37d18660 Peter Maydell
| input-denormal exception and return zero. Otherwise just return the value.
474 37d18660 Peter Maydell
*----------------------------------------------------------------------------*/
475 37d18660 Peter Maydell
static float64 float64_squash_input_denormal(float64 a STATUS_PARAM)
476 37d18660 Peter Maydell
{
477 37d18660 Peter Maydell
    if (STATUS(flush_inputs_to_zero)) {
478 37d18660 Peter Maydell
        if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
479 37d18660 Peter Maydell
            float_raise(float_flag_input_denormal STATUS_VAR);
480 37d18660 Peter Maydell
            return make_float64(float64_val(a) & (1ULL << 63));
481 37d18660 Peter Maydell
        }
482 37d18660 Peter Maydell
    }
483 37d18660 Peter Maydell
    return a;
484 37d18660 Peter Maydell
}
485 37d18660 Peter Maydell
486 37d18660 Peter Maydell
/*----------------------------------------------------------------------------
487 158142c2 bellard
| Normalizes the subnormal double-precision floating-point value represented
488 158142c2 bellard
| by the denormalized significand `aSig'.  The normalized exponent and
489 158142c2 bellard
| significand are stored at the locations pointed to by `zExpPtr' and
490 158142c2 bellard
| `zSigPtr', respectively.
491 158142c2 bellard
*----------------------------------------------------------------------------*/
492 158142c2 bellard
493 158142c2 bellard
static void
494 94a49d86 Andreas Färber
 normalizeFloat64Subnormal(uint64_t aSig, int_fast16_t *zExpPtr, uint64_t *zSigPtr)
495 158142c2 bellard
{
496 158142c2 bellard
    int8 shiftCount;
497 158142c2 bellard
498 158142c2 bellard
    shiftCount = countLeadingZeros64( aSig ) - 11;
499 158142c2 bellard
    *zSigPtr = aSig<<shiftCount;
500 158142c2 bellard
    *zExpPtr = 1 - shiftCount;
501 158142c2 bellard
502 158142c2 bellard
}
503 158142c2 bellard
504 158142c2 bellard
/*----------------------------------------------------------------------------
505 158142c2 bellard
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
506 158142c2 bellard
| double-precision floating-point value, returning the result.  After being
507 158142c2 bellard
| shifted into the proper positions, the three fields are simply added
508 158142c2 bellard
| together to form the result.  This means that any integer portion of `zSig'
509 158142c2 bellard
| will be added into the exponent.  Since a properly normalized significand
510 158142c2 bellard
| will have an integer portion equal to 1, the `zExp' input should be 1 less
511 158142c2 bellard
| than the desired result exponent whenever `zSig' is a complete, normalized
512 158142c2 bellard
| significand.
513 158142c2 bellard
*----------------------------------------------------------------------------*/
514 158142c2 bellard
515 94a49d86 Andreas Färber
INLINE float64 packFloat64(flag zSign, int_fast16_t zExp, uint64_t zSig)
516 158142c2 bellard
{
517 158142c2 bellard
518 f090c9d4 pbrook
    return make_float64(
519 bb98fe42 Andreas Färber
        ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
520 158142c2 bellard
521 158142c2 bellard
}
522 158142c2 bellard
523 158142c2 bellard
/*----------------------------------------------------------------------------
524 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
525 158142c2 bellard
| and significand `zSig', and returns the proper double-precision floating-
526 158142c2 bellard
| point value corresponding to the abstract input.  Ordinarily, the abstract
527 158142c2 bellard
| value is simply rounded and packed into the double-precision format, with
528 158142c2 bellard
| the inexact exception raised if the abstract input cannot be represented
529 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
530 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
531 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded
532 158142c2 bellard
| to a subnormal number, and the underflow and inexact exceptions are raised
533 158142c2 bellard
| if the abstract input cannot be represented exactly as a subnormal double-
534 158142c2 bellard
| precision floating-point number.
535 158142c2 bellard
|     The input significand `zSig' has its binary point between bits 62
536 158142c2 bellard
| and 61, which is 10 bits to the left of the usual location.  This shifted
537 158142c2 bellard
| significand must be normalized or smaller.  If `zSig' is not normalized,
538 158142c2 bellard
| `zExp' must be 0; in that case, the result returned is a subnormal number,
539 158142c2 bellard
| and it must not require rounding.  In the usual case that `zSig' is
540 158142c2 bellard
| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
541 158142c2 bellard
| The handling of underflow and overflow follows the IEC/IEEE Standard for
542 158142c2 bellard
| Binary Floating-Point Arithmetic.
543 158142c2 bellard
*----------------------------------------------------------------------------*/
544 158142c2 bellard
545 94a49d86 Andreas Färber
static float64 roundAndPackFloat64(flag zSign, int_fast16_t zExp, uint64_t zSig STATUS_PARAM)
546 158142c2 bellard
{
547 158142c2 bellard
    int8 roundingMode;
548 158142c2 bellard
    flag roundNearestEven;
549 94a49d86 Andreas Färber
    int_fast16_t roundIncrement, roundBits;
550 158142c2 bellard
    flag isTiny;
551 158142c2 bellard
552 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
553 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
554 158142c2 bellard
    roundIncrement = 0x200;
555 158142c2 bellard
    if ( ! roundNearestEven ) {
556 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
557 158142c2 bellard
            roundIncrement = 0;
558 158142c2 bellard
        }
559 158142c2 bellard
        else {
560 158142c2 bellard
            roundIncrement = 0x3FF;
561 158142c2 bellard
            if ( zSign ) {
562 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
563 158142c2 bellard
            }
564 158142c2 bellard
            else {
565 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
566 158142c2 bellard
            }
567 158142c2 bellard
        }
568 158142c2 bellard
    }
569 158142c2 bellard
    roundBits = zSig & 0x3FF;
570 bb98fe42 Andreas Färber
    if ( 0x7FD <= (uint16_t) zExp ) {
571 158142c2 bellard
        if (    ( 0x7FD < zExp )
572 158142c2 bellard
             || (    ( zExp == 0x7FD )
573 bb98fe42 Andreas Färber
                  && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
574 158142c2 bellard
           ) {
575 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
576 f090c9d4 pbrook
            return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 ));
577 158142c2 bellard
        }
578 158142c2 bellard
        if ( zExp < 0 ) {
579 e6afc87f Peter Maydell
            if (STATUS(flush_to_zero)) {
580 e6afc87f Peter Maydell
                float_raise(float_flag_output_denormal STATUS_VAR);
581 e6afc87f Peter Maydell
                return packFloat64(zSign, 0, 0);
582 e6afc87f Peter Maydell
            }
583 158142c2 bellard
            isTiny =
584 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
585 158142c2 bellard
                || ( zExp < -1 )
586 158142c2 bellard
                || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
587 158142c2 bellard
            shift64RightJamming( zSig, - zExp, &zSig );
588 158142c2 bellard
            zExp = 0;
589 158142c2 bellard
            roundBits = zSig & 0x3FF;
590 158142c2 bellard
            if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
591 158142c2 bellard
        }
592 158142c2 bellard
    }
593 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
594 158142c2 bellard
    zSig = ( zSig + roundIncrement )>>10;
595 158142c2 bellard
    zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
596 158142c2 bellard
    if ( zSig == 0 ) zExp = 0;
597 158142c2 bellard
    return packFloat64( zSign, zExp, zSig );
598 158142c2 bellard
599 158142c2 bellard
}
600 158142c2 bellard
601 158142c2 bellard
/*----------------------------------------------------------------------------
602 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
603 158142c2 bellard
| and significand `zSig', and returns the proper double-precision floating-
604 158142c2 bellard
| point value corresponding to the abstract input.  This routine is just like
605 158142c2 bellard
| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
606 158142c2 bellard
| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
607 158142c2 bellard
| floating-point exponent.
608 158142c2 bellard
*----------------------------------------------------------------------------*/
609 158142c2 bellard
610 158142c2 bellard
static float64
611 94a49d86 Andreas Färber
 normalizeRoundAndPackFloat64(flag zSign, int_fast16_t zExp, uint64_t zSig STATUS_PARAM)
612 158142c2 bellard
{
613 158142c2 bellard
    int8 shiftCount;
614 158142c2 bellard
615 158142c2 bellard
    shiftCount = countLeadingZeros64( zSig ) - 1;
616 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
617 158142c2 bellard
618 158142c2 bellard
}
619 158142c2 bellard
620 158142c2 bellard
/*----------------------------------------------------------------------------
621 158142c2 bellard
| Returns the fraction bits of the extended double-precision floating-point
622 158142c2 bellard
| value `a'.
623 158142c2 bellard
*----------------------------------------------------------------------------*/
624 158142c2 bellard
625 bb98fe42 Andreas Färber
INLINE uint64_t extractFloatx80Frac( floatx80 a )
626 158142c2 bellard
{
627 158142c2 bellard
628 158142c2 bellard
    return a.low;
629 158142c2 bellard
630 158142c2 bellard
}
631 158142c2 bellard
632 158142c2 bellard
/*----------------------------------------------------------------------------
633 158142c2 bellard
| Returns the exponent bits of the extended double-precision floating-point
634 158142c2 bellard
| value `a'.
635 158142c2 bellard
*----------------------------------------------------------------------------*/
636 158142c2 bellard
637 158142c2 bellard
INLINE int32 extractFloatx80Exp( floatx80 a )
638 158142c2 bellard
{
639 158142c2 bellard
640 158142c2 bellard
    return a.high & 0x7FFF;
641 158142c2 bellard
642 158142c2 bellard
}
643 158142c2 bellard
644 158142c2 bellard
/*----------------------------------------------------------------------------
645 158142c2 bellard
| Returns the sign bit of the extended double-precision floating-point value
646 158142c2 bellard
| `a'.
647 158142c2 bellard
*----------------------------------------------------------------------------*/
648 158142c2 bellard
649 158142c2 bellard
INLINE flag extractFloatx80Sign( floatx80 a )
650 158142c2 bellard
{
651 158142c2 bellard
652 158142c2 bellard
    return a.high>>15;
653 158142c2 bellard
654 158142c2 bellard
}
655 158142c2 bellard
656 158142c2 bellard
/*----------------------------------------------------------------------------
657 158142c2 bellard
| Normalizes the subnormal extended double-precision floating-point value
658 158142c2 bellard
| represented by the denormalized significand `aSig'.  The normalized exponent
659 158142c2 bellard
| and significand are stored at the locations pointed to by `zExpPtr' and
660 158142c2 bellard
| `zSigPtr', respectively.
661 158142c2 bellard
*----------------------------------------------------------------------------*/
662 158142c2 bellard
663 158142c2 bellard
static void
664 bb98fe42 Andreas Färber
 normalizeFloatx80Subnormal( uint64_t aSig, int32 *zExpPtr, uint64_t *zSigPtr )
665 158142c2 bellard
{
666 158142c2 bellard
    int8 shiftCount;
667 158142c2 bellard
668 158142c2 bellard
    shiftCount = countLeadingZeros64( aSig );
669 158142c2 bellard
    *zSigPtr = aSig<<shiftCount;
670 158142c2 bellard
    *zExpPtr = 1 - shiftCount;
671 158142c2 bellard
672 158142c2 bellard
}
673 158142c2 bellard
674 158142c2 bellard
/*----------------------------------------------------------------------------
675 158142c2 bellard
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
676 158142c2 bellard
| extended double-precision floating-point value, returning the result.
677 158142c2 bellard
*----------------------------------------------------------------------------*/
678 158142c2 bellard
679 bb98fe42 Andreas Färber
INLINE floatx80 packFloatx80( flag zSign, int32 zExp, uint64_t zSig )
680 158142c2 bellard
{
681 158142c2 bellard
    floatx80 z;
682 158142c2 bellard
683 158142c2 bellard
    z.low = zSig;
684 bb98fe42 Andreas Färber
    z.high = ( ( (uint16_t) zSign )<<15 ) + zExp;
685 158142c2 bellard
    return z;
686 158142c2 bellard
687 158142c2 bellard
}
688 158142c2 bellard
689 158142c2 bellard
/*----------------------------------------------------------------------------
690 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
691 158142c2 bellard
| and extended significand formed by the concatenation of `zSig0' and `zSig1',
692 158142c2 bellard
| and returns the proper extended double-precision floating-point value
693 158142c2 bellard
| corresponding to the abstract input.  Ordinarily, the abstract value is
694 158142c2 bellard
| rounded and packed into the extended double-precision format, with the
695 158142c2 bellard
| inexact exception raised if the abstract input cannot be represented
696 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
697 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
698 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded to
699 158142c2 bellard
| a subnormal number, and the underflow and inexact exceptions are raised if
700 158142c2 bellard
| the abstract input cannot be represented exactly as a subnormal extended
701 158142c2 bellard
| double-precision floating-point number.
702 158142c2 bellard
|     If `roundingPrecision' is 32 or 64, the result is rounded to the same
703 158142c2 bellard
| number of bits as single or double precision, respectively.  Otherwise, the
704 158142c2 bellard
| result is rounded to the full precision of the extended double-precision
705 158142c2 bellard
| format.
706 158142c2 bellard
|     The input significand must be normalized or smaller.  If the input
707 158142c2 bellard
| significand is not normalized, `zExp' must be 0; in that case, the result
708 158142c2 bellard
| returned is a subnormal number, and it must not require rounding.  The
709 158142c2 bellard
| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
710 158142c2 bellard
| Floating-Point Arithmetic.
711 158142c2 bellard
*----------------------------------------------------------------------------*/
712 158142c2 bellard
713 158142c2 bellard
static floatx80
714 158142c2 bellard
 roundAndPackFloatx80(
715 bb98fe42 Andreas Färber
     int8 roundingPrecision, flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1
716 158142c2 bellard
 STATUS_PARAM)
717 158142c2 bellard
{
718 158142c2 bellard
    int8 roundingMode;
719 158142c2 bellard
    flag roundNearestEven, increment, isTiny;
720 158142c2 bellard
    int64 roundIncrement, roundMask, roundBits;
721 158142c2 bellard
722 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
723 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
724 158142c2 bellard
    if ( roundingPrecision == 80 ) goto precision80;
725 158142c2 bellard
    if ( roundingPrecision == 64 ) {
726 158142c2 bellard
        roundIncrement = LIT64( 0x0000000000000400 );
727 158142c2 bellard
        roundMask = LIT64( 0x00000000000007FF );
728 158142c2 bellard
    }
729 158142c2 bellard
    else if ( roundingPrecision == 32 ) {
730 158142c2 bellard
        roundIncrement = LIT64( 0x0000008000000000 );
731 158142c2 bellard
        roundMask = LIT64( 0x000000FFFFFFFFFF );
732 158142c2 bellard
    }
733 158142c2 bellard
    else {
734 158142c2 bellard
        goto precision80;
735 158142c2 bellard
    }
736 158142c2 bellard
    zSig0 |= ( zSig1 != 0 );
737 158142c2 bellard
    if ( ! roundNearestEven ) {
738 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
739 158142c2 bellard
            roundIncrement = 0;
740 158142c2 bellard
        }
741 158142c2 bellard
        else {
742 158142c2 bellard
            roundIncrement = roundMask;
743 158142c2 bellard
            if ( zSign ) {
744 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
745 158142c2 bellard
            }
746 158142c2 bellard
            else {
747 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
748 158142c2 bellard
            }
749 158142c2 bellard
        }
750 158142c2 bellard
    }
751 158142c2 bellard
    roundBits = zSig0 & roundMask;
752 bb98fe42 Andreas Färber
    if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
753 158142c2 bellard
        if (    ( 0x7FFE < zExp )
754 158142c2 bellard
             || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
755 158142c2 bellard
           ) {
756 158142c2 bellard
            goto overflow;
757 158142c2 bellard
        }
758 158142c2 bellard
        if ( zExp <= 0 ) {
759 e6afc87f Peter Maydell
            if (STATUS(flush_to_zero)) {
760 e6afc87f Peter Maydell
                float_raise(float_flag_output_denormal STATUS_VAR);
761 e6afc87f Peter Maydell
                return packFloatx80(zSign, 0, 0);
762 e6afc87f Peter Maydell
            }
763 158142c2 bellard
            isTiny =
764 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
765 158142c2 bellard
                || ( zExp < 0 )
766 158142c2 bellard
                || ( zSig0 <= zSig0 + roundIncrement );
767 158142c2 bellard
            shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
768 158142c2 bellard
            zExp = 0;
769 158142c2 bellard
            roundBits = zSig0 & roundMask;
770 158142c2 bellard
            if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
771 158142c2 bellard
            if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
772 158142c2 bellard
            zSig0 += roundIncrement;
773 bb98fe42 Andreas Färber
            if ( (int64_t) zSig0 < 0 ) zExp = 1;
774 158142c2 bellard
            roundIncrement = roundMask + 1;
775 158142c2 bellard
            if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
776 158142c2 bellard
                roundMask |= roundIncrement;
777 158142c2 bellard
            }
778 158142c2 bellard
            zSig0 &= ~ roundMask;
779 158142c2 bellard
            return packFloatx80( zSign, zExp, zSig0 );
780 158142c2 bellard
        }
781 158142c2 bellard
    }
782 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
783 158142c2 bellard
    zSig0 += roundIncrement;
784 158142c2 bellard
    if ( zSig0 < roundIncrement ) {
785 158142c2 bellard
        ++zExp;
786 158142c2 bellard
        zSig0 = LIT64( 0x8000000000000000 );
787 158142c2 bellard
    }
788 158142c2 bellard
    roundIncrement = roundMask + 1;
789 158142c2 bellard
    if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
790 158142c2 bellard
        roundMask |= roundIncrement;
791 158142c2 bellard
    }
792 158142c2 bellard
    zSig0 &= ~ roundMask;
793 158142c2 bellard
    if ( zSig0 == 0 ) zExp = 0;
794 158142c2 bellard
    return packFloatx80( zSign, zExp, zSig0 );
795 158142c2 bellard
 precision80:
796 bb98fe42 Andreas Färber
    increment = ( (int64_t) zSig1 < 0 );
797 158142c2 bellard
    if ( ! roundNearestEven ) {
798 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
799 158142c2 bellard
            increment = 0;
800 158142c2 bellard
        }
801 158142c2 bellard
        else {
802 158142c2 bellard
            if ( zSign ) {
803 158142c2 bellard
                increment = ( roundingMode == float_round_down ) && zSig1;
804 158142c2 bellard
            }
805 158142c2 bellard
            else {
806 158142c2 bellard
                increment = ( roundingMode == float_round_up ) && zSig1;
807 158142c2 bellard
            }
808 158142c2 bellard
        }
809 158142c2 bellard
    }
810 bb98fe42 Andreas Färber
    if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
811 158142c2 bellard
        if (    ( 0x7FFE < zExp )
812 158142c2 bellard
             || (    ( zExp == 0x7FFE )
813 158142c2 bellard
                  && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
814 158142c2 bellard
                  && increment
815 158142c2 bellard
                )
816 158142c2 bellard
           ) {
817 158142c2 bellard
            roundMask = 0;
818 158142c2 bellard
 overflow:
819 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
820 158142c2 bellard
            if (    ( roundingMode == float_round_to_zero )
821 158142c2 bellard
                 || ( zSign && ( roundingMode == float_round_up ) )
822 158142c2 bellard
                 || ( ! zSign && ( roundingMode == float_round_down ) )
823 158142c2 bellard
               ) {
824 158142c2 bellard
                return packFloatx80( zSign, 0x7FFE, ~ roundMask );
825 158142c2 bellard
            }
826 158142c2 bellard
            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
827 158142c2 bellard
        }
828 158142c2 bellard
        if ( zExp <= 0 ) {
829 158142c2 bellard
            isTiny =
830 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
831 158142c2 bellard
                || ( zExp < 0 )
832 158142c2 bellard
                || ! increment
833 158142c2 bellard
                || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
834 158142c2 bellard
            shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
835 158142c2 bellard
            zExp = 0;
836 158142c2 bellard
            if ( isTiny && zSig1 ) float_raise( float_flag_underflow STATUS_VAR);
837 158142c2 bellard
            if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
838 158142c2 bellard
            if ( roundNearestEven ) {
839 bb98fe42 Andreas Färber
                increment = ( (int64_t) zSig1 < 0 );
840 158142c2 bellard
            }
841 158142c2 bellard
            else {
842 158142c2 bellard
                if ( zSign ) {
843 158142c2 bellard
                    increment = ( roundingMode == float_round_down ) && zSig1;
844 158142c2 bellard
                }
845 158142c2 bellard
                else {
846 158142c2 bellard
                    increment = ( roundingMode == float_round_up ) && zSig1;
847 158142c2 bellard
                }
848 158142c2 bellard
            }
849 158142c2 bellard
            if ( increment ) {
850 158142c2 bellard
                ++zSig0;
851 158142c2 bellard
                zSig0 &=
852 bb98fe42 Andreas Färber
                    ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
853 bb98fe42 Andreas Färber
                if ( (int64_t) zSig0 < 0 ) zExp = 1;
854 158142c2 bellard
            }
855 158142c2 bellard
            return packFloatx80( zSign, zExp, zSig0 );
856 158142c2 bellard
        }
857 158142c2 bellard
    }
858 158142c2 bellard
    if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
859 158142c2 bellard
    if ( increment ) {
860 158142c2 bellard
        ++zSig0;
861 158142c2 bellard
        if ( zSig0 == 0 ) {
862 158142c2 bellard
            ++zExp;
863 158142c2 bellard
            zSig0 = LIT64( 0x8000000000000000 );
864 158142c2 bellard
        }
865 158142c2 bellard
        else {
866 bb98fe42 Andreas Färber
            zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
867 158142c2 bellard
        }
868 158142c2 bellard
    }
869 158142c2 bellard
    else {
870 158142c2 bellard
        if ( zSig0 == 0 ) zExp = 0;
871 158142c2 bellard
    }
872 158142c2 bellard
    return packFloatx80( zSign, zExp, zSig0 );
873 158142c2 bellard
874 158142c2 bellard
}
875 158142c2 bellard
876 158142c2 bellard
/*----------------------------------------------------------------------------
877 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent
878 158142c2 bellard
| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
879 158142c2 bellard
| and returns the proper extended double-precision floating-point value
880 158142c2 bellard
| corresponding to the abstract input.  This routine is just like
881 158142c2 bellard
| `roundAndPackFloatx80' except that the input significand does not have to be
882 158142c2 bellard
| normalized.
883 158142c2 bellard
*----------------------------------------------------------------------------*/
884 158142c2 bellard
885 158142c2 bellard
static floatx80
886 158142c2 bellard
 normalizeRoundAndPackFloatx80(
887 bb98fe42 Andreas Färber
     int8 roundingPrecision, flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1
888 158142c2 bellard
 STATUS_PARAM)
889 158142c2 bellard
{
890 158142c2 bellard
    int8 shiftCount;
891 158142c2 bellard
892 158142c2 bellard
    if ( zSig0 == 0 ) {
893 158142c2 bellard
        zSig0 = zSig1;
894 158142c2 bellard
        zSig1 = 0;
895 158142c2 bellard
        zExp -= 64;
896 158142c2 bellard
    }
897 158142c2 bellard
    shiftCount = countLeadingZeros64( zSig0 );
898 158142c2 bellard
    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
899 158142c2 bellard
    zExp -= shiftCount;
900 158142c2 bellard
    return
901 158142c2 bellard
        roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 STATUS_VAR);
902 158142c2 bellard
903 158142c2 bellard
}
904 158142c2 bellard
905 158142c2 bellard
/*----------------------------------------------------------------------------
906 158142c2 bellard
| Returns the least-significant 64 fraction bits of the quadruple-precision
907 158142c2 bellard
| floating-point value `a'.
908 158142c2 bellard
*----------------------------------------------------------------------------*/
909 158142c2 bellard
910 bb98fe42 Andreas Färber
INLINE uint64_t extractFloat128Frac1( float128 a )
911 158142c2 bellard
{
912 158142c2 bellard
913 158142c2 bellard
    return a.low;
914 158142c2 bellard
915 158142c2 bellard
}
916 158142c2 bellard
917 158142c2 bellard
/*----------------------------------------------------------------------------
918 158142c2 bellard
| Returns the most-significant 48 fraction bits of the quadruple-precision
919 158142c2 bellard
| floating-point value `a'.
920 158142c2 bellard
*----------------------------------------------------------------------------*/
921 158142c2 bellard
922 bb98fe42 Andreas Färber
INLINE uint64_t extractFloat128Frac0( float128 a )
923 158142c2 bellard
{
924 158142c2 bellard
925 158142c2 bellard
    return a.high & LIT64( 0x0000FFFFFFFFFFFF );
926 158142c2 bellard
927 158142c2 bellard
}
928 158142c2 bellard
929 158142c2 bellard
/*----------------------------------------------------------------------------
930 158142c2 bellard
| Returns the exponent bits of the quadruple-precision floating-point value
931 158142c2 bellard
| `a'.
932 158142c2 bellard
*----------------------------------------------------------------------------*/
933 158142c2 bellard
934 158142c2 bellard
INLINE int32 extractFloat128Exp( float128 a )
935 158142c2 bellard
{
936 158142c2 bellard
937 158142c2 bellard
    return ( a.high>>48 ) & 0x7FFF;
938 158142c2 bellard
939 158142c2 bellard
}
940 158142c2 bellard
941 158142c2 bellard
/*----------------------------------------------------------------------------
942 158142c2 bellard
| Returns the sign bit of the quadruple-precision floating-point value `a'.
943 158142c2 bellard
*----------------------------------------------------------------------------*/
944 158142c2 bellard
945 158142c2 bellard
INLINE flag extractFloat128Sign( float128 a )
946 158142c2 bellard
{
947 158142c2 bellard
948 158142c2 bellard
    return a.high>>63;
949 158142c2 bellard
950 158142c2 bellard
}
951 158142c2 bellard
952 158142c2 bellard
/*----------------------------------------------------------------------------
953 158142c2 bellard
| Normalizes the subnormal quadruple-precision floating-point value
954 158142c2 bellard
| represented by the denormalized significand formed by the concatenation of
955 158142c2 bellard
| `aSig0' and `aSig1'.  The normalized exponent is stored at the location
956 158142c2 bellard
| pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
957 158142c2 bellard
| significand are stored at the location pointed to by `zSig0Ptr', and the
958 158142c2 bellard
| least significant 64 bits of the normalized significand are stored at the
959 158142c2 bellard
| location pointed to by `zSig1Ptr'.
960 158142c2 bellard
*----------------------------------------------------------------------------*/
961 158142c2 bellard
962 158142c2 bellard
static void
963 158142c2 bellard
 normalizeFloat128Subnormal(
964 bb98fe42 Andreas Färber
     uint64_t aSig0,
965 bb98fe42 Andreas Färber
     uint64_t aSig1,
966 158142c2 bellard
     int32 *zExpPtr,
967 bb98fe42 Andreas Färber
     uint64_t *zSig0Ptr,
968 bb98fe42 Andreas Färber
     uint64_t *zSig1Ptr
969 158142c2 bellard
 )
970 158142c2 bellard
{
971 158142c2 bellard
    int8 shiftCount;
972 158142c2 bellard
973 158142c2 bellard
    if ( aSig0 == 0 ) {
974 158142c2 bellard
        shiftCount = countLeadingZeros64( aSig1 ) - 15;
975 158142c2 bellard
        if ( shiftCount < 0 ) {
976 158142c2 bellard
            *zSig0Ptr = aSig1>>( - shiftCount );
977 158142c2 bellard
            *zSig1Ptr = aSig1<<( shiftCount & 63 );
978 158142c2 bellard
        }
979 158142c2 bellard
        else {
980 158142c2 bellard
            *zSig0Ptr = aSig1<<shiftCount;
981 158142c2 bellard
            *zSig1Ptr = 0;
982 158142c2 bellard
        }
983 158142c2 bellard
        *zExpPtr = - shiftCount - 63;
984 158142c2 bellard
    }
985 158142c2 bellard
    else {
986 158142c2 bellard
        shiftCount = countLeadingZeros64( aSig0 ) - 15;
987 158142c2 bellard
        shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
988 158142c2 bellard
        *zExpPtr = 1 - shiftCount;
989 158142c2 bellard
    }
990 158142c2 bellard
991 158142c2 bellard
}
992 158142c2 bellard
993 158142c2 bellard
/*----------------------------------------------------------------------------
994 158142c2 bellard
| Packs the sign `zSign', the exponent `zExp', and the significand formed
995 158142c2 bellard
| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
996 158142c2 bellard
| floating-point value, returning the result.  After being shifted into the
997 158142c2 bellard
| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
998 158142c2 bellard
| added together to form the most significant 32 bits of the result.  This
999 158142c2 bellard
| means that any integer portion of `zSig0' will be added into the exponent.
1000 158142c2 bellard
| Since a properly normalized significand will have an integer portion equal
1001 158142c2 bellard
| to 1, the `zExp' input should be 1 less than the desired result exponent
1002 158142c2 bellard
| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
1003 158142c2 bellard
| significand.
1004 158142c2 bellard
*----------------------------------------------------------------------------*/
1005 158142c2 bellard
1006 158142c2 bellard
INLINE float128
1007 bb98fe42 Andreas Färber
 packFloat128( flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1 )
1008 158142c2 bellard
{
1009 158142c2 bellard
    float128 z;
1010 158142c2 bellard
1011 158142c2 bellard
    z.low = zSig1;
1012 bb98fe42 Andreas Färber
    z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
1013 158142c2 bellard
    return z;
1014 158142c2 bellard
1015 158142c2 bellard
}
1016 158142c2 bellard
1017 158142c2 bellard
/*----------------------------------------------------------------------------
1018 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
1019 158142c2 bellard
| and extended significand formed by the concatenation of `zSig0', `zSig1',
1020 158142c2 bellard
| and `zSig2', and returns the proper quadruple-precision floating-point value
1021 158142c2 bellard
| corresponding to the abstract input.  Ordinarily, the abstract value is
1022 158142c2 bellard
| simply rounded and packed into the quadruple-precision format, with the
1023 158142c2 bellard
| inexact exception raised if the abstract input cannot be represented
1024 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
1025 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
1026 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded to
1027 158142c2 bellard
| a subnormal number, and the underflow and inexact exceptions are raised if
1028 158142c2 bellard
| the abstract input cannot be represented exactly as a subnormal quadruple-
1029 158142c2 bellard
| precision floating-point number.
1030 158142c2 bellard
|     The input significand must be normalized or smaller.  If the input
1031 158142c2 bellard
| significand is not normalized, `zExp' must be 0; in that case, the result
1032 158142c2 bellard
| returned is a subnormal number, and it must not require rounding.  In the
1033 158142c2 bellard
| usual case that the input significand is normalized, `zExp' must be 1 less
1034 158142c2 bellard
| than the ``true'' floating-point exponent.  The handling of underflow and
1035 158142c2 bellard
| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1036 158142c2 bellard
*----------------------------------------------------------------------------*/
1037 158142c2 bellard
1038 158142c2 bellard
static float128
1039 158142c2 bellard
 roundAndPackFloat128(
1040 bb98fe42 Andreas Färber
     flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1, uint64_t zSig2 STATUS_PARAM)
1041 158142c2 bellard
{
1042 158142c2 bellard
    int8 roundingMode;
1043 158142c2 bellard
    flag roundNearestEven, increment, isTiny;
1044 158142c2 bellard
1045 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
1046 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
1047 bb98fe42 Andreas Färber
    increment = ( (int64_t) zSig2 < 0 );
1048 158142c2 bellard
    if ( ! roundNearestEven ) {
1049 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
1050 158142c2 bellard
            increment = 0;
1051 158142c2 bellard
        }
1052 158142c2 bellard
        else {
1053 158142c2 bellard
            if ( zSign ) {
1054 158142c2 bellard
                increment = ( roundingMode == float_round_down ) && zSig2;
1055 158142c2 bellard
            }
1056 158142c2 bellard
            else {
1057 158142c2 bellard
                increment = ( roundingMode == float_round_up ) && zSig2;
1058 158142c2 bellard
            }
1059 158142c2 bellard
        }
1060 158142c2 bellard
    }
1061 bb98fe42 Andreas Färber
    if ( 0x7FFD <= (uint32_t) zExp ) {
1062 158142c2 bellard
        if (    ( 0x7FFD < zExp )
1063 158142c2 bellard
             || (    ( zExp == 0x7FFD )
1064 158142c2 bellard
                  && eq128(
1065 158142c2 bellard
                         LIT64( 0x0001FFFFFFFFFFFF ),
1066 158142c2 bellard
                         LIT64( 0xFFFFFFFFFFFFFFFF ),
1067 158142c2 bellard
                         zSig0,
1068 158142c2 bellard
                         zSig1
1069 158142c2 bellard
                     )
1070 158142c2 bellard
                  && increment
1071 158142c2 bellard
                )
1072 158142c2 bellard
           ) {
1073 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
1074 158142c2 bellard
            if (    ( roundingMode == float_round_to_zero )
1075 158142c2 bellard
                 || ( zSign && ( roundingMode == float_round_up ) )
1076 158142c2 bellard
                 || ( ! zSign && ( roundingMode == float_round_down ) )
1077 158142c2 bellard
               ) {
1078 158142c2 bellard
                return
1079 158142c2 bellard
                    packFloat128(
1080 158142c2 bellard
                        zSign,
1081 158142c2 bellard
                        0x7FFE,
1082 158142c2 bellard
                        LIT64( 0x0000FFFFFFFFFFFF ),
1083 158142c2 bellard
                        LIT64( 0xFFFFFFFFFFFFFFFF )
1084 158142c2 bellard
                    );
1085 158142c2 bellard
            }
1086 158142c2 bellard
            return packFloat128( zSign, 0x7FFF, 0, 0 );
1087 158142c2 bellard
        }
1088 158142c2 bellard
        if ( zExp < 0 ) {
1089 e6afc87f Peter Maydell
            if (STATUS(flush_to_zero)) {
1090 e6afc87f Peter Maydell
                float_raise(float_flag_output_denormal STATUS_VAR);
1091 e6afc87f Peter Maydell
                return packFloat128(zSign, 0, 0, 0);
1092 e6afc87f Peter Maydell
            }
1093 158142c2 bellard
            isTiny =
1094 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
1095 158142c2 bellard
                || ( zExp < -1 )
1096 158142c2 bellard
                || ! increment
1097 158142c2 bellard
                || lt128(
1098 158142c2 bellard
                       zSig0,
1099 158142c2 bellard
                       zSig1,
1100 158142c2 bellard
                       LIT64( 0x0001FFFFFFFFFFFF ),
1101 158142c2 bellard
                       LIT64( 0xFFFFFFFFFFFFFFFF )
1102 158142c2 bellard
                   );
1103 158142c2 bellard
            shift128ExtraRightJamming(
1104 158142c2 bellard
                zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
1105 158142c2 bellard
            zExp = 0;
1106 158142c2 bellard
            if ( isTiny && zSig2 ) float_raise( float_flag_underflow STATUS_VAR);
1107 158142c2 bellard
            if ( roundNearestEven ) {
1108 bb98fe42 Andreas Färber
                increment = ( (int64_t) zSig2 < 0 );
1109 158142c2 bellard
            }
1110 158142c2 bellard
            else {
1111 158142c2 bellard
                if ( zSign ) {
1112 158142c2 bellard
                    increment = ( roundingMode == float_round_down ) && zSig2;
1113 158142c2 bellard
                }
1114 158142c2 bellard
                else {
1115 158142c2 bellard
                    increment = ( roundingMode == float_round_up ) && zSig2;
1116 158142c2 bellard
                }
1117 158142c2 bellard
            }
1118 158142c2 bellard
        }
1119 158142c2 bellard
    }
1120 158142c2 bellard
    if ( zSig2 ) STATUS(float_exception_flags) |= float_flag_inexact;
1121 158142c2 bellard
    if ( increment ) {
1122 158142c2 bellard
        add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
1123 158142c2 bellard
        zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
1124 158142c2 bellard
    }
1125 158142c2 bellard
    else {
1126 158142c2 bellard
        if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
1127 158142c2 bellard
    }
1128 158142c2 bellard
    return packFloat128( zSign, zExp, zSig0, zSig1 );
1129 158142c2 bellard
1130 158142c2 bellard
}
1131 158142c2 bellard
1132 158142c2 bellard
/*----------------------------------------------------------------------------
1133 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
1134 158142c2 bellard
| and significand formed by the concatenation of `zSig0' and `zSig1', and
1135 158142c2 bellard
| returns the proper quadruple-precision floating-point value corresponding
1136 158142c2 bellard
| to the abstract input.  This routine is just like `roundAndPackFloat128'
1137 158142c2 bellard
| except that the input significand has fewer bits and does not have to be
1138 158142c2 bellard
| normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
1139 158142c2 bellard
| point exponent.
1140 158142c2 bellard
*----------------------------------------------------------------------------*/
1141 158142c2 bellard
1142 158142c2 bellard
static float128
1143 158142c2 bellard
 normalizeRoundAndPackFloat128(
1144 bb98fe42 Andreas Färber
     flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1 STATUS_PARAM)
1145 158142c2 bellard
{
1146 158142c2 bellard
    int8 shiftCount;
1147 bb98fe42 Andreas Färber
    uint64_t zSig2;
1148 158142c2 bellard
1149 158142c2 bellard
    if ( zSig0 == 0 ) {
1150 158142c2 bellard
        zSig0 = zSig1;
1151 158142c2 bellard
        zSig1 = 0;
1152 158142c2 bellard
        zExp -= 64;
1153 158142c2 bellard
    }
1154 158142c2 bellard
    shiftCount = countLeadingZeros64( zSig0 ) - 15;
1155 158142c2 bellard
    if ( 0 <= shiftCount ) {
1156 158142c2 bellard
        zSig2 = 0;
1157 158142c2 bellard
        shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1158 158142c2 bellard
    }
1159 158142c2 bellard
    else {
1160 158142c2 bellard
        shift128ExtraRightJamming(
1161 158142c2 bellard
            zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
1162 158142c2 bellard
    }
1163 158142c2 bellard
    zExp -= shiftCount;
1164 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR);
1165 158142c2 bellard
1166 158142c2 bellard
}
1167 158142c2 bellard
1168 158142c2 bellard
/*----------------------------------------------------------------------------
1169 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a'
1170 158142c2 bellard
| to the single-precision floating-point format.  The conversion is performed
1171 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1172 158142c2 bellard
*----------------------------------------------------------------------------*/
1173 158142c2 bellard
1174 c4850f9e Peter Maydell
float32 int32_to_float32(int32_t a STATUS_PARAM)
1175 158142c2 bellard
{
1176 158142c2 bellard
    flag zSign;
1177 158142c2 bellard
1178 f090c9d4 pbrook
    if ( a == 0 ) return float32_zero;
1179 bb98fe42 Andreas Färber
    if ( a == (int32_t) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
1180 158142c2 bellard
    zSign = ( a < 0 );
1181 158142c2 bellard
    return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a STATUS_VAR );
1182 158142c2 bellard
1183 158142c2 bellard
}
1184 158142c2 bellard
1185 158142c2 bellard
/*----------------------------------------------------------------------------
1186 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a'
1187 158142c2 bellard
| to the double-precision floating-point format.  The conversion is performed
1188 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1189 158142c2 bellard
*----------------------------------------------------------------------------*/
1190 158142c2 bellard
1191 c4850f9e Peter Maydell
float64 int32_to_float64(int32_t a STATUS_PARAM)
1192 158142c2 bellard
{
1193 158142c2 bellard
    flag zSign;
1194 158142c2 bellard
    uint32 absA;
1195 158142c2 bellard
    int8 shiftCount;
1196 bb98fe42 Andreas Färber
    uint64_t zSig;
1197 158142c2 bellard
1198 f090c9d4 pbrook
    if ( a == 0 ) return float64_zero;
1199 158142c2 bellard
    zSign = ( a < 0 );
1200 158142c2 bellard
    absA = zSign ? - a : a;
1201 158142c2 bellard
    shiftCount = countLeadingZeros32( absA ) + 21;
1202 158142c2 bellard
    zSig = absA;
1203 158142c2 bellard
    return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
1204 158142c2 bellard
1205 158142c2 bellard
}
1206 158142c2 bellard
1207 158142c2 bellard
/*----------------------------------------------------------------------------
1208 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a'
1209 158142c2 bellard
| to the extended double-precision floating-point format.  The conversion
1210 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1211 158142c2 bellard
| Arithmetic.
1212 158142c2 bellard
*----------------------------------------------------------------------------*/
1213 158142c2 bellard
1214 c4850f9e Peter Maydell
floatx80 int32_to_floatx80(int32_t a STATUS_PARAM)
1215 158142c2 bellard
{
1216 158142c2 bellard
    flag zSign;
1217 158142c2 bellard
    uint32 absA;
1218 158142c2 bellard
    int8 shiftCount;
1219 bb98fe42 Andreas Färber
    uint64_t zSig;
1220 158142c2 bellard
1221 158142c2 bellard
    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1222 158142c2 bellard
    zSign = ( a < 0 );
1223 158142c2 bellard
    absA = zSign ? - a : a;
1224 158142c2 bellard
    shiftCount = countLeadingZeros32( absA ) + 32;
1225 158142c2 bellard
    zSig = absA;
1226 158142c2 bellard
    return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
1227 158142c2 bellard
1228 158142c2 bellard
}
1229 158142c2 bellard
1230 158142c2 bellard
/*----------------------------------------------------------------------------
1231 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a' to
1232 158142c2 bellard
| the quadruple-precision floating-point format.  The conversion is performed
1233 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1234 158142c2 bellard
*----------------------------------------------------------------------------*/
1235 158142c2 bellard
1236 c4850f9e Peter Maydell
float128 int32_to_float128(int32_t a STATUS_PARAM)
1237 158142c2 bellard
{
1238 158142c2 bellard
    flag zSign;
1239 158142c2 bellard
    uint32 absA;
1240 158142c2 bellard
    int8 shiftCount;
1241 bb98fe42 Andreas Färber
    uint64_t zSig0;
1242 158142c2 bellard
1243 158142c2 bellard
    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1244 158142c2 bellard
    zSign = ( a < 0 );
1245 158142c2 bellard
    absA = zSign ? - a : a;
1246 158142c2 bellard
    shiftCount = countLeadingZeros32( absA ) + 17;
1247 158142c2 bellard
    zSig0 = absA;
1248 158142c2 bellard
    return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
1249 158142c2 bellard
1250 158142c2 bellard
}
1251 158142c2 bellard
1252 158142c2 bellard
/*----------------------------------------------------------------------------
1253 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a'
1254 158142c2 bellard
| to the single-precision floating-point format.  The conversion is performed
1255 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1256 158142c2 bellard
*----------------------------------------------------------------------------*/
1257 158142c2 bellard
1258 c4850f9e Peter Maydell
float32 int64_to_float32(int64_t a STATUS_PARAM)
1259 158142c2 bellard
{
1260 158142c2 bellard
    flag zSign;
1261 158142c2 bellard
    uint64 absA;
1262 158142c2 bellard
    int8 shiftCount;
1263 158142c2 bellard
1264 f090c9d4 pbrook
    if ( a == 0 ) return float32_zero;
1265 158142c2 bellard
    zSign = ( a < 0 );
1266 158142c2 bellard
    absA = zSign ? - a : a;
1267 158142c2 bellard
    shiftCount = countLeadingZeros64( absA ) - 40;
1268 158142c2 bellard
    if ( 0 <= shiftCount ) {
1269 158142c2 bellard
        return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
1270 158142c2 bellard
    }
1271 158142c2 bellard
    else {
1272 158142c2 bellard
        shiftCount += 7;
1273 158142c2 bellard
        if ( shiftCount < 0 ) {
1274 158142c2 bellard
            shift64RightJamming( absA, - shiftCount, &absA );
1275 158142c2 bellard
        }
1276 158142c2 bellard
        else {
1277 158142c2 bellard
            absA <<= shiftCount;
1278 158142c2 bellard
        }
1279 158142c2 bellard
        return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA STATUS_VAR );
1280 158142c2 bellard
    }
1281 158142c2 bellard
1282 158142c2 bellard
}
1283 158142c2 bellard
1284 c4850f9e Peter Maydell
float32 uint64_to_float32(uint64_t a STATUS_PARAM)
1285 75d62a58 j_mayer
{
1286 75d62a58 j_mayer
    int8 shiftCount;
1287 75d62a58 j_mayer
1288 f090c9d4 pbrook
    if ( a == 0 ) return float32_zero;
1289 75d62a58 j_mayer
    shiftCount = countLeadingZeros64( a ) - 40;
1290 75d62a58 j_mayer
    if ( 0 <= shiftCount ) {
1291 e744c06f Peter Maydell
        return packFloat32(0, 0x95 - shiftCount, a<<shiftCount);
1292 75d62a58 j_mayer
    }
1293 75d62a58 j_mayer
    else {
1294 75d62a58 j_mayer
        shiftCount += 7;
1295 75d62a58 j_mayer
        if ( shiftCount < 0 ) {
1296 75d62a58 j_mayer
            shift64RightJamming( a, - shiftCount, &a );
1297 75d62a58 j_mayer
        }
1298 75d62a58 j_mayer
        else {
1299 75d62a58 j_mayer
            a <<= shiftCount;
1300 75d62a58 j_mayer
        }
1301 e744c06f Peter Maydell
        return roundAndPackFloat32(0, 0x9C - shiftCount, a STATUS_VAR);
1302 75d62a58 j_mayer
    }
1303 75d62a58 j_mayer
}
1304 75d62a58 j_mayer
1305 158142c2 bellard
/*----------------------------------------------------------------------------
1306 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a'
1307 158142c2 bellard
| to the double-precision floating-point format.  The conversion is performed
1308 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1309 158142c2 bellard
*----------------------------------------------------------------------------*/
1310 158142c2 bellard
1311 c4850f9e Peter Maydell
float64 int64_to_float64(int64_t a STATUS_PARAM)
1312 158142c2 bellard
{
1313 158142c2 bellard
    flag zSign;
1314 158142c2 bellard
1315 f090c9d4 pbrook
    if ( a == 0 ) return float64_zero;
1316 bb98fe42 Andreas Färber
    if ( a == (int64_t) LIT64( 0x8000000000000000 ) ) {
1317 158142c2 bellard
        return packFloat64( 1, 0x43E, 0 );
1318 158142c2 bellard
    }
1319 158142c2 bellard
    zSign = ( a < 0 );
1320 158142c2 bellard
    return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a STATUS_VAR );
1321 158142c2 bellard
1322 158142c2 bellard
}
1323 158142c2 bellard
1324 c4850f9e Peter Maydell
float64 uint64_to_float64(uint64_t a STATUS_PARAM)
1325 75d62a58 j_mayer
{
1326 17ed2293 Richard Henderson
    int exp =  0x43C;
1327 75d62a58 j_mayer
1328 17ed2293 Richard Henderson
    if (a == 0) {
1329 17ed2293 Richard Henderson
        return float64_zero;
1330 17ed2293 Richard Henderson
    }
1331 17ed2293 Richard Henderson
    if ((int64_t)a < 0) {
1332 17ed2293 Richard Henderson
        shift64RightJamming(a, 1, &a);
1333 17ed2293 Richard Henderson
        exp += 1;
1334 17ed2293 Richard Henderson
    }
1335 17ed2293 Richard Henderson
    return normalizeRoundAndPackFloat64(0, exp, a STATUS_VAR);
1336 75d62a58 j_mayer
}
1337 75d62a58 j_mayer
1338 158142c2 bellard
/*----------------------------------------------------------------------------
1339 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a'
1340 158142c2 bellard
| to the extended double-precision floating-point format.  The conversion
1341 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1342 158142c2 bellard
| Arithmetic.
1343 158142c2 bellard
*----------------------------------------------------------------------------*/
1344 158142c2 bellard
1345 c4850f9e Peter Maydell
floatx80 int64_to_floatx80(int64_t a STATUS_PARAM)
1346 158142c2 bellard
{
1347 158142c2 bellard
    flag zSign;
1348 158142c2 bellard
    uint64 absA;
1349 158142c2 bellard
    int8 shiftCount;
1350 158142c2 bellard
1351 158142c2 bellard
    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1352 158142c2 bellard
    zSign = ( a < 0 );
1353 158142c2 bellard
    absA = zSign ? - a : a;
1354 158142c2 bellard
    shiftCount = countLeadingZeros64( absA );
1355 158142c2 bellard
    return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
1356 158142c2 bellard
1357 158142c2 bellard
}
1358 158142c2 bellard
1359 158142c2 bellard
/*----------------------------------------------------------------------------
1360 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a' to
1361 158142c2 bellard
| the quadruple-precision floating-point format.  The conversion is performed
1362 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1363 158142c2 bellard
*----------------------------------------------------------------------------*/
1364 158142c2 bellard
1365 c4850f9e Peter Maydell
float128 int64_to_float128(int64_t a STATUS_PARAM)
1366 158142c2 bellard
{
1367 158142c2 bellard
    flag zSign;
1368 158142c2 bellard
    uint64 absA;
1369 158142c2 bellard
    int8 shiftCount;
1370 158142c2 bellard
    int32 zExp;
1371 bb98fe42 Andreas Färber
    uint64_t zSig0, zSig1;
1372 158142c2 bellard
1373 158142c2 bellard
    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1374 158142c2 bellard
    zSign = ( a < 0 );
1375 158142c2 bellard
    absA = zSign ? - a : a;
1376 158142c2 bellard
    shiftCount = countLeadingZeros64( absA ) + 49;
1377 158142c2 bellard
    zExp = 0x406E - shiftCount;
1378 158142c2 bellard
    if ( 64 <= shiftCount ) {
1379 158142c2 bellard
        zSig1 = 0;
1380 158142c2 bellard
        zSig0 = absA;
1381 158142c2 bellard
        shiftCount -= 64;
1382 158142c2 bellard
    }
1383 158142c2 bellard
    else {
1384 158142c2 bellard
        zSig1 = absA;
1385 158142c2 bellard
        zSig0 = 0;
1386 158142c2 bellard
    }
1387 158142c2 bellard
    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1388 158142c2 bellard
    return packFloat128( zSign, zExp, zSig0, zSig1 );
1389 158142c2 bellard
1390 158142c2 bellard
}
1391 158142c2 bellard
1392 c4850f9e Peter Maydell
float128 uint64_to_float128(uint64_t a STATUS_PARAM)
1393 1e397ead Richard Henderson
{
1394 1e397ead Richard Henderson
    if (a == 0) {
1395 1e397ead Richard Henderson
        return float128_zero;
1396 1e397ead Richard Henderson
    }
1397 1e397ead Richard Henderson
    return normalizeRoundAndPackFloat128(0, 0x406E, a, 0 STATUS_VAR);
1398 1e397ead Richard Henderson
}
1399 1e397ead Richard Henderson
1400 158142c2 bellard
/*----------------------------------------------------------------------------
1401 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1402 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
1403 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1404 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
1405 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
1406 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
1407 158142c2 bellard
| largest integer with the same sign as `a' is returned.
1408 158142c2 bellard
*----------------------------------------------------------------------------*/
1409 158142c2 bellard
1410 158142c2 bellard
int32 float32_to_int32( float32 a STATUS_PARAM )
1411 158142c2 bellard
{
1412 158142c2 bellard
    flag aSign;
1413 94a49d86 Andreas Färber
    int_fast16_t aExp, shiftCount;
1414 bb98fe42 Andreas Färber
    uint32_t aSig;
1415 bb98fe42 Andreas Färber
    uint64_t aSig64;
1416 158142c2 bellard
1417 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1418 158142c2 bellard
    aSig = extractFloat32Frac( a );
1419 158142c2 bellard
    aExp = extractFloat32Exp( a );
1420 158142c2 bellard
    aSign = extractFloat32Sign( a );
1421 158142c2 bellard
    if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
1422 158142c2 bellard
    if ( aExp ) aSig |= 0x00800000;
1423 158142c2 bellard
    shiftCount = 0xAF - aExp;
1424 158142c2 bellard
    aSig64 = aSig;
1425 158142c2 bellard
    aSig64 <<= 32;
1426 158142c2 bellard
    if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
1427 158142c2 bellard
    return roundAndPackInt32( aSign, aSig64 STATUS_VAR );
1428 158142c2 bellard
1429 158142c2 bellard
}
1430 158142c2 bellard
1431 158142c2 bellard
/*----------------------------------------------------------------------------
1432 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1433 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
1434 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1435 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
1436 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
1437 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
1438 158142c2 bellard
| returned.
1439 158142c2 bellard
*----------------------------------------------------------------------------*/
1440 158142c2 bellard
1441 158142c2 bellard
int32 float32_to_int32_round_to_zero( float32 a STATUS_PARAM )
1442 158142c2 bellard
{
1443 158142c2 bellard
    flag aSign;
1444 94a49d86 Andreas Färber
    int_fast16_t aExp, shiftCount;
1445 bb98fe42 Andreas Färber
    uint32_t aSig;
1446 b3a6a2e0 Peter Maydell
    int32_t z;
1447 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1448 158142c2 bellard
1449 158142c2 bellard
    aSig = extractFloat32Frac( a );
1450 158142c2 bellard
    aExp = extractFloat32Exp( a );
1451 158142c2 bellard
    aSign = extractFloat32Sign( a );
1452 158142c2 bellard
    shiftCount = aExp - 0x9E;
1453 158142c2 bellard
    if ( 0 <= shiftCount ) {
1454 f090c9d4 pbrook
        if ( float32_val(a) != 0xCF000000 ) {
1455 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1456 158142c2 bellard
            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
1457 158142c2 bellard
        }
1458 bb98fe42 Andreas Färber
        return (int32_t) 0x80000000;
1459 158142c2 bellard
    }
1460 158142c2 bellard
    else if ( aExp <= 0x7E ) {
1461 158142c2 bellard
        if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
1462 158142c2 bellard
        return 0;
1463 158142c2 bellard
    }
1464 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<8;
1465 158142c2 bellard
    z = aSig>>( - shiftCount );
1466 bb98fe42 Andreas Färber
    if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
1467 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
1468 158142c2 bellard
    }
1469 158142c2 bellard
    if ( aSign ) z = - z;
1470 158142c2 bellard
    return z;
1471 158142c2 bellard
1472 158142c2 bellard
}
1473 158142c2 bellard
1474 158142c2 bellard
/*----------------------------------------------------------------------------
1475 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1476 cbcef455 Peter Maydell
| `a' to the 16-bit two's complement integer format.  The conversion is
1477 cbcef455 Peter Maydell
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1478 cbcef455 Peter Maydell
| Arithmetic, except that the conversion is always rounded toward zero.
1479 cbcef455 Peter Maydell
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
1480 cbcef455 Peter Maydell
| the conversion overflows, the largest integer with the same sign as `a' is
1481 cbcef455 Peter Maydell
| returned.
1482 cbcef455 Peter Maydell
*----------------------------------------------------------------------------*/
1483 cbcef455 Peter Maydell
1484 94a49d86 Andreas Färber
int_fast16_t float32_to_int16_round_to_zero(float32 a STATUS_PARAM)
1485 cbcef455 Peter Maydell
{
1486 cbcef455 Peter Maydell
    flag aSign;
1487 94a49d86 Andreas Färber
    int_fast16_t aExp, shiftCount;
1488 bb98fe42 Andreas Färber
    uint32_t aSig;
1489 cbcef455 Peter Maydell
    int32 z;
1490 cbcef455 Peter Maydell
1491 cbcef455 Peter Maydell
    aSig = extractFloat32Frac( a );
1492 cbcef455 Peter Maydell
    aExp = extractFloat32Exp( a );
1493 cbcef455 Peter Maydell
    aSign = extractFloat32Sign( a );
1494 cbcef455 Peter Maydell
    shiftCount = aExp - 0x8E;
1495 cbcef455 Peter Maydell
    if ( 0 <= shiftCount ) {
1496 cbcef455 Peter Maydell
        if ( float32_val(a) != 0xC7000000 ) {
1497 cbcef455 Peter Maydell
            float_raise( float_flag_invalid STATUS_VAR);
1498 cbcef455 Peter Maydell
            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1499 cbcef455 Peter Maydell
                return 0x7FFF;
1500 cbcef455 Peter Maydell
            }
1501 cbcef455 Peter Maydell
        }
1502 bb98fe42 Andreas Färber
        return (int32_t) 0xffff8000;
1503 cbcef455 Peter Maydell
    }
1504 cbcef455 Peter Maydell
    else if ( aExp <= 0x7E ) {
1505 cbcef455 Peter Maydell
        if ( aExp | aSig ) {
1506 cbcef455 Peter Maydell
            STATUS(float_exception_flags) |= float_flag_inexact;
1507 cbcef455 Peter Maydell
        }
1508 cbcef455 Peter Maydell
        return 0;
1509 cbcef455 Peter Maydell
    }
1510 cbcef455 Peter Maydell
    shiftCount -= 0x10;
1511 cbcef455 Peter Maydell
    aSig = ( aSig | 0x00800000 )<<8;
1512 cbcef455 Peter Maydell
    z = aSig>>( - shiftCount );
1513 bb98fe42 Andreas Färber
    if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
1514 cbcef455 Peter Maydell
        STATUS(float_exception_flags) |= float_flag_inexact;
1515 cbcef455 Peter Maydell
    }
1516 cbcef455 Peter Maydell
    if ( aSign ) {
1517 cbcef455 Peter Maydell
        z = - z;
1518 cbcef455 Peter Maydell
    }
1519 cbcef455 Peter Maydell
    return z;
1520 cbcef455 Peter Maydell
1521 cbcef455 Peter Maydell
}
1522 cbcef455 Peter Maydell
1523 cbcef455 Peter Maydell
/*----------------------------------------------------------------------------
1524 cbcef455 Peter Maydell
| Returns the result of converting the single-precision floating-point value
1525 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
1526 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1527 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
1528 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
1529 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
1530 158142c2 bellard
| largest integer with the same sign as `a' is returned.
1531 158142c2 bellard
*----------------------------------------------------------------------------*/
1532 158142c2 bellard
1533 158142c2 bellard
int64 float32_to_int64( float32 a STATUS_PARAM )
1534 158142c2 bellard
{
1535 158142c2 bellard
    flag aSign;
1536 94a49d86 Andreas Färber
    int_fast16_t aExp, shiftCount;
1537 bb98fe42 Andreas Färber
    uint32_t aSig;
1538 bb98fe42 Andreas Färber
    uint64_t aSig64, aSigExtra;
1539 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1540 158142c2 bellard
1541 158142c2 bellard
    aSig = extractFloat32Frac( a );
1542 158142c2 bellard
    aExp = extractFloat32Exp( a );
1543 158142c2 bellard
    aSign = extractFloat32Sign( a );
1544 158142c2 bellard
    shiftCount = 0xBE - aExp;
1545 158142c2 bellard
    if ( shiftCount < 0 ) {
1546 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
1547 158142c2 bellard
        if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1548 158142c2 bellard
            return LIT64( 0x7FFFFFFFFFFFFFFF );
1549 158142c2 bellard
        }
1550 bb98fe42 Andreas Färber
        return (int64_t) LIT64( 0x8000000000000000 );
1551 158142c2 bellard
    }
1552 158142c2 bellard
    if ( aExp ) aSig |= 0x00800000;
1553 158142c2 bellard
    aSig64 = aSig;
1554 158142c2 bellard
    aSig64 <<= 40;
1555 158142c2 bellard
    shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
1556 158142c2 bellard
    return roundAndPackInt64( aSign, aSig64, aSigExtra STATUS_VAR );
1557 158142c2 bellard
1558 158142c2 bellard
}
1559 158142c2 bellard
1560 158142c2 bellard
/*----------------------------------------------------------------------------
1561 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1562 2f18bbf9 Tom Musta
| `a' to the 64-bit unsigned integer format.  The conversion is
1563 2f18bbf9 Tom Musta
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1564 2f18bbf9 Tom Musta
| Arithmetic---which means in particular that the conversion is rounded
1565 2f18bbf9 Tom Musta
| according to the current rounding mode.  If `a' is a NaN, the largest
1566 2f18bbf9 Tom Musta
| unsigned integer is returned.  Otherwise, if the conversion overflows, the
1567 2f18bbf9 Tom Musta
| largest unsigned integer is returned.  If the 'a' is negative, the result
1568 2f18bbf9 Tom Musta
| is rounded and zero is returned; values that do not round to zero will
1569 2f18bbf9 Tom Musta
| raise the inexact exception flag.
1570 2f18bbf9 Tom Musta
*----------------------------------------------------------------------------*/
1571 2f18bbf9 Tom Musta
1572 2f18bbf9 Tom Musta
uint64 float32_to_uint64(float32 a STATUS_PARAM)
1573 2f18bbf9 Tom Musta
{
1574 2f18bbf9 Tom Musta
    flag aSign;
1575 2f18bbf9 Tom Musta
    int_fast16_t aExp, shiftCount;
1576 2f18bbf9 Tom Musta
    uint32_t aSig;
1577 2f18bbf9 Tom Musta
    uint64_t aSig64, aSigExtra;
1578 2f18bbf9 Tom Musta
    a = float32_squash_input_denormal(a STATUS_VAR);
1579 2f18bbf9 Tom Musta
1580 2f18bbf9 Tom Musta
    aSig = extractFloat32Frac(a);
1581 2f18bbf9 Tom Musta
    aExp = extractFloat32Exp(a);
1582 2f18bbf9 Tom Musta
    aSign = extractFloat32Sign(a);
1583 2f18bbf9 Tom Musta
    if ((aSign) && (aExp > 126)) {
1584 2f18bbf9 Tom Musta
        float_raise(float_flag_invalid STATUS_VAR);
1585 2f18bbf9 Tom Musta
        if (float32_is_any_nan(a)) {
1586 2f18bbf9 Tom Musta
            return LIT64(0xFFFFFFFFFFFFFFFF);
1587 2f18bbf9 Tom Musta
        } else {
1588 2f18bbf9 Tom Musta
            return 0;
1589 2f18bbf9 Tom Musta
        }
1590 2f18bbf9 Tom Musta
    }
1591 2f18bbf9 Tom Musta
    shiftCount = 0xBE - aExp;
1592 2f18bbf9 Tom Musta
    if (aExp) {
1593 2f18bbf9 Tom Musta
        aSig |= 0x00800000;
1594 2f18bbf9 Tom Musta
    }
1595 2f18bbf9 Tom Musta
    if (shiftCount < 0) {
1596 2f18bbf9 Tom Musta
        float_raise(float_flag_invalid STATUS_VAR);
1597 2f18bbf9 Tom Musta
        return LIT64(0xFFFFFFFFFFFFFFFF);
1598 2f18bbf9 Tom Musta
    }
1599 2f18bbf9 Tom Musta
1600 2f18bbf9 Tom Musta
    aSig64 = aSig;
1601 2f18bbf9 Tom Musta
    aSig64 <<= 40;
1602 2f18bbf9 Tom Musta
    shift64ExtraRightJamming(aSig64, 0, shiftCount, &aSig64, &aSigExtra);
1603 2f18bbf9 Tom Musta
    return roundAndPackUint64(aSign, aSig64, aSigExtra STATUS_VAR);
1604 2f18bbf9 Tom Musta
}
1605 2f18bbf9 Tom Musta
1606 2f18bbf9 Tom Musta
/*----------------------------------------------------------------------------
1607 2f18bbf9 Tom Musta
| Returns the result of converting the single-precision floating-point value
1608 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
1609 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1610 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.  If
1611 158142c2 bellard
| `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
1612 158142c2 bellard
| conversion overflows, the largest integer with the same sign as `a' is
1613 158142c2 bellard
| returned.
1614 158142c2 bellard
*----------------------------------------------------------------------------*/
1615 158142c2 bellard
1616 158142c2 bellard
int64 float32_to_int64_round_to_zero( float32 a STATUS_PARAM )
1617 158142c2 bellard
{
1618 158142c2 bellard
    flag aSign;
1619 94a49d86 Andreas Färber
    int_fast16_t aExp, shiftCount;
1620 bb98fe42 Andreas Färber
    uint32_t aSig;
1621 bb98fe42 Andreas Färber
    uint64_t aSig64;
1622 158142c2 bellard
    int64 z;
1623 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1624 158142c2 bellard
1625 158142c2 bellard
    aSig = extractFloat32Frac( a );
1626 158142c2 bellard
    aExp = extractFloat32Exp( a );
1627 158142c2 bellard
    aSign = extractFloat32Sign( a );
1628 158142c2 bellard
    shiftCount = aExp - 0xBE;
1629 158142c2 bellard
    if ( 0 <= shiftCount ) {
1630 f090c9d4 pbrook
        if ( float32_val(a) != 0xDF000000 ) {
1631 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1632 158142c2 bellard
            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1633 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
1634 158142c2 bellard
            }
1635 158142c2 bellard
        }
1636 bb98fe42 Andreas Färber
        return (int64_t) LIT64( 0x8000000000000000 );
1637 158142c2 bellard
    }
1638 158142c2 bellard
    else if ( aExp <= 0x7E ) {
1639 158142c2 bellard
        if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
1640 158142c2 bellard
        return 0;
1641 158142c2 bellard
    }
1642 158142c2 bellard
    aSig64 = aSig | 0x00800000;
1643 158142c2 bellard
    aSig64 <<= 40;
1644 158142c2 bellard
    z = aSig64>>( - shiftCount );
1645 bb98fe42 Andreas Färber
    if ( (uint64_t) ( aSig64<<( shiftCount & 63 ) ) ) {
1646 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
1647 158142c2 bellard
    }
1648 158142c2 bellard
    if ( aSign ) z = - z;
1649 158142c2 bellard
    return z;
1650 158142c2 bellard
1651 158142c2 bellard
}
1652 158142c2 bellard
1653 158142c2 bellard
/*----------------------------------------------------------------------------
1654 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1655 158142c2 bellard
| `a' to the double-precision floating-point format.  The conversion is
1656 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1657 158142c2 bellard
| Arithmetic.
1658 158142c2 bellard
*----------------------------------------------------------------------------*/
1659 158142c2 bellard
1660 158142c2 bellard
float64 float32_to_float64( float32 a STATUS_PARAM )
1661 158142c2 bellard
{
1662 158142c2 bellard
    flag aSign;
1663 94a49d86 Andreas Färber
    int_fast16_t aExp;
1664 bb98fe42 Andreas Färber
    uint32_t aSig;
1665 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1666 158142c2 bellard
1667 158142c2 bellard
    aSig = extractFloat32Frac( a );
1668 158142c2 bellard
    aExp = extractFloat32Exp( a );
1669 158142c2 bellard
    aSign = extractFloat32Sign( a );
1670 158142c2 bellard
    if ( aExp == 0xFF ) {
1671 bcd4d9af Christophe Lyon
        if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
1672 158142c2 bellard
        return packFloat64( aSign, 0x7FF, 0 );
1673 158142c2 bellard
    }
1674 158142c2 bellard
    if ( aExp == 0 ) {
1675 158142c2 bellard
        if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
1676 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1677 158142c2 bellard
        --aExp;
1678 158142c2 bellard
    }
1679 bb98fe42 Andreas Färber
    return packFloat64( aSign, aExp + 0x380, ( (uint64_t) aSig )<<29 );
1680 158142c2 bellard
1681 158142c2 bellard
}
1682 158142c2 bellard
1683 158142c2 bellard
/*----------------------------------------------------------------------------
1684 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1685 158142c2 bellard
| `a' to the extended double-precision floating-point format.  The conversion
1686 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1687 158142c2 bellard
| Arithmetic.
1688 158142c2 bellard
*----------------------------------------------------------------------------*/
1689 158142c2 bellard
1690 158142c2 bellard
floatx80 float32_to_floatx80( float32 a STATUS_PARAM )
1691 158142c2 bellard
{
1692 158142c2 bellard
    flag aSign;
1693 94a49d86 Andreas Färber
    int_fast16_t aExp;
1694 bb98fe42 Andreas Färber
    uint32_t aSig;
1695 158142c2 bellard
1696 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1697 158142c2 bellard
    aSig = extractFloat32Frac( a );
1698 158142c2 bellard
    aExp = extractFloat32Exp( a );
1699 158142c2 bellard
    aSign = extractFloat32Sign( a );
1700 158142c2 bellard
    if ( aExp == 0xFF ) {
1701 bcd4d9af Christophe Lyon
        if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
1702 158142c2 bellard
        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
1703 158142c2 bellard
    }
1704 158142c2 bellard
    if ( aExp == 0 ) {
1705 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1706 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1707 158142c2 bellard
    }
1708 158142c2 bellard
    aSig |= 0x00800000;
1709 bb98fe42 Andreas Färber
    return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
1710 158142c2 bellard
1711 158142c2 bellard
}
1712 158142c2 bellard
1713 158142c2 bellard
/*----------------------------------------------------------------------------
1714 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1715 158142c2 bellard
| `a' to the double-precision floating-point format.  The conversion is
1716 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1717 158142c2 bellard
| Arithmetic.
1718 158142c2 bellard
*----------------------------------------------------------------------------*/
1719 158142c2 bellard
1720 158142c2 bellard
float128 float32_to_float128( float32 a STATUS_PARAM )
1721 158142c2 bellard
{
1722 158142c2 bellard
    flag aSign;
1723 94a49d86 Andreas Färber
    int_fast16_t aExp;
1724 bb98fe42 Andreas Färber
    uint32_t aSig;
1725 158142c2 bellard
1726 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1727 158142c2 bellard
    aSig = extractFloat32Frac( a );
1728 158142c2 bellard
    aExp = extractFloat32Exp( a );
1729 158142c2 bellard
    aSign = extractFloat32Sign( a );
1730 158142c2 bellard
    if ( aExp == 0xFF ) {
1731 bcd4d9af Christophe Lyon
        if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
1732 158142c2 bellard
        return packFloat128( aSign, 0x7FFF, 0, 0 );
1733 158142c2 bellard
    }
1734 158142c2 bellard
    if ( aExp == 0 ) {
1735 158142c2 bellard
        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
1736 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1737 158142c2 bellard
        --aExp;
1738 158142c2 bellard
    }
1739 bb98fe42 Andreas Färber
    return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
1740 158142c2 bellard
1741 158142c2 bellard
}
1742 158142c2 bellard
1743 158142c2 bellard
/*----------------------------------------------------------------------------
1744 158142c2 bellard
| Rounds the single-precision floating-point value `a' to an integer, and
1745 158142c2 bellard
| returns the result as a single-precision floating-point value.  The
1746 158142c2 bellard
| operation is performed according to the IEC/IEEE Standard for Binary
1747 158142c2 bellard
| Floating-Point Arithmetic.
1748 158142c2 bellard
*----------------------------------------------------------------------------*/
1749 158142c2 bellard
1750 158142c2 bellard
float32 float32_round_to_int( float32 a STATUS_PARAM)
1751 158142c2 bellard
{
1752 158142c2 bellard
    flag aSign;
1753 94a49d86 Andreas Färber
    int_fast16_t aExp;
1754 bb98fe42 Andreas Färber
    uint32_t lastBitMask, roundBitsMask;
1755 158142c2 bellard
    int8 roundingMode;
1756 bb98fe42 Andreas Färber
    uint32_t z;
1757 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1758 158142c2 bellard
1759 158142c2 bellard
    aExp = extractFloat32Exp( a );
1760 158142c2 bellard
    if ( 0x96 <= aExp ) {
1761 158142c2 bellard
        if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
1762 158142c2 bellard
            return propagateFloat32NaN( a, a STATUS_VAR );
1763 158142c2 bellard
        }
1764 158142c2 bellard
        return a;
1765 158142c2 bellard
    }
1766 158142c2 bellard
    if ( aExp <= 0x7E ) {
1767 bb98fe42 Andreas Färber
        if ( (uint32_t) ( float32_val(a)<<1 ) == 0 ) return a;
1768 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
1769 158142c2 bellard
        aSign = extractFloat32Sign( a );
1770 158142c2 bellard
        switch ( STATUS(float_rounding_mode) ) {
1771 158142c2 bellard
         case float_round_nearest_even:
1772 158142c2 bellard
            if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
1773 158142c2 bellard
                return packFloat32( aSign, 0x7F, 0 );
1774 158142c2 bellard
            }
1775 158142c2 bellard
            break;
1776 158142c2 bellard
         case float_round_down:
1777 f090c9d4 pbrook
            return make_float32(aSign ? 0xBF800000 : 0);
1778 158142c2 bellard
         case float_round_up:
1779 f090c9d4 pbrook
            return make_float32(aSign ? 0x80000000 : 0x3F800000);
1780 158142c2 bellard
        }
1781 158142c2 bellard
        return packFloat32( aSign, 0, 0 );
1782 158142c2 bellard
    }
1783 158142c2 bellard
    lastBitMask = 1;
1784 158142c2 bellard
    lastBitMask <<= 0x96 - aExp;
1785 158142c2 bellard
    roundBitsMask = lastBitMask - 1;
1786 f090c9d4 pbrook
    z = float32_val(a);
1787 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
1788 158142c2 bellard
    if ( roundingMode == float_round_nearest_even ) {
1789 158142c2 bellard
        z += lastBitMask>>1;
1790 158142c2 bellard
        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
1791 158142c2 bellard
    }
1792 158142c2 bellard
    else if ( roundingMode != float_round_to_zero ) {
1793 f090c9d4 pbrook
        if ( extractFloat32Sign( make_float32(z) ) ^ ( roundingMode == float_round_up ) ) {
1794 158142c2 bellard
            z += roundBitsMask;
1795 158142c2 bellard
        }
1796 158142c2 bellard
    }
1797 158142c2 bellard
    z &= ~ roundBitsMask;
1798 f090c9d4 pbrook
    if ( z != float32_val(a) ) STATUS(float_exception_flags) |= float_flag_inexact;
1799 f090c9d4 pbrook
    return make_float32(z);
1800 158142c2 bellard
1801 158142c2 bellard
}
1802 158142c2 bellard
1803 158142c2 bellard
/*----------------------------------------------------------------------------
1804 158142c2 bellard
| Returns the result of adding the absolute values of the single-precision
1805 158142c2 bellard
| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
1806 158142c2 bellard
| before being returned.  `zSign' is ignored if the result is a NaN.
1807 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
1808 158142c2 bellard
| Floating-Point Arithmetic.
1809 158142c2 bellard
*----------------------------------------------------------------------------*/
1810 158142c2 bellard
1811 158142c2 bellard
static float32 addFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
1812 158142c2 bellard
{
1813 94a49d86 Andreas Färber
    int_fast16_t aExp, bExp, zExp;
1814 bb98fe42 Andreas Färber
    uint32_t aSig, bSig, zSig;
1815 94a49d86 Andreas Färber
    int_fast16_t expDiff;
1816 158142c2 bellard
1817 158142c2 bellard
    aSig = extractFloat32Frac( a );
1818 158142c2 bellard
    aExp = extractFloat32Exp( a );
1819 158142c2 bellard
    bSig = extractFloat32Frac( b );
1820 158142c2 bellard
    bExp = extractFloat32Exp( b );
1821 158142c2 bellard
    expDiff = aExp - bExp;
1822 158142c2 bellard
    aSig <<= 6;
1823 158142c2 bellard
    bSig <<= 6;
1824 158142c2 bellard
    if ( 0 < expDiff ) {
1825 158142c2 bellard
        if ( aExp == 0xFF ) {
1826 158142c2 bellard
            if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1827 158142c2 bellard
            return a;
1828 158142c2 bellard
        }
1829 158142c2 bellard
        if ( bExp == 0 ) {
1830 158142c2 bellard
            --expDiff;
1831 158142c2 bellard
        }
1832 158142c2 bellard
        else {
1833 158142c2 bellard
            bSig |= 0x20000000;
1834 158142c2 bellard
        }
1835 158142c2 bellard
        shift32RightJamming( bSig, expDiff, &bSig );
1836 158142c2 bellard
        zExp = aExp;
1837 158142c2 bellard
    }
1838 158142c2 bellard
    else if ( expDiff < 0 ) {
1839 158142c2 bellard
        if ( bExp == 0xFF ) {
1840 158142c2 bellard
            if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1841 158142c2 bellard
            return packFloat32( zSign, 0xFF, 0 );
1842 158142c2 bellard
        }
1843 158142c2 bellard
        if ( aExp == 0 ) {
1844 158142c2 bellard
            ++expDiff;
1845 158142c2 bellard
        }
1846 158142c2 bellard
        else {
1847 158142c2 bellard
            aSig |= 0x20000000;
1848 158142c2 bellard
        }
1849 158142c2 bellard
        shift32RightJamming( aSig, - expDiff, &aSig );
1850 158142c2 bellard
        zExp = bExp;
1851 158142c2 bellard
    }
1852 158142c2 bellard
    else {
1853 158142c2 bellard
        if ( aExp == 0xFF ) {
1854 158142c2 bellard
            if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1855 158142c2 bellard
            return a;
1856 158142c2 bellard
        }
1857 fe76d976 pbrook
        if ( aExp == 0 ) {
1858 e6afc87f Peter Maydell
            if (STATUS(flush_to_zero)) {
1859 e6afc87f Peter Maydell
                if (aSig | bSig) {
1860 e6afc87f Peter Maydell
                    float_raise(float_flag_output_denormal STATUS_VAR);
1861 e6afc87f Peter Maydell
                }
1862 e6afc87f Peter Maydell
                return packFloat32(zSign, 0, 0);
1863 e6afc87f Peter Maydell
            }
1864 fe76d976 pbrook
            return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
1865 fe76d976 pbrook
        }
1866 158142c2 bellard
        zSig = 0x40000000 + aSig + bSig;
1867 158142c2 bellard
        zExp = aExp;
1868 158142c2 bellard
        goto roundAndPack;
1869 158142c2 bellard
    }
1870 158142c2 bellard
    aSig |= 0x20000000;
1871 158142c2 bellard
    zSig = ( aSig + bSig )<<1;
1872 158142c2 bellard
    --zExp;
1873 bb98fe42 Andreas Färber
    if ( (int32_t) zSig < 0 ) {
1874 158142c2 bellard
        zSig = aSig + bSig;
1875 158142c2 bellard
        ++zExp;
1876 158142c2 bellard
    }
1877 158142c2 bellard
 roundAndPack:
1878 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1879 158142c2 bellard
1880 158142c2 bellard
}
1881 158142c2 bellard
1882 158142c2 bellard
/*----------------------------------------------------------------------------
1883 158142c2 bellard
| Returns the result of subtracting the absolute values of the single-
1884 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the
1885 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
1886 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
1887 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
1888 158142c2 bellard
*----------------------------------------------------------------------------*/
1889 158142c2 bellard
1890 158142c2 bellard
static float32 subFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
1891 158142c2 bellard
{
1892 94a49d86 Andreas Färber
    int_fast16_t aExp, bExp, zExp;
1893 bb98fe42 Andreas Färber
    uint32_t aSig, bSig, zSig;
1894 94a49d86 Andreas Färber
    int_fast16_t expDiff;
1895 158142c2 bellard
1896 158142c2 bellard
    aSig = extractFloat32Frac( a );
1897 158142c2 bellard
    aExp = extractFloat32Exp( a );
1898 158142c2 bellard
    bSig = extractFloat32Frac( b );
1899 158142c2 bellard
    bExp = extractFloat32Exp( b );
1900 158142c2 bellard
    expDiff = aExp - bExp;
1901 158142c2 bellard
    aSig <<= 7;
1902 158142c2 bellard
    bSig <<= 7;
1903 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
1904 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
1905 158142c2 bellard
    if ( aExp == 0xFF ) {
1906 158142c2 bellard
        if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1907 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
1908 158142c2 bellard
        return float32_default_nan;
1909 158142c2 bellard
    }
1910 158142c2 bellard
    if ( aExp == 0 ) {
1911 158142c2 bellard
        aExp = 1;
1912 158142c2 bellard
        bExp = 1;
1913 158142c2 bellard
    }
1914 158142c2 bellard
    if ( bSig < aSig ) goto aBigger;
1915 158142c2 bellard
    if ( aSig < bSig ) goto bBigger;
1916 158142c2 bellard
    return packFloat32( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
1917 158142c2 bellard
 bExpBigger:
1918 158142c2 bellard
    if ( bExp == 0xFF ) {
1919 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1920 158142c2 bellard
        return packFloat32( zSign ^ 1, 0xFF, 0 );
1921 158142c2 bellard
    }
1922 158142c2 bellard
    if ( aExp == 0 ) {
1923 158142c2 bellard
        ++expDiff;
1924 158142c2 bellard
    }
1925 158142c2 bellard
    else {
1926 158142c2 bellard
        aSig |= 0x40000000;
1927 158142c2 bellard
    }
1928 158142c2 bellard
    shift32RightJamming( aSig, - expDiff, &aSig );
1929 158142c2 bellard
    bSig |= 0x40000000;
1930 158142c2 bellard
 bBigger:
1931 158142c2 bellard
    zSig = bSig - aSig;
1932 158142c2 bellard
    zExp = bExp;
1933 158142c2 bellard
    zSign ^= 1;
1934 158142c2 bellard
    goto normalizeRoundAndPack;
1935 158142c2 bellard
 aExpBigger:
1936 158142c2 bellard
    if ( aExp == 0xFF ) {
1937 158142c2 bellard
        if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1938 158142c2 bellard
        return a;
1939 158142c2 bellard
    }
1940 158142c2 bellard
    if ( bExp == 0 ) {
1941 158142c2 bellard
        --expDiff;
1942 158142c2 bellard
    }
1943 158142c2 bellard
    else {
1944 158142c2 bellard
        bSig |= 0x40000000;
1945 158142c2 bellard
    }
1946 158142c2 bellard
    shift32RightJamming( bSig, expDiff, &bSig );
1947 158142c2 bellard
    aSig |= 0x40000000;
1948 158142c2 bellard
 aBigger:
1949 158142c2 bellard
    zSig = aSig - bSig;
1950 158142c2 bellard
    zExp = aExp;
1951 158142c2 bellard
 normalizeRoundAndPack:
1952 158142c2 bellard
    --zExp;
1953 158142c2 bellard
    return normalizeRoundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1954 158142c2 bellard
1955 158142c2 bellard
}
1956 158142c2 bellard
1957 158142c2 bellard
/*----------------------------------------------------------------------------
1958 158142c2 bellard
| Returns the result of adding the single-precision floating-point values `a'
1959 158142c2 bellard
| and `b'.  The operation is performed according to the IEC/IEEE Standard for
1960 158142c2 bellard
| Binary Floating-Point Arithmetic.
1961 158142c2 bellard
*----------------------------------------------------------------------------*/
1962 158142c2 bellard
1963 158142c2 bellard
float32 float32_add( float32 a, float32 b STATUS_PARAM )
1964 158142c2 bellard
{
1965 158142c2 bellard
    flag aSign, bSign;
1966 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1967 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
1968 158142c2 bellard
1969 158142c2 bellard
    aSign = extractFloat32Sign( a );
1970 158142c2 bellard
    bSign = extractFloat32Sign( b );
1971 158142c2 bellard
    if ( aSign == bSign ) {
1972 158142c2 bellard
        return addFloat32Sigs( a, b, aSign STATUS_VAR);
1973 158142c2 bellard
    }
1974 158142c2 bellard
    else {
1975 158142c2 bellard
        return subFloat32Sigs( a, b, aSign STATUS_VAR );
1976 158142c2 bellard
    }
1977 158142c2 bellard
1978 158142c2 bellard
}
1979 158142c2 bellard
1980 158142c2 bellard
/*----------------------------------------------------------------------------
1981 158142c2 bellard
| Returns the result of subtracting the single-precision floating-point values
1982 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1983 158142c2 bellard
| for Binary Floating-Point Arithmetic.
1984 158142c2 bellard
*----------------------------------------------------------------------------*/
1985 158142c2 bellard
1986 158142c2 bellard
float32 float32_sub( float32 a, float32 b STATUS_PARAM )
1987 158142c2 bellard
{
1988 158142c2 bellard
    flag aSign, bSign;
1989 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1990 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
1991 158142c2 bellard
1992 158142c2 bellard
    aSign = extractFloat32Sign( a );
1993 158142c2 bellard
    bSign = extractFloat32Sign( b );
1994 158142c2 bellard
    if ( aSign == bSign ) {
1995 158142c2 bellard
        return subFloat32Sigs( a, b, aSign STATUS_VAR );
1996 158142c2 bellard
    }
1997 158142c2 bellard
    else {
1998 158142c2 bellard
        return addFloat32Sigs( a, b, aSign STATUS_VAR );
1999 158142c2 bellard
    }
2000 158142c2 bellard
2001 158142c2 bellard
}
2002 158142c2 bellard
2003 158142c2 bellard
/*----------------------------------------------------------------------------
2004 158142c2 bellard
| Returns the result of multiplying the single-precision floating-point values
2005 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
2006 158142c2 bellard
| for Binary Floating-Point Arithmetic.
2007 158142c2 bellard
*----------------------------------------------------------------------------*/
2008 158142c2 bellard
2009 158142c2 bellard
float32 float32_mul( float32 a, float32 b STATUS_PARAM )
2010 158142c2 bellard
{
2011 158142c2 bellard
    flag aSign, bSign, zSign;
2012 94a49d86 Andreas Färber
    int_fast16_t aExp, bExp, zExp;
2013 bb98fe42 Andreas Färber
    uint32_t aSig, bSig;
2014 bb98fe42 Andreas Färber
    uint64_t zSig64;
2015 bb98fe42 Andreas Färber
    uint32_t zSig;
2016 158142c2 bellard
2017 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2018 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2019 37d18660 Peter Maydell
2020 158142c2 bellard
    aSig = extractFloat32Frac( a );
2021 158142c2 bellard
    aExp = extractFloat32Exp( a );
2022 158142c2 bellard
    aSign = extractFloat32Sign( a );
2023 158142c2 bellard
    bSig = extractFloat32Frac( b );
2024 158142c2 bellard
    bExp = extractFloat32Exp( b );
2025 158142c2 bellard
    bSign = extractFloat32Sign( b );
2026 158142c2 bellard
    zSign = aSign ^ bSign;
2027 158142c2 bellard
    if ( aExp == 0xFF ) {
2028 158142c2 bellard
        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
2029 158142c2 bellard
            return propagateFloat32NaN( a, b STATUS_VAR );
2030 158142c2 bellard
        }
2031 158142c2 bellard
        if ( ( bExp | bSig ) == 0 ) {
2032 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2033 158142c2 bellard
            return float32_default_nan;
2034 158142c2 bellard
        }
2035 158142c2 bellard
        return packFloat32( zSign, 0xFF, 0 );
2036 158142c2 bellard
    }
2037 158142c2 bellard
    if ( bExp == 0xFF ) {
2038 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
2039 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) {
2040 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2041 158142c2 bellard
            return float32_default_nan;
2042 158142c2 bellard
        }
2043 158142c2 bellard
        return packFloat32( zSign, 0xFF, 0 );
2044 158142c2 bellard
    }
2045 158142c2 bellard
    if ( aExp == 0 ) {
2046 158142c2 bellard
        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
2047 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2048 158142c2 bellard
    }
2049 158142c2 bellard
    if ( bExp == 0 ) {
2050 158142c2 bellard
        if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
2051 158142c2 bellard
        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2052 158142c2 bellard
    }
2053 158142c2 bellard
    zExp = aExp + bExp - 0x7F;
2054 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<7;
2055 158142c2 bellard
    bSig = ( bSig | 0x00800000 )<<8;
2056 bb98fe42 Andreas Färber
    shift64RightJamming( ( (uint64_t) aSig ) * bSig, 32, &zSig64 );
2057 158142c2 bellard
    zSig = zSig64;
2058 bb98fe42 Andreas Färber
    if ( 0 <= (int32_t) ( zSig<<1 ) ) {
2059 158142c2 bellard
        zSig <<= 1;
2060 158142c2 bellard
        --zExp;
2061 158142c2 bellard
    }
2062 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
2063 158142c2 bellard
2064 158142c2 bellard
}
2065 158142c2 bellard
2066 158142c2 bellard
/*----------------------------------------------------------------------------
2067 158142c2 bellard
| Returns the result of dividing the single-precision floating-point value `a'
2068 158142c2 bellard
| by the corresponding value `b'.  The operation is performed according to the
2069 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2070 158142c2 bellard
*----------------------------------------------------------------------------*/
2071 158142c2 bellard
2072 158142c2 bellard
float32 float32_div( float32 a, float32 b STATUS_PARAM )
2073 158142c2 bellard
{
2074 158142c2 bellard
    flag aSign, bSign, zSign;
2075 94a49d86 Andreas Färber
    int_fast16_t aExp, bExp, zExp;
2076 bb98fe42 Andreas Färber
    uint32_t aSig, bSig, zSig;
2077 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2078 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2079 158142c2 bellard
2080 158142c2 bellard
    aSig = extractFloat32Frac( a );
2081 158142c2 bellard
    aExp = extractFloat32Exp( a );
2082 158142c2 bellard
    aSign = extractFloat32Sign( a );
2083 158142c2 bellard
    bSig = extractFloat32Frac( b );
2084 158142c2 bellard
    bExp = extractFloat32Exp( b );
2085 158142c2 bellard
    bSign = extractFloat32Sign( b );
2086 158142c2 bellard
    zSign = aSign ^ bSign;
2087 158142c2 bellard
    if ( aExp == 0xFF ) {
2088 158142c2 bellard
        if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
2089 158142c2 bellard
        if ( bExp == 0xFF ) {
2090 158142c2 bellard
            if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
2091 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2092 158142c2 bellard
            return float32_default_nan;
2093 158142c2 bellard
        }
2094 158142c2 bellard
        return packFloat32( zSign, 0xFF, 0 );
2095 158142c2 bellard
    }
2096 158142c2 bellard
    if ( bExp == 0xFF ) {
2097 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
2098 158142c2 bellard
        return packFloat32( zSign, 0, 0 );
2099 158142c2 bellard
    }
2100 158142c2 bellard
    if ( bExp == 0 ) {
2101 158142c2 bellard
        if ( bSig == 0 ) {
2102 158142c2 bellard
            if ( ( aExp | aSig ) == 0 ) {
2103 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
2104 158142c2 bellard
                return float32_default_nan;
2105 158142c2 bellard
            }
2106 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
2107 158142c2 bellard
            return packFloat32( zSign, 0xFF, 0 );
2108 158142c2 bellard
        }
2109 158142c2 bellard
        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2110 158142c2 bellard
    }
2111 158142c2 bellard
    if ( aExp == 0 ) {
2112 158142c2 bellard
        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
2113 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2114 158142c2 bellard
    }
2115 158142c2 bellard
    zExp = aExp - bExp + 0x7D;
2116 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<7;
2117 158142c2 bellard
    bSig = ( bSig | 0x00800000 )<<8;
2118 158142c2 bellard
    if ( bSig <= ( aSig + aSig ) ) {
2119 158142c2 bellard
        aSig >>= 1;
2120 158142c2 bellard
        ++zExp;
2121 158142c2 bellard
    }
2122 bb98fe42 Andreas Färber
    zSig = ( ( (uint64_t) aSig )<<32 ) / bSig;
2123 158142c2 bellard
    if ( ( zSig & 0x3F ) == 0 ) {
2124 bb98fe42 Andreas Färber
        zSig |= ( (uint64_t) bSig * zSig != ( (uint64_t) aSig )<<32 );
2125 158142c2 bellard
    }
2126 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
2127 158142c2 bellard
2128 158142c2 bellard
}
2129 158142c2 bellard
2130 158142c2 bellard
/*----------------------------------------------------------------------------
2131 158142c2 bellard
| Returns the remainder of the single-precision floating-point value `a'
2132 158142c2 bellard
| with respect to the corresponding value `b'.  The operation is performed
2133 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2134 158142c2 bellard
*----------------------------------------------------------------------------*/
2135 158142c2 bellard
2136 158142c2 bellard
float32 float32_rem( float32 a, float32 b STATUS_PARAM )
2137 158142c2 bellard
{
2138 ed086f3d Blue Swirl
    flag aSign, zSign;
2139 94a49d86 Andreas Färber
    int_fast16_t aExp, bExp, expDiff;
2140 bb98fe42 Andreas Färber
    uint32_t aSig, bSig;
2141 bb98fe42 Andreas Färber
    uint32_t q;
2142 bb98fe42 Andreas Färber
    uint64_t aSig64, bSig64, q64;
2143 bb98fe42 Andreas Färber
    uint32_t alternateASig;
2144 bb98fe42 Andreas Färber
    int32_t sigMean;
2145 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2146 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2147 158142c2 bellard
2148 158142c2 bellard
    aSig = extractFloat32Frac( a );
2149 158142c2 bellard
    aExp = extractFloat32Exp( a );
2150 158142c2 bellard
    aSign = extractFloat32Sign( a );
2151 158142c2 bellard
    bSig = extractFloat32Frac( b );
2152 158142c2 bellard
    bExp = extractFloat32Exp( b );
2153 158142c2 bellard
    if ( aExp == 0xFF ) {
2154 158142c2 bellard
        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
2155 158142c2 bellard
            return propagateFloat32NaN( a, b STATUS_VAR );
2156 158142c2 bellard
        }
2157 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2158 158142c2 bellard
        return float32_default_nan;
2159 158142c2 bellard
    }
2160 158142c2 bellard
    if ( bExp == 0xFF ) {
2161 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
2162 158142c2 bellard
        return a;
2163 158142c2 bellard
    }
2164 158142c2 bellard
    if ( bExp == 0 ) {
2165 158142c2 bellard
        if ( bSig == 0 ) {
2166 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2167 158142c2 bellard
            return float32_default_nan;
2168 158142c2 bellard
        }
2169 158142c2 bellard
        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2170 158142c2 bellard
    }
2171 158142c2 bellard
    if ( aExp == 0 ) {
2172 158142c2 bellard
        if ( aSig == 0 ) return a;
2173 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2174 158142c2 bellard
    }
2175 158142c2 bellard
    expDiff = aExp - bExp;
2176 158142c2 bellard
    aSig |= 0x00800000;
2177 158142c2 bellard
    bSig |= 0x00800000;
2178 158142c2 bellard
    if ( expDiff < 32 ) {
2179 158142c2 bellard
        aSig <<= 8;
2180 158142c2 bellard
        bSig <<= 8;
2181 158142c2 bellard
        if ( expDiff < 0 ) {
2182 158142c2 bellard
            if ( expDiff < -1 ) return a;
2183 158142c2 bellard
            aSig >>= 1;
2184 158142c2 bellard
        }
2185 158142c2 bellard
        q = ( bSig <= aSig );
2186 158142c2 bellard
        if ( q ) aSig -= bSig;
2187 158142c2 bellard
        if ( 0 < expDiff ) {
2188 bb98fe42 Andreas Färber
            q = ( ( (uint64_t) aSig )<<32 ) / bSig;
2189 158142c2 bellard
            q >>= 32 - expDiff;
2190 158142c2 bellard
            bSig >>= 2;
2191 158142c2 bellard
            aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
2192 158142c2 bellard
        }
2193 158142c2 bellard
        else {
2194 158142c2 bellard
            aSig >>= 2;
2195 158142c2 bellard
            bSig >>= 2;
2196 158142c2 bellard
        }
2197 158142c2 bellard
    }
2198 158142c2 bellard
    else {
2199 158142c2 bellard
        if ( bSig <= aSig ) aSig -= bSig;
2200 bb98fe42 Andreas Färber
        aSig64 = ( (uint64_t) aSig )<<40;
2201 bb98fe42 Andreas Färber
        bSig64 = ( (uint64_t) bSig )<<40;
2202 158142c2 bellard
        expDiff -= 64;
2203 158142c2 bellard
        while ( 0 < expDiff ) {
2204 158142c2 bellard
            q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2205 158142c2 bellard
            q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2206 158142c2 bellard
            aSig64 = - ( ( bSig * q64 )<<38 );
2207 158142c2 bellard
            expDiff -= 62;
2208 158142c2 bellard
        }
2209 158142c2 bellard
        expDiff += 64;
2210 158142c2 bellard
        q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2211 158142c2 bellard
        q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2212 158142c2 bellard
        q = q64>>( 64 - expDiff );
2213 158142c2 bellard
        bSig <<= 6;
2214 158142c2 bellard
        aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
2215 158142c2 bellard
    }
2216 158142c2 bellard
    do {
2217 158142c2 bellard
        alternateASig = aSig;
2218 158142c2 bellard
        ++q;
2219 158142c2 bellard
        aSig -= bSig;
2220 bb98fe42 Andreas Färber
    } while ( 0 <= (int32_t) aSig );
2221 158142c2 bellard
    sigMean = aSig + alternateASig;
2222 158142c2 bellard
    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
2223 158142c2 bellard
        aSig = alternateASig;
2224 158142c2 bellard
    }
2225 bb98fe42 Andreas Färber
    zSign = ( (int32_t) aSig < 0 );
2226 158142c2 bellard
    if ( zSign ) aSig = - aSig;
2227 158142c2 bellard
    return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig STATUS_VAR );
2228 158142c2 bellard
2229 158142c2 bellard
}
2230 158142c2 bellard
2231 158142c2 bellard
/*----------------------------------------------------------------------------
2232 369be8f6 Peter Maydell
| Returns the result of multiplying the single-precision floating-point values
2233 369be8f6 Peter Maydell
| `a' and `b' then adding 'c', with no intermediate rounding step after the
2234 369be8f6 Peter Maydell
| multiplication.  The operation is performed according to the IEC/IEEE
2235 369be8f6 Peter Maydell
| Standard for Binary Floating-Point Arithmetic 754-2008.
2236 369be8f6 Peter Maydell
| The flags argument allows the caller to select negation of the
2237 369be8f6 Peter Maydell
| addend, the intermediate product, or the final result. (The difference
2238 369be8f6 Peter Maydell
| between this and having the caller do a separate negation is that negating
2239 369be8f6 Peter Maydell
| externally will flip the sign bit on NaNs.)
2240 369be8f6 Peter Maydell
*----------------------------------------------------------------------------*/
2241 369be8f6 Peter Maydell
2242 369be8f6 Peter Maydell
float32 float32_muladd(float32 a, float32 b, float32 c, int flags STATUS_PARAM)
2243 369be8f6 Peter Maydell
{
2244 369be8f6 Peter Maydell
    flag aSign, bSign, cSign, zSign;
2245 94a49d86 Andreas Färber
    int_fast16_t aExp, bExp, cExp, pExp, zExp, expDiff;
2246 369be8f6 Peter Maydell
    uint32_t aSig, bSig, cSig;
2247 369be8f6 Peter Maydell
    flag pInf, pZero, pSign;
2248 369be8f6 Peter Maydell
    uint64_t pSig64, cSig64, zSig64;
2249 369be8f6 Peter Maydell
    uint32_t pSig;
2250 369be8f6 Peter Maydell
    int shiftcount;
2251 369be8f6 Peter Maydell
    flag signflip, infzero;
2252 369be8f6 Peter Maydell
2253 369be8f6 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2254 369be8f6 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2255 369be8f6 Peter Maydell
    c = float32_squash_input_denormal(c STATUS_VAR);
2256 369be8f6 Peter Maydell
    aSig = extractFloat32Frac(a);
2257 369be8f6 Peter Maydell
    aExp = extractFloat32Exp(a);
2258 369be8f6 Peter Maydell
    aSign = extractFloat32Sign(a);
2259 369be8f6 Peter Maydell
    bSig = extractFloat32Frac(b);
2260 369be8f6 Peter Maydell
    bExp = extractFloat32Exp(b);
2261 369be8f6 Peter Maydell
    bSign = extractFloat32Sign(b);
2262 369be8f6 Peter Maydell
    cSig = extractFloat32Frac(c);
2263 369be8f6 Peter Maydell
    cExp = extractFloat32Exp(c);
2264 369be8f6 Peter Maydell
    cSign = extractFloat32Sign(c);
2265 369be8f6 Peter Maydell
2266 369be8f6 Peter Maydell
    infzero = ((aExp == 0 && aSig == 0 && bExp == 0xff && bSig == 0) ||
2267 369be8f6 Peter Maydell
               (aExp == 0xff && aSig == 0 && bExp == 0 && bSig == 0));
2268 369be8f6 Peter Maydell
2269 369be8f6 Peter Maydell
    /* It is implementation-defined whether the cases of (0,inf,qnan)
2270 369be8f6 Peter Maydell
     * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
2271 369be8f6 Peter Maydell
     * they return if they do), so we have to hand this information
2272 369be8f6 Peter Maydell
     * off to the target-specific pick-a-NaN routine.
2273 369be8f6 Peter Maydell
     */
2274 369be8f6 Peter Maydell
    if (((aExp == 0xff) && aSig) ||
2275 369be8f6 Peter Maydell
        ((bExp == 0xff) && bSig) ||
2276 369be8f6 Peter Maydell
        ((cExp == 0xff) && cSig)) {
2277 369be8f6 Peter Maydell
        return propagateFloat32MulAddNaN(a, b, c, infzero STATUS_VAR);
2278 369be8f6 Peter Maydell
    }
2279 369be8f6 Peter Maydell
2280 369be8f6 Peter Maydell
    if (infzero) {
2281 369be8f6 Peter Maydell
        float_raise(float_flag_invalid STATUS_VAR);
2282 369be8f6 Peter Maydell
        return float32_default_nan;
2283 369be8f6 Peter Maydell
    }
2284 369be8f6 Peter Maydell
2285 369be8f6 Peter Maydell
    if (flags & float_muladd_negate_c) {
2286 369be8f6 Peter Maydell
        cSign ^= 1;
2287 369be8f6 Peter Maydell
    }
2288 369be8f6 Peter Maydell
2289 369be8f6 Peter Maydell
    signflip = (flags & float_muladd_negate_result) ? 1 : 0;
2290 369be8f6 Peter Maydell
2291 369be8f6 Peter Maydell
    /* Work out the sign and type of the product */
2292 369be8f6 Peter Maydell
    pSign = aSign ^ bSign;
2293 369be8f6 Peter Maydell
    if (flags & float_muladd_negate_product) {
2294 369be8f6 Peter Maydell
        pSign ^= 1;
2295 369be8f6 Peter Maydell
    }
2296 369be8f6 Peter Maydell
    pInf = (aExp == 0xff) || (bExp == 0xff);
2297 369be8f6 Peter Maydell
    pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0);
2298 369be8f6 Peter Maydell
2299 369be8f6 Peter Maydell
    if (cExp == 0xff) {
2300 369be8f6 Peter Maydell
        if (pInf && (pSign ^ cSign)) {
2301 369be8f6 Peter Maydell
            /* addition of opposite-signed infinities => InvalidOperation */
2302 369be8f6 Peter Maydell
            float_raise(float_flag_invalid STATUS_VAR);
2303 369be8f6 Peter Maydell
            return float32_default_nan;
2304 369be8f6 Peter Maydell
        }
2305 369be8f6 Peter Maydell
        /* Otherwise generate an infinity of the same sign */
2306 369be8f6 Peter Maydell
        return packFloat32(cSign ^ signflip, 0xff, 0);
2307 369be8f6 Peter Maydell
    }
2308 369be8f6 Peter Maydell
2309 369be8f6 Peter Maydell
    if (pInf) {
2310 369be8f6 Peter Maydell
        return packFloat32(pSign ^ signflip, 0xff, 0);
2311 369be8f6 Peter Maydell
    }
2312 369be8f6 Peter Maydell
2313 369be8f6 Peter Maydell
    if (pZero) {
2314 369be8f6 Peter Maydell
        if (cExp == 0) {
2315 369be8f6 Peter Maydell
            if (cSig == 0) {
2316 369be8f6 Peter Maydell
                /* Adding two exact zeroes */
2317 369be8f6 Peter Maydell
                if (pSign == cSign) {
2318 369be8f6 Peter Maydell
                    zSign = pSign;
2319 369be8f6 Peter Maydell
                } else if (STATUS(float_rounding_mode) == float_round_down) {
2320 369be8f6 Peter Maydell
                    zSign = 1;
2321 369be8f6 Peter Maydell
                } else {
2322 369be8f6 Peter Maydell
                    zSign = 0;
2323 369be8f6 Peter Maydell
                }
2324 369be8f6 Peter Maydell
                return packFloat32(zSign ^ signflip, 0, 0);
2325 369be8f6 Peter Maydell
            }
2326 369be8f6 Peter Maydell
            /* Exact zero plus a denorm */
2327 369be8f6 Peter Maydell
            if (STATUS(flush_to_zero)) {
2328 369be8f6 Peter Maydell
                float_raise(float_flag_output_denormal STATUS_VAR);
2329 369be8f6 Peter Maydell
                return packFloat32(cSign ^ signflip, 0, 0);
2330 369be8f6 Peter Maydell
            }
2331 369be8f6 Peter Maydell
        }
2332 369be8f6 Peter Maydell
        /* Zero plus something non-zero : just return the something */
2333 a6e7c184 Richard Sandiford
        return packFloat32(cSign ^ signflip, cExp, cSig);
2334 369be8f6 Peter Maydell
    }
2335 369be8f6 Peter Maydell
2336 369be8f6 Peter Maydell
    if (aExp == 0) {
2337 369be8f6 Peter Maydell
        normalizeFloat32Subnormal(aSig, &aExp, &aSig);
2338 369be8f6 Peter Maydell
    }
2339 369be8f6 Peter Maydell
    if (bExp == 0) {
2340 369be8f6 Peter Maydell
        normalizeFloat32Subnormal(bSig, &bExp, &bSig);
2341 369be8f6 Peter Maydell
    }
2342 369be8f6 Peter Maydell
2343 369be8f6 Peter Maydell
    /* Calculate the actual result a * b + c */
2344 369be8f6 Peter Maydell
2345 369be8f6 Peter Maydell
    /* Multiply first; this is easy. */
2346 369be8f6 Peter Maydell
    /* NB: we subtract 0x7e where float32_mul() subtracts 0x7f
2347 369be8f6 Peter Maydell
     * because we want the true exponent, not the "one-less-than"
2348 369be8f6 Peter Maydell
     * flavour that roundAndPackFloat32() takes.
2349 369be8f6 Peter Maydell
     */
2350 369be8f6 Peter Maydell
    pExp = aExp + bExp - 0x7e;
2351 369be8f6 Peter Maydell
    aSig = (aSig | 0x00800000) << 7;
2352 369be8f6 Peter Maydell
    bSig = (bSig | 0x00800000) << 8;
2353 369be8f6 Peter Maydell
    pSig64 = (uint64_t)aSig * bSig;
2354 369be8f6 Peter Maydell
    if ((int64_t)(pSig64 << 1) >= 0) {
2355 369be8f6 Peter Maydell
        pSig64 <<= 1;
2356 369be8f6 Peter Maydell
        pExp--;
2357 369be8f6 Peter Maydell
    }
2358 369be8f6 Peter Maydell
2359 369be8f6 Peter Maydell
    zSign = pSign ^ signflip;
2360 369be8f6 Peter Maydell
2361 369be8f6 Peter Maydell
    /* Now pSig64 is the significand of the multiply, with the explicit bit in
2362 369be8f6 Peter Maydell
     * position 62.
2363 369be8f6 Peter Maydell
     */
2364 369be8f6 Peter Maydell
    if (cExp == 0) {
2365 369be8f6 Peter Maydell
        if (!cSig) {
2366 369be8f6 Peter Maydell
            /* Throw out the special case of c being an exact zero now */
2367 369be8f6 Peter Maydell
            shift64RightJamming(pSig64, 32, &pSig64);
2368 369be8f6 Peter Maydell
            pSig = pSig64;
2369 369be8f6 Peter Maydell
            return roundAndPackFloat32(zSign, pExp - 1,
2370 369be8f6 Peter Maydell
                                       pSig STATUS_VAR);
2371 369be8f6 Peter Maydell
        }
2372 369be8f6 Peter Maydell
        normalizeFloat32Subnormal(cSig, &cExp, &cSig);
2373 369be8f6 Peter Maydell
    }
2374 369be8f6 Peter Maydell
2375 369be8f6 Peter Maydell
    cSig64 = (uint64_t)cSig << (62 - 23);
2376 369be8f6 Peter Maydell
    cSig64 |= LIT64(0x4000000000000000);
2377 369be8f6 Peter Maydell
    expDiff = pExp - cExp;
2378 369be8f6 Peter Maydell
2379 369be8f6 Peter Maydell
    if (pSign == cSign) {
2380 369be8f6 Peter Maydell
        /* Addition */
2381 369be8f6 Peter Maydell
        if (expDiff > 0) {
2382 369be8f6 Peter Maydell
            /* scale c to match p */
2383 369be8f6 Peter Maydell
            shift64RightJamming(cSig64, expDiff, &cSig64);
2384 369be8f6 Peter Maydell
            zExp = pExp;
2385 369be8f6 Peter Maydell
        } else if (expDiff < 0) {
2386 369be8f6 Peter Maydell
            /* scale p to match c */
2387 369be8f6 Peter Maydell
            shift64RightJamming(pSig64, -expDiff, &pSig64);
2388 369be8f6 Peter Maydell
            zExp = cExp;
2389 369be8f6 Peter Maydell
        } else {
2390 369be8f6 Peter Maydell
            /* no scaling needed */
2391 369be8f6 Peter Maydell
            zExp = cExp;
2392 369be8f6 Peter Maydell
        }
2393 369be8f6 Peter Maydell
        /* Add significands and make sure explicit bit ends up in posn 62 */
2394 369be8f6 Peter Maydell
        zSig64 = pSig64 + cSig64;
2395 369be8f6 Peter Maydell
        if ((int64_t)zSig64 < 0) {
2396 369be8f6 Peter Maydell
            shift64RightJamming(zSig64, 1, &zSig64);
2397 369be8f6 Peter Maydell
        } else {
2398 369be8f6 Peter Maydell
            zExp--;
2399 369be8f6 Peter Maydell
        }
2400 369be8f6 Peter Maydell
    } else {
2401 369be8f6 Peter Maydell
        /* Subtraction */
2402 369be8f6 Peter Maydell
        if (expDiff > 0) {
2403 369be8f6 Peter Maydell
            shift64RightJamming(cSig64, expDiff, &cSig64);
2404 369be8f6 Peter Maydell
            zSig64 = pSig64 - cSig64;
2405 369be8f6 Peter Maydell
            zExp = pExp;
2406 369be8f6 Peter Maydell
        } else if (expDiff < 0) {
2407 369be8f6 Peter Maydell
            shift64RightJamming(pSig64, -expDiff, &pSig64);
2408 369be8f6 Peter Maydell
            zSig64 = cSig64 - pSig64;
2409 369be8f6 Peter Maydell
            zExp = cExp;
2410 369be8f6 Peter Maydell
            zSign ^= 1;
2411 369be8f6 Peter Maydell
        } else {
2412 369be8f6 Peter Maydell
            zExp = pExp;
2413 369be8f6 Peter Maydell
            if (cSig64 < pSig64) {
2414 369be8f6 Peter Maydell
                zSig64 = pSig64 - cSig64;
2415 369be8f6 Peter Maydell
            } else if (pSig64 < cSig64) {
2416 369be8f6 Peter Maydell
                zSig64 = cSig64 - pSig64;
2417 369be8f6 Peter Maydell
                zSign ^= 1;
2418 369be8f6 Peter Maydell
            } else {
2419 369be8f6 Peter Maydell
                /* Exact zero */
2420 369be8f6 Peter Maydell
                zSign = signflip;
2421 369be8f6 Peter Maydell
                if (STATUS(float_rounding_mode) == float_round_down) {
2422 369be8f6 Peter Maydell
                    zSign ^= 1;
2423 369be8f6 Peter Maydell
                }
2424 369be8f6 Peter Maydell
                return packFloat32(zSign, 0, 0);
2425 369be8f6 Peter Maydell
            }
2426 369be8f6 Peter Maydell
        }
2427 369be8f6 Peter Maydell
        --zExp;
2428 369be8f6 Peter Maydell
        /* Normalize to put the explicit bit back into bit 62. */
2429 369be8f6 Peter Maydell
        shiftcount = countLeadingZeros64(zSig64) - 1;
2430 369be8f6 Peter Maydell
        zSig64 <<= shiftcount;
2431 369be8f6 Peter Maydell
        zExp -= shiftcount;
2432 369be8f6 Peter Maydell
    }
2433 369be8f6 Peter Maydell
    shift64RightJamming(zSig64, 32, &zSig64);
2434 369be8f6 Peter Maydell
    return roundAndPackFloat32(zSign, zExp, zSig64 STATUS_VAR);
2435 369be8f6 Peter Maydell
}
2436 369be8f6 Peter Maydell
2437 369be8f6 Peter Maydell
2438 369be8f6 Peter Maydell
/*----------------------------------------------------------------------------
2439 158142c2 bellard
| Returns the square root of the single-precision floating-point value `a'.
2440 158142c2 bellard
| The operation is performed according to the IEC/IEEE Standard for Binary
2441 158142c2 bellard
| Floating-Point Arithmetic.
2442 158142c2 bellard
*----------------------------------------------------------------------------*/
2443 158142c2 bellard
2444 158142c2 bellard
float32 float32_sqrt( float32 a STATUS_PARAM )
2445 158142c2 bellard
{
2446 158142c2 bellard
    flag aSign;
2447 94a49d86 Andreas Färber
    int_fast16_t aExp, zExp;
2448 bb98fe42 Andreas Färber
    uint32_t aSig, zSig;
2449 bb98fe42 Andreas Färber
    uint64_t rem, term;
2450 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2451 158142c2 bellard
2452 158142c2 bellard
    aSig = extractFloat32Frac( a );
2453 158142c2 bellard
    aExp = extractFloat32Exp( a );
2454 158142c2 bellard
    aSign = extractFloat32Sign( a );
2455 158142c2 bellard
    if ( aExp == 0xFF ) {
2456 f090c9d4 pbrook
        if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
2457 158142c2 bellard
        if ( ! aSign ) return a;
2458 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2459 158142c2 bellard
        return float32_default_nan;
2460 158142c2 bellard
    }
2461 158142c2 bellard
    if ( aSign ) {
2462 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) return a;
2463 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2464 158142c2 bellard
        return float32_default_nan;
2465 158142c2 bellard
    }
2466 158142c2 bellard
    if ( aExp == 0 ) {
2467 f090c9d4 pbrook
        if ( aSig == 0 ) return float32_zero;
2468 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2469 158142c2 bellard
    }
2470 158142c2 bellard
    zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
2471 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<8;
2472 158142c2 bellard
    zSig = estimateSqrt32( aExp, aSig ) + 2;
2473 158142c2 bellard
    if ( ( zSig & 0x7F ) <= 5 ) {
2474 158142c2 bellard
        if ( zSig < 2 ) {
2475 158142c2 bellard
            zSig = 0x7FFFFFFF;
2476 158142c2 bellard
            goto roundAndPack;
2477 158142c2 bellard
        }
2478 158142c2 bellard
        aSig >>= aExp & 1;
2479 bb98fe42 Andreas Färber
        term = ( (uint64_t) zSig ) * zSig;
2480 bb98fe42 Andreas Färber
        rem = ( ( (uint64_t) aSig )<<32 ) - term;
2481 bb98fe42 Andreas Färber
        while ( (int64_t) rem < 0 ) {
2482 158142c2 bellard
            --zSig;
2483 bb98fe42 Andreas Färber
            rem += ( ( (uint64_t) zSig )<<1 ) | 1;
2484 158142c2 bellard
        }
2485 158142c2 bellard
        zSig |= ( rem != 0 );
2486 158142c2 bellard
    }
2487 158142c2 bellard
    shift32RightJamming( zSig, 1, &zSig );
2488 158142c2 bellard
 roundAndPack:
2489 158142c2 bellard
    return roundAndPackFloat32( 0, zExp, zSig STATUS_VAR );
2490 158142c2 bellard
2491 158142c2 bellard
}
2492 158142c2 bellard
2493 158142c2 bellard
/*----------------------------------------------------------------------------
2494 8229c991 Aurelien Jarno
| Returns the binary exponential of the single-precision floating-point value
2495 8229c991 Aurelien Jarno
| `a'. The operation is performed according to the IEC/IEEE Standard for
2496 8229c991 Aurelien Jarno
| Binary Floating-Point Arithmetic.
2497 8229c991 Aurelien Jarno
|
2498 8229c991 Aurelien Jarno
| Uses the following identities:
2499 8229c991 Aurelien Jarno
|
2500 8229c991 Aurelien Jarno
| 1. -------------------------------------------------------------------------
2501 8229c991 Aurelien Jarno
|      x    x*ln(2)
2502 8229c991 Aurelien Jarno
|     2  = e
2503 8229c991 Aurelien Jarno
|
2504 8229c991 Aurelien Jarno
| 2. -------------------------------------------------------------------------
2505 8229c991 Aurelien Jarno
|                      2     3     4     5           n
2506 8229c991 Aurelien Jarno
|      x        x     x     x     x     x           x
2507 8229c991 Aurelien Jarno
|     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
2508 8229c991 Aurelien Jarno
|               1!    2!    3!    4!    5!          n!
2509 8229c991 Aurelien Jarno
*----------------------------------------------------------------------------*/
2510 8229c991 Aurelien Jarno
2511 8229c991 Aurelien Jarno
static const float64 float32_exp2_coefficients[15] =
2512 8229c991 Aurelien Jarno
{
2513 d5138cf4 Peter Maydell
    const_float64( 0x3ff0000000000000ll ), /*  1 */
2514 d5138cf4 Peter Maydell
    const_float64( 0x3fe0000000000000ll ), /*  2 */
2515 d5138cf4 Peter Maydell
    const_float64( 0x3fc5555555555555ll ), /*  3 */
2516 d5138cf4 Peter Maydell
    const_float64( 0x3fa5555555555555ll ), /*  4 */
2517 d5138cf4 Peter Maydell
    const_float64( 0x3f81111111111111ll ), /*  5 */
2518 d5138cf4 Peter Maydell
    const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
2519 d5138cf4 Peter Maydell
    const_float64( 0x3f2a01a01a01a01all ), /*  7 */
2520 d5138cf4 Peter Maydell
    const_float64( 0x3efa01a01a01a01all ), /*  8 */
2521 d5138cf4 Peter Maydell
    const_float64( 0x3ec71de3a556c734ll ), /*  9 */
2522 d5138cf4 Peter Maydell
    const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
2523 d5138cf4 Peter Maydell
    const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
2524 d5138cf4 Peter Maydell
    const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
2525 d5138cf4 Peter Maydell
    const_float64( 0x3de6124613a86d09ll ), /* 13 */
2526 d5138cf4 Peter Maydell
    const_float64( 0x3da93974a8c07c9dll ), /* 14 */
2527 d5138cf4 Peter Maydell
    const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
2528 8229c991 Aurelien Jarno
};
2529 8229c991 Aurelien Jarno
2530 8229c991 Aurelien Jarno
float32 float32_exp2( float32 a STATUS_PARAM )
2531 8229c991 Aurelien Jarno
{
2532 8229c991 Aurelien Jarno
    flag aSign;
2533 94a49d86 Andreas Färber
    int_fast16_t aExp;
2534 bb98fe42 Andreas Färber
    uint32_t aSig;
2535 8229c991 Aurelien Jarno
    float64 r, x, xn;
2536 8229c991 Aurelien Jarno
    int i;
2537 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2538 8229c991 Aurelien Jarno
2539 8229c991 Aurelien Jarno
    aSig = extractFloat32Frac( a );
2540 8229c991 Aurelien Jarno
    aExp = extractFloat32Exp( a );
2541 8229c991 Aurelien Jarno
    aSign = extractFloat32Sign( a );
2542 8229c991 Aurelien Jarno
2543 8229c991 Aurelien Jarno
    if ( aExp == 0xFF) {
2544 8229c991 Aurelien Jarno
        if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
2545 8229c991 Aurelien Jarno
        return (aSign) ? float32_zero : a;
2546 8229c991 Aurelien Jarno
    }
2547 8229c991 Aurelien Jarno
    if (aExp == 0) {
2548 8229c991 Aurelien Jarno
        if (aSig == 0) return float32_one;
2549 8229c991 Aurelien Jarno
    }
2550 8229c991 Aurelien Jarno
2551 8229c991 Aurelien Jarno
    float_raise( float_flag_inexact STATUS_VAR);
2552 8229c991 Aurelien Jarno
2553 8229c991 Aurelien Jarno
    /* ******************************* */
2554 8229c991 Aurelien Jarno
    /* using float64 for approximation */
2555 8229c991 Aurelien Jarno
    /* ******************************* */
2556 8229c991 Aurelien Jarno
    x = float32_to_float64(a STATUS_VAR);
2557 8229c991 Aurelien Jarno
    x = float64_mul(x, float64_ln2 STATUS_VAR);
2558 8229c991 Aurelien Jarno
2559 8229c991 Aurelien Jarno
    xn = x;
2560 8229c991 Aurelien Jarno
    r = float64_one;
2561 8229c991 Aurelien Jarno
    for (i = 0 ; i < 15 ; i++) {
2562 8229c991 Aurelien Jarno
        float64 f;
2563 8229c991 Aurelien Jarno
2564 8229c991 Aurelien Jarno
        f = float64_mul(xn, float32_exp2_coefficients[i] STATUS_VAR);
2565 8229c991 Aurelien Jarno
        r = float64_add(r, f STATUS_VAR);
2566 8229c991 Aurelien Jarno
2567 8229c991 Aurelien Jarno
        xn = float64_mul(xn, x STATUS_VAR);
2568 8229c991 Aurelien Jarno
    }
2569 8229c991 Aurelien Jarno
2570 8229c991 Aurelien Jarno
    return float64_to_float32(r, status);
2571 8229c991 Aurelien Jarno
}
2572 8229c991 Aurelien Jarno
2573 8229c991 Aurelien Jarno
/*----------------------------------------------------------------------------
2574 374dfc33 aurel32
| Returns the binary log of the single-precision floating-point value `a'.
2575 374dfc33 aurel32
| The operation is performed according to the IEC/IEEE Standard for Binary
2576 374dfc33 aurel32
| Floating-Point Arithmetic.
2577 374dfc33 aurel32
*----------------------------------------------------------------------------*/
2578 374dfc33 aurel32
float32 float32_log2( float32 a STATUS_PARAM )
2579 374dfc33 aurel32
{
2580 374dfc33 aurel32
    flag aSign, zSign;
2581 94a49d86 Andreas Färber
    int_fast16_t aExp;
2582 bb98fe42 Andreas Färber
    uint32_t aSig, zSig, i;
2583 374dfc33 aurel32
2584 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2585 374dfc33 aurel32
    aSig = extractFloat32Frac( a );
2586 374dfc33 aurel32
    aExp = extractFloat32Exp( a );
2587 374dfc33 aurel32
    aSign = extractFloat32Sign( a );
2588 374dfc33 aurel32
2589 374dfc33 aurel32
    if ( aExp == 0 ) {
2590 374dfc33 aurel32
        if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
2591 374dfc33 aurel32
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2592 374dfc33 aurel32
    }
2593 374dfc33 aurel32
    if ( aSign ) {
2594 374dfc33 aurel32
        float_raise( float_flag_invalid STATUS_VAR);
2595 374dfc33 aurel32
        return float32_default_nan;
2596 374dfc33 aurel32
    }
2597 374dfc33 aurel32
    if ( aExp == 0xFF ) {
2598 374dfc33 aurel32
        if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
2599 374dfc33 aurel32
        return a;
2600 374dfc33 aurel32
    }
2601 374dfc33 aurel32
2602 374dfc33 aurel32
    aExp -= 0x7F;
2603 374dfc33 aurel32
    aSig |= 0x00800000;
2604 374dfc33 aurel32
    zSign = aExp < 0;
2605 374dfc33 aurel32
    zSig = aExp << 23;
2606 374dfc33 aurel32
2607 374dfc33 aurel32
    for (i = 1 << 22; i > 0; i >>= 1) {
2608 bb98fe42 Andreas Färber
        aSig = ( (uint64_t)aSig * aSig ) >> 23;
2609 374dfc33 aurel32
        if ( aSig & 0x01000000 ) {
2610 374dfc33 aurel32
            aSig >>= 1;
2611 374dfc33 aurel32
            zSig |= i;
2612 374dfc33 aurel32
        }
2613 374dfc33 aurel32
    }
2614 374dfc33 aurel32
2615 374dfc33 aurel32
    if ( zSign )
2616 374dfc33 aurel32
        zSig = -zSig;
2617 374dfc33 aurel32
2618 374dfc33 aurel32
    return normalizeRoundAndPackFloat32( zSign, 0x85, zSig STATUS_VAR );
2619 374dfc33 aurel32
}
2620 374dfc33 aurel32
2621 374dfc33 aurel32
/*----------------------------------------------------------------------------
2622 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is equal to
2623 b689362d Aurelien Jarno
| the corresponding value `b', and 0 otherwise.  The invalid exception is
2624 b689362d Aurelien Jarno
| raised if either operand is a NaN.  Otherwise, the comparison is performed
2625 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2626 158142c2 bellard
*----------------------------------------------------------------------------*/
2627 158142c2 bellard
2628 b689362d Aurelien Jarno
int float32_eq( float32 a, float32 b STATUS_PARAM )
2629 158142c2 bellard
{
2630 b689362d Aurelien Jarno
    uint32_t av, bv;
2631 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2632 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2633 158142c2 bellard
2634 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2635 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2636 158142c2 bellard
       ) {
2637 b689362d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
2638 158142c2 bellard
        return 0;
2639 158142c2 bellard
    }
2640 b689362d Aurelien Jarno
    av = float32_val(a);
2641 b689362d Aurelien Jarno
    bv = float32_val(b);
2642 b689362d Aurelien Jarno
    return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
2643 158142c2 bellard
}
2644 158142c2 bellard
2645 158142c2 bellard
/*----------------------------------------------------------------------------
2646 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than
2647 f5a64251 Aurelien Jarno
| or equal to the corresponding value `b', and 0 otherwise.  The invalid
2648 f5a64251 Aurelien Jarno
| exception is raised if either operand is a NaN.  The comparison is performed
2649 f5a64251 Aurelien Jarno
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2650 158142c2 bellard
*----------------------------------------------------------------------------*/
2651 158142c2 bellard
2652 750afe93 bellard
int float32_le( float32 a, float32 b STATUS_PARAM )
2653 158142c2 bellard
{
2654 158142c2 bellard
    flag aSign, bSign;
2655 bb98fe42 Andreas Färber
    uint32_t av, bv;
2656 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2657 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2658 158142c2 bellard
2659 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2660 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2661 158142c2 bellard
       ) {
2662 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2663 158142c2 bellard
        return 0;
2664 158142c2 bellard
    }
2665 158142c2 bellard
    aSign = extractFloat32Sign( a );
2666 158142c2 bellard
    bSign = extractFloat32Sign( b );
2667 f090c9d4 pbrook
    av = float32_val(a);
2668 f090c9d4 pbrook
    bv = float32_val(b);
2669 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
2670 f090c9d4 pbrook
    return ( av == bv ) || ( aSign ^ ( av < bv ) );
2671 158142c2 bellard
2672 158142c2 bellard
}
2673 158142c2 bellard
2674 158142c2 bellard
/*----------------------------------------------------------------------------
2675 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than
2676 f5a64251 Aurelien Jarno
| the corresponding value `b', and 0 otherwise.  The invalid exception is
2677 f5a64251 Aurelien Jarno
| raised if either operand is a NaN.  The comparison is performed according
2678 f5a64251 Aurelien Jarno
| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2679 158142c2 bellard
*----------------------------------------------------------------------------*/
2680 158142c2 bellard
2681 750afe93 bellard
int float32_lt( float32 a, float32 b STATUS_PARAM )
2682 158142c2 bellard
{
2683 158142c2 bellard
    flag aSign, bSign;
2684 bb98fe42 Andreas Färber
    uint32_t av, bv;
2685 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2686 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2687 158142c2 bellard
2688 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2689 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2690 158142c2 bellard
       ) {
2691 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2692 158142c2 bellard
        return 0;
2693 158142c2 bellard
    }
2694 158142c2 bellard
    aSign = extractFloat32Sign( a );
2695 158142c2 bellard
    bSign = extractFloat32Sign( b );
2696 f090c9d4 pbrook
    av = float32_val(a);
2697 f090c9d4 pbrook
    bv = float32_val(b);
2698 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
2699 f090c9d4 pbrook
    return ( av != bv ) && ( aSign ^ ( av < bv ) );
2700 158142c2 bellard
2701 158142c2 bellard
}
2702 158142c2 bellard
2703 158142c2 bellard
/*----------------------------------------------------------------------------
2704 67b7861d Aurelien Jarno
| Returns 1 if the single-precision floating-point values `a' and `b' cannot
2705 f5a64251 Aurelien Jarno
| be compared, and 0 otherwise.  The invalid exception is raised if either
2706 f5a64251 Aurelien Jarno
| operand is a NaN.  The comparison is performed according to the IEC/IEEE
2707 f5a64251 Aurelien Jarno
| Standard for Binary Floating-Point Arithmetic.
2708 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
2709 67b7861d Aurelien Jarno
2710 67b7861d Aurelien Jarno
int float32_unordered( float32 a, float32 b STATUS_PARAM )
2711 67b7861d Aurelien Jarno
{
2712 67b7861d Aurelien Jarno
    a = float32_squash_input_denormal(a STATUS_VAR);
2713 67b7861d Aurelien Jarno
    b = float32_squash_input_denormal(b STATUS_VAR);
2714 67b7861d Aurelien Jarno
2715 67b7861d Aurelien Jarno
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2716 67b7861d Aurelien Jarno
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2717 67b7861d Aurelien Jarno
       ) {
2718 67b7861d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
2719 67b7861d Aurelien Jarno
        return 1;
2720 67b7861d Aurelien Jarno
    }
2721 67b7861d Aurelien Jarno
    return 0;
2722 67b7861d Aurelien Jarno
}
2723 b689362d Aurelien Jarno
2724 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
2725 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is equal to
2726 f5a64251 Aurelien Jarno
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
2727 f5a64251 Aurelien Jarno
| exception.  The comparison is performed according to the IEC/IEEE Standard
2728 f5a64251 Aurelien Jarno
| for Binary Floating-Point Arithmetic.
2729 158142c2 bellard
*----------------------------------------------------------------------------*/
2730 158142c2 bellard
2731 b689362d Aurelien Jarno
int float32_eq_quiet( float32 a, float32 b STATUS_PARAM )
2732 158142c2 bellard
{
2733 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2734 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2735 158142c2 bellard
2736 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2737 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2738 158142c2 bellard
       ) {
2739 b689362d Aurelien Jarno
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2740 b689362d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
2741 b689362d Aurelien Jarno
        }
2742 158142c2 bellard
        return 0;
2743 158142c2 bellard
    }
2744 b689362d Aurelien Jarno
    return ( float32_val(a) == float32_val(b) ) ||
2745 b689362d Aurelien Jarno
            ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
2746 158142c2 bellard
}
2747 158142c2 bellard
2748 158142c2 bellard
/*----------------------------------------------------------------------------
2749 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than or
2750 158142c2 bellard
| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
2751 158142c2 bellard
| cause an exception.  Otherwise, the comparison is performed according to the
2752 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2753 158142c2 bellard
*----------------------------------------------------------------------------*/
2754 158142c2 bellard
2755 750afe93 bellard
int float32_le_quiet( float32 a, float32 b STATUS_PARAM )
2756 158142c2 bellard
{
2757 158142c2 bellard
    flag aSign, bSign;
2758 bb98fe42 Andreas Färber
    uint32_t av, bv;
2759 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2760 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2761 158142c2 bellard
2762 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2763 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2764 158142c2 bellard
       ) {
2765 158142c2 bellard
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2766 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2767 158142c2 bellard
        }
2768 158142c2 bellard
        return 0;
2769 158142c2 bellard
    }
2770 158142c2 bellard
    aSign = extractFloat32Sign( a );
2771 158142c2 bellard
    bSign = extractFloat32Sign( b );
2772 f090c9d4 pbrook
    av = float32_val(a);
2773 f090c9d4 pbrook
    bv = float32_val(b);
2774 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
2775 f090c9d4 pbrook
    return ( av == bv ) || ( aSign ^ ( av < bv ) );
2776 158142c2 bellard
2777 158142c2 bellard
}
2778 158142c2 bellard
2779 158142c2 bellard
/*----------------------------------------------------------------------------
2780 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than
2781 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
2782 158142c2 bellard
| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
2783 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
2784 158142c2 bellard
*----------------------------------------------------------------------------*/
2785 158142c2 bellard
2786 750afe93 bellard
int float32_lt_quiet( float32 a, float32 b STATUS_PARAM )
2787 158142c2 bellard
{
2788 158142c2 bellard
    flag aSign, bSign;
2789 bb98fe42 Andreas Färber
    uint32_t av, bv;
2790 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2791 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2792 158142c2 bellard
2793 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2794 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2795 158142c2 bellard
       ) {
2796 158142c2 bellard
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2797 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2798 158142c2 bellard
        }
2799 158142c2 bellard
        return 0;
2800 158142c2 bellard
    }
2801 158142c2 bellard
    aSign = extractFloat32Sign( a );
2802 158142c2 bellard
    bSign = extractFloat32Sign( b );
2803 f090c9d4 pbrook
    av = float32_val(a);
2804 f090c9d4 pbrook
    bv = float32_val(b);
2805 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
2806 f090c9d4 pbrook
    return ( av != bv ) && ( aSign ^ ( av < bv ) );
2807 158142c2 bellard
2808 158142c2 bellard
}
2809 158142c2 bellard
2810 158142c2 bellard
/*----------------------------------------------------------------------------
2811 67b7861d Aurelien Jarno
| Returns 1 if the single-precision floating-point values `a' and `b' cannot
2812 67b7861d Aurelien Jarno
| be compared, and 0 otherwise.  Quiet NaNs do not cause an exception.  The
2813 67b7861d Aurelien Jarno
| comparison is performed according to the IEC/IEEE Standard for Binary
2814 67b7861d Aurelien Jarno
| Floating-Point Arithmetic.
2815 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
2816 67b7861d Aurelien Jarno
2817 67b7861d Aurelien Jarno
int float32_unordered_quiet( float32 a, float32 b STATUS_PARAM )
2818 67b7861d Aurelien Jarno
{
2819 67b7861d Aurelien Jarno
    a = float32_squash_input_denormal(a STATUS_VAR);
2820 67b7861d Aurelien Jarno
    b = float32_squash_input_denormal(b STATUS_VAR);
2821 67b7861d Aurelien Jarno
2822 67b7861d Aurelien Jarno
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2823 67b7861d Aurelien Jarno
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2824 67b7861d Aurelien Jarno
       ) {
2825 67b7861d Aurelien Jarno
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2826 67b7861d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
2827 67b7861d Aurelien Jarno
        }
2828 67b7861d Aurelien Jarno
        return 1;
2829 67b7861d Aurelien Jarno
    }
2830 67b7861d Aurelien Jarno
    return 0;
2831 67b7861d Aurelien Jarno
}
2832 67b7861d Aurelien Jarno
2833 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
2834 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2835 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
2836 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2837 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
2838 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
2839 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
2840 158142c2 bellard
| largest integer with the same sign as `a' is returned.
2841 158142c2 bellard
*----------------------------------------------------------------------------*/
2842 158142c2 bellard
2843 158142c2 bellard
int32 float64_to_int32( float64 a STATUS_PARAM )
2844 158142c2 bellard
{
2845 158142c2 bellard
    flag aSign;
2846 94a49d86 Andreas Färber
    int_fast16_t aExp, shiftCount;
2847 bb98fe42 Andreas Färber
    uint64_t aSig;
2848 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
2849 158142c2 bellard
2850 158142c2 bellard
    aSig = extractFloat64Frac( a );
2851 158142c2 bellard
    aExp = extractFloat64Exp( a );
2852 158142c2 bellard
    aSign = extractFloat64Sign( a );
2853 158142c2 bellard
    if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2854 158142c2 bellard
    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2855 158142c2 bellard
    shiftCount = 0x42C - aExp;
2856 158142c2 bellard
    if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
2857 158142c2 bellard
    return roundAndPackInt32( aSign, aSig STATUS_VAR );
2858 158142c2 bellard
2859 158142c2 bellard
}
2860 158142c2 bellard
2861 158142c2 bellard
/*----------------------------------------------------------------------------
2862 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2863 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
2864 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2865 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
2866 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
2867 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
2868 158142c2 bellard
| returned.
2869 158142c2 bellard
*----------------------------------------------------------------------------*/
2870 158142c2 bellard
2871 158142c2 bellard
int32 float64_to_int32_round_to_zero( float64 a STATUS_PARAM )
2872 158142c2 bellard
{
2873 158142c2 bellard
    flag aSign;
2874 94a49d86 Andreas Färber
    int_fast16_t aExp, shiftCount;
2875 bb98fe42 Andreas Färber
    uint64_t aSig, savedASig;
2876 b3a6a2e0 Peter Maydell
    int32_t z;
2877 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
2878 158142c2 bellard
2879 158142c2 bellard
    aSig = extractFloat64Frac( a );
2880 158142c2 bellard
    aExp = extractFloat64Exp( a );
2881 158142c2 bellard
    aSign = extractFloat64Sign( a );
2882 158142c2 bellard
    if ( 0x41E < aExp ) {
2883 158142c2 bellard
        if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2884 158142c2 bellard
        goto invalid;
2885 158142c2 bellard
    }
2886 158142c2 bellard
    else if ( aExp < 0x3FF ) {
2887 158142c2 bellard
        if ( aExp || aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
2888 158142c2 bellard
        return 0;
2889 158142c2 bellard
    }
2890 158142c2 bellard
    aSig |= LIT64( 0x0010000000000000 );
2891 158142c2 bellard
    shiftCount = 0x433 - aExp;
2892 158142c2 bellard
    savedASig = aSig;
2893 158142c2 bellard
    aSig >>= shiftCount;
2894 158142c2 bellard
    z = aSig;
2895 158142c2 bellard
    if ( aSign ) z = - z;
2896 158142c2 bellard
    if ( ( z < 0 ) ^ aSign ) {
2897 158142c2 bellard
 invalid:
2898 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2899 bb98fe42 Andreas Färber
        return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
2900 158142c2 bellard
    }
2901 158142c2 bellard
    if ( ( aSig<<shiftCount ) != savedASig ) {
2902 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
2903 158142c2 bellard
    }
2904 158142c2 bellard
    return z;
2905 158142c2 bellard
2906 158142c2 bellard
}
2907 158142c2 bellard
2908 158142c2 bellard
/*----------------------------------------------------------------------------
2909 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2910 cbcef455 Peter Maydell
| `a' to the 16-bit two's complement integer format.  The conversion is
2911 cbcef455 Peter Maydell
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2912 cbcef455 Peter Maydell
| Arithmetic, except that the conversion is always rounded toward zero.
2913 cbcef455 Peter Maydell
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
2914 cbcef455 Peter Maydell
| the conversion overflows, the largest integer with the same sign as `a' is
2915 cbcef455 Peter Maydell
| returned.
2916 cbcef455 Peter Maydell
*----------------------------------------------------------------------------*/
2917 cbcef455 Peter Maydell
2918 94a49d86 Andreas Färber
int_fast16_t float64_to_int16_round_to_zero(float64 a STATUS_PARAM)
2919 cbcef455 Peter Maydell
{
2920 cbcef455 Peter Maydell
    flag aSign;
2921 94a49d86 Andreas Färber
    int_fast16_t aExp, shiftCount;
2922 bb98fe42 Andreas Färber
    uint64_t aSig, savedASig;
2923 cbcef455 Peter Maydell
    int32 z;
2924 cbcef455 Peter Maydell
2925 cbcef455 Peter Maydell
    aSig = extractFloat64Frac( a );
2926 cbcef455 Peter Maydell
    aExp = extractFloat64Exp( a );
2927 cbcef455 Peter Maydell
    aSign = extractFloat64Sign( a );
2928 cbcef455 Peter Maydell
    if ( 0x40E < aExp ) {
2929 cbcef455 Peter Maydell
        if ( ( aExp == 0x7FF ) && aSig ) {
2930 cbcef455 Peter Maydell
            aSign = 0;
2931 cbcef455 Peter Maydell
        }
2932 cbcef455 Peter Maydell
        goto invalid;
2933 cbcef455 Peter Maydell
    }
2934 cbcef455 Peter Maydell
    else if ( aExp < 0x3FF ) {
2935 cbcef455 Peter Maydell
        if ( aExp || aSig ) {
2936 cbcef455 Peter Maydell
            STATUS(float_exception_flags) |= float_flag_inexact;
2937 cbcef455 Peter Maydell
        }
2938 cbcef455 Peter Maydell
        return 0;
2939 cbcef455 Peter Maydell
    }
2940 cbcef455 Peter Maydell
    aSig |= LIT64( 0x0010000000000000 );
2941 cbcef455 Peter Maydell
    shiftCount = 0x433 - aExp;
2942 cbcef455 Peter Maydell
    savedASig = aSig;
2943 cbcef455 Peter Maydell
    aSig >>= shiftCount;
2944 cbcef455 Peter Maydell
    z = aSig;
2945 cbcef455 Peter Maydell
    if ( aSign ) {
2946 cbcef455 Peter Maydell
        z = - z;
2947 cbcef455 Peter Maydell
    }
2948 cbcef455 Peter Maydell
    if ( ( (int16_t)z < 0 ) ^ aSign ) {
2949 cbcef455 Peter Maydell
 invalid:
2950 cbcef455 Peter Maydell
        float_raise( float_flag_invalid STATUS_VAR);
2951 bb98fe42 Andreas Färber
        return aSign ? (int32_t) 0xffff8000 : 0x7FFF;
2952 cbcef455 Peter Maydell
    }
2953 cbcef455 Peter Maydell
    if ( ( aSig<<shiftCount ) != savedASig ) {
2954 cbcef455 Peter Maydell
        STATUS(float_exception_flags) |= float_flag_inexact;
2955 cbcef455 Peter Maydell
    }
2956 cbcef455 Peter Maydell
    return z;
2957 cbcef455 Peter Maydell
}
2958 cbcef455 Peter Maydell
2959 cbcef455 Peter Maydell
/*----------------------------------------------------------------------------
2960 cbcef455 Peter Maydell
| Returns the result of converting the double-precision floating-point value
2961 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
2962 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2963 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
2964 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
2965 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
2966 158142c2 bellard
| largest integer with the same sign as `a' is returned.
2967 158142c2 bellard
*----------------------------------------------------------------------------*/
2968 158142c2 bellard
2969 158142c2 bellard
int64 float64_to_int64( float64 a STATUS_PARAM )
2970 158142c2 bellard
{
2971 158142c2 bellard
    flag aSign;
2972 94a49d86 Andreas Färber
    int_fast16_t aExp, shiftCount;
2973 bb98fe42 Andreas Färber
    uint64_t aSig, aSigExtra;
2974 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
2975 158142c2 bellard
2976 158142c2 bellard
    aSig = extractFloat64Frac( a );
2977 158142c2 bellard
    aExp = extractFloat64Exp( a );
2978 158142c2 bellard
    aSign = extractFloat64Sign( a );
2979 158142c2 bellard
    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2980 158142c2 bellard
    shiftCount = 0x433 - aExp;
2981 158142c2 bellard
    if ( shiftCount <= 0 ) {
2982 158142c2 bellard
        if ( 0x43E < aExp ) {
2983 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2984 158142c2 bellard
            if (    ! aSign
2985 158142c2 bellard
                 || (    ( aExp == 0x7FF )
2986 158142c2 bellard
                      && ( aSig != LIT64( 0x0010000000000000 ) ) )
2987 158142c2 bellard
               ) {
2988 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
2989 158142c2 bellard
            }
2990 bb98fe42 Andreas Färber
            return (int64_t) LIT64( 0x8000000000000000 );
2991 158142c2 bellard
        }
2992 158142c2 bellard
        aSigExtra = 0;
2993 158142c2 bellard
        aSig <<= - shiftCount;
2994 158142c2 bellard
    }
2995 158142c2 bellard
    else {
2996 158142c2 bellard
        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
2997 158142c2 bellard
    }
2998 158142c2 bellard
    return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
2999 158142c2 bellard
3000 158142c2 bellard
}
3001 158142c2 bellard
3002 158142c2 bellard
/*----------------------------------------------------------------------------
3003 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
3004 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
3005 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
3006 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
3007 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
3008 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
3009 158142c2 bellard
| returned.
3010 158142c2 bellard
*----------------------------------------------------------------------------*/
3011 158142c2 bellard
3012 158142c2 bellard
int64 float64_to_int64_round_to_zero( float64 a STATUS_PARAM )
3013 158142c2 bellard
{
3014 158142c2 bellard
    flag aSign;
3015 94a49d86 Andreas Färber
    int_fast16_t aExp, shiftCount;
3016 bb98fe42 Andreas Färber
    uint64_t aSig;
3017 158142c2 bellard
    int64 z;
3018 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3019 158142c2 bellard
3020 158142c2 bellard
    aSig = extractFloat64Frac( a );
3021 158142c2 bellard
    aExp = extractFloat64Exp( a );
3022 158142c2 bellard
    aSign = extractFloat64Sign( a );
3023 158142c2 bellard
    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
3024 158142c2 bellard
    shiftCount = aExp - 0x433;
3025 158142c2 bellard
    if ( 0 <= shiftCount ) {
3026 158142c2 bellard
        if ( 0x43E <= aExp ) {
3027 f090c9d4 pbrook
            if ( float64_val(a) != LIT64( 0xC3E0000000000000 ) ) {
3028 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
3029 158142c2 bellard
                if (    ! aSign
3030 158142c2 bellard
                     || (    ( aExp == 0x7FF )
3031 158142c2 bellard
                          && ( aSig != LIT64( 0x0010000000000000 ) ) )
3032 158142c2 bellard
                   ) {
3033 158142c2 bellard
                    return LIT64( 0x7FFFFFFFFFFFFFFF );
3034 158142c2 bellard
                }
3035 158142c2 bellard
            }
3036 bb98fe42 Andreas Färber
            return (int64_t) LIT64( 0x8000000000000000 );
3037 158142c2 bellard
        }
3038 158142c2 bellard
        z = aSig<<shiftCount;
3039 158142c2 bellard
    }
3040 158142c2 bellard
    else {
3041 158142c2 bellard
        if ( aExp < 0x3FE ) {
3042 158142c2 bellard
            if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
3043 158142c2 bellard
            return 0;
3044 158142c2 bellard
        }
3045 158142c2 bellard
        z = aSig>>( - shiftCount );
3046 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
3047 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
3048 158142c2 bellard
        }
3049 158142c2 bellard
    }
3050 158142c2 bellard
    if ( aSign ) z = - z;
3051 158142c2 bellard
    return z;
3052 158142c2 bellard
3053 158142c2 bellard
}
3054 158142c2 bellard
3055 158142c2 bellard
/*----------------------------------------------------------------------------
3056 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
3057 158142c2 bellard
| `a' to the single-precision floating-point format.  The conversion is
3058 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
3059 158142c2 bellard
| Arithmetic.
3060 158142c2 bellard
*----------------------------------------------------------------------------*/
3061 158142c2 bellard
3062 158142c2 bellard
float32 float64_to_float32( float64 a STATUS_PARAM )
3063 158142c2 bellard
{
3064 158142c2 bellard
    flag aSign;
3065 94a49d86 Andreas Färber
    int_fast16_t aExp;
3066 bb98fe42 Andreas Färber
    uint64_t aSig;
3067 bb98fe42 Andreas Färber
    uint32_t zSig;
3068 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3069 158142c2 bellard
3070 158142c2 bellard
    aSig = extractFloat64Frac( a );
3071 158142c2 bellard
    aExp = extractFloat64Exp( a );
3072 158142c2 bellard
    aSign = extractFloat64Sign( a );
3073 158142c2 bellard
    if ( aExp == 0x7FF ) {
3074 bcd4d9af Christophe Lyon
        if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
3075 158142c2 bellard
        return packFloat32( aSign, 0xFF, 0 );
3076 158142c2 bellard
    }
3077 158142c2 bellard
    shift64RightJamming( aSig, 22, &aSig );
3078 158142c2 bellard
    zSig = aSig;
3079 158142c2 bellard
    if ( aExp || zSig ) {
3080 158142c2 bellard
        zSig |= 0x40000000;
3081 158142c2 bellard
        aExp -= 0x381;
3082 158142c2 bellard
    }
3083 158142c2 bellard
    return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
3084 158142c2 bellard
3085 158142c2 bellard
}
3086 158142c2 bellard
3087 60011498 Paul Brook
3088 60011498 Paul Brook
/*----------------------------------------------------------------------------
3089 60011498 Paul Brook
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
3090 60011498 Paul Brook
| half-precision floating-point value, returning the result.  After being
3091 60011498 Paul Brook
| shifted into the proper positions, the three fields are simply added
3092 60011498 Paul Brook
| together to form the result.  This means that any integer portion of `zSig'
3093 60011498 Paul Brook
| will be added into the exponent.  Since a properly normalized significand
3094 60011498 Paul Brook
| will have an integer portion equal to 1, the `zExp' input should be 1 less
3095 60011498 Paul Brook
| than the desired result exponent whenever `zSig' is a complete, normalized
3096 60011498 Paul Brook
| significand.
3097 60011498 Paul Brook
*----------------------------------------------------------------------------*/
3098 94a49d86 Andreas Färber
static float16 packFloat16(flag zSign, int_fast16_t zExp, uint16_t zSig)
3099 60011498 Paul Brook
{
3100 bb4d4bb3 Peter Maydell
    return make_float16(
3101 bb98fe42 Andreas Färber
        (((uint32_t)zSign) << 15) + (((uint32_t)zExp) << 10) + zSig);
3102 60011498 Paul Brook
}
3103 60011498 Paul Brook
3104 60011498 Paul Brook
/* Half precision floats come in two formats: standard IEEE and "ARM" format.
3105 60011498 Paul Brook
   The latter gains extra exponent range by omitting the NaN/Inf encodings.  */
3106 bb4d4bb3 Peter Maydell
3107 bb4d4bb3 Peter Maydell
float32 float16_to_float32(float16 a, flag ieee STATUS_PARAM)
3108 60011498 Paul Brook
{
3109 60011498 Paul Brook
    flag aSign;
3110 94a49d86 Andreas Färber
    int_fast16_t aExp;
3111 bb98fe42 Andreas Färber
    uint32_t aSig;
3112 60011498 Paul Brook
3113 bb4d4bb3 Peter Maydell
    aSign = extractFloat16Sign(a);
3114 bb4d4bb3 Peter Maydell
    aExp = extractFloat16Exp(a);
3115 bb4d4bb3 Peter Maydell
    aSig = extractFloat16Frac(a);
3116 60011498 Paul Brook
3117 60011498 Paul Brook
    if (aExp == 0x1f && ieee) {
3118 60011498 Paul Brook
        if (aSig) {
3119 f591e1be Peter Maydell
            return commonNaNToFloat32(float16ToCommonNaN(a STATUS_VAR) STATUS_VAR);
3120 60011498 Paul Brook
        }
3121 4be8eeac Peter Maydell
        return packFloat32(aSign, 0xff, 0);
3122 60011498 Paul Brook
    }
3123 60011498 Paul Brook
    if (aExp == 0) {
3124 60011498 Paul Brook
        int8 shiftCount;
3125 60011498 Paul Brook
3126 60011498 Paul Brook
        if (aSig == 0) {
3127 60011498 Paul Brook
            return packFloat32(aSign, 0, 0);
3128 60011498 Paul Brook
        }
3129 60011498 Paul Brook
3130 60011498 Paul Brook
        shiftCount = countLeadingZeros32( aSig ) - 21;
3131 60011498 Paul Brook
        aSig = aSig << shiftCount;
3132 60011498 Paul Brook
        aExp = -shiftCount;
3133 60011498 Paul Brook
    }
3134 60011498 Paul Brook
    return packFloat32( aSign, aExp + 0x70, aSig << 13);
3135 60011498 Paul Brook
}
3136 60011498 Paul Brook
3137 bb4d4bb3 Peter Maydell
float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM)
3138 60011498 Paul Brook
{
3139 60011498 Paul Brook
    flag aSign;
3140 94a49d86 Andreas Färber
    int_fast16_t aExp;
3141 bb98fe42 Andreas Färber
    uint32_t aSig;
3142 bb98fe42 Andreas Färber
    uint32_t mask;
3143 bb98fe42 Andreas Färber
    uint32_t increment;
3144 60011498 Paul Brook
    int8 roundingMode;
3145 38970efa Peter Maydell
    int maxexp = ieee ? 15 : 16;
3146 38970efa Peter Maydell
    bool rounding_bumps_exp;
3147 38970efa Peter Maydell
    bool is_tiny = false;
3148 38970efa Peter Maydell
3149 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
3150 60011498 Paul Brook
3151 60011498 Paul Brook
    aSig = extractFloat32Frac( a );
3152 60011498 Paul Brook
    aExp = extractFloat32Exp( a );
3153 60011498 Paul Brook
    aSign = extractFloat32Sign( a );
3154 60011498 Paul Brook
    if ( aExp == 0xFF ) {
3155 60011498 Paul Brook
        if (aSig) {
3156 600e30d2 Peter Maydell
            /* Input is a NaN */
3157 600e30d2 Peter Maydell
            if (!ieee) {
3158 38970efa Peter Maydell
                float_raise(float_flag_invalid STATUS_VAR);
3159 600e30d2 Peter Maydell
                return packFloat16(aSign, 0, 0);
3160 600e30d2 Peter Maydell
            }
3161 38970efa Peter Maydell
            return commonNaNToFloat16(
3162 38970efa Peter Maydell
                float32ToCommonNaN(a STATUS_VAR) STATUS_VAR);
3163 60011498 Paul Brook
        }
3164 600e30d2 Peter Maydell
        /* Infinity */
3165 600e30d2 Peter Maydell
        if (!ieee) {
3166 600e30d2 Peter Maydell
            float_raise(float_flag_invalid STATUS_VAR);
3167 600e30d2 Peter Maydell
            return packFloat16(aSign, 0x1f, 0x3ff);
3168 600e30d2 Peter Maydell
        }
3169 600e30d2 Peter Maydell
        return packFloat16(aSign, 0x1f, 0);
3170 60011498 Paul Brook
    }
3171 600e30d2 Peter Maydell
    if (aExp == 0 && aSig == 0) {
3172 60011498 Paul Brook
        return packFloat16(aSign, 0, 0);
3173 60011498 Paul Brook
    }
3174 38970efa Peter Maydell
    /* Decimal point between bits 22 and 23. Note that we add the 1 bit
3175 38970efa Peter Maydell
     * even if the input is denormal; however this is harmless because
3176 38970efa Peter Maydell
     * the largest possible single-precision denormal is still smaller
3177 38970efa Peter Maydell
     * than the smallest representable half-precision denormal, and so we
3178 38970efa Peter Maydell
     * will end up ignoring aSig and returning via the "always return zero"
3179 38970efa Peter Maydell
     * codepath.
3180 38970efa Peter Maydell
     */
3181 60011498 Paul Brook
    aSig |= 0x00800000;
3182 60011498 Paul Brook
    aExp -= 0x7f;
3183 38970efa Peter Maydell
    /* Calculate the mask of bits of the mantissa which are not
3184 38970efa Peter Maydell
     * representable in half-precision and will be lost.
3185 38970efa Peter Maydell
     */
3186 60011498 Paul Brook
    if (aExp < -14) {
3187 38970efa Peter Maydell
        /* Will be denormal in halfprec */
3188 600e30d2 Peter Maydell
        mask = 0x00ffffff;
3189 600e30d2 Peter Maydell
        if (aExp >= -24) {
3190 600e30d2 Peter Maydell
            mask >>= 25 + aExp;
3191 60011498 Paul Brook
        }
3192 60011498 Paul Brook
    } else {
3193 38970efa Peter Maydell
        /* Normal number in halfprec */
3194 60011498 Paul Brook
        mask = 0x00001fff;
3195 60011498 Paul Brook
    }
3196 60011498 Paul Brook
3197 38970efa Peter Maydell
    roundingMode = STATUS(float_rounding_mode);
3198 38970efa Peter Maydell
    switch (roundingMode) {
3199 38970efa Peter Maydell
    case float_round_nearest_even:
3200 38970efa Peter Maydell
        increment = (mask + 1) >> 1;
3201 38970efa Peter Maydell
        if ((aSig & mask) == increment) {
3202 38970efa Peter Maydell
            increment = aSig & (increment << 1);
3203 38970efa Peter Maydell
        }
3204 38970efa Peter Maydell
        break;
3205 38970efa Peter Maydell
    case float_round_up:
3206 38970efa Peter Maydell
        increment = aSign ? 0 : mask;
3207 38970efa Peter Maydell
        break;
3208 38970efa Peter Maydell
    case float_round_down:
3209 38970efa Peter Maydell
        increment = aSign ? mask : 0;
3210 38970efa Peter Maydell
        break;
3211 38970efa Peter Maydell
    default: /* round_to_zero */
3212 38970efa Peter Maydell
        increment = 0;
3213 38970efa Peter Maydell
        break;
3214 38970efa Peter Maydell
    }
3215 38970efa Peter Maydell
3216 38970efa Peter Maydell
    rounding_bumps_exp = (aSig + increment >= 0x01000000);
3217 38970efa Peter Maydell
3218 38970efa Peter Maydell
    if (aExp > maxexp || (aExp == maxexp && rounding_bumps_exp)) {
3219 38970efa Peter Maydell
        if (ieee) {
3220 38970efa Peter Maydell
            float_raise(float_flag_overflow | float_flag_inexact STATUS_VAR);
3221 60011498 Paul Brook
            return packFloat16(aSign, 0x1f, 0);
3222 38970efa Peter Maydell
        } else {
3223 38970efa Peter Maydell
            float_raise(float_flag_invalid STATUS_VAR);
3224 60011498 Paul Brook
            return packFloat16(aSign, 0x1f, 0x3ff);
3225 60011498 Paul Brook
        }
3226 60011498 Paul Brook
    }
3227 38970efa Peter Maydell
3228 38970efa Peter Maydell
    if (aExp < -14) {
3229 38970efa Peter Maydell
        /* Note that flush-to-zero does not affect half-precision results */
3230 38970efa Peter Maydell
        is_tiny =
3231 38970efa Peter Maydell
            (STATUS(float_detect_tininess) == float_tininess_before_rounding)
3232 38970efa Peter Maydell
            || (aExp < -15)
3233 38970efa Peter Maydell
            || (!rounding_bumps_exp);
3234 38970efa Peter Maydell
    }
3235 38970efa Peter Maydell
    if (aSig & mask) {
3236 38970efa Peter Maydell
        float_raise(float_flag_inexact STATUS_VAR);
3237 38970efa Peter Maydell
        if (is_tiny) {
3238 38970efa Peter Maydell
            float_raise(float_flag_underflow STATUS_VAR);
3239 38970efa Peter Maydell
        }
3240 38970efa Peter Maydell
    }
3241 38970efa Peter Maydell
3242 38970efa Peter Maydell
    aSig += increment;
3243 38970efa Peter Maydell
    if (rounding_bumps_exp) {
3244 38970efa Peter Maydell
        aSig >>= 1;
3245 38970efa Peter Maydell
        aExp++;
3246 38970efa Peter Maydell
    }
3247 38970efa Peter Maydell
3248 60011498 Paul Brook
    if (aExp < -24) {
3249 60011498 Paul Brook
        return packFloat16(aSign, 0, 0);
3250 60011498 Paul Brook
    }
3251 60011498 Paul Brook
    if (aExp < -14) {
3252 60011498 Paul Brook
        aSig >>= -14 - aExp;
3253 60011498 Paul Brook
        aExp = -14;
3254 60011498 Paul Brook
    }
3255 60011498 Paul Brook
    return packFloat16(aSign, aExp + 14, aSig >> 13);
3256 60011498 Paul Brook
}
3257 60011498 Paul Brook
3258 158142c2 bellard
/*----------------------------------------------------------------------------
3259 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
3260 158142c2 bellard
| `a' to the extended double-precision floating-point format.  The conversion
3261 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3262 158142c2 bellard
| Arithmetic.
3263 158142c2 bellard
*----------------------------------------------------------------------------*/
3264 158142c2 bellard
3265 158142c2 bellard
floatx80 float64_to_floatx80( float64 a STATUS_PARAM )
3266 158142c2 bellard
{
3267 158142c2 bellard
    flag aSign;
3268 94a49d86 Andreas Färber
    int_fast16_t aExp;
3269 bb98fe42 Andreas Färber
    uint64_t aSig;
3270 158142c2 bellard
3271 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3272 158142c2 bellard
    aSig = extractFloat64Frac( a );
3273 158142c2 bellard
    aExp = extractFloat64Exp( a );
3274 158142c2 bellard
    aSign = extractFloat64Sign( a );
3275 158142c2 bellard
    if ( aExp == 0x7FF ) {
3276 bcd4d9af Christophe Lyon
        if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
3277 158142c2 bellard
        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3278 158142c2 bellard
    }
3279 158142c2 bellard
    if ( aExp == 0 ) {
3280 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
3281 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3282 158142c2 bellard
    }
3283 158142c2 bellard
    return
3284 158142c2 bellard
        packFloatx80(
3285 158142c2 bellard
            aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
3286 158142c2 bellard
3287 158142c2 bellard
}
3288 158142c2 bellard
3289 158142c2 bellard
/*----------------------------------------------------------------------------
3290 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
3291 158142c2 bellard
| `a' to the quadruple-precision floating-point format.  The conversion is
3292 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
3293 158142c2 bellard
| Arithmetic.
3294 158142c2 bellard
*----------------------------------------------------------------------------*/
3295 158142c2 bellard
3296 158142c2 bellard
float128 float64_to_float128( float64 a STATUS_PARAM )
3297 158142c2 bellard
{
3298 158142c2 bellard
    flag aSign;
3299 94a49d86 Andreas Färber
    int_fast16_t aExp;
3300 bb98fe42 Andreas Färber
    uint64_t aSig, zSig0, zSig1;
3301 158142c2 bellard
3302 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3303 158142c2 bellard
    aSig = extractFloat64Frac( a );
3304 158142c2 bellard
    aExp = extractFloat64Exp( a );
3305 158142c2 bellard
    aSign = extractFloat64Sign( a );
3306 158142c2 bellard
    if ( aExp == 0x7FF ) {
3307 bcd4d9af Christophe Lyon
        if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
3308 158142c2 bellard
        return packFloat128( aSign, 0x7FFF, 0, 0 );
3309 158142c2 bellard
    }
3310 158142c2 bellard
    if ( aExp == 0 ) {
3311 158142c2 bellard
        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
3312 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3313 158142c2 bellard
        --aExp;
3314 158142c2 bellard
    }
3315 158142c2 bellard
    shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
3316 158142c2 bellard
    return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
3317 158142c2 bellard
3318 158142c2 bellard
}
3319 158142c2 bellard
3320 158142c2 bellard
/*----------------------------------------------------------------------------
3321 158142c2 bellard
| Rounds the double-precision floating-point value `a' to an integer, and
3322 158142c2 bellard
| returns the result as a double-precision floating-point value.  The
3323 158142c2 bellard
| operation is performed according to the IEC/IEEE Standard for Binary
3324 158142c2 bellard
| Floating-Point Arithmetic.
3325 158142c2 bellard
*----------------------------------------------------------------------------*/
3326 158142c2 bellard
3327 158142c2 bellard
float64 float64_round_to_int( float64 a STATUS_PARAM )
3328 158142c2 bellard
{
3329 158142c2 bellard
    flag aSign;
3330 94a49d86 Andreas Färber
    int_fast16_t aExp;
3331 bb98fe42 Andreas Färber
    uint64_t lastBitMask, roundBitsMask;
3332 158142c2 bellard
    int8 roundingMode;
3333 bb98fe42 Andreas Färber
    uint64_t z;
3334 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3335 158142c2 bellard
3336 158142c2 bellard
    aExp = extractFloat64Exp( a );
3337 158142c2 bellard
    if ( 0x433 <= aExp ) {
3338 158142c2 bellard
        if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
3339 158142c2 bellard
            return propagateFloat64NaN( a, a STATUS_VAR );
3340 158142c2 bellard
        }
3341 158142c2 bellard
        return a;
3342 158142c2 bellard
    }
3343 158142c2 bellard
    if ( aExp < 0x3FF ) {
3344 bb98fe42 Andreas Färber
        if ( (uint64_t) ( float64_val(a)<<1 ) == 0 ) return a;
3345 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
3346 158142c2 bellard
        aSign = extractFloat64Sign( a );
3347 158142c2 bellard
        switch ( STATUS(float_rounding_mode) ) {
3348 158142c2 bellard
         case float_round_nearest_even:
3349 158142c2 bellard
            if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
3350 158142c2 bellard
                return packFloat64( aSign, 0x3FF, 0 );
3351 158142c2 bellard
            }
3352 158142c2 bellard
            break;
3353 158142c2 bellard
         case float_round_down:
3354 f090c9d4 pbrook
            return make_float64(aSign ? LIT64( 0xBFF0000000000000 ) : 0);
3355 158142c2 bellard
         case float_round_up:
3356 f090c9d4 pbrook
            return make_float64(
3357 f090c9d4 pbrook
            aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ));
3358 158142c2 bellard
        }
3359 158142c2 bellard
        return packFloat64( aSign, 0, 0 );
3360 158142c2 bellard
    }
3361 158142c2 bellard
    lastBitMask = 1;
3362 158142c2 bellard
    lastBitMask <<= 0x433 - aExp;
3363 158142c2 bellard
    roundBitsMask = lastBitMask - 1;
3364 f090c9d4 pbrook
    z = float64_val(a);
3365 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
3366 158142c2 bellard
    if ( roundingMode == float_round_nearest_even ) {
3367 158142c2 bellard
        z += lastBitMask>>1;
3368 158142c2 bellard
        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
3369 158142c2 bellard
    }
3370 158142c2 bellard
    else if ( roundingMode != float_round_to_zero ) {
3371 f090c9d4 pbrook
        if ( extractFloat64Sign( make_float64(z) ) ^ ( roundingMode == float_round_up ) ) {
3372 158142c2 bellard
            z += roundBitsMask;
3373 158142c2 bellard
        }
3374 158142c2 bellard
    }
3375 158142c2 bellard
    z &= ~ roundBitsMask;
3376 f090c9d4 pbrook
    if ( z != float64_val(a) )
3377 f090c9d4 pbrook
        STATUS(float_exception_flags) |= float_flag_inexact;
3378 f090c9d4 pbrook
    return make_float64(z);
3379 158142c2 bellard
3380 158142c2 bellard
}
3381 158142c2 bellard
3382 e6e5906b pbrook
float64 float64_trunc_to_int( float64 a STATUS_PARAM)
3383 e6e5906b pbrook
{
3384 e6e5906b pbrook
    int oldmode;
3385 e6e5906b pbrook
    float64 res;
3386 e6e5906b pbrook
    oldmode = STATUS(float_rounding_mode);
3387 e6e5906b pbrook
    STATUS(float_rounding_mode) = float_round_to_zero;
3388 e6e5906b pbrook
    res = float64_round_to_int(a STATUS_VAR);
3389 e6e5906b pbrook
    STATUS(float_rounding_mode) = oldmode;
3390 e6e5906b pbrook
    return res;
3391 e6e5906b pbrook
}
3392 e6e5906b pbrook
3393 158142c2 bellard
/*----------------------------------------------------------------------------
3394 158142c2 bellard
| Returns the result of adding the absolute values of the double-precision
3395 158142c2 bellard
| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
3396 158142c2 bellard
| before being returned.  `zSign' is ignored if the result is a NaN.
3397 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
3398 158142c2 bellard
| Floating-Point Arithmetic.
3399 158142c2 bellard
*----------------------------------------------------------------------------*/
3400 158142c2 bellard
3401 158142c2 bellard
static float64 addFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
3402 158142c2 bellard
{
3403 94a49d86 Andreas Färber
    int_fast16_t aExp, bExp, zExp;
3404 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig;
3405 94a49d86 Andreas Färber
    int_fast16_t expDiff;
3406 158142c2 bellard
3407 158142c2 bellard
    aSig = extractFloat64Frac( a );
3408 158142c2 bellard
    aExp = extractFloat64Exp( a );
3409 158142c2 bellard
    bSig = extractFloat64Frac( b );
3410 158142c2 bellard
    bExp = extractFloat64Exp( b );
3411 158142c2 bellard
    expDiff = aExp - bExp;
3412 158142c2 bellard
    aSig <<= 9;
3413 158142c2 bellard
    bSig <<= 9;
3414 158142c2 bellard
    if ( 0 < expDiff ) {
3415 158142c2 bellard
        if ( aExp == 0x7FF ) {
3416 158142c2 bellard
            if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3417 158142c2 bellard
            return a;
3418 158142c2 bellard
        }
3419 158142c2 bellard
        if ( bExp == 0 ) {
3420 158142c2 bellard
            --expDiff;
3421 158142c2 bellard
        }
3422 158142c2 bellard
        else {
3423 158142c2 bellard
            bSig |= LIT64( 0x2000000000000000 );
3424 158142c2 bellard
        }
3425 158142c2 bellard
        shift64RightJamming( bSig, expDiff, &bSig );
3426 158142c2 bellard
        zExp = aExp;
3427 158142c2 bellard
    }
3428 158142c2 bellard
    else if ( expDiff < 0 ) {
3429 158142c2 bellard
        if ( bExp == 0x7FF ) {
3430 158142c2 bellard
            if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3431 158142c2 bellard
            return packFloat64( zSign, 0x7FF, 0 );
3432 158142c2 bellard
        }
3433 158142c2 bellard
        if ( aExp == 0 ) {
3434 158142c2 bellard
            ++expDiff;
3435 158142c2 bellard
        }
3436 158142c2 bellard
        else {
3437 158142c2 bellard
            aSig |= LIT64( 0x2000000000000000 );
3438 158142c2 bellard
        }
3439 158142c2 bellard
        shift64RightJamming( aSig, - expDiff, &aSig );
3440 158142c2 bellard
        zExp = bExp;
3441 158142c2 bellard
    }
3442 158142c2 bellard
    else {
3443 158142c2 bellard
        if ( aExp == 0x7FF ) {
3444 158142c2 bellard
            if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3445 158142c2 bellard
            return a;
3446 158142c2 bellard
        }
3447 fe76d976 pbrook
        if ( aExp == 0 ) {
3448 e6afc87f Peter Maydell
            if (STATUS(flush_to_zero)) {
3449 e6afc87f Peter Maydell
                if (aSig | bSig) {
3450 e6afc87f Peter Maydell
                    float_raise(float_flag_output_denormal STATUS_VAR);
3451 e6afc87f Peter Maydell
                }
3452 e6afc87f Peter Maydell
                return packFloat64(zSign, 0, 0);
3453 e6afc87f Peter Maydell
            }
3454 fe76d976 pbrook
            return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
3455 fe76d976 pbrook
        }
3456 158142c2 bellard
        zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
3457 158142c2 bellard
        zExp = aExp;
3458 158142c2 bellard
        goto roundAndPack;
3459 158142c2 bellard
    }
3460 158142c2 bellard
    aSig |= LIT64( 0x2000000000000000 );
3461 158142c2 bellard
    zSig = ( aSig + bSig )<<1;
3462 158142c2 bellard
    --zExp;
3463 bb98fe42 Andreas Färber
    if ( (int64_t) zSig < 0 ) {
3464 158142c2 bellard
        zSig = aSig + bSig;
3465 158142c2 bellard
        ++zExp;
3466 158142c2 bellard
    }
3467 158142c2 bellard
 roundAndPack:
3468 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
3469 158142c2 bellard
3470 158142c2 bellard
}
3471 158142c2 bellard
3472 158142c2 bellard
/*----------------------------------------------------------------------------
3473 158142c2 bellard
| Returns the result of subtracting the absolute values of the double-
3474 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the
3475 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
3476 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
3477 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
3478 158142c2 bellard
*----------------------------------------------------------------------------*/
3479 158142c2 bellard
3480 158142c2 bellard
static float64 subFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
3481 158142c2 bellard
{
3482 94a49d86 Andreas Färber
    int_fast16_t aExp, bExp, zExp;
3483 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig;
3484 94a49d86 Andreas Färber
    int_fast16_t expDiff;
3485 158142c2 bellard
3486 158142c2 bellard
    aSig = extractFloat64Frac( a );
3487 158142c2 bellard
    aExp = extractFloat64Exp( a );
3488 158142c2 bellard
    bSig = extractFloat64Frac( b );
3489 158142c2 bellard
    bExp = extractFloat64Exp( b );
3490 158142c2 bellard
    expDiff = aExp - bExp;
3491 158142c2 bellard
    aSig <<= 10;
3492 158142c2 bellard
    bSig <<= 10;
3493 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
3494 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
3495 158142c2 bellard
    if ( aExp == 0x7FF ) {
3496 158142c2 bellard
        if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3497 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3498 158142c2 bellard
        return float64_default_nan;
3499 158142c2 bellard
    }
3500 158142c2 bellard
    if ( aExp == 0 ) {
3501 158142c2 bellard
        aExp = 1;
3502 158142c2 bellard
        bExp = 1;
3503 158142c2 bellard
    }
3504 158142c2 bellard
    if ( bSig < aSig ) goto aBigger;
3505 158142c2 bellard
    if ( aSig < bSig ) goto bBigger;
3506 158142c2 bellard
    return packFloat64( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
3507 158142c2 bellard
 bExpBigger:
3508 158142c2 bellard
    if ( bExp == 0x7FF ) {
3509 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3510 158142c2 bellard
        return packFloat64( zSign ^ 1, 0x7FF, 0 );
3511 158142c2 bellard
    }
3512 158142c2 bellard
    if ( aExp == 0 ) {
3513 158142c2 bellard
        ++expDiff;
3514 158142c2 bellard
    }
3515 158142c2 bellard
    else {
3516 158142c2 bellard
        aSig |= LIT64( 0x4000000000000000 );
3517 158142c2 bellard
    }
3518 158142c2 bellard
    shift64RightJamming( aSig, - expDiff, &aSig );
3519 158142c2 bellard
    bSig |= LIT64( 0x4000000000000000 );
3520 158142c2 bellard
 bBigger:
3521 158142c2 bellard
    zSig = bSig - aSig;
3522 158142c2 bellard
    zExp = bExp;
3523 158142c2 bellard
    zSign ^= 1;
3524 158142c2 bellard
    goto normalizeRoundAndPack;
3525 158142c2 bellard
 aExpBigger:
3526 158142c2 bellard
    if ( aExp == 0x7FF ) {
3527 158142c2 bellard
        if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3528 158142c2 bellard
        return a;
3529 158142c2 bellard
    }
3530 158142c2 bellard
    if ( bExp == 0 ) {
3531 158142c2 bellard
        --expDiff;
3532 158142c2 bellard
    }
3533 158142c2 bellard
    else {
3534 158142c2 bellard
        bSig |= LIT64( 0x4000000000000000 );
3535 158142c2 bellard
    }
3536 158142c2 bellard
    shift64RightJamming( bSig, expDiff, &bSig );
3537 158142c2 bellard
    aSig |= LIT64( 0x4000000000000000 );
3538 158142c2 bellard
 aBigger:
3539 158142c2 bellard
    zSig = aSig - bSig;
3540 158142c2 bellard
    zExp = aExp;
3541 158142c2 bellard
 normalizeRoundAndPack:
3542 158142c2 bellard
    --zExp;
3543 158142c2 bellard
    return normalizeRoundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
3544 158142c2 bellard
3545 158142c2 bellard
}
3546 158142c2 bellard
3547 158142c2 bellard
/*----------------------------------------------------------------------------
3548 158142c2 bellard
| Returns the result of adding the double-precision floating-point values `a'
3549 158142c2 bellard
| and `b'.  The operation is performed according to the IEC/IEEE Standard for
3550 158142c2 bellard
| Binary Floating-Point Arithmetic.
3551 158142c2 bellard
*----------------------------------------------------------------------------*/
3552 158142c2 bellard
3553 158142c2 bellard
float64 float64_add( float64 a, float64 b STATUS_PARAM )
3554 158142c2 bellard
{
3555 158142c2 bellard
    flag aSign, bSign;
3556 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3557 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3558 158142c2 bellard
3559 158142c2 bellard
    aSign = extractFloat64Sign( a );
3560 158142c2 bellard
    bSign = extractFloat64Sign( b );
3561 158142c2 bellard
    if ( aSign == bSign ) {
3562 158142c2 bellard
        return addFloat64Sigs( a, b, aSign STATUS_VAR );
3563 158142c2 bellard
    }
3564 158142c2 bellard
    else {
3565 158142c2 bellard
        return subFloat64Sigs( a, b, aSign STATUS_VAR );
3566 158142c2 bellard
    }
3567 158142c2 bellard
3568 158142c2 bellard
}
3569 158142c2 bellard
3570 158142c2 bellard
/*----------------------------------------------------------------------------
3571 158142c2 bellard
| Returns the result of subtracting the double-precision floating-point values
3572 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
3573 158142c2 bellard
| for Binary Floating-Point Arithmetic.
3574 158142c2 bellard
*----------------------------------------------------------------------------*/
3575 158142c2 bellard
3576 158142c2 bellard
float64 float64_sub( float64 a, float64 b STATUS_PARAM )
3577 158142c2 bellard
{
3578 158142c2 bellard
    flag aSign, bSign;
3579 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3580 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3581 158142c2 bellard
3582 158142c2 bellard
    aSign = extractFloat64Sign( a );
3583 158142c2 bellard
    bSign = extractFloat64Sign( b );
3584 158142c2 bellard
    if ( aSign == bSign ) {
3585 158142c2 bellard
        return subFloat64Sigs( a, b, aSign STATUS_VAR );
3586 158142c2 bellard
    }
3587 158142c2 bellard
    else {
3588 158142c2 bellard
        return addFloat64Sigs( a, b, aSign STATUS_VAR );
3589 158142c2 bellard
    }
3590 158142c2 bellard
3591 158142c2 bellard
}
3592 158142c2 bellard
3593 158142c2 bellard
/*----------------------------------------------------------------------------
3594 158142c2 bellard
| Returns the result of multiplying the double-precision floating-point values
3595 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
3596 158142c2 bellard
| for Binary Floating-Point Arithmetic.
3597 158142c2 bellard
*----------------------------------------------------------------------------*/
3598 158142c2 bellard
3599 158142c2 bellard
float64 float64_mul( float64 a, float64 b STATUS_PARAM )
3600 158142c2 bellard
{
3601 158142c2 bellard
    flag aSign, bSign, zSign;
3602 94a49d86 Andreas Färber
    int_fast16_t aExp, bExp, zExp;
3603 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig0, zSig1;
3604 158142c2 bellard
3605 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3606 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3607 37d18660 Peter Maydell
3608 158142c2 bellard
    aSig = extractFloat64Frac( a );
3609 158142c2 bellard
    aExp = extractFloat64Exp( a );
3610 158142c2 bellard
    aSign = extractFloat64Sign( a );
3611 158142c2 bellard
    bSig = extractFloat64Frac( b );
3612 158142c2 bellard
    bExp = extractFloat64Exp( b );
3613 158142c2 bellard
    bSign = extractFloat64Sign( b );
3614 158142c2 bellard
    zSign = aSign ^ bSign;
3615 158142c2 bellard
    if ( aExp == 0x7FF ) {
3616 158142c2 bellard
        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
3617 158142c2 bellard
            return propagateFloat64NaN( a, b STATUS_VAR );
3618 158142c2 bellard
        }
3619 158142c2 bellard
        if ( ( bExp | bSig ) == 0 ) {
3620 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3621 158142c2 bellard
            return float64_default_nan;
3622 158142c2 bellard
        }
3623 158142c2 bellard
        return packFloat64( zSign, 0x7FF, 0 );
3624 158142c2 bellard
    }
3625 158142c2 bellard
    if ( bExp == 0x7FF ) {
3626 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3627 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) {
3628 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3629 158142c2 bellard
            return float64_default_nan;
3630 158142c2 bellard
        }
3631 158142c2 bellard
        return packFloat64( zSign, 0x7FF, 0 );
3632 158142c2 bellard
    }
3633 158142c2 bellard
    if ( aExp == 0 ) {
3634 158142c2 bellard
        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
3635 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3636 158142c2 bellard
    }
3637 158142c2 bellard
    if ( bExp == 0 ) {
3638 158142c2 bellard
        if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
3639 158142c2 bellard
        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3640 158142c2 bellard
    }
3641 158142c2 bellard
    zExp = aExp + bExp - 0x3FF;
3642 158142c2 bellard
    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
3643 158142c2 bellard
    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3644 158142c2 bellard
    mul64To128( aSig, bSig, &zSig0, &zSig1 );
3645 158142c2 bellard
    zSig0 |= ( zSig1 != 0 );
3646 bb98fe42 Andreas Färber
    if ( 0 <= (int64_t) ( zSig0<<1 ) ) {
3647 158142c2 bellard
        zSig0 <<= 1;
3648 158142c2 bellard
        --zExp;
3649 158142c2 bellard
    }
3650 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp, zSig0 STATUS_VAR );
3651 158142c2 bellard
3652 158142c2 bellard
}
3653 158142c2 bellard
3654 158142c2 bellard
/*----------------------------------------------------------------------------
3655 158142c2 bellard
| Returns the result of dividing the double-precision floating-point value `a'
3656 158142c2 bellard
| by the corresponding value `b'.  The operation is performed according to
3657 158142c2 bellard
| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3658 158142c2 bellard
*----------------------------------------------------------------------------*/
3659 158142c2 bellard
3660 158142c2 bellard
float64 float64_div( float64 a, float64 b STATUS_PARAM )
3661 158142c2 bellard
{
3662 158142c2 bellard
    flag aSign, bSign, zSign;
3663 94a49d86 Andreas Färber
    int_fast16_t aExp, bExp, zExp;
3664 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig;
3665 bb98fe42 Andreas Färber
    uint64_t rem0, rem1;
3666 bb98fe42 Andreas Färber
    uint64_t term0, term1;
3667 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3668 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3669 158142c2 bellard
3670 158142c2 bellard
    aSig = extractFloat64Frac( a );
3671 158142c2 bellard
    aExp = extractFloat64Exp( a );
3672 158142c2 bellard
    aSign = extractFloat64Sign( a );
3673 158142c2 bellard
    bSig = extractFloat64Frac( b );
3674 158142c2 bellard
    bExp = extractFloat64Exp( b );
3675 158142c2 bellard
    bSign = extractFloat64Sign( b );
3676 158142c2 bellard
    zSign = aSign ^ bSign;
3677 158142c2 bellard
    if ( aExp == 0x7FF ) {
3678 158142c2 bellard
        if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3679 158142c2 bellard
        if ( bExp == 0x7FF ) {
3680 158142c2 bellard
            if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3681 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3682 158142c2 bellard
            return float64_default_nan;
3683 158142c2 bellard
        }
3684 158142c2 bellard
        return packFloat64( zSign, 0x7FF, 0 );
3685 158142c2 bellard
    }
3686 158142c2 bellard
    if ( bExp == 0x7FF ) {
3687 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3688 158142c2 bellard
        return packFloat64( zSign, 0, 0 );
3689 158142c2 bellard
    }
3690 158142c2 bellard
    if ( bExp == 0 ) {
3691 158142c2 bellard
        if ( bSig == 0 ) {
3692 158142c2 bellard
            if ( ( aExp | aSig ) == 0 ) {
3693 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
3694 158142c2 bellard
                return float64_default_nan;
3695 158142c2 bellard
            }
3696 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
3697 158142c2 bellard
            return packFloat64( zSign, 0x7FF, 0 );
3698 158142c2 bellard
        }
3699 158142c2 bellard
        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3700 158142c2 bellard
    }
3701 158142c2 bellard
    if ( aExp == 0 ) {
3702 158142c2 bellard
        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
3703 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3704 158142c2 bellard
    }
3705 158142c2 bellard
    zExp = aExp - bExp + 0x3FD;
3706 158142c2 bellard
    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
3707 158142c2 bellard
    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3708 158142c2 bellard
    if ( bSig <= ( aSig + aSig ) ) {
3709 158142c2 bellard
        aSig >>= 1;
3710 158142c2 bellard
        ++zExp;
3711 158142c2 bellard
    }
3712 158142c2 bellard
    zSig = estimateDiv128To64( aSig, 0, bSig );
3713 158142c2 bellard
    if ( ( zSig & 0x1FF ) <= 2 ) {
3714 158142c2 bellard
        mul64To128( bSig, zSig, &term0, &term1 );
3715 158142c2 bellard
        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
3716 bb98fe42 Andreas Färber
        while ( (int64_t) rem0 < 0 ) {
3717 158142c2 bellard
            --zSig;
3718 158142c2 bellard
            add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
3719 158142c2 bellard
        }
3720 158142c2 bellard
        zSig |= ( rem1 != 0 );
3721 158142c2 bellard
    }
3722 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
3723 158142c2 bellard
3724 158142c2 bellard
}
3725 158142c2 bellard
3726 158142c2 bellard
/*----------------------------------------------------------------------------
3727 158142c2 bellard
| Returns the remainder of the double-precision floating-point value `a'
3728 158142c2 bellard
| with respect to the corresponding value `b'.  The operation is performed
3729 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3730 158142c2 bellard
*----------------------------------------------------------------------------*/
3731 158142c2 bellard
3732 158142c2 bellard
float64 float64_rem( float64 a, float64 b STATUS_PARAM )
3733 158142c2 bellard
{
3734 ed086f3d Blue Swirl
    flag aSign, zSign;
3735 94a49d86 Andreas Färber
    int_fast16_t aExp, bExp, expDiff;
3736 bb98fe42 Andreas Färber
    uint64_t aSig, bSig;
3737 bb98fe42 Andreas Färber
    uint64_t q, alternateASig;
3738 bb98fe42 Andreas Färber
    int64_t sigMean;
3739 158142c2 bellard
3740 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3741 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3742 158142c2 bellard
    aSig = extractFloat64Frac( a );
3743 158142c2 bellard
    aExp = extractFloat64Exp( a );
3744 158142c2 bellard
    aSign = extractFloat64Sign( a );
3745 158142c2 bellard
    bSig = extractFloat64Frac( b );
3746 158142c2 bellard
    bExp = extractFloat64Exp( b );
3747 158142c2 bellard
    if ( aExp == 0x7FF ) {
3748 158142c2 bellard
        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
3749 158142c2 bellard
            return propagateFloat64NaN( a, b STATUS_VAR );
3750 158142c2 bellard
        }
3751 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3752 158142c2 bellard
        return float64_default_nan;
3753 158142c2 bellard
    }
3754 158142c2 bellard
    if ( bExp == 0x7FF ) {
3755 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3756 158142c2 bellard
        return a;
3757 158142c2 bellard
    }
3758 158142c2 bellard
    if ( bExp == 0 ) {
3759 158142c2 bellard
        if ( bSig == 0 ) {
3760 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3761 158142c2 bellard
            return float64_default_nan;
3762 158142c2 bellard
        }
3763 158142c2 bellard
        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3764 158142c2 bellard
    }
3765 158142c2 bellard
    if ( aExp == 0 ) {
3766 158142c2 bellard
        if ( aSig == 0 ) return a;
3767 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3768 158142c2 bellard
    }
3769 158142c2 bellard
    expDiff = aExp - bExp;
3770 158142c2 bellard
    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
3771 158142c2 bellard
    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3772 158142c2 bellard
    if ( expDiff < 0 ) {
3773 158142c2 bellard
        if ( expDiff < -1 ) return a;
3774 158142c2 bellard
        aSig >>= 1;
3775 158142c2 bellard
    }
3776 158142c2 bellard
    q = ( bSig <= aSig );
3777 158142c2 bellard
    if ( q ) aSig -= bSig;
3778 158142c2 bellard
    expDiff -= 64;
3779 158142c2 bellard
    while ( 0 < expDiff ) {
3780 158142c2 bellard
        q = estimateDiv128To64( aSig, 0, bSig );
3781 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
3782 158142c2 bellard
        aSig = - ( ( bSig>>2 ) * q );
3783 158142c2 bellard
        expDiff -= 62;
3784 158142c2 bellard
    }
3785 158142c2 bellard
    expDiff += 64;
3786 158142c2 bellard
    if ( 0 < expDiff ) {
3787 158142c2 bellard
        q = estimateDiv128To64( aSig, 0, bSig );
3788 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
3789 158142c2 bellard
        q >>= 64 - expDiff;
3790 158142c2 bellard
        bSig >>= 2;
3791 158142c2 bellard
        aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
3792 158142c2 bellard
    }
3793 158142c2 bellard
    else {
3794 158142c2 bellard
        aSig >>= 2;
3795 158142c2 bellard
        bSig >>= 2;
3796 158142c2 bellard
    }
3797 158142c2 bellard
    do {
3798 158142c2 bellard
        alternateASig = aSig;
3799 158142c2 bellard
        ++q;
3800 158142c2 bellard
        aSig -= bSig;
3801 bb98fe42 Andreas Färber
    } while ( 0 <= (int64_t) aSig );
3802 158142c2 bellard
    sigMean = aSig + alternateASig;
3803 158142c2 bellard
    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
3804 158142c2 bellard
        aSig = alternateASig;
3805 158142c2 bellard
    }
3806 bb98fe42 Andreas Färber
    zSign = ( (int64_t) aSig < 0 );
3807 158142c2 bellard
    if ( zSign ) aSig = - aSig;
3808 158142c2 bellard
    return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig STATUS_VAR );
3809 158142c2 bellard
3810 158142c2 bellard
}
3811 158142c2 bellard
3812 158142c2 bellard
/*----------------------------------------------------------------------------
3813 369be8f6 Peter Maydell
| Returns the result of multiplying the double-precision floating-point values
3814 369be8f6 Peter Maydell
| `a' and `b' then adding 'c', with no intermediate rounding step after the
3815 369be8f6 Peter Maydell
| multiplication.  The operation is performed according to the IEC/IEEE
3816 369be8f6 Peter Maydell
| Standard for Binary Floating-Point Arithmetic 754-2008.
3817 369be8f6 Peter Maydell
| The flags argument allows the caller to select negation of the
3818 369be8f6 Peter Maydell
| addend, the intermediate product, or the final result. (The difference
3819 369be8f6 Peter Maydell
| between this and having the caller do a separate negation is that negating
3820 369be8f6 Peter Maydell
| externally will flip the sign bit on NaNs.)
3821 369be8f6 Peter Maydell
*----------------------------------------------------------------------------*/
3822 369be8f6 Peter Maydell
3823 369be8f6 Peter Maydell
float64 float64_muladd(float64 a, float64 b, float64 c, int flags STATUS_PARAM)
3824 369be8f6 Peter Maydell
{
3825 369be8f6 Peter Maydell
    flag aSign, bSign, cSign, zSign;
3826 94a49d86 Andreas Färber
    int_fast16_t aExp, bExp, cExp, pExp, zExp, expDiff;
3827 369be8f6 Peter Maydell
    uint64_t aSig, bSig, cSig;
3828 369be8f6 Peter Maydell
    flag pInf, pZero, pSign;
3829 369be8f6 Peter Maydell
    uint64_t pSig0, pSig1, cSig0, cSig1, zSig0, zSig1;
3830 369be8f6 Peter Maydell
    int shiftcount;
3831 369be8f6 Peter Maydell
    flag signflip, infzero;
3832 369be8f6 Peter Maydell
3833 369be8f6 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3834 369be8f6 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3835 369be8f6 Peter Maydell
    c = float64_squash_input_denormal(c STATUS_VAR);
3836 369be8f6 Peter Maydell
    aSig = extractFloat64Frac(a);
3837 369be8f6 Peter Maydell
    aExp = extractFloat64Exp(a);
3838 369be8f6 Peter Maydell
    aSign = extractFloat64Sign(a);
3839 369be8f6 Peter Maydell
    bSig = extractFloat64Frac(b);
3840 369be8f6 Peter Maydell
    bExp = extractFloat64Exp(b);
3841 369be8f6 Peter Maydell
    bSign = extractFloat64Sign(b);
3842 369be8f6 Peter Maydell
    cSig = extractFloat64Frac(c);
3843 369be8f6 Peter Maydell
    cExp = extractFloat64Exp(c);
3844 369be8f6 Peter Maydell
    cSign = extractFloat64Sign(c);
3845 369be8f6 Peter Maydell
3846 369be8f6 Peter Maydell
    infzero = ((aExp == 0 && aSig == 0 && bExp == 0x7ff && bSig == 0) ||
3847 369be8f6 Peter Maydell
               (aExp == 0x7ff && aSig == 0 && bExp == 0 && bSig == 0));
3848 369be8f6 Peter Maydell
3849 369be8f6 Peter Maydell
    /* It is implementation-defined whether the cases of (0,inf,qnan)
3850 369be8f6 Peter Maydell
     * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
3851 369be8f6 Peter Maydell
     * they return if they do), so we have to hand this information
3852 369be8f6 Peter Maydell
     * off to the target-specific pick-a-NaN routine.
3853 369be8f6 Peter Maydell
     */
3854 369be8f6 Peter Maydell
    if (((aExp == 0x7ff) && aSig) ||
3855 369be8f6 Peter Maydell
        ((bExp == 0x7ff) && bSig) ||
3856 369be8f6 Peter Maydell
        ((cExp == 0x7ff) && cSig)) {
3857 369be8f6 Peter Maydell
        return propagateFloat64MulAddNaN(a, b, c, infzero STATUS_VAR);
3858 369be8f6 Peter Maydell
    }
3859 369be8f6 Peter Maydell
3860 369be8f6 Peter Maydell
    if (infzero) {
3861 369be8f6 Peter Maydell
        float_raise(float_flag_invalid STATUS_VAR);
3862 369be8f6 Peter Maydell
        return float64_default_nan;
3863 369be8f6 Peter Maydell
    }
3864 369be8f6 Peter Maydell
3865 369be8f6 Peter Maydell
    if (flags & float_muladd_negate_c) {
3866 369be8f6 Peter Maydell
        cSign ^= 1;
3867 369be8f6 Peter Maydell
    }
3868 369be8f6 Peter Maydell
3869 369be8f6 Peter Maydell
    signflip = (flags & float_muladd_negate_result) ? 1 : 0;
3870 369be8f6 Peter Maydell
3871 369be8f6 Peter Maydell
    /* Work out the sign and type of the product */
3872 369be8f6 Peter Maydell
    pSign = aSign ^ bSign;
3873 369be8f6 Peter Maydell
    if (flags & float_muladd_negate_product) {
3874 369be8f6 Peter Maydell
        pSign ^= 1;
3875 369be8f6 Peter Maydell
    }
3876 369be8f6 Peter Maydell
    pInf = (aExp == 0x7ff) || (bExp == 0x7ff);
3877 369be8f6 Peter Maydell
    pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0);
3878 369be8f6 Peter Maydell
3879 369be8f6 Peter Maydell
    if (cExp == 0x7ff) {
3880 369be8f6 Peter Maydell
        if (pInf && (pSign ^ cSign)) {
3881 369be8f6 Peter Maydell
            /* addition of opposite-signed infinities => InvalidOperation */
3882 369be8f6 Peter Maydell
            float_raise(float_flag_invalid STATUS_VAR);
3883 369be8f6 Peter Maydell
            return float64_default_nan;
3884 369be8f6 Peter Maydell
        }
3885 369be8f6 Peter Maydell
        /* Otherwise generate an infinity of the same sign */
3886 369be8f6 Peter Maydell
        return packFloat64(cSign ^ signflip, 0x7ff, 0);
3887 369be8f6 Peter Maydell
    }
3888 369be8f6 Peter Maydell
3889 369be8f6 Peter Maydell
    if (pInf) {
3890 369be8f6 Peter Maydell
        return packFloat64(pSign ^ signflip, 0x7ff, 0);
3891 369be8f6 Peter Maydell
    }
3892 369be8f6 Peter Maydell
3893 369be8f6 Peter Maydell
    if (pZero) {
3894 369be8f6 Peter Maydell
        if (cExp == 0) {
3895 369be8f6 Peter Maydell
            if (cSig == 0) {
3896 369be8f6 Peter Maydell
                /* Adding two exact zeroes */
3897 369be8f6 Peter Maydell
                if (pSign == cSign) {
3898 369be8f6 Peter Maydell
                    zSign = pSign;
3899 369be8f6 Peter Maydell
                } else if (STATUS(float_rounding_mode) == float_round_down) {
3900 369be8f6 Peter Maydell
                    zSign = 1;
3901 369be8f6 Peter Maydell
                } else {
3902 369be8f6 Peter Maydell
                    zSign = 0;
3903 369be8f6 Peter Maydell
                }
3904 369be8f6 Peter Maydell
                return packFloat64(zSign ^ signflip, 0, 0);
3905 369be8f6 Peter Maydell
            }
3906 369be8f6 Peter Maydell
            /* Exact zero plus a denorm */
3907 369be8f6 Peter Maydell
            if (STATUS(flush_to_zero)) {
3908 369be8f6 Peter Maydell
                float_raise(float_flag_output_denormal STATUS_VAR);
3909 369be8f6 Peter Maydell
                return packFloat64(cSign ^ signflip, 0, 0);
3910 369be8f6 Peter Maydell
            }
3911 369be8f6 Peter Maydell
        }
3912 369be8f6 Peter Maydell
        /* Zero plus something non-zero : just return the something */
3913 a6e7c184 Richard Sandiford
        return packFloat64(cSign ^ signflip, cExp, cSig);
3914 369be8f6 Peter Maydell
    }
3915 369be8f6 Peter Maydell
3916 369be8f6 Peter Maydell
    if (aExp == 0) {
3917 369be8f6 Peter Maydell
        normalizeFloat64Subnormal(aSig, &aExp, &aSig);
3918 369be8f6 Peter Maydell
    }
3919 369be8f6 Peter Maydell
    if (bExp == 0) {
3920 369be8f6 Peter Maydell
        normalizeFloat64Subnormal(bSig, &bExp, &bSig);
3921 369be8f6 Peter Maydell
    }
3922 369be8f6 Peter Maydell
3923 369be8f6 Peter Maydell
    /* Calculate the actual result a * b + c */
3924 369be8f6 Peter Maydell
3925 369be8f6 Peter Maydell
    /* Multiply first; this is easy. */
3926 369be8f6 Peter Maydell
    /* NB: we subtract 0x3fe where float64_mul() subtracts 0x3ff
3927 369be8f6 Peter Maydell
     * because we want the true exponent, not the "one-less-than"
3928 369be8f6 Peter Maydell
     * flavour that roundAndPackFloat64() takes.
3929 369be8f6 Peter Maydell
     */
3930 369be8f6 Peter Maydell
    pExp = aExp + bExp - 0x3fe;
3931 369be8f6 Peter Maydell
    aSig = (aSig | LIT64(0x0010000000000000))<<10;
3932 369be8f6 Peter Maydell
    bSig = (bSig | LIT64(0x0010000000000000))<<11;
3933 369be8f6 Peter Maydell
    mul64To128(aSig, bSig, &pSig0, &pSig1);
3934 369be8f6 Peter Maydell
    if ((int64_t)(pSig0 << 1) >= 0) {
3935 369be8f6 Peter Maydell
        shortShift128Left(pSig0, pSig1, 1, &pSig0, &pSig1);
3936 369be8f6 Peter Maydell
        pExp--;
3937 369be8f6 Peter Maydell
    }
3938 369be8f6 Peter Maydell
3939 369be8f6 Peter Maydell
    zSign = pSign ^ signflip;
3940 369be8f6 Peter Maydell
3941 369be8f6 Peter Maydell
    /* Now [pSig0:pSig1] is the significand of the multiply, with the explicit
3942 369be8f6 Peter Maydell
     * bit in position 126.
3943 369be8f6 Peter Maydell
     */
3944 369be8f6 Peter Maydell
    if (cExp == 0) {
3945 369be8f6 Peter Maydell
        if (!cSig) {
3946 369be8f6 Peter Maydell
            /* Throw out the special case of c being an exact zero now */
3947 369be8f6 Peter Maydell
            shift128RightJamming(pSig0, pSig1, 64, &pSig0, &pSig1);
3948 369be8f6 Peter Maydell
            return roundAndPackFloat64(zSign, pExp - 1,
3949 369be8f6 Peter Maydell
                                       pSig1 STATUS_VAR);
3950 369be8f6 Peter Maydell
        }
3951 369be8f6 Peter Maydell
        normalizeFloat64Subnormal(cSig, &cExp, &cSig);
3952 369be8f6 Peter Maydell
    }
3953 369be8f6 Peter Maydell
3954 369be8f6 Peter Maydell
    /* Shift cSig and add the explicit bit so [cSig0:cSig1] is the
3955 369be8f6 Peter Maydell
     * significand of the addend, with the explicit bit in position 126.
3956 369be8f6 Peter Maydell
     */
3957 369be8f6 Peter Maydell
    cSig0 = cSig << (126 - 64 - 52);
3958 369be8f6 Peter Maydell
    cSig1 = 0;
3959 369be8f6 Peter Maydell
    cSig0 |= LIT64(0x4000000000000000);
3960 369be8f6 Peter Maydell
    expDiff = pExp - cExp;
3961 369be8f6 Peter Maydell
3962 369be8f6 Peter Maydell
    if (pSign == cSign) {
3963 369be8f6 Peter Maydell
        /* Addition */
3964 369be8f6 Peter Maydell
        if (expDiff > 0) {
3965 369be8f6 Peter Maydell
            /* scale c to match p */
3966 369be8f6 Peter Maydell
            shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
3967 369be8f6 Peter Maydell
            zExp = pExp;
3968 369be8f6 Peter Maydell
        } else if (expDiff < 0) {
3969 369be8f6 Peter Maydell
            /* scale p to match c */
3970 369be8f6 Peter Maydell
            shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
3971 369be8f6 Peter Maydell
            zExp = cExp;
3972 369be8f6 Peter Maydell
        } else {
3973 369be8f6 Peter Maydell
            /* no scaling needed */
3974 369be8f6 Peter Maydell
            zExp = cExp;
3975 369be8f6 Peter Maydell
        }
3976 369be8f6 Peter Maydell
        /* Add significands and make sure explicit bit ends up in posn 126 */
3977 369be8f6 Peter Maydell
        add128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
3978 369be8f6 Peter Maydell
        if ((int64_t)zSig0 < 0) {
3979 369be8f6 Peter Maydell
            shift128RightJamming(zSig0, zSig1, 1, &zSig0, &zSig1);
3980 369be8f6 Peter Maydell
        } else {
3981 369be8f6 Peter Maydell
            zExp--;
3982 369be8f6 Peter Maydell
        }
3983 369be8f6 Peter Maydell
        shift128RightJamming(zSig0, zSig1, 64, &zSig0, &zSig1);
3984 369be8f6 Peter Maydell
        return roundAndPackFloat64(zSign, zExp, zSig1 STATUS_VAR);
3985 369be8f6 Peter Maydell
    } else {
3986 369be8f6 Peter Maydell
        /* Subtraction */
3987 369be8f6 Peter Maydell
        if (expDiff > 0) {
3988 369be8f6 Peter Maydell
            shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
3989 369be8f6 Peter Maydell
            sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
3990 369be8f6 Peter Maydell
            zExp = pExp;
3991 369be8f6 Peter Maydell
        } else if (expDiff < 0) {
3992 369be8f6 Peter Maydell
            shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
3993 369be8f6 Peter Maydell
            sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
3994 369be8f6 Peter Maydell
            zExp = cExp;
3995 369be8f6 Peter Maydell
            zSign ^= 1;
3996 369be8f6 Peter Maydell
        } else {
3997 369be8f6 Peter Maydell
            zExp = pExp;
3998 369be8f6 Peter Maydell
            if (lt128(cSig0, cSig1, pSig0, pSig1)) {
3999 369be8f6 Peter Maydell
                sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
4000 369be8f6 Peter Maydell
            } else if (lt128(pSig0, pSig1, cSig0, cSig1)) {
4001 369be8f6 Peter Maydell
                sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
4002 369be8f6 Peter Maydell
                zSign ^= 1;
4003 369be8f6 Peter Maydell
            } else {
4004 369be8f6 Peter Maydell
                /* Exact zero */
4005 369be8f6 Peter Maydell
                zSign = signflip;
4006 369be8f6 Peter Maydell
                if (STATUS(float_rounding_mode) == float_round_down) {
4007 369be8f6 Peter Maydell
                    zSign ^= 1;
4008 369be8f6 Peter Maydell
                }
4009 369be8f6 Peter Maydell
                return packFloat64(zSign, 0, 0);
4010 369be8f6 Peter Maydell
            }
4011 369be8f6 Peter Maydell
        }
4012 369be8f6 Peter Maydell
        --zExp;
4013 369be8f6 Peter Maydell
        /* Do the equivalent of normalizeRoundAndPackFloat64() but
4014 369be8f6 Peter Maydell
         * starting with the significand in a pair of uint64_t.
4015 369be8f6 Peter Maydell
         */
4016 369be8f6 Peter Maydell
        if (zSig0) {
4017 369be8f6 Peter Maydell
            shiftcount = countLeadingZeros64(zSig0) - 1;
4018 369be8f6 Peter Maydell
            shortShift128Left(zSig0, zSig1, shiftcount, &zSig0, &zSig1);
4019 369be8f6 Peter Maydell
            if (zSig1) {
4020 369be8f6 Peter Maydell
                zSig0 |= 1;
4021 369be8f6 Peter Maydell
            }
4022 369be8f6 Peter Maydell
            zExp -= shiftcount;
4023 369be8f6 Peter Maydell
        } else {
4024 e3d142d0 Peter Maydell
            shiftcount = countLeadingZeros64(zSig1);
4025 e3d142d0 Peter Maydell
            if (shiftcount == 0) {
4026 e3d142d0 Peter Maydell
                zSig0 = (zSig1 >> 1) | (zSig1 & 1);
4027 e3d142d0 Peter Maydell
                zExp -= 63;
4028 e3d142d0 Peter Maydell
            } else {
4029 e3d142d0 Peter Maydell
                shiftcount--;
4030 e3d142d0 Peter Maydell
                zSig0 = zSig1 << shiftcount;
4031 e3d142d0 Peter Maydell
                zExp -= (shiftcount + 64);
4032 e3d142d0 Peter Maydell
            }
4033 369be8f6 Peter Maydell
        }
4034 369be8f6 Peter Maydell
        return roundAndPackFloat64(zSign, zExp, zSig0 STATUS_VAR);
4035 369be8f6 Peter Maydell
    }
4036 369be8f6 Peter Maydell
}
4037 369be8f6 Peter Maydell
4038 369be8f6 Peter Maydell
/*----------------------------------------------------------------------------
4039 158142c2 bellard
| Returns the square root of the double-precision floating-point value `a'.
4040 158142c2 bellard
| The operation is performed according to the IEC/IEEE Standard for Binary
4041 158142c2 bellard
| Floating-Point Arithmetic.
4042 158142c2 bellard
*----------------------------------------------------------------------------*/
4043 158142c2 bellard
4044 158142c2 bellard
float64 float64_sqrt( float64 a STATUS_PARAM )
4045 158142c2 bellard
{
4046 158142c2 bellard
    flag aSign;
4047 94a49d86 Andreas Färber
    int_fast16_t aExp, zExp;
4048 bb98fe42 Andreas Färber
    uint64_t aSig, zSig, doubleZSig;
4049 bb98fe42 Andreas Färber
    uint64_t rem0, rem1, term0, term1;
4050 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
4051 158142c2 bellard
4052 158142c2 bellard
    aSig = extractFloat64Frac( a );
4053 158142c2 bellard
    aExp = extractFloat64Exp( a );
4054 158142c2 bellard
    aSign = extractFloat64Sign( a );
4055 158142c2 bellard
    if ( aExp == 0x7FF ) {
4056 158142c2 bellard
        if ( aSig ) return propagateFloat64NaN( a, a STATUS_VAR );
4057 158142c2 bellard
        if ( ! aSign ) return a;
4058 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4059 158142c2 bellard
        return float64_default_nan;
4060 158142c2 bellard
    }
4061 158142c2 bellard
    if ( aSign ) {
4062 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) return a;
4063 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4064 158142c2 bellard
        return float64_default_nan;
4065 158142c2 bellard
    }
4066 158142c2 bellard
    if ( aExp == 0 ) {
4067 f090c9d4 pbrook
        if ( aSig == 0 ) return float64_zero;
4068 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4069 158142c2 bellard
    }
4070 158142c2 bellard
    zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
4071 158142c2 bellard
    aSig |= LIT64( 0x0010000000000000 );
4072 158142c2 bellard
    zSig = estimateSqrt32( aExp, aSig>>21 );
4073 158142c2 bellard
    aSig <<= 9 - ( aExp & 1 );
4074 158142c2 bellard
    zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
4075 158142c2 bellard
    if ( ( zSig & 0x1FF ) <= 5 ) {
4076 158142c2 bellard
        doubleZSig = zSig<<1;
4077 158142c2 bellard
        mul64To128( zSig, zSig, &term0, &term1 );
4078 158142c2 bellard
        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
4079 bb98fe42 Andreas Färber
        while ( (int64_t) rem0 < 0 ) {
4080 158142c2 bellard
            --zSig;
4081 158142c2 bellard
            doubleZSig -= 2;
4082 158142c2 bellard
            add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
4083 158142c2 bellard
        }
4084 158142c2 bellard
        zSig |= ( ( rem0 | rem1 ) != 0 );
4085 158142c2 bellard
    }
4086 158142c2 bellard
    return roundAndPackFloat64( 0, zExp, zSig STATUS_VAR );
4087 158142c2 bellard
4088 158142c2 bellard
}
4089 158142c2 bellard
4090 158142c2 bellard
/*----------------------------------------------------------------------------
4091 374dfc33 aurel32
| Returns the binary log of the double-precision floating-point value `a'.
4092 374dfc33 aurel32
| The operation is performed according to the IEC/IEEE Standard for Binary
4093 374dfc33 aurel32
| Floating-Point Arithmetic.
4094 374dfc33 aurel32
*----------------------------------------------------------------------------*/
4095 374dfc33 aurel32
float64 float64_log2( float64 a STATUS_PARAM )
4096 374dfc33 aurel32
{
4097 374dfc33 aurel32
    flag aSign, zSign;
4098 94a49d86 Andreas Färber
    int_fast16_t aExp;
4099 bb98fe42 Andreas Färber
    uint64_t aSig, aSig0, aSig1, zSig, i;
4100 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
4101 374dfc33 aurel32
4102 374dfc33 aurel32
    aSig = extractFloat64Frac( a );
4103 374dfc33 aurel32
    aExp = extractFloat64Exp( a );
4104 374dfc33 aurel32
    aSign = extractFloat64Sign( a );
4105 374dfc33 aurel32
4106 374dfc33 aurel32
    if ( aExp == 0 ) {
4107 374dfc33 aurel32
        if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
4108 374dfc33 aurel32
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4109 374dfc33 aurel32
    }
4110 374dfc33 aurel32
    if ( aSign ) {
4111 374dfc33 aurel32
        float_raise( float_flag_invalid STATUS_VAR);
4112 374dfc33 aurel32
        return float64_default_nan;
4113 374dfc33 aurel32
    }
4114 374dfc33 aurel32
    if ( aExp == 0x7FF ) {
4115 374dfc33 aurel32
        if ( aSig ) return propagateFloat64NaN( a, float64_zero STATUS_VAR );
4116 374dfc33 aurel32
        return a;
4117 374dfc33 aurel32
    }
4118 374dfc33 aurel32
4119 374dfc33 aurel32
    aExp -= 0x3FF;
4120 374dfc33 aurel32
    aSig |= LIT64( 0x0010000000000000 );
4121 374dfc33 aurel32
    zSign = aExp < 0;
4122 bb98fe42 Andreas Färber
    zSig = (uint64_t)aExp << 52;
4123 374dfc33 aurel32
    for (i = 1LL << 51; i > 0; i >>= 1) {
4124 374dfc33 aurel32
        mul64To128( aSig, aSig, &aSig0, &aSig1 );
4125 374dfc33 aurel32
        aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
4126 374dfc33 aurel32
        if ( aSig & LIT64( 0x0020000000000000 ) ) {
4127 374dfc33 aurel32
            aSig >>= 1;
4128 374dfc33 aurel32
            zSig |= i;
4129 374dfc33 aurel32
        }
4130 374dfc33 aurel32
    }
4131 374dfc33 aurel32
4132 374dfc33 aurel32
    if ( zSign )
4133 374dfc33 aurel32
        zSig = -zSig;
4134 374dfc33 aurel32
    return normalizeRoundAndPackFloat64( zSign, 0x408, zSig STATUS_VAR );
4135 374dfc33 aurel32
}
4136 374dfc33 aurel32
4137 374dfc33 aurel32
/*----------------------------------------------------------------------------
4138 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is equal to the
4139 b689362d Aurelien Jarno
| corresponding value `b', and 0 otherwise.  The invalid exception is raised
4140 b689362d Aurelien Jarno
| if either operand is a NaN.  Otherwise, the comparison is performed
4141 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4142 158142c2 bellard
*----------------------------------------------------------------------------*/
4143 158142c2 bellard
4144 b689362d Aurelien Jarno
int float64_eq( float64 a, float64 b STATUS_PARAM )
4145 158142c2 bellard
{
4146 bb98fe42 Andreas Färber
    uint64_t av, bv;
4147 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
4148 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
4149 158142c2 bellard
4150 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4151 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4152 158142c2 bellard
       ) {
4153 b689362d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
4154 158142c2 bellard
        return 0;
4155 158142c2 bellard
    }
4156 f090c9d4 pbrook
    av = float64_val(a);
4157 a1b91bb4 pbrook
    bv = float64_val(b);
4158 bb98fe42 Andreas Färber
    return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
4159 158142c2 bellard
4160 158142c2 bellard
}
4161 158142c2 bellard
4162 158142c2 bellard
/*----------------------------------------------------------------------------
4163 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than or
4164 f5a64251 Aurelien Jarno
| equal to the corresponding value `b', and 0 otherwise.  The invalid
4165 f5a64251 Aurelien Jarno
| exception is raised if either operand is a NaN.  The comparison is performed
4166 f5a64251 Aurelien Jarno
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4167 158142c2 bellard
*----------------------------------------------------------------------------*/
4168 158142c2 bellard
4169 750afe93 bellard
int float64_le( float64 a, float64 b STATUS_PARAM )
4170 158142c2 bellard
{
4171 158142c2 bellard
    flag aSign, bSign;
4172 bb98fe42 Andreas Färber
    uint64_t av, bv;
4173 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
4174 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
4175 158142c2 bellard
4176 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4177 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4178 158142c2 bellard
       ) {
4179 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4180 158142c2 bellard
        return 0;
4181 158142c2 bellard
    }
4182 158142c2 bellard
    aSign = extractFloat64Sign( a );
4183 158142c2 bellard
    bSign = extractFloat64Sign( b );
4184 f090c9d4 pbrook
    av = float64_val(a);
4185 a1b91bb4 pbrook
    bv = float64_val(b);
4186 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
4187 f090c9d4 pbrook
    return ( av == bv ) || ( aSign ^ ( av < bv ) );
4188 158142c2 bellard
4189 158142c2 bellard
}
4190 158142c2 bellard
4191 158142c2 bellard
/*----------------------------------------------------------------------------
4192 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than
4193 f5a64251 Aurelien Jarno
| the corresponding value `b', and 0 otherwise.  The invalid exception is
4194 f5a64251 Aurelien Jarno
| raised if either operand is a NaN.  The comparison is performed according
4195 f5a64251 Aurelien Jarno
| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4196 158142c2 bellard
*----------------------------------------------------------------------------*/
4197 158142c2 bellard
4198 750afe93 bellard
int float64_lt( float64 a, float64 b STATUS_PARAM )
4199 158142c2 bellard
{
4200 158142c2 bellard
    flag aSign, bSign;
4201 bb98fe42 Andreas Färber
    uint64_t av, bv;
4202 158142c2 bellard
4203 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
4204 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
4205 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4206 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4207 158142c2 bellard
       ) {
4208 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4209 158142c2 bellard
        return 0;
4210 158142c2 bellard
    }
4211 158142c2 bellard
    aSign = extractFloat64Sign( a );
4212 158142c2 bellard
    bSign = extractFloat64Sign( b );
4213 f090c9d4 pbrook
    av = float64_val(a);
4214 a1b91bb4 pbrook
    bv = float64_val(b);
4215 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
4216 f090c9d4 pbrook
    return ( av != bv ) && ( aSign ^ ( av < bv ) );
4217 158142c2 bellard
4218 158142c2 bellard
}
4219 158142c2 bellard
4220 158142c2 bellard
/*----------------------------------------------------------------------------
4221 67b7861d Aurelien Jarno
| Returns 1 if the double-precision floating-point values `a' and `b' cannot
4222 f5a64251 Aurelien Jarno
| be compared, and 0 otherwise.  The invalid exception is raised if either
4223 f5a64251 Aurelien Jarno
| operand is a NaN.  The comparison is performed according to the IEC/IEEE
4224 f5a64251 Aurelien Jarno
| Standard for Binary Floating-Point Arithmetic.
4225 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
4226 67b7861d Aurelien Jarno
4227 67b7861d Aurelien Jarno
int float64_unordered( float64 a, float64 b STATUS_PARAM )
4228 67b7861d Aurelien Jarno
{
4229 67b7861d Aurelien Jarno
    a = float64_squash_input_denormal(a STATUS_VAR);
4230 67b7861d Aurelien Jarno
    b = float64_squash_input_denormal(b STATUS_VAR);
4231 67b7861d Aurelien Jarno
4232 67b7861d Aurelien Jarno
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4233 67b7861d Aurelien Jarno
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4234 67b7861d Aurelien Jarno
       ) {
4235 67b7861d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
4236 67b7861d Aurelien Jarno
        return 1;
4237 67b7861d Aurelien Jarno
    }
4238 67b7861d Aurelien Jarno
    return 0;
4239 67b7861d Aurelien Jarno
}
4240 67b7861d Aurelien Jarno
4241 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
4242 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is equal to the
4243 f5a64251 Aurelien Jarno
| corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
4244 f5a64251 Aurelien Jarno
| exception.The comparison is performed according to the IEC/IEEE Standard
4245 f5a64251 Aurelien Jarno
| for Binary Floating-Point Arithmetic.
4246 158142c2 bellard
*----------------------------------------------------------------------------*/
4247 158142c2 bellard
4248 b689362d Aurelien Jarno
int float64_eq_quiet( float64 a, float64 b STATUS_PARAM )
4249 158142c2 bellard
{
4250 bb98fe42 Andreas Färber
    uint64_t av, bv;
4251 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
4252 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
4253 158142c2 bellard
4254 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4255 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4256 158142c2 bellard
       ) {
4257 b689362d Aurelien Jarno
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
4258 b689362d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
4259 b689362d Aurelien Jarno
        }
4260 158142c2 bellard
        return 0;
4261 158142c2 bellard
    }
4262 f090c9d4 pbrook
    av = float64_val(a);
4263 a1b91bb4 pbrook
    bv = float64_val(b);
4264 bb98fe42 Andreas Färber
    return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
4265 158142c2 bellard
4266 158142c2 bellard
}
4267 158142c2 bellard
4268 158142c2 bellard
/*----------------------------------------------------------------------------
4269 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than or
4270 158142c2 bellard
| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
4271 158142c2 bellard
| cause an exception.  Otherwise, the comparison is performed according to the
4272 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4273 158142c2 bellard
*----------------------------------------------------------------------------*/
4274 158142c2 bellard
4275 750afe93 bellard
int float64_le_quiet( float64 a, float64 b STATUS_PARAM )
4276 158142c2 bellard
{
4277 158142c2 bellard
    flag aSign, bSign;
4278 bb98fe42 Andreas Färber
    uint64_t av, bv;
4279 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
4280 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
4281 158142c2 bellard
4282 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4283 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4284 158142c2 bellard
       ) {
4285 158142c2 bellard
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
4286 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4287 158142c2 bellard
        }
4288 158142c2 bellard
        return 0;
4289 158142c2 bellard
    }
4290 158142c2 bellard
    aSign = extractFloat64Sign( a );
4291 158142c2 bellard
    bSign = extractFloat64Sign( b );
4292 f090c9d4 pbrook
    av = float64_val(a);
4293 a1b91bb4 pbrook
    bv = float64_val(b);
4294 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
4295 f090c9d4 pbrook
    return ( av == bv ) || ( aSign ^ ( av < bv ) );
4296 158142c2 bellard
4297 158142c2 bellard
}
4298 158142c2 bellard
4299 158142c2 bellard
/*----------------------------------------------------------------------------
4300 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than
4301 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
4302 158142c2 bellard
| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
4303 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
4304 158142c2 bellard
*----------------------------------------------------------------------------*/
4305 158142c2 bellard
4306 750afe93 bellard
int float64_lt_quiet( float64 a, float64 b STATUS_PARAM )
4307 158142c2 bellard
{
4308 158142c2 bellard
    flag aSign, bSign;
4309 bb98fe42 Andreas Färber
    uint64_t av, bv;
4310 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
4311 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
4312 158142c2 bellard
4313 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4314 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4315 158142c2 bellard
       ) {
4316 158142c2 bellard
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
4317 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4318 158142c2 bellard
        }
4319 158142c2 bellard
        return 0;
4320 158142c2 bellard
    }
4321 158142c2 bellard
    aSign = extractFloat64Sign( a );
4322 158142c2 bellard
    bSign = extractFloat64Sign( b );
4323 f090c9d4 pbrook
    av = float64_val(a);
4324 a1b91bb4 pbrook
    bv = float64_val(b);
4325 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
4326 f090c9d4 pbrook
    return ( av != bv ) && ( aSign ^ ( av < bv ) );
4327 158142c2 bellard
4328 158142c2 bellard
}
4329 158142c2 bellard
4330 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
4331 67b7861d Aurelien Jarno
| Returns 1 if the double-precision floating-point values `a' and `b' cannot
4332 67b7861d Aurelien Jarno
| be compared, and 0 otherwise.  Quiet NaNs do not cause an exception.  The
4333 67b7861d Aurelien Jarno
| comparison is performed according to the IEC/IEEE Standard for Binary
4334 67b7861d Aurelien Jarno
| Floating-Point Arithmetic.
4335 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
4336 67b7861d Aurelien Jarno
4337 67b7861d Aurelien Jarno
int float64_unordered_quiet( float64 a, float64 b STATUS_PARAM )
4338 67b7861d Aurelien Jarno
{
4339 67b7861d Aurelien Jarno
    a = float64_squash_input_denormal(a STATUS_VAR);
4340 67b7861d Aurelien Jarno
    b = float64_squash_input_denormal(b STATUS_VAR);
4341 67b7861d Aurelien Jarno
4342 67b7861d Aurelien Jarno
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4343 67b7861d Aurelien Jarno
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4344 67b7861d Aurelien Jarno
       ) {
4345 67b7861d Aurelien Jarno
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
4346 67b7861d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
4347 67b7861d Aurelien Jarno
        }
4348 67b7861d Aurelien Jarno
        return 1;
4349 67b7861d Aurelien Jarno
    }
4350 67b7861d Aurelien Jarno
    return 0;
4351 67b7861d Aurelien Jarno
}
4352 67b7861d Aurelien Jarno
4353 158142c2 bellard
/*----------------------------------------------------------------------------
4354 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
4355 158142c2 bellard
| point value `a' to the 32-bit two's complement integer format.  The
4356 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
4357 158142c2 bellard
| Floating-Point Arithmetic---which means in particular that the conversion
4358 158142c2 bellard
| is rounded according to the current rounding mode.  If `a' is a NaN, the
4359 158142c2 bellard
| largest positive integer is returned.  Otherwise, if the conversion
4360 158142c2 bellard
| overflows, the largest integer with the same sign as `a' is returned.
4361 158142c2 bellard
*----------------------------------------------------------------------------*/
4362 158142c2 bellard
4363 158142c2 bellard
int32 floatx80_to_int32( floatx80 a STATUS_PARAM )
4364 158142c2 bellard
{
4365 158142c2 bellard
    flag aSign;
4366 158142c2 bellard
    int32 aExp, shiftCount;
4367 bb98fe42 Andreas Färber
    uint64_t aSig;
4368 158142c2 bellard
4369 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4370 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4371 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4372 bb98fe42 Andreas Färber
    if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
4373 158142c2 bellard
    shiftCount = 0x4037 - aExp;
4374 158142c2 bellard
    if ( shiftCount <= 0 ) shiftCount = 1;
4375 158142c2 bellard
    shift64RightJamming( aSig, shiftCount, &aSig );
4376 158142c2 bellard
    return roundAndPackInt32( aSign, aSig STATUS_VAR );
4377 158142c2 bellard
4378 158142c2 bellard
}
4379 158142c2 bellard
4380 158142c2 bellard
/*----------------------------------------------------------------------------
4381 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
4382 158142c2 bellard
| point value `a' to the 32-bit two's complement integer format.  The
4383 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
4384 158142c2 bellard
| Floating-Point Arithmetic, except that the conversion is always rounded
4385 158142c2 bellard
| toward zero.  If `a' is a NaN, the largest positive integer is returned.
4386 158142c2 bellard
| Otherwise, if the conversion overflows, the largest integer with the same
4387 158142c2 bellard
| sign as `a' is returned.
4388 158142c2 bellard
*----------------------------------------------------------------------------*/
4389 158142c2 bellard
4390 158142c2 bellard
int32 floatx80_to_int32_round_to_zero( floatx80 a STATUS_PARAM )
4391 158142c2 bellard
{
4392 158142c2 bellard
    flag aSign;
4393 158142c2 bellard
    int32 aExp, shiftCount;
4394 bb98fe42 Andreas Färber
    uint64_t aSig, savedASig;
4395 b3a6a2e0 Peter Maydell
    int32_t z;
4396 158142c2 bellard
4397 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4398 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4399 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4400 158142c2 bellard
    if ( 0x401E < aExp ) {
4401 bb98fe42 Andreas Färber
        if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
4402 158142c2 bellard
        goto invalid;
4403 158142c2 bellard
    }
4404 158142c2 bellard
    else if ( aExp < 0x3FFF ) {
4405 158142c2 bellard
        if ( aExp || aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
4406 158142c2 bellard
        return 0;
4407 158142c2 bellard
    }
4408 158142c2 bellard
    shiftCount = 0x403E - aExp;
4409 158142c2 bellard
    savedASig = aSig;
4410 158142c2 bellard
    aSig >>= shiftCount;
4411 158142c2 bellard
    z = aSig;
4412 158142c2 bellard
    if ( aSign ) z = - z;
4413 158142c2 bellard
    if ( ( z < 0 ) ^ aSign ) {
4414 158142c2 bellard
 invalid:
4415 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4416 bb98fe42 Andreas Färber
        return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
4417 158142c2 bellard
    }
4418 158142c2 bellard
    if ( ( aSig<<shiftCount ) != savedASig ) {
4419 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
4420 158142c2 bellard
    }
4421 158142c2 bellard
    return z;
4422 158142c2 bellard
4423 158142c2 bellard
}
4424 158142c2 bellard
4425 158142c2 bellard
/*----------------------------------------------------------------------------
4426 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
4427 158142c2 bellard
| point value `a' to the 64-bit two's complement integer format.  The
4428 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
4429 158142c2 bellard
| Floating-Point Arithmetic---which means in particular that the conversion
4430 158142c2 bellard
| is rounded according to the current rounding mode.  If `a' is a NaN,
4431 158142c2 bellard
| the largest positive integer is returned.  Otherwise, if the conversion
4432 158142c2 bellard
| overflows, the largest integer with the same sign as `a' is returned.
4433 158142c2 bellard
*----------------------------------------------------------------------------*/
4434 158142c2 bellard
4435 158142c2 bellard
int64 floatx80_to_int64( floatx80 a STATUS_PARAM )
4436 158142c2 bellard
{
4437 158142c2 bellard
    flag aSign;
4438 158142c2 bellard
    int32 aExp, shiftCount;
4439 bb98fe42 Andreas Färber
    uint64_t aSig, aSigExtra;
4440 158142c2 bellard
4441 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4442 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4443 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4444 158142c2 bellard
    shiftCount = 0x403E - aExp;
4445 158142c2 bellard
    if ( shiftCount <= 0 ) {
4446 158142c2 bellard
        if ( shiftCount ) {
4447 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4448 158142c2 bellard
            if (    ! aSign
4449 158142c2 bellard
                 || (    ( aExp == 0x7FFF )
4450 158142c2 bellard
                      && ( aSig != LIT64( 0x8000000000000000 ) ) )
4451 158142c2 bellard
               ) {
4452 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
4453 158142c2 bellard
            }
4454 bb98fe42 Andreas Färber
            return (int64_t) LIT64( 0x8000000000000000 );
4455 158142c2 bellard
        }
4456 158142c2 bellard
        aSigExtra = 0;
4457 158142c2 bellard
    }
4458 158142c2 bellard
    else {
4459 158142c2 bellard
        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
4460 158142c2 bellard
    }
4461 158142c2 bellard
    return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
4462 158142c2 bellard
4463 158142c2 bellard
}
4464 158142c2 bellard
4465 158142c2 bellard
/*----------------------------------------------------------------------------
4466 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
4467 158142c2 bellard
| point value `a' to the 64-bit two's complement integer format.  The
4468 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
4469 158142c2 bellard
| Floating-Point Arithmetic, except that the conversion is always rounded
4470 158142c2 bellard
| toward zero.  If `a' is a NaN, the largest positive integer is returned.
4471 158142c2 bellard
| Otherwise, if the conversion overflows, the largest integer with the same
4472 158142c2 bellard
| sign as `a' is returned.
4473 158142c2 bellard
*----------------------------------------------------------------------------*/
4474 158142c2 bellard
4475 158142c2 bellard
int64 floatx80_to_int64_round_to_zero( floatx80 a STATUS_PARAM )
4476 158142c2 bellard
{
4477 158142c2 bellard
    flag aSign;
4478 158142c2 bellard
    int32 aExp, shiftCount;
4479 bb98fe42 Andreas Färber
    uint64_t aSig;
4480 158142c2 bellard
    int64 z;
4481 158142c2 bellard
4482 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4483 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4484 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4485 158142c2 bellard
    shiftCount = aExp - 0x403E;
4486 158142c2 bellard
    if ( 0 <= shiftCount ) {
4487 158142c2 bellard
        aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
4488 158142c2 bellard
        if ( ( a.high != 0xC03E ) || aSig ) {
4489 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4490 158142c2 bellard
            if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
4491 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
4492 158142c2 bellard
            }
4493 158142c2 bellard
        }
4494 bb98fe42 Andreas Färber
        return (int64_t) LIT64( 0x8000000000000000 );
4495 158142c2 bellard
    }
4496 158142c2 bellard
    else if ( aExp < 0x3FFF ) {
4497 158142c2 bellard
        if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
4498 158142c2 bellard
        return 0;
4499 158142c2 bellard
    }
4500 158142c2 bellard
    z = aSig>>( - shiftCount );
4501 bb98fe42 Andreas Färber
    if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
4502 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
4503 158142c2 bellard
    }
4504 158142c2 bellard
    if ( aSign ) z = - z;
4505 158142c2 bellard
    return z;
4506 158142c2 bellard
4507 158142c2 bellard
}
4508 158142c2 bellard
4509 158142c2 bellard
/*----------------------------------------------------------------------------
4510 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
4511 158142c2 bellard
| point value `a' to the single-precision floating-point format.  The
4512 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
4513 158142c2 bellard
| Floating-Point Arithmetic.
4514 158142c2 bellard
*----------------------------------------------------------------------------*/
4515 158142c2 bellard
4516 158142c2 bellard
float32 floatx80_to_float32( floatx80 a STATUS_PARAM )
4517 158142c2 bellard
{
4518 158142c2 bellard
    flag aSign;
4519 158142c2 bellard
    int32 aExp;
4520 bb98fe42 Andreas Färber
    uint64_t aSig;
4521 158142c2 bellard
4522 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4523 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4524 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4525 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4526 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig<<1 ) ) {
4527 bcd4d9af Christophe Lyon
            return commonNaNToFloat32( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
4528 158142c2 bellard
        }
4529 158142c2 bellard
        return packFloat32( aSign, 0xFF, 0 );
4530 158142c2 bellard
    }
4531 158142c2 bellard
    shift64RightJamming( aSig, 33, &aSig );
4532 158142c2 bellard
    if ( aExp || aSig ) aExp -= 0x3F81;
4533 158142c2 bellard
    return roundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
4534 158142c2 bellard
4535 158142c2 bellard
}
4536 158142c2 bellard
4537 158142c2 bellard
/*----------------------------------------------------------------------------
4538 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
4539 158142c2 bellard
| point value `a' to the double-precision floating-point format.  The
4540 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
4541 158142c2 bellard
| Floating-Point Arithmetic.
4542 158142c2 bellard
*----------------------------------------------------------------------------*/
4543 158142c2 bellard
4544 158142c2 bellard
float64 floatx80_to_float64( floatx80 a STATUS_PARAM )
4545 158142c2 bellard
{
4546 158142c2 bellard
    flag aSign;
4547 158142c2 bellard
    int32 aExp;
4548 bb98fe42 Andreas Färber
    uint64_t aSig, zSig;
4549 158142c2 bellard
4550 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4551 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4552 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4553 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4554 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig<<1 ) ) {
4555 bcd4d9af Christophe Lyon
            return commonNaNToFloat64( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
4556 158142c2 bellard
        }
4557 158142c2 bellard
        return packFloat64( aSign, 0x7FF, 0 );
4558 158142c2 bellard
    }
4559 158142c2 bellard
    shift64RightJamming( aSig, 1, &zSig );
4560 158142c2 bellard
    if ( aExp || aSig ) aExp -= 0x3C01;
4561 158142c2 bellard
    return roundAndPackFloat64( aSign, aExp, zSig STATUS_VAR );
4562 158142c2 bellard
4563 158142c2 bellard
}
4564 158142c2 bellard
4565 158142c2 bellard
/*----------------------------------------------------------------------------
4566 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
4567 158142c2 bellard
| point value `a' to the quadruple-precision floating-point format.  The
4568 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
4569 158142c2 bellard
| Floating-Point Arithmetic.
4570 158142c2 bellard
*----------------------------------------------------------------------------*/
4571 158142c2 bellard
4572 158142c2 bellard
float128 floatx80_to_float128( floatx80 a STATUS_PARAM )
4573 158142c2 bellard
{
4574 158142c2 bellard
    flag aSign;
4575 94a49d86 Andreas Färber
    int_fast16_t aExp;
4576 bb98fe42 Andreas Färber
    uint64_t aSig, zSig0, zSig1;
4577 158142c2 bellard
4578 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4579 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4580 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4581 bb98fe42 Andreas Färber
    if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
4582 bcd4d9af Christophe Lyon
        return commonNaNToFloat128( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
4583 158142c2 bellard
    }
4584 158142c2 bellard
    shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
4585 158142c2 bellard
    return packFloat128( aSign, aExp, zSig0, zSig1 );
4586 158142c2 bellard
4587 158142c2 bellard
}
4588 158142c2 bellard
4589 158142c2 bellard
/*----------------------------------------------------------------------------
4590 158142c2 bellard
| Rounds the extended double-precision floating-point value `a' to an integer,
4591 158142c2 bellard
| and returns the result as an extended quadruple-precision floating-point
4592 158142c2 bellard
| value.  The operation is performed according to the IEC/IEEE Standard for
4593 158142c2 bellard
| Binary Floating-Point Arithmetic.
4594 158142c2 bellard
*----------------------------------------------------------------------------*/
4595 158142c2 bellard
4596 158142c2 bellard
floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM )
4597 158142c2 bellard
{
4598 158142c2 bellard
    flag aSign;
4599 158142c2 bellard
    int32 aExp;
4600 bb98fe42 Andreas Färber
    uint64_t lastBitMask, roundBitsMask;
4601 158142c2 bellard
    int8 roundingMode;
4602 158142c2 bellard
    floatx80 z;
4603 158142c2 bellard
4604 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4605 158142c2 bellard
    if ( 0x403E <= aExp ) {
4606 bb98fe42 Andreas Färber
        if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
4607 158142c2 bellard
            return propagateFloatx80NaN( a, a STATUS_VAR );
4608 158142c2 bellard
        }
4609 158142c2 bellard
        return a;
4610 158142c2 bellard
    }
4611 158142c2 bellard
    if ( aExp < 0x3FFF ) {
4612 158142c2 bellard
        if (    ( aExp == 0 )
4613 bb98fe42 Andreas Färber
             && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
4614 158142c2 bellard
            return a;
4615 158142c2 bellard
        }
4616 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
4617 158142c2 bellard
        aSign = extractFloatx80Sign( a );
4618 158142c2 bellard
        switch ( STATUS(float_rounding_mode) ) {
4619 158142c2 bellard
         case float_round_nearest_even:
4620 bb98fe42 Andreas Färber
            if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
4621 158142c2 bellard
               ) {
4622 158142c2 bellard
                return
4623 158142c2 bellard
                    packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
4624 158142c2 bellard
            }
4625 158142c2 bellard
            break;
4626 158142c2 bellard
         case float_round_down:
4627 158142c2 bellard
            return
4628 158142c2 bellard
                  aSign ?
4629 158142c2 bellard
                      packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
4630 158142c2 bellard
                : packFloatx80( 0, 0, 0 );
4631 158142c2 bellard
         case float_round_up:
4632 158142c2 bellard
            return
4633 158142c2 bellard
                  aSign ? packFloatx80( 1, 0, 0 )
4634 158142c2 bellard
                : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
4635 158142c2 bellard
        }
4636 158142c2 bellard
        return packFloatx80( aSign, 0, 0 );
4637 158142c2 bellard
    }
4638 158142c2 bellard
    lastBitMask = 1;
4639 158142c2 bellard
    lastBitMask <<= 0x403E - aExp;
4640 158142c2 bellard
    roundBitsMask = lastBitMask - 1;
4641 158142c2 bellard
    z = a;
4642 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
4643 158142c2 bellard
    if ( roundingMode == float_round_nearest_even ) {
4644 158142c2 bellard
        z.low += lastBitMask>>1;
4645 158142c2 bellard
        if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
4646 158142c2 bellard
    }
4647 158142c2 bellard
    else if ( roundingMode != float_round_to_zero ) {
4648 158142c2 bellard
        if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
4649 158142c2 bellard
            z.low += roundBitsMask;
4650 158142c2 bellard
        }
4651 158142c2 bellard
    }
4652 158142c2 bellard
    z.low &= ~ roundBitsMask;
4653 158142c2 bellard
    if ( z.low == 0 ) {
4654 158142c2 bellard
        ++z.high;
4655 158142c2 bellard
        z.low = LIT64( 0x8000000000000000 );
4656 158142c2 bellard
    }
4657 158142c2 bellard
    if ( z.low != a.low ) STATUS(float_exception_flags) |= float_flag_inexact;
4658 158142c2 bellard
    return z;
4659 158142c2 bellard
4660 158142c2 bellard
}
4661 158142c2 bellard
4662 158142c2 bellard
/*----------------------------------------------------------------------------
4663 158142c2 bellard
| Returns the result of adding the absolute values of the extended double-
4664 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
4665 158142c2 bellard
| negated before being returned.  `zSign' is ignored if the result is a NaN.
4666 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
4667 158142c2 bellard
| Floating-Point Arithmetic.
4668 158142c2 bellard
*----------------------------------------------------------------------------*/
4669 158142c2 bellard
4670 158142c2 bellard
static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM)
4671 158142c2 bellard
{
4672 158142c2 bellard
    int32 aExp, bExp, zExp;
4673 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig0, zSig1;
4674 158142c2 bellard
    int32 expDiff;
4675 158142c2 bellard
4676 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4677 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4678 158142c2 bellard
    bSig = extractFloatx80Frac( b );
4679 158142c2 bellard
    bExp = extractFloatx80Exp( b );
4680 158142c2 bellard
    expDiff = aExp - bExp;
4681 158142c2 bellard
    if ( 0 < expDiff ) {
4682 158142c2 bellard
        if ( aExp == 0x7FFF ) {
4683 bb98fe42 Andreas Färber
            if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4684 158142c2 bellard
            return a;
4685 158142c2 bellard
        }
4686 158142c2 bellard
        if ( bExp == 0 ) --expDiff;
4687 158142c2 bellard
        shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
4688 158142c2 bellard
        zExp = aExp;
4689 158142c2 bellard
    }
4690 158142c2 bellard
    else if ( expDiff < 0 ) {
4691 158142c2 bellard
        if ( bExp == 0x7FFF ) {
4692 bb98fe42 Andreas Färber
            if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4693 158142c2 bellard
            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4694 158142c2 bellard
        }
4695 158142c2 bellard
        if ( aExp == 0 ) ++expDiff;
4696 158142c2 bellard
        shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
4697 158142c2 bellard
        zExp = bExp;
4698 158142c2 bellard
    }
4699 158142c2 bellard
    else {
4700 158142c2 bellard
        if ( aExp == 0x7FFF ) {
4701 bb98fe42 Andreas Färber
            if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
4702 158142c2 bellard
                return propagateFloatx80NaN( a, b STATUS_VAR );
4703 158142c2 bellard
            }
4704 158142c2 bellard
            return a;
4705 158142c2 bellard
        }
4706 158142c2 bellard
        zSig1 = 0;
4707 158142c2 bellard
        zSig0 = aSig + bSig;
4708 158142c2 bellard
        if ( aExp == 0 ) {
4709 158142c2 bellard
            normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
4710 158142c2 bellard
            goto roundAndPack;
4711 158142c2 bellard
        }
4712 158142c2 bellard
        zExp = aExp;
4713 158142c2 bellard
        goto shiftRight1;
4714 158142c2 bellard
    }
4715 158142c2 bellard
    zSig0 = aSig + bSig;
4716 bb98fe42 Andreas Färber
    if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
4717 158142c2 bellard
 shiftRight1:
4718 158142c2 bellard
    shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
4719 158142c2 bellard
    zSig0 |= LIT64( 0x8000000000000000 );
4720 158142c2 bellard
    ++zExp;
4721 158142c2 bellard
 roundAndPack:
4722 158142c2 bellard
    return
4723 158142c2 bellard
        roundAndPackFloatx80(
4724 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
4725 158142c2 bellard
4726 158142c2 bellard
}
4727 158142c2 bellard
4728 158142c2 bellard
/*----------------------------------------------------------------------------
4729 158142c2 bellard
| Returns the result of subtracting the absolute values of the extended
4730 158142c2 bellard
| double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
4731 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
4732 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
4733 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
4734 158142c2 bellard
*----------------------------------------------------------------------------*/
4735 158142c2 bellard
4736 158142c2 bellard
static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM )
4737 158142c2 bellard
{
4738 158142c2 bellard
    int32 aExp, bExp, zExp;
4739 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig0, zSig1;
4740 158142c2 bellard
    int32 expDiff;
4741 158142c2 bellard
    floatx80 z;
4742 158142c2 bellard
4743 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4744 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4745 158142c2 bellard
    bSig = extractFloatx80Frac( b );
4746 158142c2 bellard
    bExp = extractFloatx80Exp( b );
4747 158142c2 bellard
    expDiff = aExp - bExp;
4748 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
4749 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
4750 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4751 bb98fe42 Andreas Färber
        if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
4752 158142c2 bellard
            return propagateFloatx80NaN( a, b STATUS_VAR );
4753 158142c2 bellard
        }
4754 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4755 158142c2 bellard
        z.low = floatx80_default_nan_low;
4756 158142c2 bellard
        z.high = floatx80_default_nan_high;
4757 158142c2 bellard
        return z;
4758 158142c2 bellard
    }
4759 158142c2 bellard
    if ( aExp == 0 ) {
4760 158142c2 bellard
        aExp = 1;
4761 158142c2 bellard
        bExp = 1;
4762 158142c2 bellard
    }
4763 158142c2 bellard
    zSig1 = 0;
4764 158142c2 bellard
    if ( bSig < aSig ) goto aBigger;
4765 158142c2 bellard
    if ( aSig < bSig ) goto bBigger;
4766 158142c2 bellard
    return packFloatx80( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
4767 158142c2 bellard
 bExpBigger:
4768 158142c2 bellard
    if ( bExp == 0x7FFF ) {
4769 bb98fe42 Andreas Färber
        if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4770 158142c2 bellard
        return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
4771 158142c2 bellard
    }
4772 158142c2 bellard
    if ( aExp == 0 ) ++expDiff;
4773 158142c2 bellard
    shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
4774 158142c2 bellard
 bBigger:
4775 158142c2 bellard
    sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
4776 158142c2 bellard
    zExp = bExp;
4777 158142c2 bellard
    zSign ^= 1;
4778 158142c2 bellard
    goto normalizeRoundAndPack;
4779 158142c2 bellard
 aExpBigger:
4780 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4781 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4782 158142c2 bellard
        return a;
4783 158142c2 bellard
    }
4784 158142c2 bellard
    if ( bExp == 0 ) --expDiff;
4785 158142c2 bellard
    shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
4786 158142c2 bellard
 aBigger:
4787 158142c2 bellard
    sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
4788 158142c2 bellard
    zExp = aExp;
4789 158142c2 bellard
 normalizeRoundAndPack:
4790 158142c2 bellard
    return
4791 158142c2 bellard
        normalizeRoundAndPackFloatx80(
4792 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
4793 158142c2 bellard
4794 158142c2 bellard
}
4795 158142c2 bellard
4796 158142c2 bellard
/*----------------------------------------------------------------------------
4797 158142c2 bellard
| Returns the result of adding the extended double-precision floating-point
4798 158142c2 bellard
| values `a' and `b'.  The operation is performed according to the IEC/IEEE
4799 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
4800 158142c2 bellard
*----------------------------------------------------------------------------*/
4801 158142c2 bellard
4802 158142c2 bellard
floatx80 floatx80_add( floatx80 a, floatx80 b STATUS_PARAM )
4803 158142c2 bellard
{
4804 158142c2 bellard
    flag aSign, bSign;
4805 158142c2 bellard
4806 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4807 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4808 158142c2 bellard
    if ( aSign == bSign ) {
4809 158142c2 bellard
        return addFloatx80Sigs( a, b, aSign STATUS_VAR );
4810 158142c2 bellard
    }
4811 158142c2 bellard
    else {
4812 158142c2 bellard
        return subFloatx80Sigs( a, b, aSign STATUS_VAR );
4813 158142c2 bellard
    }
4814 158142c2 bellard
4815 158142c2 bellard
}
4816 158142c2 bellard
4817 158142c2 bellard
/*----------------------------------------------------------------------------
4818 158142c2 bellard
| Returns the result of subtracting the extended double-precision floating-
4819 158142c2 bellard
| point values `a' and `b'.  The operation is performed according to the
4820 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4821 158142c2 bellard
*----------------------------------------------------------------------------*/
4822 158142c2 bellard
4823 158142c2 bellard
floatx80 floatx80_sub( floatx80 a, floatx80 b STATUS_PARAM )
4824 158142c2 bellard
{
4825 158142c2 bellard
    flag aSign, bSign;
4826 158142c2 bellard
4827 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4828 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4829 158142c2 bellard
    if ( aSign == bSign ) {
4830 158142c2 bellard
        return subFloatx80Sigs( a, b, aSign STATUS_VAR );
4831 158142c2 bellard
    }
4832 158142c2 bellard
    else {
4833 158142c2 bellard
        return addFloatx80Sigs( a, b, aSign STATUS_VAR );
4834 158142c2 bellard
    }
4835 158142c2 bellard
4836 158142c2 bellard
}
4837 158142c2 bellard
4838 158142c2 bellard
/*----------------------------------------------------------------------------
4839 158142c2 bellard
| Returns the result of multiplying the extended double-precision floating-
4840 158142c2 bellard
| point values `a' and `b'.  The operation is performed according to the
4841 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4842 158142c2 bellard
*----------------------------------------------------------------------------*/
4843 158142c2 bellard
4844 158142c2 bellard
floatx80 floatx80_mul( floatx80 a, floatx80 b STATUS_PARAM )
4845 158142c2 bellard
{
4846 158142c2 bellard
    flag aSign, bSign, zSign;
4847 158142c2 bellard
    int32 aExp, bExp, zExp;
4848 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig0, zSig1;
4849 158142c2 bellard
    floatx80 z;
4850 158142c2 bellard
4851 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4852 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4853 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4854 158142c2 bellard
    bSig = extractFloatx80Frac( b );
4855 158142c2 bellard
    bExp = extractFloatx80Exp( b );
4856 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4857 158142c2 bellard
    zSign = aSign ^ bSign;
4858 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4859 bb98fe42 Andreas Färber
        if (    (uint64_t) ( aSig<<1 )
4860 bb98fe42 Andreas Färber
             || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
4861 158142c2 bellard
            return propagateFloatx80NaN( a, b STATUS_VAR );
4862 158142c2 bellard
        }
4863 158142c2 bellard
        if ( ( bExp | bSig ) == 0 ) goto invalid;
4864 158142c2 bellard
        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4865 158142c2 bellard
    }
4866 158142c2 bellard
    if ( bExp == 0x7FFF ) {
4867 bb98fe42 Andreas Färber
        if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4868 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) {
4869 158142c2 bellard
 invalid:
4870 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4871 158142c2 bellard
            z.low = floatx80_default_nan_low;
4872 158142c2 bellard
            z.high = floatx80_default_nan_high;
4873 158142c2 bellard
            return z;
4874 158142c2 bellard
        }
4875 158142c2 bellard
        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4876 158142c2 bellard
    }
4877 158142c2 bellard
    if ( aExp == 0 ) {
4878 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
4879 158142c2 bellard
        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
4880 158142c2 bellard
    }
4881 158142c2 bellard
    if ( bExp == 0 ) {
4882 158142c2 bellard
        if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
4883 158142c2 bellard
        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
4884 158142c2 bellard
    }
4885 158142c2 bellard
    zExp = aExp + bExp - 0x3FFE;
4886 158142c2 bellard
    mul64To128( aSig, bSig, &zSig0, &zSig1 );
4887 bb98fe42 Andreas Färber
    if ( 0 < (int64_t) zSig0 ) {
4888 158142c2 bellard
        shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
4889 158142c2 bellard
        --zExp;
4890 158142c2 bellard
    }
4891 158142c2 bellard
    return
4892 158142c2 bellard
        roundAndPackFloatx80(
4893 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
4894 158142c2 bellard
4895 158142c2 bellard
}
4896 158142c2 bellard
4897 158142c2 bellard
/*----------------------------------------------------------------------------
4898 158142c2 bellard
| Returns the result of dividing the extended double-precision floating-point
4899 158142c2 bellard
| value `a' by the corresponding value `b'.  The operation is performed
4900 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4901 158142c2 bellard
*----------------------------------------------------------------------------*/
4902 158142c2 bellard
4903 158142c2 bellard
floatx80 floatx80_div( floatx80 a, floatx80 b STATUS_PARAM )
4904 158142c2 bellard
{
4905 158142c2 bellard
    flag aSign, bSign, zSign;
4906 158142c2 bellard
    int32 aExp, bExp, zExp;
4907 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig0, zSig1;
4908 bb98fe42 Andreas Färber
    uint64_t rem0, rem1, rem2, term0, term1, term2;
4909 158142c2 bellard
    floatx80 z;
4910 158142c2 bellard
4911 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4912 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4913 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4914 158142c2 bellard
    bSig = extractFloatx80Frac( b );
4915 158142c2 bellard
    bExp = extractFloatx80Exp( b );
4916 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4917 158142c2 bellard
    zSign = aSign ^ bSign;
4918 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4919 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4920 158142c2 bellard
        if ( bExp == 0x7FFF ) {
4921 bb98fe42 Andreas Färber
            if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4922 158142c2 bellard
            goto invalid;
4923 158142c2 bellard
        }
4924 158142c2 bellard
        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4925 158142c2 bellard
    }
4926 158142c2 bellard
    if ( bExp == 0x7FFF ) {
4927 bb98fe42 Andreas Färber
        if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4928 158142c2 bellard
        return packFloatx80( zSign, 0, 0 );
4929 158142c2 bellard
    }
4930 158142c2 bellard
    if ( bExp == 0 ) {
4931 158142c2 bellard
        if ( bSig == 0 ) {
4932 158142c2 bellard
            if ( ( aExp | aSig ) == 0 ) {
4933 158142c2 bellard
 invalid:
4934 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
4935 158142c2 bellard
                z.low = floatx80_default_nan_low;
4936 158142c2 bellard
                z.high = floatx80_default_nan_high;
4937 158142c2 bellard
                return z;
4938 158142c2 bellard
            }
4939 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
4940 158142c2 bellard
            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4941 158142c2 bellard
        }
4942 158142c2 bellard
        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
4943 158142c2 bellard
    }
4944 158142c2 bellard
    if ( aExp == 0 ) {
4945 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
4946 158142c2 bellard
        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
4947 158142c2 bellard
    }
4948 158142c2 bellard
    zExp = aExp - bExp + 0x3FFE;
4949 158142c2 bellard
    rem1 = 0;
4950 158142c2 bellard
    if ( bSig <= aSig ) {
4951 158142c2 bellard
        shift128Right( aSig, 0, 1, &aSig, &rem1 );
4952 158142c2 bellard
        ++zExp;
4953 158142c2 bellard
    }
4954 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig, rem1, bSig );
4955 158142c2 bellard
    mul64To128( bSig, zSig0, &term0, &term1 );
4956 158142c2 bellard
    sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
4957 bb98fe42 Andreas Färber
    while ( (int64_t) rem0 < 0 ) {
4958 158142c2 bellard
        --zSig0;
4959 158142c2 bellard
        add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
4960 158142c2 bellard
    }
4961 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, 0, bSig );
4962 bb98fe42 Andreas Färber
    if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
4963 158142c2 bellard
        mul64To128( bSig, zSig1, &term1, &term2 );
4964 158142c2 bellard
        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
4965 bb98fe42 Andreas Färber
        while ( (int64_t) rem1 < 0 ) {
4966 158142c2 bellard
            --zSig1;
4967 158142c2 bellard
            add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
4968 158142c2 bellard
        }
4969 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 ) != 0 );
4970 158142c2 bellard
    }
4971 158142c2 bellard
    return
4972 158142c2 bellard
        roundAndPackFloatx80(
4973 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
4974 158142c2 bellard
4975 158142c2 bellard
}
4976 158142c2 bellard
4977 158142c2 bellard
/*----------------------------------------------------------------------------
4978 158142c2 bellard
| Returns the remainder of the extended double-precision floating-point value
4979 158142c2 bellard
| `a' with respect to the corresponding value `b'.  The operation is performed
4980 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4981 158142c2 bellard
*----------------------------------------------------------------------------*/
4982 158142c2 bellard
4983 158142c2 bellard
floatx80 floatx80_rem( floatx80 a, floatx80 b STATUS_PARAM )
4984 158142c2 bellard
{
4985 ed086f3d Blue Swirl
    flag aSign, zSign;
4986 158142c2 bellard
    int32 aExp, bExp, expDiff;
4987 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, bSig;
4988 bb98fe42 Andreas Färber
    uint64_t q, term0, term1, alternateASig0, alternateASig1;
4989 158142c2 bellard
    floatx80 z;
4990 158142c2 bellard
4991 158142c2 bellard
    aSig0 = extractFloatx80Frac( a );
4992 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4993 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4994 158142c2 bellard
    bSig = extractFloatx80Frac( b );
4995 158142c2 bellard
    bExp = extractFloatx80Exp( b );
4996 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4997 bb98fe42 Andreas Färber
        if (    (uint64_t) ( aSig0<<1 )
4998 bb98fe42 Andreas Färber
             || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
4999 158142c2 bellard
            return propagateFloatx80NaN( a, b STATUS_VAR );
5000 158142c2 bellard
        }
5001 158142c2 bellard
        goto invalid;
5002 158142c2 bellard
    }
5003 158142c2 bellard
    if ( bExp == 0x7FFF ) {
5004 bb98fe42 Andreas Färber
        if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
5005 158142c2 bellard
        return a;
5006 158142c2 bellard
    }
5007 158142c2 bellard
    if ( bExp == 0 ) {
5008 158142c2 bellard
        if ( bSig == 0 ) {
5009 158142c2 bellard
 invalid:
5010 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5011 158142c2 bellard
            z.low = floatx80_default_nan_low;
5012 158142c2 bellard
            z.high = floatx80_default_nan_high;
5013 158142c2 bellard
            return z;
5014 158142c2 bellard
        }
5015 158142c2 bellard
        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5016 158142c2 bellard
    }
5017 158142c2 bellard
    if ( aExp == 0 ) {
5018 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
5019 158142c2 bellard
        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5020 158142c2 bellard
    }
5021 158142c2 bellard
    bSig |= LIT64( 0x8000000000000000 );
5022 158142c2 bellard
    zSign = aSign;
5023 158142c2 bellard
    expDiff = aExp - bExp;
5024 158142c2 bellard
    aSig1 = 0;
5025 158142c2 bellard
    if ( expDiff < 0 ) {
5026 158142c2 bellard
        if ( expDiff < -1 ) return a;
5027 158142c2 bellard
        shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
5028 158142c2 bellard
        expDiff = 0;
5029 158142c2 bellard
    }
5030 158142c2 bellard
    q = ( bSig <= aSig0 );
5031 158142c2 bellard
    if ( q ) aSig0 -= bSig;
5032 158142c2 bellard
    expDiff -= 64;
5033 158142c2 bellard
    while ( 0 < expDiff ) {
5034 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig );
5035 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
5036 158142c2 bellard
        mul64To128( bSig, q, &term0, &term1 );
5037 158142c2 bellard
        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5038 158142c2 bellard
        shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
5039 158142c2 bellard
        expDiff -= 62;
5040 158142c2 bellard
    }
5041 158142c2 bellard
    expDiff += 64;
5042 158142c2 bellard
    if ( 0 < expDiff ) {
5043 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig );
5044 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
5045 158142c2 bellard
        q >>= 64 - expDiff;
5046 158142c2 bellard
        mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
5047 158142c2 bellard
        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5048 158142c2 bellard
        shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
5049 158142c2 bellard
        while ( le128( term0, term1, aSig0, aSig1 ) ) {
5050 158142c2 bellard
            ++q;
5051 158142c2 bellard
            sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5052 158142c2 bellard
        }
5053 158142c2 bellard
    }
5054 158142c2 bellard
    else {
5055 158142c2 bellard
        term1 = 0;
5056 158142c2 bellard
        term0 = bSig;
5057 158142c2 bellard
    }
5058 158142c2 bellard
    sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
5059 158142c2 bellard
    if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
5060 158142c2 bellard
         || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
5061 158142c2 bellard
              && ( q & 1 ) )
5062 158142c2 bellard
       ) {
5063 158142c2 bellard
        aSig0 = alternateASig0;
5064 158142c2 bellard
        aSig1 = alternateASig1;
5065 158142c2 bellard
        zSign = ! zSign;
5066 158142c2 bellard
    }
5067 158142c2 bellard
    return
5068 158142c2 bellard
        normalizeRoundAndPackFloatx80(
5069 158142c2 bellard
            80, zSign, bExp + expDiff, aSig0, aSig1 STATUS_VAR );
5070 158142c2 bellard
5071 158142c2 bellard
}
5072 158142c2 bellard
5073 158142c2 bellard
/*----------------------------------------------------------------------------
5074 158142c2 bellard
| Returns the square root of the extended double-precision floating-point
5075 158142c2 bellard
| value `a'.  The operation is performed according to the IEC/IEEE Standard
5076 158142c2 bellard
| for Binary Floating-Point Arithmetic.
5077 158142c2 bellard
*----------------------------------------------------------------------------*/
5078 158142c2 bellard
5079 158142c2 bellard
floatx80 floatx80_sqrt( floatx80 a STATUS_PARAM )
5080 158142c2 bellard
{
5081 158142c2 bellard
    flag aSign;
5082 158142c2 bellard
    int32 aExp, zExp;
5083 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
5084 bb98fe42 Andreas Färber
    uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5085 158142c2 bellard
    floatx80 z;
5086 158142c2 bellard
5087 158142c2 bellard
    aSig0 = extractFloatx80Frac( a );
5088 158142c2 bellard
    aExp = extractFloatx80Exp( a );
5089 158142c2 bellard
    aSign = extractFloatx80Sign( a );
5090 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5091 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a STATUS_VAR );
5092 158142c2 bellard
        if ( ! aSign ) return a;
5093 158142c2 bellard
        goto invalid;
5094 158142c2 bellard
    }
5095 158142c2 bellard
    if ( aSign ) {
5096 158142c2 bellard
        if ( ( aExp | aSig0 ) == 0 ) return a;
5097 158142c2 bellard
 invalid:
5098 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5099 158142c2 bellard
        z.low = floatx80_default_nan_low;
5100 158142c2 bellard
        z.high = floatx80_default_nan_high;
5101 158142c2 bellard
        return z;
5102 158142c2 bellard
    }
5103 158142c2 bellard
    if ( aExp == 0 ) {
5104 158142c2 bellard
        if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
5105 158142c2 bellard
        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5106 158142c2 bellard
    }
5107 158142c2 bellard
    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
5108 158142c2 bellard
    zSig0 = estimateSqrt32( aExp, aSig0>>32 );
5109 158142c2 bellard
    shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
5110 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
5111 158142c2 bellard
    doubleZSig0 = zSig0<<1;
5112 158142c2 bellard
    mul64To128( zSig0, zSig0, &term0, &term1 );
5113 158142c2 bellard
    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
5114 bb98fe42 Andreas Färber
    while ( (int64_t) rem0 < 0 ) {
5115 158142c2 bellard
        --zSig0;
5116 158142c2 bellard
        doubleZSig0 -= 2;
5117 158142c2 bellard
        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
5118 158142c2 bellard
    }
5119 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
5120 158142c2 bellard
    if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
5121 158142c2 bellard
        if ( zSig1 == 0 ) zSig1 = 1;
5122 158142c2 bellard
        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
5123 158142c2 bellard
        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5124 158142c2 bellard
        mul64To128( zSig1, zSig1, &term2, &term3 );
5125 158142c2 bellard
        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
5126 bb98fe42 Andreas Färber
        while ( (int64_t) rem1 < 0 ) {
5127 158142c2 bellard
            --zSig1;
5128 158142c2 bellard
            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
5129 158142c2 bellard
            term3 |= 1;
5130 158142c2 bellard
            term2 |= doubleZSig0;
5131 158142c2 bellard
            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
5132 158142c2 bellard
        }
5133 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5134 158142c2 bellard
    }
5135 158142c2 bellard
    shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
5136 158142c2 bellard
    zSig0 |= doubleZSig0;
5137 158142c2 bellard
    return
5138 158142c2 bellard
        roundAndPackFloatx80(
5139 158142c2 bellard
            STATUS(floatx80_rounding_precision), 0, zExp, zSig0, zSig1 STATUS_VAR );
5140 158142c2 bellard
5141 158142c2 bellard
}
5142 158142c2 bellard
5143 158142c2 bellard
/*----------------------------------------------------------------------------
5144 b689362d Aurelien Jarno
| Returns 1 if the extended double-precision floating-point value `a' is equal
5145 b689362d Aurelien Jarno
| to the corresponding value `b', and 0 otherwise.  The invalid exception is
5146 b689362d Aurelien Jarno
| raised if either operand is a NaN.  Otherwise, the comparison is performed
5147 b689362d Aurelien Jarno
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5148 158142c2 bellard
*----------------------------------------------------------------------------*/
5149 158142c2 bellard
5150 b689362d Aurelien Jarno
int floatx80_eq( floatx80 a, floatx80 b STATUS_PARAM )
5151 158142c2 bellard
{
5152 158142c2 bellard
5153 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
5154 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5155 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
5156 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5157 158142c2 bellard
       ) {
5158 b689362d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
5159 158142c2 bellard
        return 0;
5160 158142c2 bellard
    }
5161 158142c2 bellard
    return
5162 158142c2 bellard
           ( a.low == b.low )
5163 158142c2 bellard
        && (    ( a.high == b.high )
5164 158142c2 bellard
             || (    ( a.low == 0 )
5165 bb98fe42 Andreas Färber
                  && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
5166 158142c2 bellard
           );
5167 158142c2 bellard
5168 158142c2 bellard
}
5169 158142c2 bellard
5170 158142c2 bellard
/*----------------------------------------------------------------------------
5171 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is
5172 158142c2 bellard
| less than or equal to the corresponding value `b', and 0 otherwise.  The
5173 f5a64251 Aurelien Jarno
| invalid exception is raised if either operand is a NaN.  The comparison is
5174 f5a64251 Aurelien Jarno
| performed according to the IEC/IEEE Standard for Binary Floating-Point
5175 f5a64251 Aurelien Jarno
| Arithmetic.
5176 158142c2 bellard
*----------------------------------------------------------------------------*/
5177 158142c2 bellard
5178 750afe93 bellard
int floatx80_le( floatx80 a, floatx80 b STATUS_PARAM )
5179 158142c2 bellard
{
5180 158142c2 bellard
    flag aSign, bSign;
5181 158142c2 bellard
5182 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
5183 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5184 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
5185 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5186 158142c2 bellard
       ) {
5187 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5188 158142c2 bellard
        return 0;
5189 158142c2 bellard
    }
5190 158142c2 bellard
    aSign = extractFloatx80Sign( a );
5191 158142c2 bellard
    bSign = extractFloatx80Sign( b );
5192 158142c2 bellard
    if ( aSign != bSign ) {
5193 158142c2 bellard
        return
5194 158142c2 bellard
               aSign
5195 bb98fe42 Andreas Färber
            || (    ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5196 158142c2 bellard
                 == 0 );
5197 158142c2 bellard
    }
5198 158142c2 bellard
    return
5199 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
5200 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
5201 158142c2 bellard
5202 158142c2 bellard
}
5203 158142c2 bellard
5204 158142c2 bellard
/*----------------------------------------------------------------------------
5205 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is
5206 f5a64251 Aurelien Jarno
| less than the corresponding value `b', and 0 otherwise.  The invalid
5207 f5a64251 Aurelien Jarno
| exception is raised if either operand is a NaN.  The comparison is performed
5208 f5a64251 Aurelien Jarno
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5209 158142c2 bellard
*----------------------------------------------------------------------------*/
5210 158142c2 bellard
5211 750afe93 bellard
int floatx80_lt( floatx80 a, floatx80 b STATUS_PARAM )
5212 158142c2 bellard
{
5213 158142c2 bellard
    flag aSign, bSign;
5214 158142c2 bellard
5215 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
5216 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5217 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
5218 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5219 158142c2 bellard
       ) {
5220 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5221 158142c2 bellard
        return 0;
5222 158142c2 bellard
    }
5223 158142c2 bellard
    aSign = extractFloatx80Sign( a );
5224 158142c2 bellard
    bSign = extractFloatx80Sign( b );
5225 158142c2 bellard
    if ( aSign != bSign ) {
5226 158142c2 bellard
        return
5227 158142c2 bellard
               aSign
5228 bb98fe42 Andreas Färber
            && (    ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5229 158142c2 bellard
                 != 0 );
5230 158142c2 bellard
    }
5231 158142c2 bellard
    return
5232 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
5233 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
5234 158142c2 bellard
5235 158142c2 bellard
}
5236 158142c2 bellard
5237 158142c2 bellard
/*----------------------------------------------------------------------------
5238 67b7861d Aurelien Jarno
| Returns 1 if the extended double-precision floating-point values `a' and `b'
5239 f5a64251 Aurelien Jarno
| cannot be compared, and 0 otherwise.  The invalid exception is raised if
5240 f5a64251 Aurelien Jarno
| either operand is a NaN.   The comparison is performed according to the
5241 f5a64251 Aurelien Jarno
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5242 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
5243 67b7861d Aurelien Jarno
int floatx80_unordered( floatx80 a, floatx80 b STATUS_PARAM )
5244 67b7861d Aurelien Jarno
{
5245 67b7861d Aurelien Jarno
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
5246 67b7861d Aurelien Jarno
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5247 67b7861d Aurelien Jarno
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
5248 67b7861d Aurelien Jarno
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5249 67b7861d Aurelien Jarno
       ) {
5250 67b7861d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
5251 67b7861d Aurelien Jarno
        return 1;
5252 67b7861d Aurelien Jarno
    }
5253 67b7861d Aurelien Jarno
    return 0;
5254 67b7861d Aurelien Jarno
}
5255 67b7861d Aurelien Jarno
5256 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
5257 b689362d Aurelien Jarno
| Returns 1 if the extended double-precision floating-point value `a' is
5258 f5a64251 Aurelien Jarno
| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
5259 f5a64251 Aurelien Jarno
| cause an exception.  The comparison is performed according to the IEC/IEEE
5260 f5a64251 Aurelien Jarno
| Standard for Binary Floating-Point Arithmetic.
5261 158142c2 bellard
*----------------------------------------------------------------------------*/
5262 158142c2 bellard
5263 b689362d Aurelien Jarno
int floatx80_eq_quiet( floatx80 a, floatx80 b STATUS_PARAM )
5264 158142c2 bellard
{
5265 158142c2 bellard
5266 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
5267 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5268 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
5269 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5270 158142c2 bellard
       ) {
5271 b689362d Aurelien Jarno
        if (    floatx80_is_signaling_nan( a )
5272 b689362d Aurelien Jarno
             || floatx80_is_signaling_nan( b ) ) {
5273 b689362d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
5274 b689362d Aurelien Jarno
        }
5275 158142c2 bellard
        return 0;
5276 158142c2 bellard
    }
5277 158142c2 bellard
    return
5278 158142c2 bellard
           ( a.low == b.low )
5279 158142c2 bellard
        && (    ( a.high == b.high )
5280 158142c2 bellard
             || (    ( a.low == 0 )
5281 bb98fe42 Andreas Färber
                  && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
5282 158142c2 bellard
           );
5283 158142c2 bellard
5284 158142c2 bellard
}
5285 158142c2 bellard
5286 158142c2 bellard
/*----------------------------------------------------------------------------
5287 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is less
5288 158142c2 bellard
| than or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
5289 158142c2 bellard
| do not cause an exception.  Otherwise, the comparison is performed according
5290 158142c2 bellard
| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5291 158142c2 bellard
*----------------------------------------------------------------------------*/
5292 158142c2 bellard
5293 750afe93 bellard
int floatx80_le_quiet( floatx80 a, floatx80 b STATUS_PARAM )
5294 158142c2 bellard
{
5295 158142c2 bellard
    flag aSign, bSign;
5296 158142c2 bellard
5297 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
5298 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5299 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
5300 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5301 158142c2 bellard
       ) {
5302 158142c2 bellard
        if (    floatx80_is_signaling_nan( a )
5303 158142c2 bellard
             || floatx80_is_signaling_nan( b ) ) {
5304 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5305 158142c2 bellard
        }
5306 158142c2 bellard
        return 0;
5307 158142c2 bellard
    }
5308 158142c2 bellard
    aSign = extractFloatx80Sign( a );
5309 158142c2 bellard
    bSign = extractFloatx80Sign( b );
5310 158142c2 bellard
    if ( aSign != bSign ) {
5311 158142c2 bellard
        return
5312 158142c2 bellard
               aSign
5313 bb98fe42 Andreas Färber
            || (    ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5314 158142c2 bellard
                 == 0 );
5315 158142c2 bellard
    }
5316 158142c2 bellard
    return
5317 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
5318 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
5319 158142c2 bellard
5320 158142c2 bellard
}
5321 158142c2 bellard
5322 158142c2 bellard
/*----------------------------------------------------------------------------
5323 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is less
5324 158142c2 bellard
| than the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
5325 158142c2 bellard
| an exception.  Otherwise, the comparison is performed according to the
5326 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5327 158142c2 bellard
*----------------------------------------------------------------------------*/
5328 158142c2 bellard
5329 750afe93 bellard
int floatx80_lt_quiet( floatx80 a, floatx80 b STATUS_PARAM )
5330 158142c2 bellard
{
5331 158142c2 bellard
    flag aSign, bSign;
5332 158142c2 bellard
5333 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
5334 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5335 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
5336 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5337 158142c2 bellard
       ) {
5338 158142c2 bellard
        if (    floatx80_is_signaling_nan( a )
5339 158142c2 bellard
             || floatx80_is_signaling_nan( b ) ) {
5340 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5341 158142c2 bellard
        }
5342 158142c2 bellard
        return 0;
5343 158142c2 bellard
    }
5344 158142c2 bellard
    aSign = extractFloatx80Sign( a );
5345 158142c2 bellard
    bSign = extractFloatx80Sign( b );
5346 158142c2 bellard
    if ( aSign != bSign ) {
5347 158142c2 bellard
        return
5348 158142c2 bellard
               aSign
5349 bb98fe42 Andreas Färber
            && (    ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5350 158142c2 bellard
                 != 0 );
5351 158142c2 bellard
    }
5352 158142c2 bellard
    return
5353 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
5354 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
5355 158142c2 bellard
5356 158142c2 bellard
}
5357 158142c2 bellard
5358 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
5359 67b7861d Aurelien Jarno
| Returns 1 if the extended double-precision floating-point values `a' and `b'
5360 67b7861d Aurelien Jarno
| cannot be compared, and 0 otherwise.  Quiet NaNs do not cause an exception.
5361 67b7861d Aurelien Jarno
| The comparison is performed according to the IEC/IEEE Standard for Binary
5362 67b7861d Aurelien Jarno
| Floating-Point Arithmetic.
5363 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
5364 67b7861d Aurelien Jarno
int floatx80_unordered_quiet( floatx80 a, floatx80 b STATUS_PARAM )
5365 67b7861d Aurelien Jarno
{
5366 67b7861d Aurelien Jarno
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
5367 67b7861d Aurelien Jarno
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5368 67b7861d Aurelien Jarno
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
5369 67b7861d Aurelien Jarno
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5370 67b7861d Aurelien Jarno
       ) {
5371 67b7861d Aurelien Jarno
        if (    floatx80_is_signaling_nan( a )
5372 67b7861d Aurelien Jarno
             || floatx80_is_signaling_nan( b ) ) {
5373 67b7861d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
5374 67b7861d Aurelien Jarno
        }
5375 67b7861d Aurelien Jarno
        return 1;
5376 67b7861d Aurelien Jarno
    }
5377 67b7861d Aurelien Jarno
    return 0;
5378 67b7861d Aurelien Jarno
}
5379 67b7861d Aurelien Jarno
5380 158142c2 bellard
/*----------------------------------------------------------------------------
5381 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
5382 158142c2 bellard
| value `a' to the 32-bit two's complement integer format.  The conversion
5383 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5384 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
5385 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
5386 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
5387 158142c2 bellard
| largest integer with the same sign as `a' is returned.
5388 158142c2 bellard
*----------------------------------------------------------------------------*/
5389 158142c2 bellard
5390 158142c2 bellard
int32 float128_to_int32( float128 a STATUS_PARAM )
5391 158142c2 bellard
{
5392 158142c2 bellard
    flag aSign;
5393 158142c2 bellard
    int32 aExp, shiftCount;
5394 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1;
5395 158142c2 bellard
5396 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5397 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5398 158142c2 bellard
    aExp = extractFloat128Exp( a );
5399 158142c2 bellard
    aSign = extractFloat128Sign( a );
5400 158142c2 bellard
    if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
5401 158142c2 bellard
    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5402 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
5403 158142c2 bellard
    shiftCount = 0x4028 - aExp;
5404 158142c2 bellard
    if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
5405 158142c2 bellard
    return roundAndPackInt32( aSign, aSig0 STATUS_VAR );
5406 158142c2 bellard
5407 158142c2 bellard
}
5408 158142c2 bellard
5409 158142c2 bellard
/*----------------------------------------------------------------------------
5410 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
5411 158142c2 bellard
| value `a' to the 32-bit two's complement integer format.  The conversion
5412 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5413 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.  If
5414 158142c2 bellard
| `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
5415 158142c2 bellard
| conversion overflows, the largest integer with the same sign as `a' is
5416 158142c2 bellard
| returned.
5417 158142c2 bellard
*----------------------------------------------------------------------------*/
5418 158142c2 bellard
5419 158142c2 bellard
int32 float128_to_int32_round_to_zero( float128 a STATUS_PARAM )
5420 158142c2 bellard
{
5421 158142c2 bellard
    flag aSign;
5422 158142c2 bellard
    int32 aExp, shiftCount;
5423 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, savedASig;
5424 b3a6a2e0 Peter Maydell
    int32_t z;
5425 158142c2 bellard
5426 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5427 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5428 158142c2 bellard
    aExp = extractFloat128Exp( a );
5429 158142c2 bellard
    aSign = extractFloat128Sign( a );
5430 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
5431 158142c2 bellard
    if ( 0x401E < aExp ) {
5432 158142c2 bellard
        if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
5433 158142c2 bellard
        goto invalid;
5434 158142c2 bellard
    }
5435 158142c2 bellard
    else if ( aExp < 0x3FFF ) {
5436 158142c2 bellard
        if ( aExp || aSig0 ) STATUS(float_exception_flags) |= float_flag_inexact;
5437 158142c2 bellard
        return 0;
5438 158142c2 bellard
    }
5439 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
5440 158142c2 bellard
    shiftCount = 0x402F - aExp;
5441 158142c2 bellard
    savedASig = aSig0;
5442 158142c2 bellard
    aSig0 >>= shiftCount;
5443 158142c2 bellard
    z = aSig0;
5444 158142c2 bellard
    if ( aSign ) z = - z;
5445 158142c2 bellard
    if ( ( z < 0 ) ^ aSign ) {
5446 158142c2 bellard
 invalid:
5447 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5448 bb98fe42 Andreas Färber
        return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
5449 158142c2 bellard
    }
5450 158142c2 bellard
    if ( ( aSig0<<shiftCount ) != savedASig ) {
5451 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
5452 158142c2 bellard
    }
5453 158142c2 bellard
    return z;
5454 158142c2 bellard
5455 158142c2 bellard
}
5456 158142c2 bellard
5457 158142c2 bellard
/*----------------------------------------------------------------------------
5458 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
5459 158142c2 bellard
| value `a' to the 64-bit two's complement integer format.  The conversion
5460 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5461 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
5462 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
5463 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
5464 158142c2 bellard
| largest integer with the same sign as `a' is returned.
5465 158142c2 bellard
*----------------------------------------------------------------------------*/
5466 158142c2 bellard
5467 158142c2 bellard
int64 float128_to_int64( float128 a STATUS_PARAM )
5468 158142c2 bellard
{
5469 158142c2 bellard
    flag aSign;
5470 158142c2 bellard
    int32 aExp, shiftCount;
5471 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1;
5472 158142c2 bellard
5473 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5474 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5475 158142c2 bellard
    aExp = extractFloat128Exp( a );
5476 158142c2 bellard
    aSign = extractFloat128Sign( a );
5477 158142c2 bellard
    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5478 158142c2 bellard
    shiftCount = 0x402F - aExp;
5479 158142c2 bellard
    if ( shiftCount <= 0 ) {
5480 158142c2 bellard
        if ( 0x403E < aExp ) {
5481 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5482 158142c2 bellard
            if (    ! aSign
5483 158142c2 bellard
                 || (    ( aExp == 0x7FFF )
5484 158142c2 bellard
                      && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
5485 158142c2 bellard
                    )
5486 158142c2 bellard
               ) {
5487 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
5488 158142c2 bellard
            }
5489 bb98fe42 Andreas Färber
            return (int64_t) LIT64( 0x8000000000000000 );
5490 158142c2 bellard
        }
5491 158142c2 bellard
        shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
5492 158142c2 bellard
    }
5493 158142c2 bellard
    else {
5494 158142c2 bellard
        shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
5495 158142c2 bellard
    }
5496 158142c2 bellard
    return roundAndPackInt64( aSign, aSig0, aSig1 STATUS_VAR );
5497 158142c2 bellard
5498 158142c2 bellard
}
5499 158142c2 bellard
5500 158142c2 bellard
/*----------------------------------------------------------------------------
5501 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
5502 158142c2 bellard
| value `a' to the 64-bit two's complement integer format.  The conversion
5503 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5504 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
5505 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
5506 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
5507 158142c2 bellard
| returned.
5508 158142c2 bellard
*----------------------------------------------------------------------------*/
5509 158142c2 bellard
5510 158142c2 bellard
int64 float128_to_int64_round_to_zero( float128 a STATUS_PARAM )
5511 158142c2 bellard
{
5512 158142c2 bellard
    flag aSign;
5513 158142c2 bellard
    int32 aExp, shiftCount;
5514 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1;
5515 158142c2 bellard
    int64 z;
5516 158142c2 bellard
5517 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5518 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5519 158142c2 bellard
    aExp = extractFloat128Exp( a );
5520 158142c2 bellard
    aSign = extractFloat128Sign( a );
5521 158142c2 bellard
    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5522 158142c2 bellard
    shiftCount = aExp - 0x402F;
5523 158142c2 bellard
    if ( 0 < shiftCount ) {
5524 158142c2 bellard
        if ( 0x403E <= aExp ) {
5525 158142c2 bellard
            aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
5526 158142c2 bellard
            if (    ( a.high == LIT64( 0xC03E000000000000 ) )
5527 158142c2 bellard
                 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
5528 158142c2 bellard
                if ( aSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
5529 158142c2 bellard
            }
5530 158142c2 bellard
            else {
5531 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
5532 158142c2 bellard
                if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
5533 158142c2 bellard
                    return LIT64( 0x7FFFFFFFFFFFFFFF );
5534 158142c2 bellard
                }
5535 158142c2 bellard
            }
5536 bb98fe42 Andreas Färber
            return (int64_t) LIT64( 0x8000000000000000 );
5537 158142c2 bellard
        }
5538 158142c2 bellard
        z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
5539 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig1<<shiftCount ) ) {
5540 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
5541 158142c2 bellard
        }
5542 158142c2 bellard
    }
5543 158142c2 bellard
    else {
5544 158142c2 bellard
        if ( aExp < 0x3FFF ) {
5545 158142c2 bellard
            if ( aExp | aSig0 | aSig1 ) {
5546 158142c2 bellard
                STATUS(float_exception_flags) |= float_flag_inexact;
5547 158142c2 bellard
            }
5548 158142c2 bellard
            return 0;
5549 158142c2 bellard
        }
5550 158142c2 bellard
        z = aSig0>>( - shiftCount );
5551 158142c2 bellard
        if (    aSig1
5552 bb98fe42 Andreas Färber
             || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
5553 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
5554 158142c2 bellard
        }
5555 158142c2 bellard
    }
5556 158142c2 bellard
    if ( aSign ) z = - z;
5557 158142c2 bellard
    return z;
5558 158142c2 bellard
5559 158142c2 bellard
}
5560 158142c2 bellard
5561 158142c2 bellard
/*----------------------------------------------------------------------------
5562 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
5563 158142c2 bellard
| value `a' to the single-precision floating-point format.  The conversion
5564 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5565 158142c2 bellard
| Arithmetic.
5566 158142c2 bellard
*----------------------------------------------------------------------------*/
5567 158142c2 bellard
5568 158142c2 bellard
float32 float128_to_float32( float128 a STATUS_PARAM )
5569 158142c2 bellard
{
5570 158142c2 bellard
    flag aSign;
5571 158142c2 bellard
    int32 aExp;
5572 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1;
5573 bb98fe42 Andreas Färber
    uint32_t zSig;
5574 158142c2 bellard
5575 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5576 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5577 158142c2 bellard
    aExp = extractFloat128Exp( a );
5578 158142c2 bellard
    aSign = extractFloat128Sign( a );
5579 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5580 158142c2 bellard
        if ( aSig0 | aSig1 ) {
5581 bcd4d9af Christophe Lyon
            return commonNaNToFloat32( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
5582 158142c2 bellard
        }
5583 158142c2 bellard
        return packFloat32( aSign, 0xFF, 0 );
5584 158142c2 bellard
    }
5585 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
5586 158142c2 bellard
    shift64RightJamming( aSig0, 18, &aSig0 );
5587 158142c2 bellard
    zSig = aSig0;
5588 158142c2 bellard
    if ( aExp || zSig ) {
5589 158142c2 bellard
        zSig |= 0x40000000;
5590 158142c2 bellard
        aExp -= 0x3F81;
5591 158142c2 bellard
    }
5592 158142c2 bellard
    return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
5593 158142c2 bellard
5594 158142c2 bellard
}
5595 158142c2 bellard
5596 158142c2 bellard
/*----------------------------------------------------------------------------
5597 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
5598 158142c2 bellard
| value `a' to the double-precision floating-point format.  The conversion
5599 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5600 158142c2 bellard
| Arithmetic.
5601 158142c2 bellard
*----------------------------------------------------------------------------*/
5602 158142c2 bellard
5603 158142c2 bellard
float64 float128_to_float64( float128 a STATUS_PARAM )
5604 158142c2 bellard
{
5605 158142c2 bellard
    flag aSign;
5606 158142c2 bellard
    int32 aExp;
5607 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1;
5608 158142c2 bellard
5609 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5610 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5611 158142c2 bellard
    aExp = extractFloat128Exp( a );
5612 158142c2 bellard
    aSign = extractFloat128Sign( a );
5613 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5614 158142c2 bellard
        if ( aSig0 | aSig1 ) {
5615 bcd4d9af Christophe Lyon
            return commonNaNToFloat64( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
5616 158142c2 bellard
        }
5617 158142c2 bellard
        return packFloat64( aSign, 0x7FF, 0 );
5618 158142c2 bellard
    }
5619 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
5620 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
5621 158142c2 bellard
    if ( aExp || aSig0 ) {
5622 158142c2 bellard
        aSig0 |= LIT64( 0x4000000000000000 );
5623 158142c2 bellard
        aExp -= 0x3C01;
5624 158142c2 bellard
    }
5625 158142c2 bellard
    return roundAndPackFloat64( aSign, aExp, aSig0 STATUS_VAR );
5626 158142c2 bellard
5627 158142c2 bellard
}
5628 158142c2 bellard
5629 158142c2 bellard
/*----------------------------------------------------------------------------
5630 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
5631 158142c2 bellard
| value `a' to the extended double-precision floating-point format.  The
5632 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
5633 158142c2 bellard
| Floating-Point Arithmetic.
5634 158142c2 bellard
*----------------------------------------------------------------------------*/
5635 158142c2 bellard
5636 158142c2 bellard
floatx80 float128_to_floatx80( float128 a STATUS_PARAM )
5637 158142c2 bellard
{
5638 158142c2 bellard
    flag aSign;
5639 158142c2 bellard
    int32 aExp;
5640 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1;
5641 158142c2 bellard
5642 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5643 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5644 158142c2 bellard
    aExp = extractFloat128Exp( a );
5645 158142c2 bellard
    aSign = extractFloat128Sign( a );
5646 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5647 158142c2 bellard
        if ( aSig0 | aSig1 ) {
5648 bcd4d9af Christophe Lyon
            return commonNaNToFloatx80( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
5649 158142c2 bellard
        }
5650 158142c2 bellard
        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5651 158142c2 bellard
    }
5652 158142c2 bellard
    if ( aExp == 0 ) {
5653 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
5654 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5655 158142c2 bellard
    }
5656 158142c2 bellard
    else {
5657 158142c2 bellard
        aSig0 |= LIT64( 0x0001000000000000 );
5658 158142c2 bellard
    }
5659 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
5660 158142c2 bellard
    return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 STATUS_VAR );
5661 158142c2 bellard
5662 158142c2 bellard
}
5663 158142c2 bellard
5664 158142c2 bellard
/*----------------------------------------------------------------------------
5665 158142c2 bellard
| Rounds the quadruple-precision floating-point value `a' to an integer, and
5666 158142c2 bellard
| returns the result as a quadruple-precision floating-point value.  The
5667 158142c2 bellard
| operation is performed according to the IEC/IEEE Standard for Binary
5668 158142c2 bellard
| Floating-Point Arithmetic.
5669 158142c2 bellard
*----------------------------------------------------------------------------*/
5670 158142c2 bellard
5671 158142c2 bellard
float128 float128_round_to_int( float128 a STATUS_PARAM )
5672 158142c2 bellard
{
5673 158142c2 bellard
    flag aSign;
5674 158142c2 bellard
    int32 aExp;
5675 bb98fe42 Andreas Färber
    uint64_t lastBitMask, roundBitsMask;
5676 158142c2 bellard
    int8 roundingMode;
5677 158142c2 bellard
    float128 z;
5678 158142c2 bellard
5679 158142c2 bellard
    aExp = extractFloat128Exp( a );
5680 158142c2 bellard
    if ( 0x402F <= aExp ) {
5681 158142c2 bellard
        if ( 0x406F <= aExp ) {
5682 158142c2 bellard
            if (    ( aExp == 0x7FFF )
5683 158142c2 bellard
                 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
5684 158142c2 bellard
               ) {
5685 158142c2 bellard
                return propagateFloat128NaN( a, a STATUS_VAR );
5686 158142c2 bellard
            }
5687 158142c2 bellard
            return a;
5688 158142c2 bellard
        }
5689 158142c2 bellard
        lastBitMask = 1;
5690 158142c2 bellard
        lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
5691 158142c2 bellard
        roundBitsMask = lastBitMask - 1;
5692 158142c2 bellard
        z = a;
5693 158142c2 bellard
        roundingMode = STATUS(float_rounding_mode);
5694 158142c2 bellard
        if ( roundingMode == float_round_nearest_even ) {
5695 158142c2 bellard
            if ( lastBitMask ) {
5696 158142c2 bellard
                add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
5697 158142c2 bellard
                if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
5698 158142c2 bellard
            }
5699 158142c2 bellard
            else {
5700 bb98fe42 Andreas Färber
                if ( (int64_t) z.low < 0 ) {
5701 158142c2 bellard
                    ++z.high;
5702 bb98fe42 Andreas Färber
                    if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
5703 158142c2 bellard
                }
5704 158142c2 bellard
            }
5705 158142c2 bellard
        }
5706 158142c2 bellard
        else if ( roundingMode != float_round_to_zero ) {
5707 158142c2 bellard
            if (   extractFloat128Sign( z )
5708 158142c2 bellard
                 ^ ( roundingMode == float_round_up ) ) {
5709 158142c2 bellard
                add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
5710 158142c2 bellard
            }
5711 158142c2 bellard
        }
5712 158142c2 bellard
        z.low &= ~ roundBitsMask;
5713 158142c2 bellard
    }
5714 158142c2 bellard
    else {
5715 158142c2 bellard
        if ( aExp < 0x3FFF ) {
5716 bb98fe42 Andreas Färber
            if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
5717 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
5718 158142c2 bellard
            aSign = extractFloat128Sign( a );
5719 158142c2 bellard
            switch ( STATUS(float_rounding_mode) ) {
5720 158142c2 bellard
             case float_round_nearest_even:
5721 158142c2 bellard
                if (    ( aExp == 0x3FFE )
5722 158142c2 bellard
                     && (   extractFloat128Frac0( a )
5723 158142c2 bellard
                          | extractFloat128Frac1( a ) )
5724 158142c2 bellard
                   ) {
5725 158142c2 bellard
                    return packFloat128( aSign, 0x3FFF, 0, 0 );
5726 158142c2 bellard
                }
5727 158142c2 bellard
                break;
5728 158142c2 bellard
             case float_round_down:
5729 158142c2 bellard
                return
5730 158142c2 bellard
                      aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
5731 158142c2 bellard
                    : packFloat128( 0, 0, 0, 0 );
5732 158142c2 bellard
             case float_round_up:
5733 158142c2 bellard
                return
5734 158142c2 bellard
                      aSign ? packFloat128( 1, 0, 0, 0 )
5735 158142c2 bellard
                    : packFloat128( 0, 0x3FFF, 0, 0 );
5736 158142c2 bellard
            }
5737 158142c2 bellard
            return packFloat128( aSign, 0, 0, 0 );
5738 158142c2 bellard
        }
5739 158142c2 bellard
        lastBitMask = 1;
5740 158142c2 bellard
        lastBitMask <<= 0x402F - aExp;
5741 158142c2 bellard
        roundBitsMask = lastBitMask - 1;
5742 158142c2 bellard
        z.low = 0;
5743 158142c2 bellard
        z.high = a.high;
5744 158142c2 bellard
        roundingMode = STATUS(float_rounding_mode);
5745 158142c2 bellard
        if ( roundingMode == float_round_nearest_even ) {
5746 158142c2 bellard
            z.high += lastBitMask>>1;
5747 158142c2 bellard
            if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
5748 158142c2 bellard
                z.high &= ~ lastBitMask;
5749 158142c2 bellard
            }
5750 158142c2 bellard
        }
5751 158142c2 bellard
        else if ( roundingMode != float_round_to_zero ) {
5752 158142c2 bellard
            if (   extractFloat128Sign( z )
5753 158142c2 bellard
                 ^ ( roundingMode == float_round_up ) ) {
5754 158142c2 bellard
                z.high |= ( a.low != 0 );
5755 158142c2 bellard
                z.high += roundBitsMask;
5756 158142c2 bellard
            }
5757 158142c2 bellard
        }
5758 158142c2 bellard
        z.high &= ~ roundBitsMask;
5759 158142c2 bellard
    }
5760 158142c2 bellard
    if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
5761 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
5762 158142c2 bellard
    }
5763 158142c2 bellard
    return z;
5764 158142c2 bellard
5765 158142c2 bellard
}
5766 158142c2 bellard
5767 158142c2 bellard
/*----------------------------------------------------------------------------
5768 158142c2 bellard
| Returns the result of adding the absolute values of the quadruple-precision
5769 158142c2 bellard
| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
5770 158142c2 bellard
| before being returned.  `zSign' is ignored if the result is a NaN.
5771 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
5772 158142c2 bellard
| Floating-Point Arithmetic.
5773 158142c2 bellard
*----------------------------------------------------------------------------*/
5774 158142c2 bellard
5775 158142c2 bellard
static float128 addFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
5776 158142c2 bellard
{
5777 158142c2 bellard
    int32 aExp, bExp, zExp;
5778 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
5779 158142c2 bellard
    int32 expDiff;
5780 158142c2 bellard
5781 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5782 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5783 158142c2 bellard
    aExp = extractFloat128Exp( a );
5784 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
5785 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
5786 158142c2 bellard
    bExp = extractFloat128Exp( b );
5787 158142c2 bellard
    expDiff = aExp - bExp;
5788 158142c2 bellard
    if ( 0 < expDiff ) {
5789 158142c2 bellard
        if ( aExp == 0x7FFF ) {
5790 158142c2 bellard
            if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5791 158142c2 bellard
            return a;
5792 158142c2 bellard
        }
5793 158142c2 bellard
        if ( bExp == 0 ) {
5794 158142c2 bellard
            --expDiff;
5795 158142c2 bellard
        }
5796 158142c2 bellard
        else {
5797 158142c2 bellard
            bSig0 |= LIT64( 0x0001000000000000 );
5798 158142c2 bellard
        }
5799 158142c2 bellard
        shift128ExtraRightJamming(
5800 158142c2 bellard
            bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
5801 158142c2 bellard
        zExp = aExp;
5802 158142c2 bellard
    }
5803 158142c2 bellard
    else if ( expDiff < 0 ) {
5804 158142c2 bellard
        if ( bExp == 0x7FFF ) {
5805 158142c2 bellard
            if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5806 158142c2 bellard
            return packFloat128( zSign, 0x7FFF, 0, 0 );
5807 158142c2 bellard
        }
5808 158142c2 bellard
        if ( aExp == 0 ) {
5809 158142c2 bellard
            ++expDiff;
5810 158142c2 bellard
        }
5811 158142c2 bellard
        else {
5812 158142c2 bellard
            aSig0 |= LIT64( 0x0001000000000000 );
5813 158142c2 bellard
        }
5814 158142c2 bellard
        shift128ExtraRightJamming(
5815 158142c2 bellard
            aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
5816 158142c2 bellard
        zExp = bExp;
5817 158142c2 bellard
    }
5818 158142c2 bellard
    else {
5819 158142c2 bellard
        if ( aExp == 0x7FFF ) {
5820 158142c2 bellard
            if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
5821 158142c2 bellard
                return propagateFloat128NaN( a, b STATUS_VAR );
5822 158142c2 bellard
            }
5823 158142c2 bellard
            return a;
5824 158142c2 bellard
        }
5825 158142c2 bellard
        add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
5826 fe76d976 pbrook
        if ( aExp == 0 ) {
5827 e6afc87f Peter Maydell
            if (STATUS(flush_to_zero)) {
5828 e6afc87f Peter Maydell
                if (zSig0 | zSig1) {
5829 e6afc87f Peter Maydell
                    float_raise(float_flag_output_denormal STATUS_VAR);
5830 e6afc87f Peter Maydell
                }
5831 e6afc87f Peter Maydell
                return packFloat128(zSign, 0, 0, 0);
5832 e6afc87f Peter Maydell
            }
5833 fe76d976 pbrook
            return packFloat128( zSign, 0, zSig0, zSig1 );
5834 fe76d976 pbrook
        }
5835 158142c2 bellard
        zSig2 = 0;
5836 158142c2 bellard
        zSig0 |= LIT64( 0x0002000000000000 );
5837 158142c2 bellard
        zExp = aExp;
5838 158142c2 bellard
        goto shiftRight1;
5839 158142c2 bellard
    }
5840 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
5841 158142c2 bellard
    add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
5842 158142c2 bellard
    --zExp;
5843 158142c2 bellard
    if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
5844 158142c2 bellard
    ++zExp;
5845 158142c2 bellard
 shiftRight1:
5846 158142c2 bellard
    shift128ExtraRightJamming(
5847 158142c2 bellard
        zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
5848 158142c2 bellard
 roundAndPack:
5849 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5850 158142c2 bellard
5851 158142c2 bellard
}
5852 158142c2 bellard
5853 158142c2 bellard
/*----------------------------------------------------------------------------
5854 158142c2 bellard
| Returns the result of subtracting the absolute values of the quadruple-
5855 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the
5856 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
5857 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
5858 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
5859 158142c2 bellard
*----------------------------------------------------------------------------*/
5860 158142c2 bellard
5861 158142c2 bellard
static float128 subFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
5862 158142c2 bellard
{
5863 158142c2 bellard
    int32 aExp, bExp, zExp;
5864 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
5865 158142c2 bellard
    int32 expDiff;
5866 158142c2 bellard
    float128 z;
5867 158142c2 bellard
5868 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5869 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5870 158142c2 bellard
    aExp = extractFloat128Exp( a );
5871 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
5872 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
5873 158142c2 bellard
    bExp = extractFloat128Exp( b );
5874 158142c2 bellard
    expDiff = aExp - bExp;
5875 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
5876 158142c2 bellard
    shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
5877 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
5878 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
5879 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5880 158142c2 bellard
        if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
5881 158142c2 bellard
            return propagateFloat128NaN( a, b STATUS_VAR );
5882 158142c2 bellard
        }
5883 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5884 158142c2 bellard
        z.low = float128_default_nan_low;
5885 158142c2 bellard
        z.high = float128_default_nan_high;
5886 158142c2 bellard
        return z;
5887 158142c2 bellard
    }
5888 158142c2 bellard
    if ( aExp == 0 ) {
5889 158142c2 bellard
        aExp = 1;
5890 158142c2 bellard
        bExp = 1;
5891 158142c2 bellard
    }
5892 158142c2 bellard
    if ( bSig0 < aSig0 ) goto aBigger;
5893 158142c2 bellard
    if ( aSig0 < bSig0 ) goto bBigger;
5894 158142c2 bellard
    if ( bSig1 < aSig1 ) goto aBigger;
5895 158142c2 bellard
    if ( aSig1 < bSig1 ) goto bBigger;
5896 158142c2 bellard
    return packFloat128( STATUS(float_rounding_mode) == float_round_down, 0, 0, 0 );
5897 158142c2 bellard
 bExpBigger:
5898 158142c2 bellard
    if ( bExp == 0x7FFF ) {
5899 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5900 158142c2 bellard
        return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
5901 158142c2 bellard
    }
5902 158142c2 bellard
    if ( aExp == 0 ) {
5903 158142c2 bellard
        ++expDiff;
5904 158142c2 bellard
    }
5905 158142c2 bellard
    else {
5906 158142c2 bellard
        aSig0 |= LIT64( 0x4000000000000000 );
5907 158142c2 bellard
    }
5908 158142c2 bellard
    shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
5909 158142c2 bellard
    bSig0 |= LIT64( 0x4000000000000000 );
5910 158142c2 bellard
 bBigger:
5911 158142c2 bellard
    sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
5912 158142c2 bellard
    zExp = bExp;
5913 158142c2 bellard
    zSign ^= 1;
5914 158142c2 bellard
    goto normalizeRoundAndPack;
5915 158142c2 bellard
 aExpBigger:
5916 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5917 158142c2 bellard
        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5918 158142c2 bellard
        return a;
5919 158142c2 bellard
    }
5920 158142c2 bellard
    if ( bExp == 0 ) {
5921 158142c2 bellard
        --expDiff;
5922 158142c2 bellard
    }
5923 158142c2 bellard
    else {
5924 158142c2 bellard
        bSig0 |= LIT64( 0x4000000000000000 );
5925 158142c2 bellard
    }
5926 158142c2 bellard
    shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
5927 158142c2 bellard
    aSig0 |= LIT64( 0x4000000000000000 );
5928 158142c2 bellard
 aBigger:
5929 158142c2 bellard
    sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
5930 158142c2 bellard
    zExp = aExp;
5931 158142c2 bellard
 normalizeRoundAndPack:
5932 158142c2 bellard
    --zExp;
5933 158142c2 bellard
    return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 STATUS_VAR );
5934 158142c2 bellard
5935 158142c2 bellard
}
5936 158142c2 bellard
5937 158142c2 bellard
/*----------------------------------------------------------------------------
5938 158142c2 bellard
| Returns the result of adding the quadruple-precision floating-point values
5939 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
5940 158142c2 bellard
| for Binary Floating-Point Arithmetic.
5941 158142c2 bellard
*----------------------------------------------------------------------------*/
5942 158142c2 bellard
5943 158142c2 bellard
float128 float128_add( float128 a, float128 b STATUS_PARAM )
5944 158142c2 bellard
{
5945 158142c2 bellard
    flag aSign, bSign;
5946 158142c2 bellard
5947 158142c2 bellard
    aSign = extractFloat128Sign( a );
5948 158142c2 bellard
    bSign = extractFloat128Sign( b );
5949 158142c2 bellard
    if ( aSign == bSign ) {
5950 158142c2 bellard
        return addFloat128Sigs( a, b, aSign STATUS_VAR );
5951 158142c2 bellard
    }
5952 158142c2 bellard
    else {
5953 158142c2 bellard
        return subFloat128Sigs( a, b, aSign STATUS_VAR );
5954 158142c2 bellard
    }
5955 158142c2 bellard
5956 158142c2 bellard
}
5957 158142c2 bellard
5958 158142c2 bellard
/*----------------------------------------------------------------------------
5959 158142c2 bellard
| Returns the result of subtracting the quadruple-precision floating-point
5960 158142c2 bellard
| values `a' and `b'.  The operation is performed according to the IEC/IEEE
5961 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
5962 158142c2 bellard
*----------------------------------------------------------------------------*/
5963 158142c2 bellard
5964 158142c2 bellard
float128 float128_sub( float128 a, float128 b STATUS_PARAM )
5965 158142c2 bellard
{
5966 158142c2 bellard
    flag aSign, bSign;
5967 158142c2 bellard
5968 158142c2 bellard
    aSign = extractFloat128Sign( a );
5969 158142c2 bellard
    bSign = extractFloat128Sign( b );
5970 158142c2 bellard
    if ( aSign == bSign ) {
5971 158142c2 bellard
        return subFloat128Sigs( a, b, aSign STATUS_VAR );
5972 158142c2 bellard
    }
5973 158142c2 bellard
    else {
5974 158142c2 bellard
        return addFloat128Sigs( a, b, aSign STATUS_VAR );
5975 158142c2 bellard
    }
5976 158142c2 bellard
5977 158142c2 bellard
}
5978 158142c2 bellard
5979 158142c2 bellard
/*----------------------------------------------------------------------------
5980 158142c2 bellard
| Returns the result of multiplying the quadruple-precision floating-point
5981 158142c2 bellard
| values `a' and `b'.  The operation is performed according to the IEC/IEEE
5982 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
5983 158142c2 bellard
*----------------------------------------------------------------------------*/
5984 158142c2 bellard
5985 158142c2 bellard
float128 float128_mul( float128 a, float128 b STATUS_PARAM )
5986 158142c2 bellard
{
5987 158142c2 bellard
    flag aSign, bSign, zSign;
5988 158142c2 bellard
    int32 aExp, bExp, zExp;
5989 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
5990 158142c2 bellard
    float128 z;
5991 158142c2 bellard
5992 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5993 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5994 158142c2 bellard
    aExp = extractFloat128Exp( a );
5995 158142c2 bellard
    aSign = extractFloat128Sign( a );
5996 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
5997 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
5998 158142c2 bellard
    bExp = extractFloat128Exp( b );
5999 158142c2 bellard
    bSign = extractFloat128Sign( b );
6000 158142c2 bellard
    zSign = aSign ^ bSign;
6001 158142c2 bellard
    if ( aExp == 0x7FFF ) {
6002 158142c2 bellard
        if (    ( aSig0 | aSig1 )
6003 158142c2 bellard
             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
6004 158142c2 bellard
            return propagateFloat128NaN( a, b STATUS_VAR );
6005 158142c2 bellard
        }
6006 158142c2 bellard
        if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
6007 158142c2 bellard
        return packFloat128( zSign, 0x7FFF, 0, 0 );
6008 158142c2 bellard
    }
6009 158142c2 bellard
    if ( bExp == 0x7FFF ) {
6010 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
6011 158142c2 bellard
        if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
6012 158142c2 bellard
 invalid:
6013 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
6014 158142c2 bellard
            z.low = float128_default_nan_low;
6015 158142c2 bellard
            z.high = float128_default_nan_high;
6016 158142c2 bellard
            return z;
6017 158142c2 bellard
        }
6018 158142c2 bellard
        return packFloat128( zSign, 0x7FFF, 0, 0 );
6019 158142c2 bellard
    }
6020 158142c2 bellard
    if ( aExp == 0 ) {
6021 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6022 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6023 158142c2 bellard
    }
6024 158142c2 bellard
    if ( bExp == 0 ) {
6025 158142c2 bellard
        if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6026 158142c2 bellard
        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6027 158142c2 bellard
    }
6028 158142c2 bellard
    zExp = aExp + bExp - 0x4000;
6029 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
6030 158142c2 bellard
    shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
6031 158142c2 bellard
    mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
6032 158142c2 bellard
    add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
6033 158142c2 bellard
    zSig2 |= ( zSig3 != 0 );
6034 158142c2 bellard
    if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
6035 158142c2 bellard
        shift128ExtraRightJamming(
6036 158142c2 bellard
            zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
6037 158142c2 bellard
        ++zExp;
6038 158142c2 bellard
    }
6039 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
6040 158142c2 bellard
6041 158142c2 bellard
}
6042 158142c2 bellard
6043 158142c2 bellard
/*----------------------------------------------------------------------------
6044 158142c2 bellard
| Returns the result of dividing the quadruple-precision floating-point value
6045 158142c2 bellard
| `a' by the corresponding value `b'.  The operation is performed according to
6046 158142c2 bellard
| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6047 158142c2 bellard
*----------------------------------------------------------------------------*/
6048 158142c2 bellard
6049 158142c2 bellard
float128 float128_div( float128 a, float128 b STATUS_PARAM )
6050 158142c2 bellard
{
6051 158142c2 bellard
    flag aSign, bSign, zSign;
6052 158142c2 bellard
    int32 aExp, bExp, zExp;
6053 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
6054 bb98fe42 Andreas Färber
    uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6055 158142c2 bellard
    float128 z;
6056 158142c2 bellard
6057 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
6058 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
6059 158142c2 bellard
    aExp = extractFloat128Exp( a );
6060 158142c2 bellard
    aSign = extractFloat128Sign( a );
6061 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
6062 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
6063 158142c2 bellard
    bExp = extractFloat128Exp( b );
6064 158142c2 bellard
    bSign = extractFloat128Sign( b );
6065 158142c2 bellard
    zSign = aSign ^ bSign;
6066 158142c2 bellard
    if ( aExp == 0x7FFF ) {
6067 158142c2 bellard
        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
6068 158142c2 bellard
        if ( bExp == 0x7FFF ) {
6069 158142c2 bellard
            if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
6070 158142c2 bellard
            goto invalid;
6071 158142c2 bellard
        }
6072 158142c2 bellard
        return packFloat128( zSign, 0x7FFF, 0, 0 );
6073 158142c2 bellard
    }
6074 158142c2 bellard
    if ( bExp == 0x7FFF ) {
6075 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
6076 158142c2 bellard
        return packFloat128( zSign, 0, 0, 0 );
6077 158142c2 bellard
    }
6078 158142c2 bellard
    if ( bExp == 0 ) {
6079 158142c2 bellard
        if ( ( bSig0 | bSig1 ) == 0 ) {
6080 158142c2 bellard
            if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
6081 158142c2 bellard
 invalid:
6082 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
6083 158142c2 bellard
                z.low = float128_default_nan_low;
6084 158142c2 bellard
                z.high = float128_default_nan_high;
6085 158142c2 bellard
                return z;
6086 158142c2 bellard
            }
6087 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
6088 158142c2 bellard
            return packFloat128( zSign, 0x7FFF, 0, 0 );
6089 158142c2 bellard
        }
6090 158142c2 bellard
        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6091 158142c2 bellard
    }
6092 158142c2 bellard
    if ( aExp == 0 ) {
6093 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6094 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6095 158142c2 bellard
    }
6096 158142c2 bellard
    zExp = aExp - bExp + 0x3FFD;
6097 158142c2 bellard
    shortShift128Left(
6098 158142c2 bellard
        aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
6099 158142c2 bellard
    shortShift128Left(
6100 158142c2 bellard
        bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6101 158142c2 bellard
    if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
6102 158142c2 bellard
        shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
6103 158142c2 bellard
        ++zExp;
6104 158142c2 bellard
    }
6105 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
6106 158142c2 bellard
    mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
6107 158142c2 bellard
    sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
6108 bb98fe42 Andreas Färber
    while ( (int64_t) rem0 < 0 ) {
6109 158142c2 bellard
        --zSig0;
6110 158142c2 bellard
        add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
6111 158142c2 bellard
    }
6112 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
6113 158142c2 bellard
    if ( ( zSig1 & 0x3FFF ) <= 4 ) {
6114 158142c2 bellard
        mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
6115 158142c2 bellard
        sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
6116 bb98fe42 Andreas Färber
        while ( (int64_t) rem1 < 0 ) {
6117 158142c2 bellard
            --zSig1;
6118 158142c2 bellard
            add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
6119 158142c2 bellard
        }
6120 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6121 158142c2 bellard
    }
6122 158142c2 bellard
    shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
6123 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
6124 158142c2 bellard
6125 158142c2 bellard
}
6126 158142c2 bellard
6127 158142c2 bellard
/*----------------------------------------------------------------------------
6128 158142c2 bellard
| Returns the remainder of the quadruple-precision floating-point value `a'
6129 158142c2 bellard
| with respect to the corresponding value `b'.  The operation is performed
6130 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6131 158142c2 bellard
*----------------------------------------------------------------------------*/
6132 158142c2 bellard
6133 158142c2 bellard
float128 float128_rem( float128 a, float128 b STATUS_PARAM )
6134 158142c2 bellard
{
6135 ed086f3d Blue Swirl
    flag aSign, zSign;
6136 158142c2 bellard
    int32 aExp, bExp, expDiff;
6137 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6138 bb98fe42 Andreas Färber
    uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6139 bb98fe42 Andreas Färber
    int64_t sigMean0;
6140 158142c2 bellard
    float128 z;
6141 158142c2 bellard
6142 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
6143 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
6144 158142c2 bellard
    aExp = extractFloat128Exp( a );
6145 158142c2 bellard
    aSign = extractFloat128Sign( a );
6146 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
6147 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
6148 158142c2 bellard
    bExp = extractFloat128Exp( b );
6149 158142c2 bellard
    if ( aExp == 0x7FFF ) {
6150 158142c2 bellard
        if (    ( aSig0 | aSig1 )
6151 158142c2 bellard
             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
6152 158142c2 bellard
            return propagateFloat128NaN( a, b STATUS_VAR );
6153 158142c2 bellard
        }
6154 158142c2 bellard
        goto invalid;
6155 158142c2 bellard
    }
6156 158142c2 bellard
    if ( bExp == 0x7FFF ) {
6157 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
6158 158142c2 bellard
        return a;
6159 158142c2 bellard
    }
6160 158142c2 bellard
    if ( bExp == 0 ) {
6161 158142c2 bellard
        if ( ( bSig0 | bSig1 ) == 0 ) {
6162 158142c2 bellard
 invalid:
6163 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
6164 158142c2 bellard
            z.low = float128_default_nan_low;
6165 158142c2 bellard
            z.high = float128_default_nan_high;
6166 158142c2 bellard
            return z;
6167 158142c2 bellard
        }
6168 158142c2 bellard
        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6169 158142c2 bellard
    }
6170 158142c2 bellard
    if ( aExp == 0 ) {
6171 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return a;
6172 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6173 158142c2 bellard
    }
6174 158142c2 bellard
    expDiff = aExp - bExp;
6175 158142c2 bellard
    if ( expDiff < -1 ) return a;
6176 158142c2 bellard
    shortShift128Left(
6177 158142c2 bellard
        aSig0 | LIT64( 0x0001000000000000 ),
6178 158142c2 bellard
        aSig1,
6179 158142c2 bellard
        15 - ( expDiff < 0 ),
6180 158142c2 bellard
        &aSig0,
6181 158142c2 bellard
        &aSig1
6182 158142c2 bellard
    );
6183 158142c2 bellard
    shortShift128Left(
6184 158142c2 bellard
        bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6185 158142c2 bellard
    q = le128( bSig0, bSig1, aSig0, aSig1 );
6186 158142c2 bellard
    if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6187 158142c2 bellard
    expDiff -= 64;
6188 158142c2 bellard
    while ( 0 < expDiff ) {
6189 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6190 158142c2 bellard
        q = ( 4 < q ) ? q - 4 : 0;
6191 158142c2 bellard
        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6192 158142c2 bellard
        shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6193 158142c2 bellard
        shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6194 158142c2 bellard
        sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6195 158142c2 bellard
        expDiff -= 61;
6196 158142c2 bellard
    }
6197 158142c2 bellard
    if ( -64 < expDiff ) {
6198 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6199 158142c2 bellard
        q = ( 4 < q ) ? q - 4 : 0;
6200 158142c2 bellard
        q >>= - expDiff;
6201 158142c2 bellard
        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6202 158142c2 bellard
        expDiff += 52;
6203 158142c2 bellard
        if ( expDiff < 0 ) {
6204 158142c2 bellard
            shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6205 158142c2 bellard
        }
6206 158142c2 bellard
        else {
6207 158142c2 bellard
            shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6208 158142c2 bellard
        }
6209 158142c2 bellard
        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6210 158142c2 bellard
        sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6211 158142c2 bellard
    }
6212 158142c2 bellard
    else {
6213 158142c2 bellard
        shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6214 158142c2 bellard
        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6215 158142c2 bellard
    }
6216 158142c2 bellard
    do {
6217 158142c2 bellard
        alternateASig0 = aSig0;
6218 158142c2 bellard
        alternateASig1 = aSig1;
6219 158142c2 bellard
        ++q;
6220 158142c2 bellard
        sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6221 bb98fe42 Andreas Färber
    } while ( 0 <= (int64_t) aSig0 );
6222 158142c2 bellard
    add128(
6223 bb98fe42 Andreas Färber
        aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
6224 158142c2 bellard
    if (    ( sigMean0 < 0 )
6225 158142c2 bellard
         || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6226 158142c2 bellard
        aSig0 = alternateASig0;
6227 158142c2 bellard
        aSig1 = alternateASig1;
6228 158142c2 bellard
    }
6229 bb98fe42 Andreas Färber
    zSign = ( (int64_t) aSig0 < 0 );
6230 158142c2 bellard
    if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
6231 158142c2 bellard
    return
6232 158142c2 bellard
        normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 STATUS_VAR );
6233 158142c2 bellard
6234 158142c2 bellard
}
6235 158142c2 bellard
6236 158142c2 bellard
/*----------------------------------------------------------------------------
6237 158142c2 bellard
| Returns the square root of the quadruple-precision floating-point value `a'.
6238 158142c2 bellard
| The operation is performed according to the IEC/IEEE Standard for Binary
6239 158142c2 bellard
| Floating-Point Arithmetic.
6240 158142c2 bellard
*----------------------------------------------------------------------------*/
6241 158142c2 bellard
6242 158142c2 bellard
float128 float128_sqrt( float128 a STATUS_PARAM )
6243 158142c2 bellard
{
6244 158142c2 bellard
    flag aSign;
6245 158142c2 bellard
    int32 aExp, zExp;
6246 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
6247 bb98fe42 Andreas Färber
    uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6248 158142c2 bellard
    float128 z;
6249 158142c2 bellard
6250 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
6251 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
6252 158142c2 bellard
    aExp = extractFloat128Exp( a );
6253 158142c2 bellard
    aSign = extractFloat128Sign( a );
6254 158142c2 bellard
    if ( aExp == 0x7FFF ) {
6255 158142c2 bellard
        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a STATUS_VAR );
6256 158142c2 bellard
        if ( ! aSign ) return a;
6257 158142c2 bellard
        goto invalid;
6258 158142c2 bellard
    }
6259 158142c2 bellard
    if ( aSign ) {
6260 158142c2 bellard
        if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
6261 158142c2 bellard
 invalid:
6262 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
6263 158142c2 bellard
        z.low = float128_default_nan_low;
6264 158142c2 bellard
        z.high = float128_default_nan_high;
6265 158142c2 bellard
        return z;
6266 158142c2 bellard
    }
6267 158142c2 bellard
    if ( aExp == 0 ) {
6268 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
6269 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6270 158142c2 bellard
    }
6271 158142c2 bellard
    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
6272 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
6273 158142c2 bellard
    zSig0 = estimateSqrt32( aExp, aSig0>>17 );
6274 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
6275 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6276 158142c2 bellard
    doubleZSig0 = zSig0<<1;
6277 158142c2 bellard
    mul64To128( zSig0, zSig0, &term0, &term1 );
6278 158142c2 bellard
    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6279 bb98fe42 Andreas Färber
    while ( (int64_t) rem0 < 0 ) {
6280 158142c2 bellard
        --zSig0;
6281 158142c2 bellard
        doubleZSig0 -= 2;
6282 158142c2 bellard
        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6283 158142c2 bellard
    }
6284 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6285 158142c2 bellard
    if ( ( zSig1 & 0x1FFF ) <= 5 ) {
6286 158142c2 bellard
        if ( zSig1 == 0 ) zSig1 = 1;
6287 158142c2 bellard
        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6288 158142c2 bellard
        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6289 158142c2 bellard
        mul64To128( zSig1, zSig1, &term2, &term3 );
6290 158142c2 bellard
        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6291 bb98fe42 Andreas Färber
        while ( (int64_t) rem1 < 0 ) {
6292 158142c2 bellard
            --zSig1;
6293 158142c2 bellard
            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6294 158142c2 bellard
            term3 |= 1;
6295 158142c2 bellard
            term2 |= doubleZSig0;
6296 158142c2 bellard
            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6297 158142c2 bellard
        }
6298 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6299 158142c2 bellard
    }
6300 158142c2 bellard
    shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
6301 158142c2 bellard
    return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
6302 158142c2 bellard
6303 158142c2 bellard
}
6304 158142c2 bellard
6305 158142c2 bellard
/*----------------------------------------------------------------------------
6306 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is equal to
6307 b689362d Aurelien Jarno
| the corresponding value `b', and 0 otherwise.  The invalid exception is
6308 b689362d Aurelien Jarno
| raised if either operand is a NaN.  Otherwise, the comparison is performed
6309 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6310 158142c2 bellard
*----------------------------------------------------------------------------*/
6311 158142c2 bellard
6312 b689362d Aurelien Jarno
int float128_eq( float128 a, float128 b STATUS_PARAM )
6313 158142c2 bellard
{
6314 158142c2 bellard
6315 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
6316 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6317 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
6318 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6319 158142c2 bellard
       ) {
6320 b689362d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
6321 158142c2 bellard
        return 0;
6322 158142c2 bellard
    }
6323 158142c2 bellard
    return
6324 158142c2 bellard
           ( a.low == b.low )
6325 158142c2 bellard
        && (    ( a.high == b.high )
6326 158142c2 bellard
             || (    ( a.low == 0 )
6327 bb98fe42 Andreas Färber
                  && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
6328 158142c2 bellard
           );
6329 158142c2 bellard
6330 158142c2 bellard
}
6331 158142c2 bellard
6332 158142c2 bellard
/*----------------------------------------------------------------------------
6333 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
6334 f5a64251 Aurelien Jarno
| or equal to the corresponding value `b', and 0 otherwise.  The invalid
6335 f5a64251 Aurelien Jarno
| exception is raised if either operand is a NaN.  The comparison is performed
6336 f5a64251 Aurelien Jarno
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6337 158142c2 bellard
*----------------------------------------------------------------------------*/
6338 158142c2 bellard
6339 750afe93 bellard
int float128_le( float128 a, float128 b STATUS_PARAM )
6340 158142c2 bellard
{
6341 158142c2 bellard
    flag aSign, bSign;
6342 158142c2 bellard
6343 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
6344 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6345 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
6346 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6347 158142c2 bellard
       ) {
6348 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
6349 158142c2 bellard
        return 0;
6350 158142c2 bellard
    }
6351 158142c2 bellard
    aSign = extractFloat128Sign( a );
6352 158142c2 bellard
    bSign = extractFloat128Sign( b );
6353 158142c2 bellard
    if ( aSign != bSign ) {
6354 158142c2 bellard
        return
6355 158142c2 bellard
               aSign
6356 bb98fe42 Andreas Färber
            || (    ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
6357 158142c2 bellard
                 == 0 );
6358 158142c2 bellard
    }
6359 158142c2 bellard
    return
6360 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
6361 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
6362 158142c2 bellard
6363 158142c2 bellard
}
6364 158142c2 bellard
6365 158142c2 bellard
/*----------------------------------------------------------------------------
6366 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
6367 f5a64251 Aurelien Jarno
| the corresponding value `b', and 0 otherwise.  The invalid exception is
6368 f5a64251 Aurelien Jarno
| raised if either operand is a NaN.  The comparison is performed according
6369 f5a64251 Aurelien Jarno
| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6370 158142c2 bellard
*----------------------------------------------------------------------------*/
6371 158142c2 bellard
6372 750afe93 bellard
int float128_lt( float128 a, float128 b STATUS_PARAM )
6373 158142c2 bellard
{
6374 158142c2 bellard
    flag aSign, bSign;
6375 158142c2 bellard
6376 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
6377 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6378 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
6379 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6380 158142c2 bellard
       ) {
6381 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
6382 158142c2 bellard
        return 0;
6383 158142c2 bellard
    }
6384 158142c2 bellard
    aSign = extractFloat128Sign( a );
6385 158142c2 bellard
    bSign = extractFloat128Sign( b );
6386 158142c2 bellard
    if ( aSign != bSign ) {
6387 158142c2 bellard
        return
6388 158142c2 bellard
               aSign
6389 bb98fe42 Andreas Färber
            && (    ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
6390 158142c2 bellard
                 != 0 );
6391 158142c2 bellard
    }
6392 158142c2 bellard
    return
6393 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
6394 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
6395 158142c2 bellard
6396 158142c2 bellard
}
6397 158142c2 bellard
6398 158142c2 bellard
/*----------------------------------------------------------------------------
6399 67b7861d Aurelien Jarno
| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
6400 f5a64251 Aurelien Jarno
| be compared, and 0 otherwise.  The invalid exception is raised if either
6401 f5a64251 Aurelien Jarno
| operand is a NaN. The comparison is performed according to the IEC/IEEE
6402 f5a64251 Aurelien Jarno
| Standard for Binary Floating-Point Arithmetic.
6403 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
6404 67b7861d Aurelien Jarno
6405 67b7861d Aurelien Jarno
int float128_unordered( float128 a, float128 b STATUS_PARAM )
6406 67b7861d Aurelien Jarno
{
6407 67b7861d Aurelien Jarno
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
6408 67b7861d Aurelien Jarno
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6409 67b7861d Aurelien Jarno
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
6410 67b7861d Aurelien Jarno
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6411 67b7861d Aurelien Jarno
       ) {
6412 67b7861d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
6413 67b7861d Aurelien Jarno
        return 1;
6414 67b7861d Aurelien Jarno
    }
6415 67b7861d Aurelien Jarno
    return 0;
6416 67b7861d Aurelien Jarno
}
6417 67b7861d Aurelien Jarno
6418 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
6419 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is equal to
6420 f5a64251 Aurelien Jarno
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
6421 f5a64251 Aurelien Jarno
| exception.  The comparison is performed according to the IEC/IEEE Standard
6422 f5a64251 Aurelien Jarno
| for Binary Floating-Point Arithmetic.
6423 158142c2 bellard
*----------------------------------------------------------------------------*/
6424 158142c2 bellard
6425 b689362d Aurelien Jarno
int float128_eq_quiet( float128 a, float128 b STATUS_PARAM )
6426 158142c2 bellard
{
6427 158142c2 bellard
6428 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
6429 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6430 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
6431 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6432 158142c2 bellard
       ) {
6433 b689362d Aurelien Jarno
        if (    float128_is_signaling_nan( a )
6434 b689362d Aurelien Jarno
             || float128_is_signaling_nan( b ) ) {
6435 b689362d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
6436 b689362d Aurelien Jarno
        }
6437 158142c2 bellard
        return 0;
6438 158142c2 bellard
    }
6439 158142c2 bellard
    return
6440 158142c2 bellard
           ( a.low == b.low )
6441 158142c2 bellard
        && (    ( a.high == b.high )
6442 158142c2 bellard
             || (    ( a.low == 0 )
6443 bb98fe42 Andreas Färber
                  && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
6444 158142c2 bellard
           );
6445 158142c2 bellard
6446 158142c2 bellard
}
6447 158142c2 bellard
6448 158142c2 bellard
/*----------------------------------------------------------------------------
6449 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
6450 158142c2 bellard
| or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
6451 158142c2 bellard
| cause an exception.  Otherwise, the comparison is performed according to the
6452 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6453 158142c2 bellard
*----------------------------------------------------------------------------*/
6454 158142c2 bellard
6455 750afe93 bellard
int float128_le_quiet( float128 a, float128 b STATUS_PARAM )
6456 158142c2 bellard
{
6457 158142c2 bellard
    flag aSign, bSign;
6458 158142c2 bellard
6459 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
6460 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6461 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
6462 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6463 158142c2 bellard
       ) {
6464 158142c2 bellard
        if (    float128_is_signaling_nan( a )
6465 158142c2 bellard
             || float128_is_signaling_nan( b ) ) {
6466 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
6467 158142c2 bellard
        }
6468 158142c2 bellard
        return 0;
6469 158142c2 bellard
    }
6470 158142c2 bellard
    aSign = extractFloat128Sign( a );
6471 158142c2 bellard
    bSign = extractFloat128Sign( b );
6472 158142c2 bellard
    if ( aSign != bSign ) {
6473 158142c2 bellard
        return
6474 158142c2 bellard
               aSign
6475 bb98fe42 Andreas Färber
            || (    ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
6476 158142c2 bellard
                 == 0 );
6477 158142c2 bellard
    }
6478 158142c2 bellard
    return
6479 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
6480 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
6481 158142c2 bellard
6482 158142c2 bellard
}
6483 158142c2 bellard
6484 158142c2 bellard
/*----------------------------------------------------------------------------
6485 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
6486 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
6487 158142c2 bellard
| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
6488 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
6489 158142c2 bellard
*----------------------------------------------------------------------------*/
6490 158142c2 bellard
6491 750afe93 bellard
int float128_lt_quiet( float128 a, float128 b STATUS_PARAM )
6492 158142c2 bellard
{
6493 158142c2 bellard
    flag aSign, bSign;
6494 158142c2 bellard
6495 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
6496 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6497 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
6498 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6499 158142c2 bellard
       ) {
6500 158142c2 bellard
        if (    float128_is_signaling_nan( a )
6501 158142c2 bellard
             || float128_is_signaling_nan( b ) ) {
6502 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
6503 158142c2 bellard
        }
6504 158142c2 bellard
        return 0;
6505 158142c2 bellard
    }
6506 158142c2 bellard
    aSign = extractFloat128Sign( a );
6507 158142c2 bellard
    bSign = extractFloat128Sign( b );
6508 158142c2 bellard
    if ( aSign != bSign ) {
6509 158142c2 bellard
        return
6510 158142c2 bellard
               aSign
6511 bb98fe42 Andreas Färber
            && (    ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
6512 158142c2 bellard
                 != 0 );
6513 158142c2 bellard
    }
6514 158142c2 bellard
    return
6515 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
6516 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
6517 158142c2 bellard
6518 158142c2 bellard
}
6519 158142c2 bellard
6520 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
6521 67b7861d Aurelien Jarno
| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
6522 67b7861d Aurelien Jarno
| be compared, and 0 otherwise.  Quiet NaNs do not cause an exception.  The
6523 67b7861d Aurelien Jarno
| comparison is performed according to the IEC/IEEE Standard for Binary
6524 67b7861d Aurelien Jarno
| Floating-Point Arithmetic.
6525 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
6526 67b7861d Aurelien Jarno
6527 67b7861d Aurelien Jarno
int float128_unordered_quiet( float128 a, float128 b STATUS_PARAM )
6528 67b7861d Aurelien Jarno
{
6529 67b7861d Aurelien Jarno
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
6530 67b7861d Aurelien Jarno
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6531 67b7861d Aurelien Jarno
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
6532 67b7861d Aurelien Jarno
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6533 67b7861d Aurelien Jarno
       ) {
6534 67b7861d Aurelien Jarno
        if (    float128_is_signaling_nan( a )
6535 67b7861d Aurelien Jarno
             || float128_is_signaling_nan( b ) ) {
6536 67b7861d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
6537 67b7861d Aurelien Jarno
        }
6538 67b7861d Aurelien Jarno
        return 1;
6539 67b7861d Aurelien Jarno
    }
6540 67b7861d Aurelien Jarno
    return 0;
6541 67b7861d Aurelien Jarno
}
6542 67b7861d Aurelien Jarno
6543 1d6bda35 bellard
/* misc functions */
6544 c4850f9e Peter Maydell
float32 uint32_to_float32(uint32_t a STATUS_PARAM)
6545 1d6bda35 bellard
{
6546 1d6bda35 bellard
    return int64_to_float32(a STATUS_VAR);
6547 1d6bda35 bellard
}
6548 1d6bda35 bellard
6549 c4850f9e Peter Maydell
float64 uint32_to_float64(uint32_t a STATUS_PARAM)
6550 1d6bda35 bellard
{
6551 1d6bda35 bellard
    return int64_to_float64(a STATUS_VAR);
6552 1d6bda35 bellard
}
6553 1d6bda35 bellard
6554 9f8d2a09 Andreas Färber
uint32 float32_to_uint32( float32 a STATUS_PARAM )
6555 1d6bda35 bellard
{
6556 1d6bda35 bellard
    int64_t v;
6557 9f8d2a09 Andreas Färber
    uint32 res;
6558 34e1c27b Peter Maydell
    int old_exc_flags = get_float_exception_flags(status);
6559 1d6bda35 bellard
6560 1d6bda35 bellard
    v = float32_to_int64(a STATUS_VAR);
6561 1d6bda35 bellard
    if (v < 0) {
6562 1d6bda35 bellard
        res = 0;
6563 1d6bda35 bellard
    } else if (v > 0xffffffff) {
6564 1d6bda35 bellard
        res = 0xffffffff;
6565 1d6bda35 bellard
    } else {
6566 34e1c27b Peter Maydell
        return v;
6567 1d6bda35 bellard
    }
6568 34e1c27b Peter Maydell
    set_float_exception_flags(old_exc_flags, status);
6569 34e1c27b Peter Maydell
    float_raise(float_flag_invalid STATUS_VAR);
6570 1d6bda35 bellard
    return res;
6571 1d6bda35 bellard
}
6572 1d6bda35 bellard
6573 9f8d2a09 Andreas Färber
uint32 float32_to_uint32_round_to_zero( float32 a STATUS_PARAM )
6574 1d6bda35 bellard
{
6575 1d6bda35 bellard
    int64_t v;
6576 9f8d2a09 Andreas Färber
    uint32 res;
6577 34e1c27b Peter Maydell
    int old_exc_flags = get_float_exception_flags(status);
6578 1d6bda35 bellard
6579 1d6bda35 bellard
    v = float32_to_int64_round_to_zero(a STATUS_VAR);
6580 1d6bda35 bellard
    if (v < 0) {
6581 1d6bda35 bellard
        res = 0;
6582 1d6bda35 bellard
    } else if (v > 0xffffffff) {
6583 1d6bda35 bellard
        res = 0xffffffff;
6584 1d6bda35 bellard
    } else {
6585 34e1c27b Peter Maydell
        return v;
6586 1d6bda35 bellard
    }
6587 34e1c27b Peter Maydell
    set_float_exception_flags(old_exc_flags, status);
6588 34e1c27b Peter Maydell
    float_raise(float_flag_invalid STATUS_VAR);
6589 1d6bda35 bellard
    return res;
6590 1d6bda35 bellard
}
6591 1d6bda35 bellard
6592 f581bf54 Will Newton
int_fast16_t float32_to_int16(float32 a STATUS_PARAM)
6593 f581bf54 Will Newton
{
6594 f581bf54 Will Newton
    int32_t v;
6595 f581bf54 Will Newton
    int_fast16_t res;
6596 f581bf54 Will Newton
    int old_exc_flags = get_float_exception_flags(status);
6597 f581bf54 Will Newton
6598 f581bf54 Will Newton
    v = float32_to_int32(a STATUS_VAR);
6599 f581bf54 Will Newton
    if (v < -0x8000) {
6600 f581bf54 Will Newton
        res = -0x8000;
6601 f581bf54 Will Newton
    } else if (v > 0x7fff) {
6602 f581bf54 Will Newton
        res = 0x7fff;
6603 f581bf54 Will Newton
    } else {
6604 f581bf54 Will Newton
        return v;
6605 f581bf54 Will Newton
    }
6606 f581bf54 Will Newton
6607 f581bf54 Will Newton
    set_float_exception_flags(old_exc_flags, status);
6608 f581bf54 Will Newton
    float_raise(float_flag_invalid STATUS_VAR);
6609 f581bf54 Will Newton
    return res;
6610 f581bf54 Will Newton
}
6611 f581bf54 Will Newton
6612 f581bf54 Will Newton
uint_fast16_t float32_to_uint16(float32 a STATUS_PARAM)
6613 f581bf54 Will Newton
{
6614 f581bf54 Will Newton
    int32_t v;
6615 f581bf54 Will Newton
    uint_fast16_t res;
6616 f581bf54 Will Newton
    int old_exc_flags = get_float_exception_flags(status);
6617 f581bf54 Will Newton
6618 f581bf54 Will Newton
    v = float32_to_int32(a STATUS_VAR);
6619 f581bf54 Will Newton
    if (v < 0) {
6620 f581bf54 Will Newton
        res = 0;
6621 f581bf54 Will Newton
    } else if (v > 0xffff) {
6622 f581bf54 Will Newton
        res = 0xffff;
6623 f581bf54 Will Newton
    } else {
6624 f581bf54 Will Newton
        return v;
6625 f581bf54 Will Newton
    }
6626 f581bf54 Will Newton
6627 f581bf54 Will Newton
    set_float_exception_flags(old_exc_flags, status);
6628 f581bf54 Will Newton
    float_raise(float_flag_invalid STATUS_VAR);
6629 f581bf54 Will Newton
    return res;
6630 f581bf54 Will Newton
}
6631 f581bf54 Will Newton
6632 5aea4c58 Andreas Färber
uint_fast16_t float32_to_uint16_round_to_zero(float32 a STATUS_PARAM)
6633 cbcef455 Peter Maydell
{
6634 cbcef455 Peter Maydell
    int64_t v;
6635 5aea4c58 Andreas Färber
    uint_fast16_t res;
6636 34e1c27b Peter Maydell
    int old_exc_flags = get_float_exception_flags(status);
6637 cbcef455 Peter Maydell
6638 cbcef455 Peter Maydell
    v = float32_to_int64_round_to_zero(a STATUS_VAR);
6639 cbcef455 Peter Maydell
    if (v < 0) {
6640 cbcef455 Peter Maydell
        res = 0;
6641 cbcef455 Peter Maydell
    } else if (v > 0xffff) {
6642 cbcef455 Peter Maydell
        res = 0xffff;
6643 cbcef455 Peter Maydell
    } else {
6644 34e1c27b Peter Maydell
        return v;
6645 cbcef455 Peter Maydell
    }
6646 34e1c27b Peter Maydell
    set_float_exception_flags(old_exc_flags, status);
6647 34e1c27b Peter Maydell
    float_raise(float_flag_invalid STATUS_VAR);
6648 cbcef455 Peter Maydell
    return res;
6649 cbcef455 Peter Maydell
}
6650 cbcef455 Peter Maydell
6651 9f8d2a09 Andreas Färber
uint32 float64_to_uint32( float64 a STATUS_PARAM )
6652 1d6bda35 bellard
{
6653 5e7f654f Tom Musta
    uint64_t v;
6654 9f8d2a09 Andreas Färber
    uint32 res;
6655 5e7f654f Tom Musta
    int old_exc_flags = get_float_exception_flags(status);
6656 1d6bda35 bellard
6657 5e7f654f Tom Musta
    v = float64_to_uint64(a STATUS_VAR);
6658 5e7f654f Tom Musta
    if (v > 0xffffffff) {
6659 1d6bda35 bellard
        res = 0xffffffff;
6660 1d6bda35 bellard
    } else {
6661 5e7f654f Tom Musta
        return v;
6662 1d6bda35 bellard
    }
6663 5e7f654f Tom Musta
    set_float_exception_flags(old_exc_flags, status);
6664 5e7f654f Tom Musta
    float_raise(float_flag_invalid STATUS_VAR);
6665 1d6bda35 bellard
    return res;
6666 1d6bda35 bellard
}
6667 1d6bda35 bellard
6668 9f8d2a09 Andreas Färber
uint32 float64_to_uint32_round_to_zero( float64 a STATUS_PARAM )
6669 1d6bda35 bellard
{
6670 fd728f2f Tom Musta
    uint64_t v;
6671 9f8d2a09 Andreas Färber
    uint32 res;
6672 fd728f2f Tom Musta
    int old_exc_flags = get_float_exception_flags(status);
6673 1d6bda35 bellard
6674 fd728f2f Tom Musta
    v = float64_to_uint64_round_to_zero(a STATUS_VAR);
6675 fd728f2f Tom Musta
    if (v > 0xffffffff) {
6676 1d6bda35 bellard
        res = 0xffffffff;
6677 1d6bda35 bellard
    } else {
6678 fd728f2f Tom Musta
        return v;
6679 1d6bda35 bellard
    }
6680 fd728f2f Tom Musta
    set_float_exception_flags(old_exc_flags, status);
6681 fd728f2f Tom Musta
    float_raise(float_flag_invalid STATUS_VAR);
6682 1d6bda35 bellard
    return res;
6683 1d6bda35 bellard
}
6684 1d6bda35 bellard
6685 f581bf54 Will Newton
int_fast16_t float64_to_int16(float64 a STATUS_PARAM)
6686 f581bf54 Will Newton
{
6687 f581bf54 Will Newton
    int64_t v;
6688 f581bf54 Will Newton
    int_fast16_t res;
6689 f581bf54 Will Newton
    int old_exc_flags = get_float_exception_flags(status);
6690 f581bf54 Will Newton
6691 f581bf54 Will Newton
    v = float64_to_int32(a STATUS_VAR);
6692 f581bf54 Will Newton
    if (v < -0x8000) {
6693 f581bf54 Will Newton
        res = -0x8000;
6694 f581bf54 Will Newton
    } else if (v > 0x7fff) {
6695 f581bf54 Will Newton
        res = 0x7fff;
6696 f581bf54 Will Newton
    } else {
6697 f581bf54 Will Newton
        return v;
6698 f581bf54 Will Newton
    }
6699 f581bf54 Will Newton
6700 f581bf54 Will Newton
    set_float_exception_flags(old_exc_flags, status);
6701 f581bf54 Will Newton
    float_raise(float_flag_invalid STATUS_VAR);
6702 f581bf54 Will Newton
    return res;
6703 f581bf54 Will Newton
}
6704 f581bf54 Will Newton
6705 f581bf54 Will Newton
uint_fast16_t float64_to_uint16(float64 a STATUS_PARAM)
6706 f581bf54 Will Newton
{
6707 f581bf54 Will Newton
    int64_t v;
6708 f581bf54 Will Newton
    uint_fast16_t res;
6709 f581bf54 Will Newton
    int old_exc_flags = get_float_exception_flags(status);
6710 f581bf54 Will Newton
6711 f581bf54 Will Newton
    v = float64_to_int32(a STATUS_VAR);
6712 f581bf54 Will Newton
    if (v < 0) {
6713 f581bf54 Will Newton
        res = 0;
6714 f581bf54 Will Newton
    } else if (v > 0xffff) {
6715 f581bf54 Will Newton
        res = 0xffff;
6716 f581bf54 Will Newton
    } else {
6717 f581bf54 Will Newton
        return v;
6718 f581bf54 Will Newton
    }
6719 f581bf54 Will Newton
6720 f581bf54 Will Newton
    set_float_exception_flags(old_exc_flags, status);
6721 f581bf54 Will Newton
    float_raise(float_flag_invalid STATUS_VAR);
6722 f581bf54 Will Newton
    return res;
6723 f581bf54 Will Newton
}
6724 f581bf54 Will Newton
6725 5aea4c58 Andreas Färber
uint_fast16_t float64_to_uint16_round_to_zero(float64 a STATUS_PARAM)
6726 cbcef455 Peter Maydell
{
6727 cbcef455 Peter Maydell
    int64_t v;
6728 5aea4c58 Andreas Färber
    uint_fast16_t res;
6729 34e1c27b Peter Maydell
    int old_exc_flags = get_float_exception_flags(status);
6730 cbcef455 Peter Maydell
6731 cbcef455 Peter Maydell
    v = float64_to_int64_round_to_zero(a STATUS_VAR);
6732 cbcef455 Peter Maydell
    if (v < 0) {
6733 cbcef455 Peter Maydell
        res = 0;
6734 cbcef455 Peter Maydell
    } else if (v > 0xffff) {
6735 cbcef455 Peter Maydell
        res = 0xffff;
6736 cbcef455 Peter Maydell
    } else {
6737 34e1c27b Peter Maydell
        return v;
6738 cbcef455 Peter Maydell
    }
6739 34e1c27b Peter Maydell
    set_float_exception_flags(old_exc_flags, status);
6740 34e1c27b Peter Maydell
    float_raise(float_flag_invalid STATUS_VAR);
6741 cbcef455 Peter Maydell
    return res;
6742 cbcef455 Peter Maydell
}
6743 cbcef455 Peter Maydell
6744 fb3ea83a Tom Musta
/*----------------------------------------------------------------------------
6745 fb3ea83a Tom Musta
| Returns the result of converting the double-precision floating-point value
6746 fb3ea83a Tom Musta
| `a' to the 64-bit unsigned integer format.  The conversion is
6747 fb3ea83a Tom Musta
| performed according to the IEC/IEEE Standard for Binary Floating-Point
6748 fb3ea83a Tom Musta
| Arithmetic---which means in particular that the conversion is rounded
6749 fb3ea83a Tom Musta
| according to the current rounding mode.  If `a' is a NaN, the largest
6750 fb3ea83a Tom Musta
| positive integer is returned.  If the conversion overflows, the
6751 fb3ea83a Tom Musta
| largest unsigned integer is returned.  If 'a' is negative, the value is
6752 fb3ea83a Tom Musta
| rounded and zero is returned; negative values that do not round to zero
6753 fb3ea83a Tom Musta
| will raise the inexact exception.
6754 fb3ea83a Tom Musta
*----------------------------------------------------------------------------*/
6755 75d62a58 j_mayer
6756 fb3ea83a Tom Musta
uint64_t float64_to_uint64(float64 a STATUS_PARAM)
6757 fb3ea83a Tom Musta
{
6758 fb3ea83a Tom Musta
    flag aSign;
6759 fb3ea83a Tom Musta
    int_fast16_t aExp, shiftCount;
6760 fb3ea83a Tom Musta
    uint64_t aSig, aSigExtra;
6761 fb3ea83a Tom Musta
    a = float64_squash_input_denormal(a STATUS_VAR);
6762 75d62a58 j_mayer
6763 fb3ea83a Tom Musta
    aSig = extractFloat64Frac(a);
6764 fb3ea83a Tom Musta
    aExp = extractFloat64Exp(a);
6765 fb3ea83a Tom Musta
    aSign = extractFloat64Sign(a);
6766 fb3ea83a Tom Musta
    if (aSign && (aExp > 1022)) {
6767 fb3ea83a Tom Musta
        float_raise(float_flag_invalid STATUS_VAR);
6768 fb3ea83a Tom Musta
        if (float64_is_any_nan(a)) {
6769 fb3ea83a Tom Musta
            return LIT64(0xFFFFFFFFFFFFFFFF);
6770 fb3ea83a Tom Musta
        } else {
6771 fb3ea83a Tom Musta
            return 0;
6772 fb3ea83a Tom Musta
        }
6773 fb3ea83a Tom Musta
    }
6774 fb3ea83a Tom Musta
    if (aExp) {
6775 fb3ea83a Tom Musta
        aSig |= LIT64(0x0010000000000000);
6776 fb3ea83a Tom Musta
    }
6777 fb3ea83a Tom Musta
    shiftCount = 0x433 - aExp;
6778 fb3ea83a Tom Musta
    if (shiftCount <= 0) {
6779 fb3ea83a Tom Musta
        if (0x43E < aExp) {
6780 fb3ea83a Tom Musta
            float_raise(float_flag_invalid STATUS_VAR);
6781 fb3ea83a Tom Musta
            return LIT64(0xFFFFFFFFFFFFFFFF);
6782 fb3ea83a Tom Musta
        }
6783 fb3ea83a Tom Musta
        aSigExtra = 0;
6784 fb3ea83a Tom Musta
        aSig <<= -shiftCount;
6785 fb3ea83a Tom Musta
    } else {
6786 fb3ea83a Tom Musta
        shift64ExtraRightJamming(aSig, 0, shiftCount, &aSig, &aSigExtra);
6787 fb3ea83a Tom Musta
    }
6788 fb3ea83a Tom Musta
    return roundAndPackUint64(aSign, aSig, aSigExtra STATUS_VAR);
6789 75d62a58 j_mayer
}
6790 75d62a58 j_mayer
6791 75d62a58 j_mayer
uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM)
6792 75d62a58 j_mayer
{
6793 0a87a310 Tom Musta
    signed char current_rounding_mode = STATUS(float_rounding_mode);
6794 0a87a310 Tom Musta
    set_float_rounding_mode(float_round_to_zero STATUS_VAR);
6795 0a87a310 Tom Musta
    int64_t v = float64_to_uint64(a STATUS_VAR);
6796 0a87a310 Tom Musta
    set_float_rounding_mode(current_rounding_mode STATUS_VAR);
6797 0a87a310 Tom Musta
    return v;
6798 75d62a58 j_mayer
}
6799 75d62a58 j_mayer
6800 1d6bda35 bellard
#define COMPARE(s, nan_exp)                                                  \
6801 750afe93 bellard
INLINE int float ## s ## _compare_internal( float ## s a, float ## s b,      \
6802 1d6bda35 bellard
                                      int is_quiet STATUS_PARAM )            \
6803 1d6bda35 bellard
{                                                                            \
6804 1d6bda35 bellard
    flag aSign, bSign;                                                       \
6805 bb98fe42 Andreas Färber
    uint ## s ## _t av, bv;                                                  \
6806 37d18660 Peter Maydell
    a = float ## s ## _squash_input_denormal(a STATUS_VAR);                  \
6807 37d18660 Peter Maydell
    b = float ## s ## _squash_input_denormal(b STATUS_VAR);                  \
6808 1d6bda35 bellard
                                                                             \
6809 1d6bda35 bellard
    if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) &&                    \
6810 1d6bda35 bellard
         extractFloat ## s ## Frac( a ) ) ||                                 \
6811 1d6bda35 bellard
        ( ( extractFloat ## s ## Exp( b ) == nan_exp ) &&                    \
6812 1d6bda35 bellard
          extractFloat ## s ## Frac( b ) )) {                                \
6813 1d6bda35 bellard
        if (!is_quiet ||                                                     \
6814 1d6bda35 bellard
            float ## s ## _is_signaling_nan( a ) ||                          \
6815 1d6bda35 bellard
            float ## s ## _is_signaling_nan( b ) ) {                         \
6816 1d6bda35 bellard
            float_raise( float_flag_invalid STATUS_VAR);                     \
6817 1d6bda35 bellard
        }                                                                    \
6818 1d6bda35 bellard
        return float_relation_unordered;                                     \
6819 1d6bda35 bellard
    }                                                                        \
6820 1d6bda35 bellard
    aSign = extractFloat ## s ## Sign( a );                                  \
6821 1d6bda35 bellard
    bSign = extractFloat ## s ## Sign( b );                                  \
6822 f090c9d4 pbrook
    av = float ## s ## _val(a);                                              \
6823 cd8a2533 blueswir1
    bv = float ## s ## _val(b);                                              \
6824 1d6bda35 bellard
    if ( aSign != bSign ) {                                                  \
6825 bb98fe42 Andreas Färber
        if ( (uint ## s ## _t) ( ( av | bv )<<1 ) == 0 ) {                   \
6826 1d6bda35 bellard
            /* zero case */                                                  \
6827 1d6bda35 bellard
            return float_relation_equal;                                     \
6828 1d6bda35 bellard
        } else {                                                             \
6829 1d6bda35 bellard
            return 1 - (2 * aSign);                                          \
6830 1d6bda35 bellard
        }                                                                    \
6831 1d6bda35 bellard
    } else {                                                                 \
6832 f090c9d4 pbrook
        if (av == bv) {                                                      \
6833 1d6bda35 bellard
            return float_relation_equal;                                     \
6834 1d6bda35 bellard
        } else {                                                             \
6835 f090c9d4 pbrook
            return 1 - 2 * (aSign ^ ( av < bv ));                            \
6836 1d6bda35 bellard
        }                                                                    \
6837 1d6bda35 bellard
    }                                                                        \
6838 1d6bda35 bellard
}                                                                            \
6839 1d6bda35 bellard
                                                                             \
6840 750afe93 bellard
int float ## s ## _compare( float ## s a, float ## s b STATUS_PARAM )        \
6841 1d6bda35 bellard
{                                                                            \
6842 1d6bda35 bellard
    return float ## s ## _compare_internal(a, b, 0 STATUS_VAR);              \
6843 1d6bda35 bellard
}                                                                            \
6844 1d6bda35 bellard
                                                                             \
6845 750afe93 bellard
int float ## s ## _compare_quiet( float ## s a, float ## s b STATUS_PARAM )  \
6846 1d6bda35 bellard
{                                                                            \
6847 1d6bda35 bellard
    return float ## s ## _compare_internal(a, b, 1 STATUS_VAR);              \
6848 1d6bda35 bellard
}
6849 1d6bda35 bellard
6850 1d6bda35 bellard
COMPARE(32, 0xff)
6851 1d6bda35 bellard
COMPARE(64, 0x7ff)
6852 9ee6e8bb pbrook
6853 f6714d36 Aurelien Jarno
INLINE int floatx80_compare_internal( floatx80 a, floatx80 b,
6854 f6714d36 Aurelien Jarno
                                      int is_quiet STATUS_PARAM )
6855 f6714d36 Aurelien Jarno
{
6856 f6714d36 Aurelien Jarno
    flag aSign, bSign;
6857 f6714d36 Aurelien Jarno
6858 f6714d36 Aurelien Jarno
    if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6859 f6714d36 Aurelien Jarno
          ( extractFloatx80Frac( a )<<1 ) ) ||
6860 f6714d36 Aurelien Jarno
        ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6861 f6714d36 Aurelien Jarno
          ( extractFloatx80Frac( b )<<1 ) )) {
6862 f6714d36 Aurelien Jarno
        if (!is_quiet ||
6863 f6714d36 Aurelien Jarno
            floatx80_is_signaling_nan( a ) ||
6864 f6714d36 Aurelien Jarno
            floatx80_is_signaling_nan( b ) ) {
6865 f6714d36 Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
6866 f6714d36 Aurelien Jarno
        }
6867 f6714d36 Aurelien Jarno
        return float_relation_unordered;
6868 f6714d36 Aurelien Jarno
    }
6869 f6714d36 Aurelien Jarno
    aSign = extractFloatx80Sign( a );
6870 f6714d36 Aurelien Jarno
    bSign = extractFloatx80Sign( b );
6871 f6714d36 Aurelien Jarno
    if ( aSign != bSign ) {
6872 f6714d36 Aurelien Jarno
6873 f6714d36 Aurelien Jarno
        if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
6874 f6714d36 Aurelien Jarno
             ( ( a.low | b.low ) == 0 ) ) {
6875 f6714d36 Aurelien Jarno
            /* zero case */
6876 f6714d36 Aurelien Jarno
            return float_relation_equal;
6877 f6714d36 Aurelien Jarno
        } else {
6878 f6714d36 Aurelien Jarno
            return 1 - (2 * aSign);
6879 f6714d36 Aurelien Jarno
        }
6880 f6714d36 Aurelien Jarno
    } else {
6881 f6714d36 Aurelien Jarno
        if (a.low == b.low && a.high == b.high) {
6882 f6714d36 Aurelien Jarno
            return float_relation_equal;
6883 f6714d36 Aurelien Jarno
        } else {
6884 f6714d36 Aurelien Jarno
            return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6885 f6714d36 Aurelien Jarno
        }
6886 f6714d36 Aurelien Jarno
    }
6887 f6714d36 Aurelien Jarno
}
6888 f6714d36 Aurelien Jarno
6889 f6714d36 Aurelien Jarno
int floatx80_compare( floatx80 a, floatx80 b STATUS_PARAM )
6890 f6714d36 Aurelien Jarno
{
6891 f6714d36 Aurelien Jarno
    return floatx80_compare_internal(a, b, 0 STATUS_VAR);
6892 f6714d36 Aurelien Jarno
}
6893 f6714d36 Aurelien Jarno
6894 f6714d36 Aurelien Jarno
int floatx80_compare_quiet( floatx80 a, floatx80 b STATUS_PARAM )
6895 f6714d36 Aurelien Jarno
{
6896 f6714d36 Aurelien Jarno
    return floatx80_compare_internal(a, b, 1 STATUS_VAR);
6897 f6714d36 Aurelien Jarno
}
6898 f6714d36 Aurelien Jarno
6899 1f587329 blueswir1
INLINE int float128_compare_internal( float128 a, float128 b,
6900 1f587329 blueswir1
                                      int is_quiet STATUS_PARAM )
6901 1f587329 blueswir1
{
6902 1f587329 blueswir1
    flag aSign, bSign;
6903 1f587329 blueswir1
6904 1f587329 blueswir1
    if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
6905 1f587329 blueswir1
          ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
6906 1f587329 blueswir1
        ( ( extractFloat128Exp( b ) == 0x7fff ) &&
6907 1f587329 blueswir1
          ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
6908 1f587329 blueswir1
        if (!is_quiet ||
6909 1f587329 blueswir1
            float128_is_signaling_nan( a ) ||
6910 1f587329 blueswir1
            float128_is_signaling_nan( b ) ) {
6911 1f587329 blueswir1
            float_raise( float_flag_invalid STATUS_VAR);
6912 1f587329 blueswir1
        }
6913 1f587329 blueswir1
        return float_relation_unordered;
6914 1f587329 blueswir1
    }
6915 1f587329 blueswir1
    aSign = extractFloat128Sign( a );
6916 1f587329 blueswir1
    bSign = extractFloat128Sign( b );
6917 1f587329 blueswir1
    if ( aSign != bSign ) {
6918 1f587329 blueswir1
        if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
6919 1f587329 blueswir1
            /* zero case */
6920 1f587329 blueswir1
            return float_relation_equal;
6921 1f587329 blueswir1
        } else {
6922 1f587329 blueswir1
            return 1 - (2 * aSign);
6923 1f587329 blueswir1
        }
6924 1f587329 blueswir1
    } else {
6925 1f587329 blueswir1
        if (a.low == b.low && a.high == b.high) {
6926 1f587329 blueswir1
            return float_relation_equal;
6927 1f587329 blueswir1
        } else {
6928 1f587329 blueswir1
            return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6929 1f587329 blueswir1
        }
6930 1f587329 blueswir1
    }
6931 1f587329 blueswir1
}
6932 1f587329 blueswir1
6933 1f587329 blueswir1
int float128_compare( float128 a, float128 b STATUS_PARAM )
6934 1f587329 blueswir1
{
6935 1f587329 blueswir1
    return float128_compare_internal(a, b, 0 STATUS_VAR);
6936 1f587329 blueswir1
}
6937 1f587329 blueswir1
6938 1f587329 blueswir1
int float128_compare_quiet( float128 a, float128 b STATUS_PARAM )
6939 1f587329 blueswir1
{
6940 1f587329 blueswir1
    return float128_compare_internal(a, b, 1 STATUS_VAR);
6941 1f587329 blueswir1
}
6942 1f587329 blueswir1
6943 274f1b04 Peter Maydell
/* min() and max() functions. These can't be implemented as
6944 274f1b04 Peter Maydell
 * 'compare and pick one input' because that would mishandle
6945 274f1b04 Peter Maydell
 * NaNs and +0 vs -0.
6946 e17ab310 Will Newton
 *
6947 e17ab310 Will Newton
 * minnum() and maxnum() functions. These are similar to the min()
6948 e17ab310 Will Newton
 * and max() functions but if one of the arguments is a QNaN and
6949 e17ab310 Will Newton
 * the other is numerical then the numerical argument is returned.
6950 e17ab310 Will Newton
 * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
6951 e17ab310 Will Newton
 * and maxNum() operations. min() and max() are the typical min/max
6952 e17ab310 Will Newton
 * semantics provided by many CPUs which predate that specification.
6953 274f1b04 Peter Maydell
 */
6954 e70614ea Will Newton
#define MINMAX(s)                                                       \
6955 274f1b04 Peter Maydell
INLINE float ## s float ## s ## _minmax(float ## s a, float ## s b,     \
6956 e17ab310 Will Newton
                                        int ismin, int isieee STATUS_PARAM) \
6957 274f1b04 Peter Maydell
{                                                                       \
6958 274f1b04 Peter Maydell
    flag aSign, bSign;                                                  \
6959 274f1b04 Peter Maydell
    uint ## s ## _t av, bv;                                             \
6960 274f1b04 Peter Maydell
    a = float ## s ## _squash_input_denormal(a STATUS_VAR);             \
6961 274f1b04 Peter Maydell
    b = float ## s ## _squash_input_denormal(b STATUS_VAR);             \
6962 274f1b04 Peter Maydell
    if (float ## s ## _is_any_nan(a) ||                                 \
6963 274f1b04 Peter Maydell
        float ## s ## _is_any_nan(b)) {                                 \
6964 e17ab310 Will Newton
        if (isieee) {                                                   \
6965 e17ab310 Will Newton
            if (float ## s ## _is_quiet_nan(a) &&                       \
6966 e17ab310 Will Newton
                !float ## s ##_is_any_nan(b)) {                         \
6967 e17ab310 Will Newton
                return b;                                               \
6968 e17ab310 Will Newton
            } else if (float ## s ## _is_quiet_nan(b) &&                \
6969 e17ab310 Will Newton
                       !float ## s ## _is_any_nan(a)) {                 \
6970 e17ab310 Will Newton
                return a;                                               \
6971 e17ab310 Will Newton
            }                                                           \
6972 e17ab310 Will Newton
        }                                                               \
6973 274f1b04 Peter Maydell
        return propagateFloat ## s ## NaN(a, b STATUS_VAR);             \
6974 274f1b04 Peter Maydell
    }                                                                   \
6975 274f1b04 Peter Maydell
    aSign = extractFloat ## s ## Sign(a);                               \
6976 274f1b04 Peter Maydell
    bSign = extractFloat ## s ## Sign(b);                               \
6977 274f1b04 Peter Maydell
    av = float ## s ## _val(a);                                         \
6978 274f1b04 Peter Maydell
    bv = float ## s ## _val(b);                                         \
6979 274f1b04 Peter Maydell
    if (aSign != bSign) {                                               \
6980 274f1b04 Peter Maydell
        if (ismin) {                                                    \
6981 274f1b04 Peter Maydell
            return aSign ? a : b;                                       \
6982 274f1b04 Peter Maydell
        } else {                                                        \
6983 274f1b04 Peter Maydell
            return aSign ? b : a;                                       \
6984 274f1b04 Peter Maydell
        }                                                               \
6985 274f1b04 Peter Maydell
    } else {                                                            \
6986 274f1b04 Peter Maydell
        if (ismin) {                                                    \
6987 274f1b04 Peter Maydell
            return (aSign ^ (av < bv)) ? a : b;                         \
6988 274f1b04 Peter Maydell
        } else {                                                        \
6989 274f1b04 Peter Maydell
            return (aSign ^ (av < bv)) ? b : a;                         \
6990 274f1b04 Peter Maydell
        }                                                               \
6991 274f1b04 Peter Maydell
    }                                                                   \
6992 274f1b04 Peter Maydell
}                                                                       \
6993 274f1b04 Peter Maydell
                                                                        \
6994 274f1b04 Peter Maydell
float ## s float ## s ## _min(float ## s a, float ## s b STATUS_PARAM)  \
6995 274f1b04 Peter Maydell
{                                                                       \
6996 e17ab310 Will Newton
    return float ## s ## _minmax(a, b, 1, 0 STATUS_VAR);                \
6997 274f1b04 Peter Maydell
}                                                                       \
6998 274f1b04 Peter Maydell
                                                                        \
6999 274f1b04 Peter Maydell
float ## s float ## s ## _max(float ## s a, float ## s b STATUS_PARAM)  \
7000 274f1b04 Peter Maydell
{                                                                       \
7001 e17ab310 Will Newton
    return float ## s ## _minmax(a, b, 0, 0 STATUS_VAR);                \
7002 e17ab310 Will Newton
}                                                                       \
7003 e17ab310 Will Newton
                                                                        \
7004 e17ab310 Will Newton
float ## s float ## s ## _minnum(float ## s a, float ## s b STATUS_PARAM) \
7005 e17ab310 Will Newton
{                                                                       \
7006 e17ab310 Will Newton
    return float ## s ## _minmax(a, b, 1, 1 STATUS_VAR);                \
7007 e17ab310 Will Newton
}                                                                       \
7008 e17ab310 Will Newton
                                                                        \
7009 e17ab310 Will Newton
float ## s float ## s ## _maxnum(float ## s a, float ## s b STATUS_PARAM) \
7010 e17ab310 Will Newton
{                                                                       \
7011 e17ab310 Will Newton
    return float ## s ## _minmax(a, b, 0, 1 STATUS_VAR);                \
7012 274f1b04 Peter Maydell
}
7013 274f1b04 Peter Maydell
7014 e70614ea Will Newton
MINMAX(32)
7015 e70614ea Will Newton
MINMAX(64)
7016 274f1b04 Peter Maydell
7017 274f1b04 Peter Maydell
7018 9ee6e8bb pbrook
/* Multiply A by 2 raised to the power N.  */
7019 9ee6e8bb pbrook
float32 float32_scalbn( float32 a, int n STATUS_PARAM )
7020 9ee6e8bb pbrook
{
7021 9ee6e8bb pbrook
    flag aSign;
7022 326b9e98 Aurelien Jarno
    int16_t aExp;
7023 bb98fe42 Andreas Färber
    uint32_t aSig;
7024 9ee6e8bb pbrook
7025 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
7026 9ee6e8bb pbrook
    aSig = extractFloat32Frac( a );
7027 9ee6e8bb pbrook
    aExp = extractFloat32Exp( a );
7028 9ee6e8bb pbrook
    aSign = extractFloat32Sign( a );
7029 9ee6e8bb pbrook
7030 9ee6e8bb pbrook
    if ( aExp == 0xFF ) {
7031 326b9e98 Aurelien Jarno
        if ( aSig ) {
7032 326b9e98 Aurelien Jarno
            return propagateFloat32NaN( a, a STATUS_VAR );
7033 326b9e98 Aurelien Jarno
        }
7034 9ee6e8bb pbrook
        return a;
7035 9ee6e8bb pbrook
    }
7036 3c85c37f Peter Maydell
    if (aExp != 0) {
7037 69397542 pbrook
        aSig |= 0x00800000;
7038 3c85c37f Peter Maydell
    } else if (aSig == 0) {
7039 69397542 pbrook
        return a;
7040 3c85c37f Peter Maydell
    } else {
7041 3c85c37f Peter Maydell
        aExp++;
7042 3c85c37f Peter Maydell
    }
7043 69397542 pbrook
7044 326b9e98 Aurelien Jarno
    if (n > 0x200) {
7045 326b9e98 Aurelien Jarno
        n = 0x200;
7046 326b9e98 Aurelien Jarno
    } else if (n < -0x200) {
7047 326b9e98 Aurelien Jarno
        n = -0x200;
7048 326b9e98 Aurelien Jarno
    }
7049 326b9e98 Aurelien Jarno
7050 69397542 pbrook
    aExp += n - 1;
7051 69397542 pbrook
    aSig <<= 7;
7052 69397542 pbrook
    return normalizeRoundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
7053 9ee6e8bb pbrook
}
7054 9ee6e8bb pbrook
7055 9ee6e8bb pbrook
float64 float64_scalbn( float64 a, int n STATUS_PARAM )
7056 9ee6e8bb pbrook
{
7057 9ee6e8bb pbrook
    flag aSign;
7058 326b9e98 Aurelien Jarno
    int16_t aExp;
7059 bb98fe42 Andreas Färber
    uint64_t aSig;
7060 9ee6e8bb pbrook
7061 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
7062 9ee6e8bb pbrook
    aSig = extractFloat64Frac( a );
7063 9ee6e8bb pbrook
    aExp = extractFloat64Exp( a );
7064 9ee6e8bb pbrook
    aSign = extractFloat64Sign( a );
7065 9ee6e8bb pbrook
7066 9ee6e8bb pbrook
    if ( aExp == 0x7FF ) {
7067 326b9e98 Aurelien Jarno
        if ( aSig ) {
7068 326b9e98 Aurelien Jarno
            return propagateFloat64NaN( a, a STATUS_VAR );
7069 326b9e98 Aurelien Jarno
        }
7070 9ee6e8bb pbrook
        return a;
7071 9ee6e8bb pbrook
    }
7072 3c85c37f Peter Maydell
    if (aExp != 0) {
7073 69397542 pbrook
        aSig |= LIT64( 0x0010000000000000 );
7074 3c85c37f Peter Maydell
    } else if (aSig == 0) {
7075 69397542 pbrook
        return a;
7076 3c85c37f Peter Maydell
    } else {
7077 3c85c37f Peter Maydell
        aExp++;
7078 3c85c37f Peter Maydell
    }
7079 69397542 pbrook
7080 326b9e98 Aurelien Jarno
    if (n > 0x1000) {
7081 326b9e98 Aurelien Jarno
        n = 0x1000;
7082 326b9e98 Aurelien Jarno
    } else if (n < -0x1000) {
7083 326b9e98 Aurelien Jarno
        n = -0x1000;
7084 326b9e98 Aurelien Jarno
    }
7085 326b9e98 Aurelien Jarno
7086 69397542 pbrook
    aExp += n - 1;
7087 69397542 pbrook
    aSig <<= 10;
7088 69397542 pbrook
    return normalizeRoundAndPackFloat64( aSign, aExp, aSig STATUS_VAR );
7089 9ee6e8bb pbrook
}
7090 9ee6e8bb pbrook
7091 9ee6e8bb pbrook
floatx80 floatx80_scalbn( floatx80 a, int n STATUS_PARAM )
7092 9ee6e8bb pbrook
{
7093 9ee6e8bb pbrook
    flag aSign;
7094 326b9e98 Aurelien Jarno
    int32_t aExp;
7095 bb98fe42 Andreas Färber
    uint64_t aSig;
7096 9ee6e8bb pbrook
7097 9ee6e8bb pbrook
    aSig = extractFloatx80Frac( a );
7098 9ee6e8bb pbrook
    aExp = extractFloatx80Exp( a );
7099 9ee6e8bb pbrook
    aSign = extractFloatx80Sign( a );
7100 9ee6e8bb pbrook
7101 326b9e98 Aurelien Jarno
    if ( aExp == 0x7FFF ) {
7102 326b9e98 Aurelien Jarno
        if ( aSig<<1 ) {
7103 326b9e98 Aurelien Jarno
            return propagateFloatx80NaN( a, a STATUS_VAR );
7104 326b9e98 Aurelien Jarno
        }
7105 9ee6e8bb pbrook
        return a;
7106 9ee6e8bb pbrook
    }
7107 326b9e98 Aurelien Jarno
7108 3c85c37f Peter Maydell
    if (aExp == 0) {
7109 3c85c37f Peter Maydell
        if (aSig == 0) {
7110 3c85c37f Peter Maydell
            return a;
7111 3c85c37f Peter Maydell
        }
7112 3c85c37f Peter Maydell
        aExp++;
7113 3c85c37f Peter Maydell
    }
7114 69397542 pbrook
7115 326b9e98 Aurelien Jarno
    if (n > 0x10000) {
7116 326b9e98 Aurelien Jarno
        n = 0x10000;
7117 326b9e98 Aurelien Jarno
    } else if (n < -0x10000) {
7118 326b9e98 Aurelien Jarno
        n = -0x10000;
7119 326b9e98 Aurelien Jarno
    }
7120 326b9e98 Aurelien Jarno
7121 9ee6e8bb pbrook
    aExp += n;
7122 69397542 pbrook
    return normalizeRoundAndPackFloatx80( STATUS(floatx80_rounding_precision),
7123 69397542 pbrook
                                          aSign, aExp, aSig, 0 STATUS_VAR );
7124 9ee6e8bb pbrook
}
7125 9ee6e8bb pbrook
7126 9ee6e8bb pbrook
float128 float128_scalbn( float128 a, int n STATUS_PARAM )
7127 9ee6e8bb pbrook
{
7128 9ee6e8bb pbrook
    flag aSign;
7129 326b9e98 Aurelien Jarno
    int32_t aExp;
7130 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1;
7131 9ee6e8bb pbrook
7132 9ee6e8bb pbrook
    aSig1 = extractFloat128Frac1( a );
7133 9ee6e8bb pbrook
    aSig0 = extractFloat128Frac0( a );
7134 9ee6e8bb pbrook
    aExp = extractFloat128Exp( a );
7135 9ee6e8bb pbrook
    aSign = extractFloat128Sign( a );
7136 9ee6e8bb pbrook
    if ( aExp == 0x7FFF ) {
7137 326b9e98 Aurelien Jarno
        if ( aSig0 | aSig1 ) {
7138 326b9e98 Aurelien Jarno
            return propagateFloat128NaN( a, a STATUS_VAR );
7139 326b9e98 Aurelien Jarno
        }
7140 9ee6e8bb pbrook
        return a;
7141 9ee6e8bb pbrook
    }
7142 3c85c37f Peter Maydell
    if (aExp != 0) {
7143 69397542 pbrook
        aSig0 |= LIT64( 0x0001000000000000 );
7144 3c85c37f Peter Maydell
    } else if (aSig0 == 0 && aSig1 == 0) {
7145 69397542 pbrook
        return a;
7146 3c85c37f Peter Maydell
    } else {
7147 3c85c37f Peter Maydell
        aExp++;
7148 3c85c37f Peter Maydell
    }
7149 69397542 pbrook
7150 326b9e98 Aurelien Jarno
    if (n > 0x10000) {
7151 326b9e98 Aurelien Jarno
        n = 0x10000;
7152 326b9e98 Aurelien Jarno
    } else if (n < -0x10000) {
7153 326b9e98 Aurelien Jarno
        n = -0x10000;
7154 326b9e98 Aurelien Jarno
    }
7155 326b9e98 Aurelien Jarno
7156 69397542 pbrook
    aExp += n - 1;
7157 69397542 pbrook
    return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
7158 69397542 pbrook
                                          STATUS_VAR );
7159 9ee6e8bb pbrook
7160 9ee6e8bb pbrook
}