Statistics
| Branch: | Revision:

root / fpu / softfloat.c @ a74cdab4

History | View | Annotate | Download (228.1 kB)

1 8d725fac Andreas Färber
/*
2 8d725fac Andreas Färber
 * QEMU float support
3 8d725fac Andreas Färber
 *
4 8d725fac Andreas Färber
 * Derived from SoftFloat.
5 8d725fac Andreas Färber
 */
6 158142c2 bellard
7 158142c2 bellard
/*============================================================================
8 158142c2 bellard

9 158142c2 bellard
This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
10 158142c2 bellard
Package, Release 2b.
11 158142c2 bellard

12 158142c2 bellard
Written by John R. Hauser.  This work was made possible in part by the
13 158142c2 bellard
International Computer Science Institute, located at Suite 600, 1947 Center
14 158142c2 bellard
Street, Berkeley, California 94704.  Funding was partially provided by the
15 158142c2 bellard
National Science Foundation under grant MIP-9311980.  The original version
16 158142c2 bellard
of this code was written as part of a project to build a fixed-point vector
17 158142c2 bellard
processor in collaboration with the University of California at Berkeley,
18 158142c2 bellard
overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
19 158142c2 bellard
is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
20 158142c2 bellard
arithmetic/SoftFloat.html'.
21 158142c2 bellard

22 158142c2 bellard
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
23 158142c2 bellard
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
24 158142c2 bellard
RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
25 158142c2 bellard
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
26 158142c2 bellard
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
27 158142c2 bellard
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
28 158142c2 bellard
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
29 158142c2 bellard
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
30 158142c2 bellard

31 158142c2 bellard
Derivative works are acceptable, even for commercial purposes, so long as
32 158142c2 bellard
(1) the source code for the derivative work includes prominent notice that
33 158142c2 bellard
the work is derivative, and (2) the source code includes prominent notice with
34 158142c2 bellard
these four paragraphs for those parts of this code that are retained.
35 158142c2 bellard

36 158142c2 bellard
=============================================================================*/
37 158142c2 bellard
38 158142c2 bellard
#include "softfloat.h"
39 158142c2 bellard
40 158142c2 bellard
/*----------------------------------------------------------------------------
41 158142c2 bellard
| Primitive arithmetic functions, including multi-word arithmetic, and
42 158142c2 bellard
| division and square root approximations.  (Can be specialized to target if
43 158142c2 bellard
| desired.)
44 158142c2 bellard
*----------------------------------------------------------------------------*/
45 158142c2 bellard
#include "softfloat-macros.h"
46 158142c2 bellard
47 158142c2 bellard
/*----------------------------------------------------------------------------
48 158142c2 bellard
| Functions and definitions to determine:  (1) whether tininess for underflow
49 158142c2 bellard
| is detected before or after rounding by default, (2) what (if anything)
50 158142c2 bellard
| happens when exceptions are raised, (3) how signaling NaNs are distinguished
51 158142c2 bellard
| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
52 158142c2 bellard
| are propagated from function inputs to output.  These details are target-
53 158142c2 bellard
| specific.
54 158142c2 bellard
*----------------------------------------------------------------------------*/
55 158142c2 bellard
#include "softfloat-specialize.h"
56 158142c2 bellard
57 158142c2 bellard
void set_float_rounding_mode(int val STATUS_PARAM)
58 158142c2 bellard
{
59 158142c2 bellard
    STATUS(float_rounding_mode) = val;
60 158142c2 bellard
}
61 158142c2 bellard
62 1d6bda35 bellard
void set_float_exception_flags(int val STATUS_PARAM)
63 1d6bda35 bellard
{
64 1d6bda35 bellard
    STATUS(float_exception_flags) = val;
65 1d6bda35 bellard
}
66 1d6bda35 bellard
67 158142c2 bellard
#ifdef FLOATX80
68 158142c2 bellard
void set_floatx80_rounding_precision(int val STATUS_PARAM)
69 158142c2 bellard
{
70 158142c2 bellard
    STATUS(floatx80_rounding_precision) = val;
71 158142c2 bellard
}
72 158142c2 bellard
#endif
73 158142c2 bellard
74 158142c2 bellard
/*----------------------------------------------------------------------------
75 bb4d4bb3 Peter Maydell
| Returns the fraction bits of the half-precision floating-point value `a'.
76 bb4d4bb3 Peter Maydell
*----------------------------------------------------------------------------*/
77 bb4d4bb3 Peter Maydell
78 bb4d4bb3 Peter Maydell
INLINE uint32_t extractFloat16Frac(float16 a)
79 bb4d4bb3 Peter Maydell
{
80 bb4d4bb3 Peter Maydell
    return float16_val(a) & 0x3ff;
81 bb4d4bb3 Peter Maydell
}
82 bb4d4bb3 Peter Maydell
83 bb4d4bb3 Peter Maydell
/*----------------------------------------------------------------------------
84 bb4d4bb3 Peter Maydell
| Returns the exponent bits of the half-precision floating-point value `a'.
85 bb4d4bb3 Peter Maydell
*----------------------------------------------------------------------------*/
86 bb4d4bb3 Peter Maydell
87 bb4d4bb3 Peter Maydell
INLINE int16 extractFloat16Exp(float16 a)
88 bb4d4bb3 Peter Maydell
{
89 bb4d4bb3 Peter Maydell
    return (float16_val(a) >> 10) & 0x1f;
90 bb4d4bb3 Peter Maydell
}
91 bb4d4bb3 Peter Maydell
92 bb4d4bb3 Peter Maydell
/*----------------------------------------------------------------------------
93 bb4d4bb3 Peter Maydell
| Returns the sign bit of the single-precision floating-point value `a'.
94 bb4d4bb3 Peter Maydell
*----------------------------------------------------------------------------*/
95 bb4d4bb3 Peter Maydell
96 bb4d4bb3 Peter Maydell
INLINE flag extractFloat16Sign(float16 a)
97 bb4d4bb3 Peter Maydell
{
98 bb4d4bb3 Peter Maydell
    return float16_val(a)>>15;
99 bb4d4bb3 Peter Maydell
}
100 bb4d4bb3 Peter Maydell
101 bb4d4bb3 Peter Maydell
/*----------------------------------------------------------------------------
102 158142c2 bellard
| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
103 158142c2 bellard
| and 7, and returns the properly rounded 32-bit integer corresponding to the
104 158142c2 bellard
| input.  If `zSign' is 1, the input is negated before being converted to an
105 158142c2 bellard
| integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
106 158142c2 bellard
| is simply rounded to an integer, with the inexact exception raised if the
107 158142c2 bellard
| input cannot be represented exactly as an integer.  However, if the fixed-
108 158142c2 bellard
| point input is too large, the invalid exception is raised and the largest
109 158142c2 bellard
| positive or negative integer is returned.
110 158142c2 bellard
*----------------------------------------------------------------------------*/
111 158142c2 bellard
112 bb98fe42 Andreas Färber
static int32 roundAndPackInt32( flag zSign, uint64_t absZ STATUS_PARAM)
113 158142c2 bellard
{
114 158142c2 bellard
    int8 roundingMode;
115 158142c2 bellard
    flag roundNearestEven;
116 158142c2 bellard
    int8 roundIncrement, roundBits;
117 158142c2 bellard
    int32 z;
118 158142c2 bellard
119 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
120 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
121 158142c2 bellard
    roundIncrement = 0x40;
122 158142c2 bellard
    if ( ! roundNearestEven ) {
123 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
124 158142c2 bellard
            roundIncrement = 0;
125 158142c2 bellard
        }
126 158142c2 bellard
        else {
127 158142c2 bellard
            roundIncrement = 0x7F;
128 158142c2 bellard
            if ( zSign ) {
129 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
130 158142c2 bellard
            }
131 158142c2 bellard
            else {
132 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
133 158142c2 bellard
            }
134 158142c2 bellard
        }
135 158142c2 bellard
    }
136 158142c2 bellard
    roundBits = absZ & 0x7F;
137 158142c2 bellard
    absZ = ( absZ + roundIncrement )>>7;
138 158142c2 bellard
    absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
139 158142c2 bellard
    z = absZ;
140 158142c2 bellard
    if ( zSign ) z = - z;
141 158142c2 bellard
    if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
142 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
143 bb98fe42 Andreas Färber
        return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
144 158142c2 bellard
    }
145 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
146 158142c2 bellard
    return z;
147 158142c2 bellard
148 158142c2 bellard
}
149 158142c2 bellard
150 158142c2 bellard
/*----------------------------------------------------------------------------
151 158142c2 bellard
| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
152 158142c2 bellard
| `absZ1', with binary point between bits 63 and 64 (between the input words),
153 158142c2 bellard
| and returns the properly rounded 64-bit integer corresponding to the input.
154 158142c2 bellard
| If `zSign' is 1, the input is negated before being converted to an integer.
155 158142c2 bellard
| Ordinarily, the fixed-point input is simply rounded to an integer, with
156 158142c2 bellard
| the inexact exception raised if the input cannot be represented exactly as
157 158142c2 bellard
| an integer.  However, if the fixed-point input is too large, the invalid
158 158142c2 bellard
| exception is raised and the largest positive or negative integer is
159 158142c2 bellard
| returned.
160 158142c2 bellard
*----------------------------------------------------------------------------*/
161 158142c2 bellard
162 bb98fe42 Andreas Färber
static int64 roundAndPackInt64( flag zSign, uint64_t absZ0, uint64_t absZ1 STATUS_PARAM)
163 158142c2 bellard
{
164 158142c2 bellard
    int8 roundingMode;
165 158142c2 bellard
    flag roundNearestEven, increment;
166 158142c2 bellard
    int64 z;
167 158142c2 bellard
168 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
169 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
170 bb98fe42 Andreas Färber
    increment = ( (int64_t) absZ1 < 0 );
171 158142c2 bellard
    if ( ! roundNearestEven ) {
172 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
173 158142c2 bellard
            increment = 0;
174 158142c2 bellard
        }
175 158142c2 bellard
        else {
176 158142c2 bellard
            if ( zSign ) {
177 158142c2 bellard
                increment = ( roundingMode == float_round_down ) && absZ1;
178 158142c2 bellard
            }
179 158142c2 bellard
            else {
180 158142c2 bellard
                increment = ( roundingMode == float_round_up ) && absZ1;
181 158142c2 bellard
            }
182 158142c2 bellard
        }
183 158142c2 bellard
    }
184 158142c2 bellard
    if ( increment ) {
185 158142c2 bellard
        ++absZ0;
186 158142c2 bellard
        if ( absZ0 == 0 ) goto overflow;
187 bb98fe42 Andreas Färber
        absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
188 158142c2 bellard
    }
189 158142c2 bellard
    z = absZ0;
190 158142c2 bellard
    if ( zSign ) z = - z;
191 158142c2 bellard
    if ( z && ( ( z < 0 ) ^ zSign ) ) {
192 158142c2 bellard
 overflow:
193 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
194 158142c2 bellard
        return
195 bb98fe42 Andreas Färber
              zSign ? (int64_t) LIT64( 0x8000000000000000 )
196 158142c2 bellard
            : LIT64( 0x7FFFFFFFFFFFFFFF );
197 158142c2 bellard
    }
198 158142c2 bellard
    if ( absZ1 ) STATUS(float_exception_flags) |= float_flag_inexact;
199 158142c2 bellard
    return z;
200 158142c2 bellard
201 158142c2 bellard
}
202 158142c2 bellard
203 158142c2 bellard
/*----------------------------------------------------------------------------
204 158142c2 bellard
| Returns the fraction bits of the single-precision floating-point value `a'.
205 158142c2 bellard
*----------------------------------------------------------------------------*/
206 158142c2 bellard
207 bb98fe42 Andreas Färber
INLINE uint32_t extractFloat32Frac( float32 a )
208 158142c2 bellard
{
209 158142c2 bellard
210 f090c9d4 pbrook
    return float32_val(a) & 0x007FFFFF;
211 158142c2 bellard
212 158142c2 bellard
}
213 158142c2 bellard
214 158142c2 bellard
/*----------------------------------------------------------------------------
215 158142c2 bellard
| Returns the exponent bits of the single-precision floating-point value `a'.
216 158142c2 bellard
*----------------------------------------------------------------------------*/
217 158142c2 bellard
218 158142c2 bellard
INLINE int16 extractFloat32Exp( float32 a )
219 158142c2 bellard
{
220 158142c2 bellard
221 f090c9d4 pbrook
    return ( float32_val(a)>>23 ) & 0xFF;
222 158142c2 bellard
223 158142c2 bellard
}
224 158142c2 bellard
225 158142c2 bellard
/*----------------------------------------------------------------------------
226 158142c2 bellard
| Returns the sign bit of the single-precision floating-point value `a'.
227 158142c2 bellard
*----------------------------------------------------------------------------*/
228 158142c2 bellard
229 158142c2 bellard
INLINE flag extractFloat32Sign( float32 a )
230 158142c2 bellard
{
231 158142c2 bellard
232 f090c9d4 pbrook
    return float32_val(a)>>31;
233 158142c2 bellard
234 158142c2 bellard
}
235 158142c2 bellard
236 158142c2 bellard
/*----------------------------------------------------------------------------
237 37d18660 Peter Maydell
| If `a' is denormal and we are in flush-to-zero mode then set the
238 37d18660 Peter Maydell
| input-denormal exception and return zero. Otherwise just return the value.
239 37d18660 Peter Maydell
*----------------------------------------------------------------------------*/
240 37d18660 Peter Maydell
static float32 float32_squash_input_denormal(float32 a STATUS_PARAM)
241 37d18660 Peter Maydell
{
242 37d18660 Peter Maydell
    if (STATUS(flush_inputs_to_zero)) {
243 37d18660 Peter Maydell
        if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
244 37d18660 Peter Maydell
            float_raise(float_flag_input_denormal STATUS_VAR);
245 37d18660 Peter Maydell
            return make_float32(float32_val(a) & 0x80000000);
246 37d18660 Peter Maydell
        }
247 37d18660 Peter Maydell
    }
248 37d18660 Peter Maydell
    return a;
249 37d18660 Peter Maydell
}
250 37d18660 Peter Maydell
251 37d18660 Peter Maydell
/*----------------------------------------------------------------------------
252 158142c2 bellard
| Normalizes the subnormal single-precision floating-point value represented
253 158142c2 bellard
| by the denormalized significand `aSig'.  The normalized exponent and
254 158142c2 bellard
| significand are stored at the locations pointed to by `zExpPtr' and
255 158142c2 bellard
| `zSigPtr', respectively.
256 158142c2 bellard
*----------------------------------------------------------------------------*/
257 158142c2 bellard
258 158142c2 bellard
static void
259 bb98fe42 Andreas Färber
 normalizeFloat32Subnormal( uint32_t aSig, int16 *zExpPtr, uint32_t *zSigPtr )
260 158142c2 bellard
{
261 158142c2 bellard
    int8 shiftCount;
262 158142c2 bellard
263 158142c2 bellard
    shiftCount = countLeadingZeros32( aSig ) - 8;
264 158142c2 bellard
    *zSigPtr = aSig<<shiftCount;
265 158142c2 bellard
    *zExpPtr = 1 - shiftCount;
266 158142c2 bellard
267 158142c2 bellard
}
268 158142c2 bellard
269 158142c2 bellard
/*----------------------------------------------------------------------------
270 158142c2 bellard
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
271 158142c2 bellard
| single-precision floating-point value, returning the result.  After being
272 158142c2 bellard
| shifted into the proper positions, the three fields are simply added
273 158142c2 bellard
| together to form the result.  This means that any integer portion of `zSig'
274 158142c2 bellard
| will be added into the exponent.  Since a properly normalized significand
275 158142c2 bellard
| will have an integer portion equal to 1, the `zExp' input should be 1 less
276 158142c2 bellard
| than the desired result exponent whenever `zSig' is a complete, normalized
277 158142c2 bellard
| significand.
278 158142c2 bellard
*----------------------------------------------------------------------------*/
279 158142c2 bellard
280 bb98fe42 Andreas Färber
INLINE float32 packFloat32( flag zSign, int16 zExp, uint32_t zSig )
281 158142c2 bellard
{
282 158142c2 bellard
283 f090c9d4 pbrook
    return make_float32(
284 bb98fe42 Andreas Färber
          ( ( (uint32_t) zSign )<<31 ) + ( ( (uint32_t) zExp )<<23 ) + zSig);
285 158142c2 bellard
286 158142c2 bellard
}
287 158142c2 bellard
288 158142c2 bellard
/*----------------------------------------------------------------------------
289 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
290 158142c2 bellard
| and significand `zSig', and returns the proper single-precision floating-
291 158142c2 bellard
| point value corresponding to the abstract input.  Ordinarily, the abstract
292 158142c2 bellard
| value is simply rounded and packed into the single-precision format, with
293 158142c2 bellard
| the inexact exception raised if the abstract input cannot be represented
294 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
295 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
296 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded to
297 158142c2 bellard
| a subnormal number, and the underflow and inexact exceptions are raised if
298 158142c2 bellard
| the abstract input cannot be represented exactly as a subnormal single-
299 158142c2 bellard
| precision floating-point number.
300 158142c2 bellard
|     The input significand `zSig' has its binary point between bits 30
301 158142c2 bellard
| and 29, which is 7 bits to the left of the usual location.  This shifted
302 158142c2 bellard
| significand must be normalized or smaller.  If `zSig' is not normalized,
303 158142c2 bellard
| `zExp' must be 0; in that case, the result returned is a subnormal number,
304 158142c2 bellard
| and it must not require rounding.  In the usual case that `zSig' is
305 158142c2 bellard
| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
306 158142c2 bellard
| The handling of underflow and overflow follows the IEC/IEEE Standard for
307 158142c2 bellard
| Binary Floating-Point Arithmetic.
308 158142c2 bellard
*----------------------------------------------------------------------------*/
309 158142c2 bellard
310 bb98fe42 Andreas Färber
static float32 roundAndPackFloat32( flag zSign, int16 zExp, uint32_t zSig STATUS_PARAM)
311 158142c2 bellard
{
312 158142c2 bellard
    int8 roundingMode;
313 158142c2 bellard
    flag roundNearestEven;
314 158142c2 bellard
    int8 roundIncrement, roundBits;
315 158142c2 bellard
    flag isTiny;
316 158142c2 bellard
317 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
318 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
319 158142c2 bellard
    roundIncrement = 0x40;
320 158142c2 bellard
    if ( ! roundNearestEven ) {
321 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
322 158142c2 bellard
            roundIncrement = 0;
323 158142c2 bellard
        }
324 158142c2 bellard
        else {
325 158142c2 bellard
            roundIncrement = 0x7F;
326 158142c2 bellard
            if ( zSign ) {
327 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
328 158142c2 bellard
            }
329 158142c2 bellard
            else {
330 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
331 158142c2 bellard
            }
332 158142c2 bellard
        }
333 158142c2 bellard
    }
334 158142c2 bellard
    roundBits = zSig & 0x7F;
335 bb98fe42 Andreas Färber
    if ( 0xFD <= (uint16_t) zExp ) {
336 158142c2 bellard
        if (    ( 0xFD < zExp )
337 158142c2 bellard
             || (    ( zExp == 0xFD )
338 bb98fe42 Andreas Färber
                  && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
339 158142c2 bellard
           ) {
340 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
341 f090c9d4 pbrook
            return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
342 158142c2 bellard
        }
343 158142c2 bellard
        if ( zExp < 0 ) {
344 fe76d976 pbrook
            if ( STATUS(flush_to_zero) ) return packFloat32( zSign, 0, 0 );
345 158142c2 bellard
            isTiny =
346 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
347 158142c2 bellard
                || ( zExp < -1 )
348 158142c2 bellard
                || ( zSig + roundIncrement < 0x80000000 );
349 158142c2 bellard
            shift32RightJamming( zSig, - zExp, &zSig );
350 158142c2 bellard
            zExp = 0;
351 158142c2 bellard
            roundBits = zSig & 0x7F;
352 158142c2 bellard
            if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
353 158142c2 bellard
        }
354 158142c2 bellard
    }
355 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
356 158142c2 bellard
    zSig = ( zSig + roundIncrement )>>7;
357 158142c2 bellard
    zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
358 158142c2 bellard
    if ( zSig == 0 ) zExp = 0;
359 158142c2 bellard
    return packFloat32( zSign, zExp, zSig );
360 158142c2 bellard
361 158142c2 bellard
}
362 158142c2 bellard
363 158142c2 bellard
/*----------------------------------------------------------------------------
364 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
365 158142c2 bellard
| and significand `zSig', and returns the proper single-precision floating-
366 158142c2 bellard
| point value corresponding to the abstract input.  This routine is just like
367 158142c2 bellard
| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
368 158142c2 bellard
| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
369 158142c2 bellard
| floating-point exponent.
370 158142c2 bellard
*----------------------------------------------------------------------------*/
371 158142c2 bellard
372 158142c2 bellard
static float32
373 bb98fe42 Andreas Färber
 normalizeRoundAndPackFloat32( flag zSign, int16 zExp, uint32_t zSig STATUS_PARAM)
374 158142c2 bellard
{
375 158142c2 bellard
    int8 shiftCount;
376 158142c2 bellard
377 158142c2 bellard
    shiftCount = countLeadingZeros32( zSig ) - 1;
378 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
379 158142c2 bellard
380 158142c2 bellard
}
381 158142c2 bellard
382 158142c2 bellard
/*----------------------------------------------------------------------------
383 158142c2 bellard
| Returns the fraction bits of the double-precision floating-point value `a'.
384 158142c2 bellard
*----------------------------------------------------------------------------*/
385 158142c2 bellard
386 bb98fe42 Andreas Färber
INLINE uint64_t extractFloat64Frac( float64 a )
387 158142c2 bellard
{
388 158142c2 bellard
389 f090c9d4 pbrook
    return float64_val(a) & LIT64( 0x000FFFFFFFFFFFFF );
390 158142c2 bellard
391 158142c2 bellard
}
392 158142c2 bellard
393 158142c2 bellard
/*----------------------------------------------------------------------------
394 158142c2 bellard
| Returns the exponent bits of the double-precision floating-point value `a'.
395 158142c2 bellard
*----------------------------------------------------------------------------*/
396 158142c2 bellard
397 158142c2 bellard
INLINE int16 extractFloat64Exp( float64 a )
398 158142c2 bellard
{
399 158142c2 bellard
400 f090c9d4 pbrook
    return ( float64_val(a)>>52 ) & 0x7FF;
401 158142c2 bellard
402 158142c2 bellard
}
403 158142c2 bellard
404 158142c2 bellard
/*----------------------------------------------------------------------------
405 158142c2 bellard
| Returns the sign bit of the double-precision floating-point value `a'.
406 158142c2 bellard
*----------------------------------------------------------------------------*/
407 158142c2 bellard
408 158142c2 bellard
INLINE flag extractFloat64Sign( float64 a )
409 158142c2 bellard
{
410 158142c2 bellard
411 f090c9d4 pbrook
    return float64_val(a)>>63;
412 158142c2 bellard
413 158142c2 bellard
}
414 158142c2 bellard
415 158142c2 bellard
/*----------------------------------------------------------------------------
416 37d18660 Peter Maydell
| If `a' is denormal and we are in flush-to-zero mode then set the
417 37d18660 Peter Maydell
| input-denormal exception and return zero. Otherwise just return the value.
418 37d18660 Peter Maydell
*----------------------------------------------------------------------------*/
419 37d18660 Peter Maydell
static float64 float64_squash_input_denormal(float64 a STATUS_PARAM)
420 37d18660 Peter Maydell
{
421 37d18660 Peter Maydell
    if (STATUS(flush_inputs_to_zero)) {
422 37d18660 Peter Maydell
        if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
423 37d18660 Peter Maydell
            float_raise(float_flag_input_denormal STATUS_VAR);
424 37d18660 Peter Maydell
            return make_float64(float64_val(a) & (1ULL << 63));
425 37d18660 Peter Maydell
        }
426 37d18660 Peter Maydell
    }
427 37d18660 Peter Maydell
    return a;
428 37d18660 Peter Maydell
}
429 37d18660 Peter Maydell
430 37d18660 Peter Maydell
/*----------------------------------------------------------------------------
431 158142c2 bellard
| Normalizes the subnormal double-precision floating-point value represented
432 158142c2 bellard
| by the denormalized significand `aSig'.  The normalized exponent and
433 158142c2 bellard
| significand are stored at the locations pointed to by `zExpPtr' and
434 158142c2 bellard
| `zSigPtr', respectively.
435 158142c2 bellard
*----------------------------------------------------------------------------*/
436 158142c2 bellard
437 158142c2 bellard
static void
438 bb98fe42 Andreas Färber
 normalizeFloat64Subnormal( uint64_t aSig, int16 *zExpPtr, uint64_t *zSigPtr )
439 158142c2 bellard
{
440 158142c2 bellard
    int8 shiftCount;
441 158142c2 bellard
442 158142c2 bellard
    shiftCount = countLeadingZeros64( aSig ) - 11;
443 158142c2 bellard
    *zSigPtr = aSig<<shiftCount;
444 158142c2 bellard
    *zExpPtr = 1 - shiftCount;
445 158142c2 bellard
446 158142c2 bellard
}
447 158142c2 bellard
448 158142c2 bellard
/*----------------------------------------------------------------------------
449 158142c2 bellard
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
450 158142c2 bellard
| double-precision floating-point value, returning the result.  After being
451 158142c2 bellard
| shifted into the proper positions, the three fields are simply added
452 158142c2 bellard
| together to form the result.  This means that any integer portion of `zSig'
453 158142c2 bellard
| will be added into the exponent.  Since a properly normalized significand
454 158142c2 bellard
| will have an integer portion equal to 1, the `zExp' input should be 1 less
455 158142c2 bellard
| than the desired result exponent whenever `zSig' is a complete, normalized
456 158142c2 bellard
| significand.
457 158142c2 bellard
*----------------------------------------------------------------------------*/
458 158142c2 bellard
459 bb98fe42 Andreas Färber
INLINE float64 packFloat64( flag zSign, int16 zExp, uint64_t zSig )
460 158142c2 bellard
{
461 158142c2 bellard
462 f090c9d4 pbrook
    return make_float64(
463 bb98fe42 Andreas Färber
        ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
464 158142c2 bellard
465 158142c2 bellard
}
466 158142c2 bellard
467 158142c2 bellard
/*----------------------------------------------------------------------------
468 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
469 158142c2 bellard
| and significand `zSig', and returns the proper double-precision floating-
470 158142c2 bellard
| point value corresponding to the abstract input.  Ordinarily, the abstract
471 158142c2 bellard
| value is simply rounded and packed into the double-precision format, with
472 158142c2 bellard
| the inexact exception raised if the abstract input cannot be represented
473 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
474 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
475 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded
476 158142c2 bellard
| to a subnormal number, and the underflow and inexact exceptions are raised
477 158142c2 bellard
| if the abstract input cannot be represented exactly as a subnormal double-
478 158142c2 bellard
| precision floating-point number.
479 158142c2 bellard
|     The input significand `zSig' has its binary point between bits 62
480 158142c2 bellard
| and 61, which is 10 bits to the left of the usual location.  This shifted
481 158142c2 bellard
| significand must be normalized or smaller.  If `zSig' is not normalized,
482 158142c2 bellard
| `zExp' must be 0; in that case, the result returned is a subnormal number,
483 158142c2 bellard
| and it must not require rounding.  In the usual case that `zSig' is
484 158142c2 bellard
| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
485 158142c2 bellard
| The handling of underflow and overflow follows the IEC/IEEE Standard for
486 158142c2 bellard
| Binary Floating-Point Arithmetic.
487 158142c2 bellard
*----------------------------------------------------------------------------*/
488 158142c2 bellard
489 bb98fe42 Andreas Färber
static float64 roundAndPackFloat64( flag zSign, int16 zExp, uint64_t zSig STATUS_PARAM)
490 158142c2 bellard
{
491 158142c2 bellard
    int8 roundingMode;
492 158142c2 bellard
    flag roundNearestEven;
493 158142c2 bellard
    int16 roundIncrement, roundBits;
494 158142c2 bellard
    flag isTiny;
495 158142c2 bellard
496 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
497 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
498 158142c2 bellard
    roundIncrement = 0x200;
499 158142c2 bellard
    if ( ! roundNearestEven ) {
500 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
501 158142c2 bellard
            roundIncrement = 0;
502 158142c2 bellard
        }
503 158142c2 bellard
        else {
504 158142c2 bellard
            roundIncrement = 0x3FF;
505 158142c2 bellard
            if ( zSign ) {
506 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
507 158142c2 bellard
            }
508 158142c2 bellard
            else {
509 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
510 158142c2 bellard
            }
511 158142c2 bellard
        }
512 158142c2 bellard
    }
513 158142c2 bellard
    roundBits = zSig & 0x3FF;
514 bb98fe42 Andreas Färber
    if ( 0x7FD <= (uint16_t) zExp ) {
515 158142c2 bellard
        if (    ( 0x7FD < zExp )
516 158142c2 bellard
             || (    ( zExp == 0x7FD )
517 bb98fe42 Andreas Färber
                  && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
518 158142c2 bellard
           ) {
519 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
520 f090c9d4 pbrook
            return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 ));
521 158142c2 bellard
        }
522 158142c2 bellard
        if ( zExp < 0 ) {
523 fe76d976 pbrook
            if ( STATUS(flush_to_zero) ) return packFloat64( zSign, 0, 0 );
524 158142c2 bellard
            isTiny =
525 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
526 158142c2 bellard
                || ( zExp < -1 )
527 158142c2 bellard
                || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
528 158142c2 bellard
            shift64RightJamming( zSig, - zExp, &zSig );
529 158142c2 bellard
            zExp = 0;
530 158142c2 bellard
            roundBits = zSig & 0x3FF;
531 158142c2 bellard
            if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
532 158142c2 bellard
        }
533 158142c2 bellard
    }
534 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
535 158142c2 bellard
    zSig = ( zSig + roundIncrement )>>10;
536 158142c2 bellard
    zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
537 158142c2 bellard
    if ( zSig == 0 ) zExp = 0;
538 158142c2 bellard
    return packFloat64( zSign, zExp, zSig );
539 158142c2 bellard
540 158142c2 bellard
}
541 158142c2 bellard
542 158142c2 bellard
/*----------------------------------------------------------------------------
543 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
544 158142c2 bellard
| and significand `zSig', and returns the proper double-precision floating-
545 158142c2 bellard
| point value corresponding to the abstract input.  This routine is just like
546 158142c2 bellard
| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
547 158142c2 bellard
| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
548 158142c2 bellard
| floating-point exponent.
549 158142c2 bellard
*----------------------------------------------------------------------------*/
550 158142c2 bellard
551 158142c2 bellard
static float64
552 bb98fe42 Andreas Färber
 normalizeRoundAndPackFloat64( flag zSign, int16 zExp, uint64_t zSig STATUS_PARAM)
553 158142c2 bellard
{
554 158142c2 bellard
    int8 shiftCount;
555 158142c2 bellard
556 158142c2 bellard
    shiftCount = countLeadingZeros64( zSig ) - 1;
557 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
558 158142c2 bellard
559 158142c2 bellard
}
560 158142c2 bellard
561 158142c2 bellard
#ifdef FLOATX80
562 158142c2 bellard
563 158142c2 bellard
/*----------------------------------------------------------------------------
564 158142c2 bellard
| Returns the fraction bits of the extended double-precision floating-point
565 158142c2 bellard
| value `a'.
566 158142c2 bellard
*----------------------------------------------------------------------------*/
567 158142c2 bellard
568 bb98fe42 Andreas Färber
INLINE uint64_t extractFloatx80Frac( floatx80 a )
569 158142c2 bellard
{
570 158142c2 bellard
571 158142c2 bellard
    return a.low;
572 158142c2 bellard
573 158142c2 bellard
}
574 158142c2 bellard
575 158142c2 bellard
/*----------------------------------------------------------------------------
576 158142c2 bellard
| Returns the exponent bits of the extended double-precision floating-point
577 158142c2 bellard
| value `a'.
578 158142c2 bellard
*----------------------------------------------------------------------------*/
579 158142c2 bellard
580 158142c2 bellard
INLINE int32 extractFloatx80Exp( floatx80 a )
581 158142c2 bellard
{
582 158142c2 bellard
583 158142c2 bellard
    return a.high & 0x7FFF;
584 158142c2 bellard
585 158142c2 bellard
}
586 158142c2 bellard
587 158142c2 bellard
/*----------------------------------------------------------------------------
588 158142c2 bellard
| Returns the sign bit of the extended double-precision floating-point value
589 158142c2 bellard
| `a'.
590 158142c2 bellard
*----------------------------------------------------------------------------*/
591 158142c2 bellard
592 158142c2 bellard
INLINE flag extractFloatx80Sign( floatx80 a )
593 158142c2 bellard
{
594 158142c2 bellard
595 158142c2 bellard
    return a.high>>15;
596 158142c2 bellard
597 158142c2 bellard
}
598 158142c2 bellard
599 158142c2 bellard
/*----------------------------------------------------------------------------
600 158142c2 bellard
| Normalizes the subnormal extended double-precision floating-point value
601 158142c2 bellard
| represented by the denormalized significand `aSig'.  The normalized exponent
602 158142c2 bellard
| and significand are stored at the locations pointed to by `zExpPtr' and
603 158142c2 bellard
| `zSigPtr', respectively.
604 158142c2 bellard
*----------------------------------------------------------------------------*/
605 158142c2 bellard
606 158142c2 bellard
static void
607 bb98fe42 Andreas Färber
 normalizeFloatx80Subnormal( uint64_t aSig, int32 *zExpPtr, uint64_t *zSigPtr )
608 158142c2 bellard
{
609 158142c2 bellard
    int8 shiftCount;
610 158142c2 bellard
611 158142c2 bellard
    shiftCount = countLeadingZeros64( aSig );
612 158142c2 bellard
    *zSigPtr = aSig<<shiftCount;
613 158142c2 bellard
    *zExpPtr = 1 - shiftCount;
614 158142c2 bellard
615 158142c2 bellard
}
616 158142c2 bellard
617 158142c2 bellard
/*----------------------------------------------------------------------------
618 158142c2 bellard
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
619 158142c2 bellard
| extended double-precision floating-point value, returning the result.
620 158142c2 bellard
*----------------------------------------------------------------------------*/
621 158142c2 bellard
622 bb98fe42 Andreas Färber
INLINE floatx80 packFloatx80( flag zSign, int32 zExp, uint64_t zSig )
623 158142c2 bellard
{
624 158142c2 bellard
    floatx80 z;
625 158142c2 bellard
626 158142c2 bellard
    z.low = zSig;
627 bb98fe42 Andreas Färber
    z.high = ( ( (uint16_t) zSign )<<15 ) + zExp;
628 158142c2 bellard
    return z;
629 158142c2 bellard
630 158142c2 bellard
}
631 158142c2 bellard
632 158142c2 bellard
/*----------------------------------------------------------------------------
633 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
634 158142c2 bellard
| and extended significand formed by the concatenation of `zSig0' and `zSig1',
635 158142c2 bellard
| and returns the proper extended double-precision floating-point value
636 158142c2 bellard
| corresponding to the abstract input.  Ordinarily, the abstract value is
637 158142c2 bellard
| rounded and packed into the extended double-precision format, with the
638 158142c2 bellard
| inexact exception raised if the abstract input cannot be represented
639 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
640 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
641 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded to
642 158142c2 bellard
| a subnormal number, and the underflow and inexact exceptions are raised if
643 158142c2 bellard
| the abstract input cannot be represented exactly as a subnormal extended
644 158142c2 bellard
| double-precision floating-point number.
645 158142c2 bellard
|     If `roundingPrecision' is 32 or 64, the result is rounded to the same
646 158142c2 bellard
| number of bits as single or double precision, respectively.  Otherwise, the
647 158142c2 bellard
| result is rounded to the full precision of the extended double-precision
648 158142c2 bellard
| format.
649 158142c2 bellard
|     The input significand must be normalized or smaller.  If the input
650 158142c2 bellard
| significand is not normalized, `zExp' must be 0; in that case, the result
651 158142c2 bellard
| returned is a subnormal number, and it must not require rounding.  The
652 158142c2 bellard
| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
653 158142c2 bellard
| Floating-Point Arithmetic.
654 158142c2 bellard
*----------------------------------------------------------------------------*/
655 158142c2 bellard
656 158142c2 bellard
static floatx80
657 158142c2 bellard
 roundAndPackFloatx80(
658 bb98fe42 Andreas Färber
     int8 roundingPrecision, flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1
659 158142c2 bellard
 STATUS_PARAM)
660 158142c2 bellard
{
661 158142c2 bellard
    int8 roundingMode;
662 158142c2 bellard
    flag roundNearestEven, increment, isTiny;
663 158142c2 bellard
    int64 roundIncrement, roundMask, roundBits;
664 158142c2 bellard
665 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
666 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
667 158142c2 bellard
    if ( roundingPrecision == 80 ) goto precision80;
668 158142c2 bellard
    if ( roundingPrecision == 64 ) {
669 158142c2 bellard
        roundIncrement = LIT64( 0x0000000000000400 );
670 158142c2 bellard
        roundMask = LIT64( 0x00000000000007FF );
671 158142c2 bellard
    }
672 158142c2 bellard
    else if ( roundingPrecision == 32 ) {
673 158142c2 bellard
        roundIncrement = LIT64( 0x0000008000000000 );
674 158142c2 bellard
        roundMask = LIT64( 0x000000FFFFFFFFFF );
675 158142c2 bellard
    }
676 158142c2 bellard
    else {
677 158142c2 bellard
        goto precision80;
678 158142c2 bellard
    }
679 158142c2 bellard
    zSig0 |= ( zSig1 != 0 );
680 158142c2 bellard
    if ( ! roundNearestEven ) {
681 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
682 158142c2 bellard
            roundIncrement = 0;
683 158142c2 bellard
        }
684 158142c2 bellard
        else {
685 158142c2 bellard
            roundIncrement = roundMask;
686 158142c2 bellard
            if ( zSign ) {
687 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
688 158142c2 bellard
            }
689 158142c2 bellard
            else {
690 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
691 158142c2 bellard
            }
692 158142c2 bellard
        }
693 158142c2 bellard
    }
694 158142c2 bellard
    roundBits = zSig0 & roundMask;
695 bb98fe42 Andreas Färber
    if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
696 158142c2 bellard
        if (    ( 0x7FFE < zExp )
697 158142c2 bellard
             || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
698 158142c2 bellard
           ) {
699 158142c2 bellard
            goto overflow;
700 158142c2 bellard
        }
701 158142c2 bellard
        if ( zExp <= 0 ) {
702 fe76d976 pbrook
            if ( STATUS(flush_to_zero) ) return packFloatx80( zSign, 0, 0 );
703 158142c2 bellard
            isTiny =
704 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
705 158142c2 bellard
                || ( zExp < 0 )
706 158142c2 bellard
                || ( zSig0 <= zSig0 + roundIncrement );
707 158142c2 bellard
            shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
708 158142c2 bellard
            zExp = 0;
709 158142c2 bellard
            roundBits = zSig0 & roundMask;
710 158142c2 bellard
            if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
711 158142c2 bellard
            if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
712 158142c2 bellard
            zSig0 += roundIncrement;
713 bb98fe42 Andreas Färber
            if ( (int64_t) zSig0 < 0 ) zExp = 1;
714 158142c2 bellard
            roundIncrement = roundMask + 1;
715 158142c2 bellard
            if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
716 158142c2 bellard
                roundMask |= roundIncrement;
717 158142c2 bellard
            }
718 158142c2 bellard
            zSig0 &= ~ roundMask;
719 158142c2 bellard
            return packFloatx80( zSign, zExp, zSig0 );
720 158142c2 bellard
        }
721 158142c2 bellard
    }
722 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
723 158142c2 bellard
    zSig0 += roundIncrement;
724 158142c2 bellard
    if ( zSig0 < roundIncrement ) {
725 158142c2 bellard
        ++zExp;
726 158142c2 bellard
        zSig0 = LIT64( 0x8000000000000000 );
727 158142c2 bellard
    }
728 158142c2 bellard
    roundIncrement = roundMask + 1;
729 158142c2 bellard
    if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
730 158142c2 bellard
        roundMask |= roundIncrement;
731 158142c2 bellard
    }
732 158142c2 bellard
    zSig0 &= ~ roundMask;
733 158142c2 bellard
    if ( zSig0 == 0 ) zExp = 0;
734 158142c2 bellard
    return packFloatx80( zSign, zExp, zSig0 );
735 158142c2 bellard
 precision80:
736 bb98fe42 Andreas Färber
    increment = ( (int64_t) zSig1 < 0 );
737 158142c2 bellard
    if ( ! roundNearestEven ) {
738 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
739 158142c2 bellard
            increment = 0;
740 158142c2 bellard
        }
741 158142c2 bellard
        else {
742 158142c2 bellard
            if ( zSign ) {
743 158142c2 bellard
                increment = ( roundingMode == float_round_down ) && zSig1;
744 158142c2 bellard
            }
745 158142c2 bellard
            else {
746 158142c2 bellard
                increment = ( roundingMode == float_round_up ) && zSig1;
747 158142c2 bellard
            }
748 158142c2 bellard
        }
749 158142c2 bellard
    }
750 bb98fe42 Andreas Färber
    if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
751 158142c2 bellard
        if (    ( 0x7FFE < zExp )
752 158142c2 bellard
             || (    ( zExp == 0x7FFE )
753 158142c2 bellard
                  && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
754 158142c2 bellard
                  && increment
755 158142c2 bellard
                )
756 158142c2 bellard
           ) {
757 158142c2 bellard
            roundMask = 0;
758 158142c2 bellard
 overflow:
759 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
760 158142c2 bellard
            if (    ( roundingMode == float_round_to_zero )
761 158142c2 bellard
                 || ( zSign && ( roundingMode == float_round_up ) )
762 158142c2 bellard
                 || ( ! zSign && ( roundingMode == float_round_down ) )
763 158142c2 bellard
               ) {
764 158142c2 bellard
                return packFloatx80( zSign, 0x7FFE, ~ roundMask );
765 158142c2 bellard
            }
766 158142c2 bellard
            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
767 158142c2 bellard
        }
768 158142c2 bellard
        if ( zExp <= 0 ) {
769 158142c2 bellard
            isTiny =
770 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
771 158142c2 bellard
                || ( zExp < 0 )
772 158142c2 bellard
                || ! increment
773 158142c2 bellard
                || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
774 158142c2 bellard
            shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
775 158142c2 bellard
            zExp = 0;
776 158142c2 bellard
            if ( isTiny && zSig1 ) float_raise( float_flag_underflow STATUS_VAR);
777 158142c2 bellard
            if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
778 158142c2 bellard
            if ( roundNearestEven ) {
779 bb98fe42 Andreas Färber
                increment = ( (int64_t) zSig1 < 0 );
780 158142c2 bellard
            }
781 158142c2 bellard
            else {
782 158142c2 bellard
                if ( zSign ) {
783 158142c2 bellard
                    increment = ( roundingMode == float_round_down ) && zSig1;
784 158142c2 bellard
                }
785 158142c2 bellard
                else {
786 158142c2 bellard
                    increment = ( roundingMode == float_round_up ) && zSig1;
787 158142c2 bellard
                }
788 158142c2 bellard
            }
789 158142c2 bellard
            if ( increment ) {
790 158142c2 bellard
                ++zSig0;
791 158142c2 bellard
                zSig0 &=
792 bb98fe42 Andreas Färber
                    ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
793 bb98fe42 Andreas Färber
                if ( (int64_t) zSig0 < 0 ) zExp = 1;
794 158142c2 bellard
            }
795 158142c2 bellard
            return packFloatx80( zSign, zExp, zSig0 );
796 158142c2 bellard
        }
797 158142c2 bellard
    }
798 158142c2 bellard
    if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
799 158142c2 bellard
    if ( increment ) {
800 158142c2 bellard
        ++zSig0;
801 158142c2 bellard
        if ( zSig0 == 0 ) {
802 158142c2 bellard
            ++zExp;
803 158142c2 bellard
            zSig0 = LIT64( 0x8000000000000000 );
804 158142c2 bellard
        }
805 158142c2 bellard
        else {
806 bb98fe42 Andreas Färber
            zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
807 158142c2 bellard
        }
808 158142c2 bellard
    }
809 158142c2 bellard
    else {
810 158142c2 bellard
        if ( zSig0 == 0 ) zExp = 0;
811 158142c2 bellard
    }
812 158142c2 bellard
    return packFloatx80( zSign, zExp, zSig0 );
813 158142c2 bellard
814 158142c2 bellard
}
815 158142c2 bellard
816 158142c2 bellard
/*----------------------------------------------------------------------------
817 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent
818 158142c2 bellard
| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
819 158142c2 bellard
| and returns the proper extended double-precision floating-point value
820 158142c2 bellard
| corresponding to the abstract input.  This routine is just like
821 158142c2 bellard
| `roundAndPackFloatx80' except that the input significand does not have to be
822 158142c2 bellard
| normalized.
823 158142c2 bellard
*----------------------------------------------------------------------------*/
824 158142c2 bellard
825 158142c2 bellard
static floatx80
826 158142c2 bellard
 normalizeRoundAndPackFloatx80(
827 bb98fe42 Andreas Färber
     int8 roundingPrecision, flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1
828 158142c2 bellard
 STATUS_PARAM)
829 158142c2 bellard
{
830 158142c2 bellard
    int8 shiftCount;
831 158142c2 bellard
832 158142c2 bellard
    if ( zSig0 == 0 ) {
833 158142c2 bellard
        zSig0 = zSig1;
834 158142c2 bellard
        zSig1 = 0;
835 158142c2 bellard
        zExp -= 64;
836 158142c2 bellard
    }
837 158142c2 bellard
    shiftCount = countLeadingZeros64( zSig0 );
838 158142c2 bellard
    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
839 158142c2 bellard
    zExp -= shiftCount;
840 158142c2 bellard
    return
841 158142c2 bellard
        roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 STATUS_VAR);
842 158142c2 bellard
843 158142c2 bellard
}
844 158142c2 bellard
845 158142c2 bellard
#endif
846 158142c2 bellard
847 158142c2 bellard
#ifdef FLOAT128
848 158142c2 bellard
849 158142c2 bellard
/*----------------------------------------------------------------------------
850 158142c2 bellard
| Returns the least-significant 64 fraction bits of the quadruple-precision
851 158142c2 bellard
| floating-point value `a'.
852 158142c2 bellard
*----------------------------------------------------------------------------*/
853 158142c2 bellard
854 bb98fe42 Andreas Färber
INLINE uint64_t extractFloat128Frac1( float128 a )
855 158142c2 bellard
{
856 158142c2 bellard
857 158142c2 bellard
    return a.low;
858 158142c2 bellard
859 158142c2 bellard
}
860 158142c2 bellard
861 158142c2 bellard
/*----------------------------------------------------------------------------
862 158142c2 bellard
| Returns the most-significant 48 fraction bits of the quadruple-precision
863 158142c2 bellard
| floating-point value `a'.
864 158142c2 bellard
*----------------------------------------------------------------------------*/
865 158142c2 bellard
866 bb98fe42 Andreas Färber
INLINE uint64_t extractFloat128Frac0( float128 a )
867 158142c2 bellard
{
868 158142c2 bellard
869 158142c2 bellard
    return a.high & LIT64( 0x0000FFFFFFFFFFFF );
870 158142c2 bellard
871 158142c2 bellard
}
872 158142c2 bellard
873 158142c2 bellard
/*----------------------------------------------------------------------------
874 158142c2 bellard
| Returns the exponent bits of the quadruple-precision floating-point value
875 158142c2 bellard
| `a'.
876 158142c2 bellard
*----------------------------------------------------------------------------*/
877 158142c2 bellard
878 158142c2 bellard
INLINE int32 extractFloat128Exp( float128 a )
879 158142c2 bellard
{
880 158142c2 bellard
881 158142c2 bellard
    return ( a.high>>48 ) & 0x7FFF;
882 158142c2 bellard
883 158142c2 bellard
}
884 158142c2 bellard
885 158142c2 bellard
/*----------------------------------------------------------------------------
886 158142c2 bellard
| Returns the sign bit of the quadruple-precision floating-point value `a'.
887 158142c2 bellard
*----------------------------------------------------------------------------*/
888 158142c2 bellard
889 158142c2 bellard
INLINE flag extractFloat128Sign( float128 a )
890 158142c2 bellard
{
891 158142c2 bellard
892 158142c2 bellard
    return a.high>>63;
893 158142c2 bellard
894 158142c2 bellard
}
895 158142c2 bellard
896 158142c2 bellard
/*----------------------------------------------------------------------------
897 158142c2 bellard
| Normalizes the subnormal quadruple-precision floating-point value
898 158142c2 bellard
| represented by the denormalized significand formed by the concatenation of
899 158142c2 bellard
| `aSig0' and `aSig1'.  The normalized exponent is stored at the location
900 158142c2 bellard
| pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
901 158142c2 bellard
| significand are stored at the location pointed to by `zSig0Ptr', and the
902 158142c2 bellard
| least significant 64 bits of the normalized significand are stored at the
903 158142c2 bellard
| location pointed to by `zSig1Ptr'.
904 158142c2 bellard
*----------------------------------------------------------------------------*/
905 158142c2 bellard
906 158142c2 bellard
static void
907 158142c2 bellard
 normalizeFloat128Subnormal(
908 bb98fe42 Andreas Färber
     uint64_t aSig0,
909 bb98fe42 Andreas Färber
     uint64_t aSig1,
910 158142c2 bellard
     int32 *zExpPtr,
911 bb98fe42 Andreas Färber
     uint64_t *zSig0Ptr,
912 bb98fe42 Andreas Färber
     uint64_t *zSig1Ptr
913 158142c2 bellard
 )
914 158142c2 bellard
{
915 158142c2 bellard
    int8 shiftCount;
916 158142c2 bellard
917 158142c2 bellard
    if ( aSig0 == 0 ) {
918 158142c2 bellard
        shiftCount = countLeadingZeros64( aSig1 ) - 15;
919 158142c2 bellard
        if ( shiftCount < 0 ) {
920 158142c2 bellard
            *zSig0Ptr = aSig1>>( - shiftCount );
921 158142c2 bellard
            *zSig1Ptr = aSig1<<( shiftCount & 63 );
922 158142c2 bellard
        }
923 158142c2 bellard
        else {
924 158142c2 bellard
            *zSig0Ptr = aSig1<<shiftCount;
925 158142c2 bellard
            *zSig1Ptr = 0;
926 158142c2 bellard
        }
927 158142c2 bellard
        *zExpPtr = - shiftCount - 63;
928 158142c2 bellard
    }
929 158142c2 bellard
    else {
930 158142c2 bellard
        shiftCount = countLeadingZeros64( aSig0 ) - 15;
931 158142c2 bellard
        shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
932 158142c2 bellard
        *zExpPtr = 1 - shiftCount;
933 158142c2 bellard
    }
934 158142c2 bellard
935 158142c2 bellard
}
936 158142c2 bellard
937 158142c2 bellard
/*----------------------------------------------------------------------------
938 158142c2 bellard
| Packs the sign `zSign', the exponent `zExp', and the significand formed
939 158142c2 bellard
| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
940 158142c2 bellard
| floating-point value, returning the result.  After being shifted into the
941 158142c2 bellard
| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
942 158142c2 bellard
| added together to form the most significant 32 bits of the result.  This
943 158142c2 bellard
| means that any integer portion of `zSig0' will be added into the exponent.
944 158142c2 bellard
| Since a properly normalized significand will have an integer portion equal
945 158142c2 bellard
| to 1, the `zExp' input should be 1 less than the desired result exponent
946 158142c2 bellard
| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
947 158142c2 bellard
| significand.
948 158142c2 bellard
*----------------------------------------------------------------------------*/
949 158142c2 bellard
950 158142c2 bellard
INLINE float128
951 bb98fe42 Andreas Färber
 packFloat128( flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1 )
952 158142c2 bellard
{
953 158142c2 bellard
    float128 z;
954 158142c2 bellard
955 158142c2 bellard
    z.low = zSig1;
956 bb98fe42 Andreas Färber
    z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
957 158142c2 bellard
    return z;
958 158142c2 bellard
959 158142c2 bellard
}
960 158142c2 bellard
961 158142c2 bellard
/*----------------------------------------------------------------------------
962 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
963 158142c2 bellard
| and extended significand formed by the concatenation of `zSig0', `zSig1',
964 158142c2 bellard
| and `zSig2', and returns the proper quadruple-precision floating-point value
965 158142c2 bellard
| corresponding to the abstract input.  Ordinarily, the abstract value is
966 158142c2 bellard
| simply rounded and packed into the quadruple-precision format, with the
967 158142c2 bellard
| inexact exception raised if the abstract input cannot be represented
968 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
969 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
970 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded to
971 158142c2 bellard
| a subnormal number, and the underflow and inexact exceptions are raised if
972 158142c2 bellard
| the abstract input cannot be represented exactly as a subnormal quadruple-
973 158142c2 bellard
| precision floating-point number.
974 158142c2 bellard
|     The input significand must be normalized or smaller.  If the input
975 158142c2 bellard
| significand is not normalized, `zExp' must be 0; in that case, the result
976 158142c2 bellard
| returned is a subnormal number, and it must not require rounding.  In the
977 158142c2 bellard
| usual case that the input significand is normalized, `zExp' must be 1 less
978 158142c2 bellard
| than the ``true'' floating-point exponent.  The handling of underflow and
979 158142c2 bellard
| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
980 158142c2 bellard
*----------------------------------------------------------------------------*/
981 158142c2 bellard
982 158142c2 bellard
static float128
983 158142c2 bellard
 roundAndPackFloat128(
984 bb98fe42 Andreas Färber
     flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1, uint64_t zSig2 STATUS_PARAM)
985 158142c2 bellard
{
986 158142c2 bellard
    int8 roundingMode;
987 158142c2 bellard
    flag roundNearestEven, increment, isTiny;
988 158142c2 bellard
989 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
990 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
991 bb98fe42 Andreas Färber
    increment = ( (int64_t) zSig2 < 0 );
992 158142c2 bellard
    if ( ! roundNearestEven ) {
993 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
994 158142c2 bellard
            increment = 0;
995 158142c2 bellard
        }
996 158142c2 bellard
        else {
997 158142c2 bellard
            if ( zSign ) {
998 158142c2 bellard
                increment = ( roundingMode == float_round_down ) && zSig2;
999 158142c2 bellard
            }
1000 158142c2 bellard
            else {
1001 158142c2 bellard
                increment = ( roundingMode == float_round_up ) && zSig2;
1002 158142c2 bellard
            }
1003 158142c2 bellard
        }
1004 158142c2 bellard
    }
1005 bb98fe42 Andreas Färber
    if ( 0x7FFD <= (uint32_t) zExp ) {
1006 158142c2 bellard
        if (    ( 0x7FFD < zExp )
1007 158142c2 bellard
             || (    ( zExp == 0x7FFD )
1008 158142c2 bellard
                  && eq128(
1009 158142c2 bellard
                         LIT64( 0x0001FFFFFFFFFFFF ),
1010 158142c2 bellard
                         LIT64( 0xFFFFFFFFFFFFFFFF ),
1011 158142c2 bellard
                         zSig0,
1012 158142c2 bellard
                         zSig1
1013 158142c2 bellard
                     )
1014 158142c2 bellard
                  && increment
1015 158142c2 bellard
                )
1016 158142c2 bellard
           ) {
1017 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
1018 158142c2 bellard
            if (    ( roundingMode == float_round_to_zero )
1019 158142c2 bellard
                 || ( zSign && ( roundingMode == float_round_up ) )
1020 158142c2 bellard
                 || ( ! zSign && ( roundingMode == float_round_down ) )
1021 158142c2 bellard
               ) {
1022 158142c2 bellard
                return
1023 158142c2 bellard
                    packFloat128(
1024 158142c2 bellard
                        zSign,
1025 158142c2 bellard
                        0x7FFE,
1026 158142c2 bellard
                        LIT64( 0x0000FFFFFFFFFFFF ),
1027 158142c2 bellard
                        LIT64( 0xFFFFFFFFFFFFFFFF )
1028 158142c2 bellard
                    );
1029 158142c2 bellard
            }
1030 158142c2 bellard
            return packFloat128( zSign, 0x7FFF, 0, 0 );
1031 158142c2 bellard
        }
1032 158142c2 bellard
        if ( zExp < 0 ) {
1033 fe76d976 pbrook
            if ( STATUS(flush_to_zero) ) return packFloat128( zSign, 0, 0, 0 );
1034 158142c2 bellard
            isTiny =
1035 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
1036 158142c2 bellard
                || ( zExp < -1 )
1037 158142c2 bellard
                || ! increment
1038 158142c2 bellard
                || lt128(
1039 158142c2 bellard
                       zSig0,
1040 158142c2 bellard
                       zSig1,
1041 158142c2 bellard
                       LIT64( 0x0001FFFFFFFFFFFF ),
1042 158142c2 bellard
                       LIT64( 0xFFFFFFFFFFFFFFFF )
1043 158142c2 bellard
                   );
1044 158142c2 bellard
            shift128ExtraRightJamming(
1045 158142c2 bellard
                zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
1046 158142c2 bellard
            zExp = 0;
1047 158142c2 bellard
            if ( isTiny && zSig2 ) float_raise( float_flag_underflow STATUS_VAR);
1048 158142c2 bellard
            if ( roundNearestEven ) {
1049 bb98fe42 Andreas Färber
                increment = ( (int64_t) zSig2 < 0 );
1050 158142c2 bellard
            }
1051 158142c2 bellard
            else {
1052 158142c2 bellard
                if ( zSign ) {
1053 158142c2 bellard
                    increment = ( roundingMode == float_round_down ) && zSig2;
1054 158142c2 bellard
                }
1055 158142c2 bellard
                else {
1056 158142c2 bellard
                    increment = ( roundingMode == float_round_up ) && zSig2;
1057 158142c2 bellard
                }
1058 158142c2 bellard
            }
1059 158142c2 bellard
        }
1060 158142c2 bellard
    }
1061 158142c2 bellard
    if ( zSig2 ) STATUS(float_exception_flags) |= float_flag_inexact;
1062 158142c2 bellard
    if ( increment ) {
1063 158142c2 bellard
        add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
1064 158142c2 bellard
        zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
1065 158142c2 bellard
    }
1066 158142c2 bellard
    else {
1067 158142c2 bellard
        if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
1068 158142c2 bellard
    }
1069 158142c2 bellard
    return packFloat128( zSign, zExp, zSig0, zSig1 );
1070 158142c2 bellard
1071 158142c2 bellard
}
1072 158142c2 bellard
1073 158142c2 bellard
/*----------------------------------------------------------------------------
1074 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
1075 158142c2 bellard
| and significand formed by the concatenation of `zSig0' and `zSig1', and
1076 158142c2 bellard
| returns the proper quadruple-precision floating-point value corresponding
1077 158142c2 bellard
| to the abstract input.  This routine is just like `roundAndPackFloat128'
1078 158142c2 bellard
| except that the input significand has fewer bits and does not have to be
1079 158142c2 bellard
| normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
1080 158142c2 bellard
| point exponent.
1081 158142c2 bellard
*----------------------------------------------------------------------------*/
1082 158142c2 bellard
1083 158142c2 bellard
static float128
1084 158142c2 bellard
 normalizeRoundAndPackFloat128(
1085 bb98fe42 Andreas Färber
     flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1 STATUS_PARAM)
1086 158142c2 bellard
{
1087 158142c2 bellard
    int8 shiftCount;
1088 bb98fe42 Andreas Färber
    uint64_t zSig2;
1089 158142c2 bellard
1090 158142c2 bellard
    if ( zSig0 == 0 ) {
1091 158142c2 bellard
        zSig0 = zSig1;
1092 158142c2 bellard
        zSig1 = 0;
1093 158142c2 bellard
        zExp -= 64;
1094 158142c2 bellard
    }
1095 158142c2 bellard
    shiftCount = countLeadingZeros64( zSig0 ) - 15;
1096 158142c2 bellard
    if ( 0 <= shiftCount ) {
1097 158142c2 bellard
        zSig2 = 0;
1098 158142c2 bellard
        shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1099 158142c2 bellard
    }
1100 158142c2 bellard
    else {
1101 158142c2 bellard
        shift128ExtraRightJamming(
1102 158142c2 bellard
            zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
1103 158142c2 bellard
    }
1104 158142c2 bellard
    zExp -= shiftCount;
1105 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR);
1106 158142c2 bellard
1107 158142c2 bellard
}
1108 158142c2 bellard
1109 158142c2 bellard
#endif
1110 158142c2 bellard
1111 158142c2 bellard
/*----------------------------------------------------------------------------
1112 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a'
1113 158142c2 bellard
| to the single-precision floating-point format.  The conversion is performed
1114 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1115 158142c2 bellard
*----------------------------------------------------------------------------*/
1116 158142c2 bellard
1117 158142c2 bellard
float32 int32_to_float32( int32 a STATUS_PARAM )
1118 158142c2 bellard
{
1119 158142c2 bellard
    flag zSign;
1120 158142c2 bellard
1121 f090c9d4 pbrook
    if ( a == 0 ) return float32_zero;
1122 bb98fe42 Andreas Färber
    if ( a == (int32_t) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
1123 158142c2 bellard
    zSign = ( a < 0 );
1124 158142c2 bellard
    return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a STATUS_VAR );
1125 158142c2 bellard
1126 158142c2 bellard
}
1127 158142c2 bellard
1128 158142c2 bellard
/*----------------------------------------------------------------------------
1129 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a'
1130 158142c2 bellard
| to the double-precision floating-point format.  The conversion is performed
1131 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1132 158142c2 bellard
*----------------------------------------------------------------------------*/
1133 158142c2 bellard
1134 158142c2 bellard
float64 int32_to_float64( int32 a STATUS_PARAM )
1135 158142c2 bellard
{
1136 158142c2 bellard
    flag zSign;
1137 158142c2 bellard
    uint32 absA;
1138 158142c2 bellard
    int8 shiftCount;
1139 bb98fe42 Andreas Färber
    uint64_t zSig;
1140 158142c2 bellard
1141 f090c9d4 pbrook
    if ( a == 0 ) return float64_zero;
1142 158142c2 bellard
    zSign = ( a < 0 );
1143 158142c2 bellard
    absA = zSign ? - a : a;
1144 158142c2 bellard
    shiftCount = countLeadingZeros32( absA ) + 21;
1145 158142c2 bellard
    zSig = absA;
1146 158142c2 bellard
    return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
1147 158142c2 bellard
1148 158142c2 bellard
}
1149 158142c2 bellard
1150 158142c2 bellard
#ifdef FLOATX80
1151 158142c2 bellard
1152 158142c2 bellard
/*----------------------------------------------------------------------------
1153 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a'
1154 158142c2 bellard
| to the extended double-precision floating-point format.  The conversion
1155 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1156 158142c2 bellard
| Arithmetic.
1157 158142c2 bellard
*----------------------------------------------------------------------------*/
1158 158142c2 bellard
1159 158142c2 bellard
floatx80 int32_to_floatx80( int32 a STATUS_PARAM )
1160 158142c2 bellard
{
1161 158142c2 bellard
    flag zSign;
1162 158142c2 bellard
    uint32 absA;
1163 158142c2 bellard
    int8 shiftCount;
1164 bb98fe42 Andreas Färber
    uint64_t zSig;
1165 158142c2 bellard
1166 158142c2 bellard
    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1167 158142c2 bellard
    zSign = ( a < 0 );
1168 158142c2 bellard
    absA = zSign ? - a : a;
1169 158142c2 bellard
    shiftCount = countLeadingZeros32( absA ) + 32;
1170 158142c2 bellard
    zSig = absA;
1171 158142c2 bellard
    return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
1172 158142c2 bellard
1173 158142c2 bellard
}
1174 158142c2 bellard
1175 158142c2 bellard
#endif
1176 158142c2 bellard
1177 158142c2 bellard
#ifdef FLOAT128
1178 158142c2 bellard
1179 158142c2 bellard
/*----------------------------------------------------------------------------
1180 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a' to
1181 158142c2 bellard
| the quadruple-precision floating-point format.  The conversion is performed
1182 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1183 158142c2 bellard
*----------------------------------------------------------------------------*/
1184 158142c2 bellard
1185 158142c2 bellard
float128 int32_to_float128( int32 a STATUS_PARAM )
1186 158142c2 bellard
{
1187 158142c2 bellard
    flag zSign;
1188 158142c2 bellard
    uint32 absA;
1189 158142c2 bellard
    int8 shiftCount;
1190 bb98fe42 Andreas Färber
    uint64_t zSig0;
1191 158142c2 bellard
1192 158142c2 bellard
    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1193 158142c2 bellard
    zSign = ( a < 0 );
1194 158142c2 bellard
    absA = zSign ? - a : a;
1195 158142c2 bellard
    shiftCount = countLeadingZeros32( absA ) + 17;
1196 158142c2 bellard
    zSig0 = absA;
1197 158142c2 bellard
    return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
1198 158142c2 bellard
1199 158142c2 bellard
}
1200 158142c2 bellard
1201 158142c2 bellard
#endif
1202 158142c2 bellard
1203 158142c2 bellard
/*----------------------------------------------------------------------------
1204 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a'
1205 158142c2 bellard
| to the single-precision floating-point format.  The conversion is performed
1206 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1207 158142c2 bellard
*----------------------------------------------------------------------------*/
1208 158142c2 bellard
1209 158142c2 bellard
float32 int64_to_float32( int64 a STATUS_PARAM )
1210 158142c2 bellard
{
1211 158142c2 bellard
    flag zSign;
1212 158142c2 bellard
    uint64 absA;
1213 158142c2 bellard
    int8 shiftCount;
1214 158142c2 bellard
1215 f090c9d4 pbrook
    if ( a == 0 ) return float32_zero;
1216 158142c2 bellard
    zSign = ( a < 0 );
1217 158142c2 bellard
    absA = zSign ? - a : a;
1218 158142c2 bellard
    shiftCount = countLeadingZeros64( absA ) - 40;
1219 158142c2 bellard
    if ( 0 <= shiftCount ) {
1220 158142c2 bellard
        return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
1221 158142c2 bellard
    }
1222 158142c2 bellard
    else {
1223 158142c2 bellard
        shiftCount += 7;
1224 158142c2 bellard
        if ( shiftCount < 0 ) {
1225 158142c2 bellard
            shift64RightJamming( absA, - shiftCount, &absA );
1226 158142c2 bellard
        }
1227 158142c2 bellard
        else {
1228 158142c2 bellard
            absA <<= shiftCount;
1229 158142c2 bellard
        }
1230 158142c2 bellard
        return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA STATUS_VAR );
1231 158142c2 bellard
    }
1232 158142c2 bellard
1233 158142c2 bellard
}
1234 158142c2 bellard
1235 3430b0be j_mayer
float32 uint64_to_float32( uint64 a STATUS_PARAM )
1236 75d62a58 j_mayer
{
1237 75d62a58 j_mayer
    int8 shiftCount;
1238 75d62a58 j_mayer
1239 f090c9d4 pbrook
    if ( a == 0 ) return float32_zero;
1240 75d62a58 j_mayer
    shiftCount = countLeadingZeros64( a ) - 40;
1241 75d62a58 j_mayer
    if ( 0 <= shiftCount ) {
1242 75d62a58 j_mayer
        return packFloat32( 1 > 0, 0x95 - shiftCount, a<<shiftCount );
1243 75d62a58 j_mayer
    }
1244 75d62a58 j_mayer
    else {
1245 75d62a58 j_mayer
        shiftCount += 7;
1246 75d62a58 j_mayer
        if ( shiftCount < 0 ) {
1247 75d62a58 j_mayer
            shift64RightJamming( a, - shiftCount, &a );
1248 75d62a58 j_mayer
        }
1249 75d62a58 j_mayer
        else {
1250 75d62a58 j_mayer
            a <<= shiftCount;
1251 75d62a58 j_mayer
        }
1252 75d62a58 j_mayer
        return roundAndPackFloat32( 1 > 0, 0x9C - shiftCount, a STATUS_VAR );
1253 75d62a58 j_mayer
    }
1254 75d62a58 j_mayer
}
1255 75d62a58 j_mayer
1256 158142c2 bellard
/*----------------------------------------------------------------------------
1257 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a'
1258 158142c2 bellard
| to the double-precision floating-point format.  The conversion is performed
1259 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1260 158142c2 bellard
*----------------------------------------------------------------------------*/
1261 158142c2 bellard
1262 158142c2 bellard
float64 int64_to_float64( int64 a STATUS_PARAM )
1263 158142c2 bellard
{
1264 158142c2 bellard
    flag zSign;
1265 158142c2 bellard
1266 f090c9d4 pbrook
    if ( a == 0 ) return float64_zero;
1267 bb98fe42 Andreas Färber
    if ( a == (int64_t) LIT64( 0x8000000000000000 ) ) {
1268 158142c2 bellard
        return packFloat64( 1, 0x43E, 0 );
1269 158142c2 bellard
    }
1270 158142c2 bellard
    zSign = ( a < 0 );
1271 158142c2 bellard
    return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a STATUS_VAR );
1272 158142c2 bellard
1273 158142c2 bellard
}
1274 158142c2 bellard
1275 75d62a58 j_mayer
float64 uint64_to_float64( uint64 a STATUS_PARAM )
1276 75d62a58 j_mayer
{
1277 f090c9d4 pbrook
    if ( a == 0 ) return float64_zero;
1278 75d62a58 j_mayer
    return normalizeRoundAndPackFloat64( 0, 0x43C, a STATUS_VAR );
1279 75d62a58 j_mayer
1280 75d62a58 j_mayer
}
1281 75d62a58 j_mayer
1282 158142c2 bellard
#ifdef FLOATX80
1283 158142c2 bellard
1284 158142c2 bellard
/*----------------------------------------------------------------------------
1285 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a'
1286 158142c2 bellard
| to the extended double-precision floating-point format.  The conversion
1287 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1288 158142c2 bellard
| Arithmetic.
1289 158142c2 bellard
*----------------------------------------------------------------------------*/
1290 158142c2 bellard
1291 158142c2 bellard
floatx80 int64_to_floatx80( int64 a STATUS_PARAM )
1292 158142c2 bellard
{
1293 158142c2 bellard
    flag zSign;
1294 158142c2 bellard
    uint64 absA;
1295 158142c2 bellard
    int8 shiftCount;
1296 158142c2 bellard
1297 158142c2 bellard
    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1298 158142c2 bellard
    zSign = ( a < 0 );
1299 158142c2 bellard
    absA = zSign ? - a : a;
1300 158142c2 bellard
    shiftCount = countLeadingZeros64( absA );
1301 158142c2 bellard
    return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
1302 158142c2 bellard
1303 158142c2 bellard
}
1304 158142c2 bellard
1305 158142c2 bellard
#endif
1306 158142c2 bellard
1307 158142c2 bellard
#ifdef FLOAT128
1308 158142c2 bellard
1309 158142c2 bellard
/*----------------------------------------------------------------------------
1310 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a' to
1311 158142c2 bellard
| the quadruple-precision floating-point format.  The conversion is performed
1312 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1313 158142c2 bellard
*----------------------------------------------------------------------------*/
1314 158142c2 bellard
1315 158142c2 bellard
float128 int64_to_float128( int64 a STATUS_PARAM )
1316 158142c2 bellard
{
1317 158142c2 bellard
    flag zSign;
1318 158142c2 bellard
    uint64 absA;
1319 158142c2 bellard
    int8 shiftCount;
1320 158142c2 bellard
    int32 zExp;
1321 bb98fe42 Andreas Färber
    uint64_t zSig0, zSig1;
1322 158142c2 bellard
1323 158142c2 bellard
    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1324 158142c2 bellard
    zSign = ( a < 0 );
1325 158142c2 bellard
    absA = zSign ? - a : a;
1326 158142c2 bellard
    shiftCount = countLeadingZeros64( absA ) + 49;
1327 158142c2 bellard
    zExp = 0x406E - shiftCount;
1328 158142c2 bellard
    if ( 64 <= shiftCount ) {
1329 158142c2 bellard
        zSig1 = 0;
1330 158142c2 bellard
        zSig0 = absA;
1331 158142c2 bellard
        shiftCount -= 64;
1332 158142c2 bellard
    }
1333 158142c2 bellard
    else {
1334 158142c2 bellard
        zSig1 = absA;
1335 158142c2 bellard
        zSig0 = 0;
1336 158142c2 bellard
    }
1337 158142c2 bellard
    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1338 158142c2 bellard
    return packFloat128( zSign, zExp, zSig0, zSig1 );
1339 158142c2 bellard
1340 158142c2 bellard
}
1341 158142c2 bellard
1342 158142c2 bellard
#endif
1343 158142c2 bellard
1344 158142c2 bellard
/*----------------------------------------------------------------------------
1345 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1346 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
1347 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1348 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
1349 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
1350 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
1351 158142c2 bellard
| largest integer with the same sign as `a' is returned.
1352 158142c2 bellard
*----------------------------------------------------------------------------*/
1353 158142c2 bellard
1354 158142c2 bellard
int32 float32_to_int32( float32 a STATUS_PARAM )
1355 158142c2 bellard
{
1356 158142c2 bellard
    flag aSign;
1357 158142c2 bellard
    int16 aExp, shiftCount;
1358 bb98fe42 Andreas Färber
    uint32_t aSig;
1359 bb98fe42 Andreas Färber
    uint64_t aSig64;
1360 158142c2 bellard
1361 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1362 158142c2 bellard
    aSig = extractFloat32Frac( a );
1363 158142c2 bellard
    aExp = extractFloat32Exp( a );
1364 158142c2 bellard
    aSign = extractFloat32Sign( a );
1365 158142c2 bellard
    if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
1366 158142c2 bellard
    if ( aExp ) aSig |= 0x00800000;
1367 158142c2 bellard
    shiftCount = 0xAF - aExp;
1368 158142c2 bellard
    aSig64 = aSig;
1369 158142c2 bellard
    aSig64 <<= 32;
1370 158142c2 bellard
    if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
1371 158142c2 bellard
    return roundAndPackInt32( aSign, aSig64 STATUS_VAR );
1372 158142c2 bellard
1373 158142c2 bellard
}
1374 158142c2 bellard
1375 158142c2 bellard
/*----------------------------------------------------------------------------
1376 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1377 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
1378 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1379 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
1380 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
1381 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
1382 158142c2 bellard
| returned.
1383 158142c2 bellard
*----------------------------------------------------------------------------*/
1384 158142c2 bellard
1385 158142c2 bellard
int32 float32_to_int32_round_to_zero( float32 a STATUS_PARAM )
1386 158142c2 bellard
{
1387 158142c2 bellard
    flag aSign;
1388 158142c2 bellard
    int16 aExp, shiftCount;
1389 bb98fe42 Andreas Färber
    uint32_t aSig;
1390 158142c2 bellard
    int32 z;
1391 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1392 158142c2 bellard
1393 158142c2 bellard
    aSig = extractFloat32Frac( a );
1394 158142c2 bellard
    aExp = extractFloat32Exp( a );
1395 158142c2 bellard
    aSign = extractFloat32Sign( a );
1396 158142c2 bellard
    shiftCount = aExp - 0x9E;
1397 158142c2 bellard
    if ( 0 <= shiftCount ) {
1398 f090c9d4 pbrook
        if ( float32_val(a) != 0xCF000000 ) {
1399 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1400 158142c2 bellard
            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
1401 158142c2 bellard
        }
1402 bb98fe42 Andreas Färber
        return (int32_t) 0x80000000;
1403 158142c2 bellard
    }
1404 158142c2 bellard
    else if ( aExp <= 0x7E ) {
1405 158142c2 bellard
        if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
1406 158142c2 bellard
        return 0;
1407 158142c2 bellard
    }
1408 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<8;
1409 158142c2 bellard
    z = aSig>>( - shiftCount );
1410 bb98fe42 Andreas Färber
    if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
1411 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
1412 158142c2 bellard
    }
1413 158142c2 bellard
    if ( aSign ) z = - z;
1414 158142c2 bellard
    return z;
1415 158142c2 bellard
1416 158142c2 bellard
}
1417 158142c2 bellard
1418 158142c2 bellard
/*----------------------------------------------------------------------------
1419 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1420 cbcef455 Peter Maydell
| `a' to the 16-bit two's complement integer format.  The conversion is
1421 cbcef455 Peter Maydell
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1422 cbcef455 Peter Maydell
| Arithmetic, except that the conversion is always rounded toward zero.
1423 cbcef455 Peter Maydell
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
1424 cbcef455 Peter Maydell
| the conversion overflows, the largest integer with the same sign as `a' is
1425 cbcef455 Peter Maydell
| returned.
1426 cbcef455 Peter Maydell
*----------------------------------------------------------------------------*/
1427 cbcef455 Peter Maydell
1428 cbcef455 Peter Maydell
int16 float32_to_int16_round_to_zero( float32 a STATUS_PARAM )
1429 cbcef455 Peter Maydell
{
1430 cbcef455 Peter Maydell
    flag aSign;
1431 cbcef455 Peter Maydell
    int16 aExp, shiftCount;
1432 bb98fe42 Andreas Färber
    uint32_t aSig;
1433 cbcef455 Peter Maydell
    int32 z;
1434 cbcef455 Peter Maydell
1435 cbcef455 Peter Maydell
    aSig = extractFloat32Frac( a );
1436 cbcef455 Peter Maydell
    aExp = extractFloat32Exp( a );
1437 cbcef455 Peter Maydell
    aSign = extractFloat32Sign( a );
1438 cbcef455 Peter Maydell
    shiftCount = aExp - 0x8E;
1439 cbcef455 Peter Maydell
    if ( 0 <= shiftCount ) {
1440 cbcef455 Peter Maydell
        if ( float32_val(a) != 0xC7000000 ) {
1441 cbcef455 Peter Maydell
            float_raise( float_flag_invalid STATUS_VAR);
1442 cbcef455 Peter Maydell
            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1443 cbcef455 Peter Maydell
                return 0x7FFF;
1444 cbcef455 Peter Maydell
            }
1445 cbcef455 Peter Maydell
        }
1446 bb98fe42 Andreas Färber
        return (int32_t) 0xffff8000;
1447 cbcef455 Peter Maydell
    }
1448 cbcef455 Peter Maydell
    else if ( aExp <= 0x7E ) {
1449 cbcef455 Peter Maydell
        if ( aExp | aSig ) {
1450 cbcef455 Peter Maydell
            STATUS(float_exception_flags) |= float_flag_inexact;
1451 cbcef455 Peter Maydell
        }
1452 cbcef455 Peter Maydell
        return 0;
1453 cbcef455 Peter Maydell
    }
1454 cbcef455 Peter Maydell
    shiftCount -= 0x10;
1455 cbcef455 Peter Maydell
    aSig = ( aSig | 0x00800000 )<<8;
1456 cbcef455 Peter Maydell
    z = aSig>>( - shiftCount );
1457 bb98fe42 Andreas Färber
    if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
1458 cbcef455 Peter Maydell
        STATUS(float_exception_flags) |= float_flag_inexact;
1459 cbcef455 Peter Maydell
    }
1460 cbcef455 Peter Maydell
    if ( aSign ) {
1461 cbcef455 Peter Maydell
        z = - z;
1462 cbcef455 Peter Maydell
    }
1463 cbcef455 Peter Maydell
    return z;
1464 cbcef455 Peter Maydell
1465 cbcef455 Peter Maydell
}
1466 cbcef455 Peter Maydell
1467 cbcef455 Peter Maydell
/*----------------------------------------------------------------------------
1468 cbcef455 Peter Maydell
| Returns the result of converting the single-precision floating-point value
1469 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
1470 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1471 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
1472 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
1473 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
1474 158142c2 bellard
| largest integer with the same sign as `a' is returned.
1475 158142c2 bellard
*----------------------------------------------------------------------------*/
1476 158142c2 bellard
1477 158142c2 bellard
int64 float32_to_int64( float32 a STATUS_PARAM )
1478 158142c2 bellard
{
1479 158142c2 bellard
    flag aSign;
1480 158142c2 bellard
    int16 aExp, shiftCount;
1481 bb98fe42 Andreas Färber
    uint32_t aSig;
1482 bb98fe42 Andreas Färber
    uint64_t aSig64, aSigExtra;
1483 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1484 158142c2 bellard
1485 158142c2 bellard
    aSig = extractFloat32Frac( a );
1486 158142c2 bellard
    aExp = extractFloat32Exp( a );
1487 158142c2 bellard
    aSign = extractFloat32Sign( a );
1488 158142c2 bellard
    shiftCount = 0xBE - aExp;
1489 158142c2 bellard
    if ( shiftCount < 0 ) {
1490 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
1491 158142c2 bellard
        if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1492 158142c2 bellard
            return LIT64( 0x7FFFFFFFFFFFFFFF );
1493 158142c2 bellard
        }
1494 bb98fe42 Andreas Färber
        return (int64_t) LIT64( 0x8000000000000000 );
1495 158142c2 bellard
    }
1496 158142c2 bellard
    if ( aExp ) aSig |= 0x00800000;
1497 158142c2 bellard
    aSig64 = aSig;
1498 158142c2 bellard
    aSig64 <<= 40;
1499 158142c2 bellard
    shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
1500 158142c2 bellard
    return roundAndPackInt64( aSign, aSig64, aSigExtra STATUS_VAR );
1501 158142c2 bellard
1502 158142c2 bellard
}
1503 158142c2 bellard
1504 158142c2 bellard
/*----------------------------------------------------------------------------
1505 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1506 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
1507 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1508 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.  If
1509 158142c2 bellard
| `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
1510 158142c2 bellard
| conversion overflows, the largest integer with the same sign as `a' is
1511 158142c2 bellard
| returned.
1512 158142c2 bellard
*----------------------------------------------------------------------------*/
1513 158142c2 bellard
1514 158142c2 bellard
int64 float32_to_int64_round_to_zero( float32 a STATUS_PARAM )
1515 158142c2 bellard
{
1516 158142c2 bellard
    flag aSign;
1517 158142c2 bellard
    int16 aExp, shiftCount;
1518 bb98fe42 Andreas Färber
    uint32_t aSig;
1519 bb98fe42 Andreas Färber
    uint64_t aSig64;
1520 158142c2 bellard
    int64 z;
1521 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1522 158142c2 bellard
1523 158142c2 bellard
    aSig = extractFloat32Frac( a );
1524 158142c2 bellard
    aExp = extractFloat32Exp( a );
1525 158142c2 bellard
    aSign = extractFloat32Sign( a );
1526 158142c2 bellard
    shiftCount = aExp - 0xBE;
1527 158142c2 bellard
    if ( 0 <= shiftCount ) {
1528 f090c9d4 pbrook
        if ( float32_val(a) != 0xDF000000 ) {
1529 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1530 158142c2 bellard
            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1531 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
1532 158142c2 bellard
            }
1533 158142c2 bellard
        }
1534 bb98fe42 Andreas Färber
        return (int64_t) LIT64( 0x8000000000000000 );
1535 158142c2 bellard
    }
1536 158142c2 bellard
    else if ( aExp <= 0x7E ) {
1537 158142c2 bellard
        if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
1538 158142c2 bellard
        return 0;
1539 158142c2 bellard
    }
1540 158142c2 bellard
    aSig64 = aSig | 0x00800000;
1541 158142c2 bellard
    aSig64 <<= 40;
1542 158142c2 bellard
    z = aSig64>>( - shiftCount );
1543 bb98fe42 Andreas Färber
    if ( (uint64_t) ( aSig64<<( shiftCount & 63 ) ) ) {
1544 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
1545 158142c2 bellard
    }
1546 158142c2 bellard
    if ( aSign ) z = - z;
1547 158142c2 bellard
    return z;
1548 158142c2 bellard
1549 158142c2 bellard
}
1550 158142c2 bellard
1551 158142c2 bellard
/*----------------------------------------------------------------------------
1552 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1553 158142c2 bellard
| `a' to the double-precision floating-point format.  The conversion is
1554 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1555 158142c2 bellard
| Arithmetic.
1556 158142c2 bellard
*----------------------------------------------------------------------------*/
1557 158142c2 bellard
1558 158142c2 bellard
float64 float32_to_float64( float32 a STATUS_PARAM )
1559 158142c2 bellard
{
1560 158142c2 bellard
    flag aSign;
1561 158142c2 bellard
    int16 aExp;
1562 bb98fe42 Andreas Färber
    uint32_t aSig;
1563 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1564 158142c2 bellard
1565 158142c2 bellard
    aSig = extractFloat32Frac( a );
1566 158142c2 bellard
    aExp = extractFloat32Exp( a );
1567 158142c2 bellard
    aSign = extractFloat32Sign( a );
1568 158142c2 bellard
    if ( aExp == 0xFF ) {
1569 bcd4d9af Christophe Lyon
        if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
1570 158142c2 bellard
        return packFloat64( aSign, 0x7FF, 0 );
1571 158142c2 bellard
    }
1572 158142c2 bellard
    if ( aExp == 0 ) {
1573 158142c2 bellard
        if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
1574 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1575 158142c2 bellard
        --aExp;
1576 158142c2 bellard
    }
1577 bb98fe42 Andreas Färber
    return packFloat64( aSign, aExp + 0x380, ( (uint64_t) aSig )<<29 );
1578 158142c2 bellard
1579 158142c2 bellard
}
1580 158142c2 bellard
1581 158142c2 bellard
#ifdef FLOATX80
1582 158142c2 bellard
1583 158142c2 bellard
/*----------------------------------------------------------------------------
1584 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1585 158142c2 bellard
| `a' to the extended double-precision floating-point format.  The conversion
1586 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1587 158142c2 bellard
| Arithmetic.
1588 158142c2 bellard
*----------------------------------------------------------------------------*/
1589 158142c2 bellard
1590 158142c2 bellard
floatx80 float32_to_floatx80( float32 a STATUS_PARAM )
1591 158142c2 bellard
{
1592 158142c2 bellard
    flag aSign;
1593 158142c2 bellard
    int16 aExp;
1594 bb98fe42 Andreas Färber
    uint32_t aSig;
1595 158142c2 bellard
1596 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1597 158142c2 bellard
    aSig = extractFloat32Frac( a );
1598 158142c2 bellard
    aExp = extractFloat32Exp( a );
1599 158142c2 bellard
    aSign = extractFloat32Sign( a );
1600 158142c2 bellard
    if ( aExp == 0xFF ) {
1601 bcd4d9af Christophe Lyon
        if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
1602 158142c2 bellard
        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
1603 158142c2 bellard
    }
1604 158142c2 bellard
    if ( aExp == 0 ) {
1605 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1606 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1607 158142c2 bellard
    }
1608 158142c2 bellard
    aSig |= 0x00800000;
1609 bb98fe42 Andreas Färber
    return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
1610 158142c2 bellard
1611 158142c2 bellard
}
1612 158142c2 bellard
1613 158142c2 bellard
#endif
1614 158142c2 bellard
1615 158142c2 bellard
#ifdef FLOAT128
1616 158142c2 bellard
1617 158142c2 bellard
/*----------------------------------------------------------------------------
1618 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1619 158142c2 bellard
| `a' to the double-precision floating-point format.  The conversion is
1620 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1621 158142c2 bellard
| Arithmetic.
1622 158142c2 bellard
*----------------------------------------------------------------------------*/
1623 158142c2 bellard
1624 158142c2 bellard
float128 float32_to_float128( float32 a STATUS_PARAM )
1625 158142c2 bellard
{
1626 158142c2 bellard
    flag aSign;
1627 158142c2 bellard
    int16 aExp;
1628 bb98fe42 Andreas Färber
    uint32_t aSig;
1629 158142c2 bellard
1630 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1631 158142c2 bellard
    aSig = extractFloat32Frac( a );
1632 158142c2 bellard
    aExp = extractFloat32Exp( a );
1633 158142c2 bellard
    aSign = extractFloat32Sign( a );
1634 158142c2 bellard
    if ( aExp == 0xFF ) {
1635 bcd4d9af Christophe Lyon
        if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
1636 158142c2 bellard
        return packFloat128( aSign, 0x7FFF, 0, 0 );
1637 158142c2 bellard
    }
1638 158142c2 bellard
    if ( aExp == 0 ) {
1639 158142c2 bellard
        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
1640 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1641 158142c2 bellard
        --aExp;
1642 158142c2 bellard
    }
1643 bb98fe42 Andreas Färber
    return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
1644 158142c2 bellard
1645 158142c2 bellard
}
1646 158142c2 bellard
1647 158142c2 bellard
#endif
1648 158142c2 bellard
1649 158142c2 bellard
/*----------------------------------------------------------------------------
1650 158142c2 bellard
| Rounds the single-precision floating-point value `a' to an integer, and
1651 158142c2 bellard
| returns the result as a single-precision floating-point value.  The
1652 158142c2 bellard
| operation is performed according to the IEC/IEEE Standard for Binary
1653 158142c2 bellard
| Floating-Point Arithmetic.
1654 158142c2 bellard
*----------------------------------------------------------------------------*/
1655 158142c2 bellard
1656 158142c2 bellard
float32 float32_round_to_int( float32 a STATUS_PARAM)
1657 158142c2 bellard
{
1658 158142c2 bellard
    flag aSign;
1659 158142c2 bellard
    int16 aExp;
1660 bb98fe42 Andreas Färber
    uint32_t lastBitMask, roundBitsMask;
1661 158142c2 bellard
    int8 roundingMode;
1662 bb98fe42 Andreas Färber
    uint32_t z;
1663 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1664 158142c2 bellard
1665 158142c2 bellard
    aExp = extractFloat32Exp( a );
1666 158142c2 bellard
    if ( 0x96 <= aExp ) {
1667 158142c2 bellard
        if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
1668 158142c2 bellard
            return propagateFloat32NaN( a, a STATUS_VAR );
1669 158142c2 bellard
        }
1670 158142c2 bellard
        return a;
1671 158142c2 bellard
    }
1672 158142c2 bellard
    if ( aExp <= 0x7E ) {
1673 bb98fe42 Andreas Färber
        if ( (uint32_t) ( float32_val(a)<<1 ) == 0 ) return a;
1674 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
1675 158142c2 bellard
        aSign = extractFloat32Sign( a );
1676 158142c2 bellard
        switch ( STATUS(float_rounding_mode) ) {
1677 158142c2 bellard
         case float_round_nearest_even:
1678 158142c2 bellard
            if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
1679 158142c2 bellard
                return packFloat32( aSign, 0x7F, 0 );
1680 158142c2 bellard
            }
1681 158142c2 bellard
            break;
1682 158142c2 bellard
         case float_round_down:
1683 f090c9d4 pbrook
            return make_float32(aSign ? 0xBF800000 : 0);
1684 158142c2 bellard
         case float_round_up:
1685 f090c9d4 pbrook
            return make_float32(aSign ? 0x80000000 : 0x3F800000);
1686 158142c2 bellard
        }
1687 158142c2 bellard
        return packFloat32( aSign, 0, 0 );
1688 158142c2 bellard
    }
1689 158142c2 bellard
    lastBitMask = 1;
1690 158142c2 bellard
    lastBitMask <<= 0x96 - aExp;
1691 158142c2 bellard
    roundBitsMask = lastBitMask - 1;
1692 f090c9d4 pbrook
    z = float32_val(a);
1693 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
1694 158142c2 bellard
    if ( roundingMode == float_round_nearest_even ) {
1695 158142c2 bellard
        z += lastBitMask>>1;
1696 158142c2 bellard
        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
1697 158142c2 bellard
    }
1698 158142c2 bellard
    else if ( roundingMode != float_round_to_zero ) {
1699 f090c9d4 pbrook
        if ( extractFloat32Sign( make_float32(z) ) ^ ( roundingMode == float_round_up ) ) {
1700 158142c2 bellard
            z += roundBitsMask;
1701 158142c2 bellard
        }
1702 158142c2 bellard
    }
1703 158142c2 bellard
    z &= ~ roundBitsMask;
1704 f090c9d4 pbrook
    if ( z != float32_val(a) ) STATUS(float_exception_flags) |= float_flag_inexact;
1705 f090c9d4 pbrook
    return make_float32(z);
1706 158142c2 bellard
1707 158142c2 bellard
}
1708 158142c2 bellard
1709 158142c2 bellard
/*----------------------------------------------------------------------------
1710 158142c2 bellard
| Returns the result of adding the absolute values of the single-precision
1711 158142c2 bellard
| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
1712 158142c2 bellard
| before being returned.  `zSign' is ignored if the result is a NaN.
1713 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
1714 158142c2 bellard
| Floating-Point Arithmetic.
1715 158142c2 bellard
*----------------------------------------------------------------------------*/
1716 158142c2 bellard
1717 158142c2 bellard
static float32 addFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
1718 158142c2 bellard
{
1719 158142c2 bellard
    int16 aExp, bExp, zExp;
1720 bb98fe42 Andreas Färber
    uint32_t aSig, bSig, zSig;
1721 158142c2 bellard
    int16 expDiff;
1722 158142c2 bellard
1723 158142c2 bellard
    aSig = extractFloat32Frac( a );
1724 158142c2 bellard
    aExp = extractFloat32Exp( a );
1725 158142c2 bellard
    bSig = extractFloat32Frac( b );
1726 158142c2 bellard
    bExp = extractFloat32Exp( b );
1727 158142c2 bellard
    expDiff = aExp - bExp;
1728 158142c2 bellard
    aSig <<= 6;
1729 158142c2 bellard
    bSig <<= 6;
1730 158142c2 bellard
    if ( 0 < expDiff ) {
1731 158142c2 bellard
        if ( aExp == 0xFF ) {
1732 158142c2 bellard
            if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1733 158142c2 bellard
            return a;
1734 158142c2 bellard
        }
1735 158142c2 bellard
        if ( bExp == 0 ) {
1736 158142c2 bellard
            --expDiff;
1737 158142c2 bellard
        }
1738 158142c2 bellard
        else {
1739 158142c2 bellard
            bSig |= 0x20000000;
1740 158142c2 bellard
        }
1741 158142c2 bellard
        shift32RightJamming( bSig, expDiff, &bSig );
1742 158142c2 bellard
        zExp = aExp;
1743 158142c2 bellard
    }
1744 158142c2 bellard
    else if ( expDiff < 0 ) {
1745 158142c2 bellard
        if ( bExp == 0xFF ) {
1746 158142c2 bellard
            if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1747 158142c2 bellard
            return packFloat32( zSign, 0xFF, 0 );
1748 158142c2 bellard
        }
1749 158142c2 bellard
        if ( aExp == 0 ) {
1750 158142c2 bellard
            ++expDiff;
1751 158142c2 bellard
        }
1752 158142c2 bellard
        else {
1753 158142c2 bellard
            aSig |= 0x20000000;
1754 158142c2 bellard
        }
1755 158142c2 bellard
        shift32RightJamming( aSig, - expDiff, &aSig );
1756 158142c2 bellard
        zExp = bExp;
1757 158142c2 bellard
    }
1758 158142c2 bellard
    else {
1759 158142c2 bellard
        if ( aExp == 0xFF ) {
1760 158142c2 bellard
            if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1761 158142c2 bellard
            return a;
1762 158142c2 bellard
        }
1763 fe76d976 pbrook
        if ( aExp == 0 ) {
1764 fe76d976 pbrook
            if ( STATUS(flush_to_zero) ) return packFloat32( zSign, 0, 0 );
1765 fe76d976 pbrook
            return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
1766 fe76d976 pbrook
        }
1767 158142c2 bellard
        zSig = 0x40000000 + aSig + bSig;
1768 158142c2 bellard
        zExp = aExp;
1769 158142c2 bellard
        goto roundAndPack;
1770 158142c2 bellard
    }
1771 158142c2 bellard
    aSig |= 0x20000000;
1772 158142c2 bellard
    zSig = ( aSig + bSig )<<1;
1773 158142c2 bellard
    --zExp;
1774 bb98fe42 Andreas Färber
    if ( (int32_t) zSig < 0 ) {
1775 158142c2 bellard
        zSig = aSig + bSig;
1776 158142c2 bellard
        ++zExp;
1777 158142c2 bellard
    }
1778 158142c2 bellard
 roundAndPack:
1779 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1780 158142c2 bellard
1781 158142c2 bellard
}
1782 158142c2 bellard
1783 158142c2 bellard
/*----------------------------------------------------------------------------
1784 158142c2 bellard
| Returns the result of subtracting the absolute values of the single-
1785 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the
1786 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
1787 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
1788 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
1789 158142c2 bellard
*----------------------------------------------------------------------------*/
1790 158142c2 bellard
1791 158142c2 bellard
static float32 subFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
1792 158142c2 bellard
{
1793 158142c2 bellard
    int16 aExp, bExp, zExp;
1794 bb98fe42 Andreas Färber
    uint32_t aSig, bSig, zSig;
1795 158142c2 bellard
    int16 expDiff;
1796 158142c2 bellard
1797 158142c2 bellard
    aSig = extractFloat32Frac( a );
1798 158142c2 bellard
    aExp = extractFloat32Exp( a );
1799 158142c2 bellard
    bSig = extractFloat32Frac( b );
1800 158142c2 bellard
    bExp = extractFloat32Exp( b );
1801 158142c2 bellard
    expDiff = aExp - bExp;
1802 158142c2 bellard
    aSig <<= 7;
1803 158142c2 bellard
    bSig <<= 7;
1804 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
1805 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
1806 158142c2 bellard
    if ( aExp == 0xFF ) {
1807 158142c2 bellard
        if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1808 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
1809 158142c2 bellard
        return float32_default_nan;
1810 158142c2 bellard
    }
1811 158142c2 bellard
    if ( aExp == 0 ) {
1812 158142c2 bellard
        aExp = 1;
1813 158142c2 bellard
        bExp = 1;
1814 158142c2 bellard
    }
1815 158142c2 bellard
    if ( bSig < aSig ) goto aBigger;
1816 158142c2 bellard
    if ( aSig < bSig ) goto bBigger;
1817 158142c2 bellard
    return packFloat32( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
1818 158142c2 bellard
 bExpBigger:
1819 158142c2 bellard
    if ( bExp == 0xFF ) {
1820 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1821 158142c2 bellard
        return packFloat32( zSign ^ 1, 0xFF, 0 );
1822 158142c2 bellard
    }
1823 158142c2 bellard
    if ( aExp == 0 ) {
1824 158142c2 bellard
        ++expDiff;
1825 158142c2 bellard
    }
1826 158142c2 bellard
    else {
1827 158142c2 bellard
        aSig |= 0x40000000;
1828 158142c2 bellard
    }
1829 158142c2 bellard
    shift32RightJamming( aSig, - expDiff, &aSig );
1830 158142c2 bellard
    bSig |= 0x40000000;
1831 158142c2 bellard
 bBigger:
1832 158142c2 bellard
    zSig = bSig - aSig;
1833 158142c2 bellard
    zExp = bExp;
1834 158142c2 bellard
    zSign ^= 1;
1835 158142c2 bellard
    goto normalizeRoundAndPack;
1836 158142c2 bellard
 aExpBigger:
1837 158142c2 bellard
    if ( aExp == 0xFF ) {
1838 158142c2 bellard
        if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1839 158142c2 bellard
        return a;
1840 158142c2 bellard
    }
1841 158142c2 bellard
    if ( bExp == 0 ) {
1842 158142c2 bellard
        --expDiff;
1843 158142c2 bellard
    }
1844 158142c2 bellard
    else {
1845 158142c2 bellard
        bSig |= 0x40000000;
1846 158142c2 bellard
    }
1847 158142c2 bellard
    shift32RightJamming( bSig, expDiff, &bSig );
1848 158142c2 bellard
    aSig |= 0x40000000;
1849 158142c2 bellard
 aBigger:
1850 158142c2 bellard
    zSig = aSig - bSig;
1851 158142c2 bellard
    zExp = aExp;
1852 158142c2 bellard
 normalizeRoundAndPack:
1853 158142c2 bellard
    --zExp;
1854 158142c2 bellard
    return normalizeRoundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1855 158142c2 bellard
1856 158142c2 bellard
}
1857 158142c2 bellard
1858 158142c2 bellard
/*----------------------------------------------------------------------------
1859 158142c2 bellard
| Returns the result of adding the single-precision floating-point values `a'
1860 158142c2 bellard
| and `b'.  The operation is performed according to the IEC/IEEE Standard for
1861 158142c2 bellard
| Binary Floating-Point Arithmetic.
1862 158142c2 bellard
*----------------------------------------------------------------------------*/
1863 158142c2 bellard
1864 158142c2 bellard
float32 float32_add( float32 a, float32 b STATUS_PARAM )
1865 158142c2 bellard
{
1866 158142c2 bellard
    flag aSign, bSign;
1867 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1868 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
1869 158142c2 bellard
1870 158142c2 bellard
    aSign = extractFloat32Sign( a );
1871 158142c2 bellard
    bSign = extractFloat32Sign( b );
1872 158142c2 bellard
    if ( aSign == bSign ) {
1873 158142c2 bellard
        return addFloat32Sigs( a, b, aSign STATUS_VAR);
1874 158142c2 bellard
    }
1875 158142c2 bellard
    else {
1876 158142c2 bellard
        return subFloat32Sigs( a, b, aSign STATUS_VAR );
1877 158142c2 bellard
    }
1878 158142c2 bellard
1879 158142c2 bellard
}
1880 158142c2 bellard
1881 158142c2 bellard
/*----------------------------------------------------------------------------
1882 158142c2 bellard
| Returns the result of subtracting the single-precision floating-point values
1883 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1884 158142c2 bellard
| for Binary Floating-Point Arithmetic.
1885 158142c2 bellard
*----------------------------------------------------------------------------*/
1886 158142c2 bellard
1887 158142c2 bellard
float32 float32_sub( float32 a, float32 b STATUS_PARAM )
1888 158142c2 bellard
{
1889 158142c2 bellard
    flag aSign, bSign;
1890 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1891 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
1892 158142c2 bellard
1893 158142c2 bellard
    aSign = extractFloat32Sign( a );
1894 158142c2 bellard
    bSign = extractFloat32Sign( b );
1895 158142c2 bellard
    if ( aSign == bSign ) {
1896 158142c2 bellard
        return subFloat32Sigs( a, b, aSign STATUS_VAR );
1897 158142c2 bellard
    }
1898 158142c2 bellard
    else {
1899 158142c2 bellard
        return addFloat32Sigs( a, b, aSign STATUS_VAR );
1900 158142c2 bellard
    }
1901 158142c2 bellard
1902 158142c2 bellard
}
1903 158142c2 bellard
1904 158142c2 bellard
/*----------------------------------------------------------------------------
1905 158142c2 bellard
| Returns the result of multiplying the single-precision floating-point values
1906 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1907 158142c2 bellard
| for Binary Floating-Point Arithmetic.
1908 158142c2 bellard
*----------------------------------------------------------------------------*/
1909 158142c2 bellard
1910 158142c2 bellard
float32 float32_mul( float32 a, float32 b STATUS_PARAM )
1911 158142c2 bellard
{
1912 158142c2 bellard
    flag aSign, bSign, zSign;
1913 158142c2 bellard
    int16 aExp, bExp, zExp;
1914 bb98fe42 Andreas Färber
    uint32_t aSig, bSig;
1915 bb98fe42 Andreas Färber
    uint64_t zSig64;
1916 bb98fe42 Andreas Färber
    uint32_t zSig;
1917 158142c2 bellard
1918 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1919 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
1920 37d18660 Peter Maydell
1921 158142c2 bellard
    aSig = extractFloat32Frac( a );
1922 158142c2 bellard
    aExp = extractFloat32Exp( a );
1923 158142c2 bellard
    aSign = extractFloat32Sign( a );
1924 158142c2 bellard
    bSig = extractFloat32Frac( b );
1925 158142c2 bellard
    bExp = extractFloat32Exp( b );
1926 158142c2 bellard
    bSign = extractFloat32Sign( b );
1927 158142c2 bellard
    zSign = aSign ^ bSign;
1928 158142c2 bellard
    if ( aExp == 0xFF ) {
1929 158142c2 bellard
        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1930 158142c2 bellard
            return propagateFloat32NaN( a, b STATUS_VAR );
1931 158142c2 bellard
        }
1932 158142c2 bellard
        if ( ( bExp | bSig ) == 0 ) {
1933 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1934 158142c2 bellard
            return float32_default_nan;
1935 158142c2 bellard
        }
1936 158142c2 bellard
        return packFloat32( zSign, 0xFF, 0 );
1937 158142c2 bellard
    }
1938 158142c2 bellard
    if ( bExp == 0xFF ) {
1939 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1940 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) {
1941 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1942 158142c2 bellard
            return float32_default_nan;
1943 158142c2 bellard
        }
1944 158142c2 bellard
        return packFloat32( zSign, 0xFF, 0 );
1945 158142c2 bellard
    }
1946 158142c2 bellard
    if ( aExp == 0 ) {
1947 158142c2 bellard
        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1948 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1949 158142c2 bellard
    }
1950 158142c2 bellard
    if ( bExp == 0 ) {
1951 158142c2 bellard
        if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1952 158142c2 bellard
        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1953 158142c2 bellard
    }
1954 158142c2 bellard
    zExp = aExp + bExp - 0x7F;
1955 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<7;
1956 158142c2 bellard
    bSig = ( bSig | 0x00800000 )<<8;
1957 bb98fe42 Andreas Färber
    shift64RightJamming( ( (uint64_t) aSig ) * bSig, 32, &zSig64 );
1958 158142c2 bellard
    zSig = zSig64;
1959 bb98fe42 Andreas Färber
    if ( 0 <= (int32_t) ( zSig<<1 ) ) {
1960 158142c2 bellard
        zSig <<= 1;
1961 158142c2 bellard
        --zExp;
1962 158142c2 bellard
    }
1963 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1964 158142c2 bellard
1965 158142c2 bellard
}
1966 158142c2 bellard
1967 158142c2 bellard
/*----------------------------------------------------------------------------
1968 158142c2 bellard
| Returns the result of dividing the single-precision floating-point value `a'
1969 158142c2 bellard
| by the corresponding value `b'.  The operation is performed according to the
1970 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1971 158142c2 bellard
*----------------------------------------------------------------------------*/
1972 158142c2 bellard
1973 158142c2 bellard
float32 float32_div( float32 a, float32 b STATUS_PARAM )
1974 158142c2 bellard
{
1975 158142c2 bellard
    flag aSign, bSign, zSign;
1976 158142c2 bellard
    int16 aExp, bExp, zExp;
1977 bb98fe42 Andreas Färber
    uint32_t aSig, bSig, zSig;
1978 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1979 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
1980 158142c2 bellard
1981 158142c2 bellard
    aSig = extractFloat32Frac( a );
1982 158142c2 bellard
    aExp = extractFloat32Exp( a );
1983 158142c2 bellard
    aSign = extractFloat32Sign( a );
1984 158142c2 bellard
    bSig = extractFloat32Frac( b );
1985 158142c2 bellard
    bExp = extractFloat32Exp( b );
1986 158142c2 bellard
    bSign = extractFloat32Sign( b );
1987 158142c2 bellard
    zSign = aSign ^ bSign;
1988 158142c2 bellard
    if ( aExp == 0xFF ) {
1989 158142c2 bellard
        if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1990 158142c2 bellard
        if ( bExp == 0xFF ) {
1991 158142c2 bellard
            if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1992 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1993 158142c2 bellard
            return float32_default_nan;
1994 158142c2 bellard
        }
1995 158142c2 bellard
        return packFloat32( zSign, 0xFF, 0 );
1996 158142c2 bellard
    }
1997 158142c2 bellard
    if ( bExp == 0xFF ) {
1998 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1999 158142c2 bellard
        return packFloat32( zSign, 0, 0 );
2000 158142c2 bellard
    }
2001 158142c2 bellard
    if ( bExp == 0 ) {
2002 158142c2 bellard
        if ( bSig == 0 ) {
2003 158142c2 bellard
            if ( ( aExp | aSig ) == 0 ) {
2004 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
2005 158142c2 bellard
                return float32_default_nan;
2006 158142c2 bellard
            }
2007 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
2008 158142c2 bellard
            return packFloat32( zSign, 0xFF, 0 );
2009 158142c2 bellard
        }
2010 158142c2 bellard
        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2011 158142c2 bellard
    }
2012 158142c2 bellard
    if ( aExp == 0 ) {
2013 158142c2 bellard
        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
2014 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2015 158142c2 bellard
    }
2016 158142c2 bellard
    zExp = aExp - bExp + 0x7D;
2017 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<7;
2018 158142c2 bellard
    bSig = ( bSig | 0x00800000 )<<8;
2019 158142c2 bellard
    if ( bSig <= ( aSig + aSig ) ) {
2020 158142c2 bellard
        aSig >>= 1;
2021 158142c2 bellard
        ++zExp;
2022 158142c2 bellard
    }
2023 bb98fe42 Andreas Färber
    zSig = ( ( (uint64_t) aSig )<<32 ) / bSig;
2024 158142c2 bellard
    if ( ( zSig & 0x3F ) == 0 ) {
2025 bb98fe42 Andreas Färber
        zSig |= ( (uint64_t) bSig * zSig != ( (uint64_t) aSig )<<32 );
2026 158142c2 bellard
    }
2027 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
2028 158142c2 bellard
2029 158142c2 bellard
}
2030 158142c2 bellard
2031 158142c2 bellard
/*----------------------------------------------------------------------------
2032 158142c2 bellard
| Returns the remainder of the single-precision floating-point value `a'
2033 158142c2 bellard
| with respect to the corresponding value `b'.  The operation is performed
2034 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2035 158142c2 bellard
*----------------------------------------------------------------------------*/
2036 158142c2 bellard
2037 158142c2 bellard
float32 float32_rem( float32 a, float32 b STATUS_PARAM )
2038 158142c2 bellard
{
2039 ed086f3d Blue Swirl
    flag aSign, zSign;
2040 158142c2 bellard
    int16 aExp, bExp, expDiff;
2041 bb98fe42 Andreas Färber
    uint32_t aSig, bSig;
2042 bb98fe42 Andreas Färber
    uint32_t q;
2043 bb98fe42 Andreas Färber
    uint64_t aSig64, bSig64, q64;
2044 bb98fe42 Andreas Färber
    uint32_t alternateASig;
2045 bb98fe42 Andreas Färber
    int32_t sigMean;
2046 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2047 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2048 158142c2 bellard
2049 158142c2 bellard
    aSig = extractFloat32Frac( a );
2050 158142c2 bellard
    aExp = extractFloat32Exp( a );
2051 158142c2 bellard
    aSign = extractFloat32Sign( a );
2052 158142c2 bellard
    bSig = extractFloat32Frac( b );
2053 158142c2 bellard
    bExp = extractFloat32Exp( b );
2054 158142c2 bellard
    if ( aExp == 0xFF ) {
2055 158142c2 bellard
        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
2056 158142c2 bellard
            return propagateFloat32NaN( a, b STATUS_VAR );
2057 158142c2 bellard
        }
2058 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2059 158142c2 bellard
        return float32_default_nan;
2060 158142c2 bellard
    }
2061 158142c2 bellard
    if ( bExp == 0xFF ) {
2062 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
2063 158142c2 bellard
        return a;
2064 158142c2 bellard
    }
2065 158142c2 bellard
    if ( bExp == 0 ) {
2066 158142c2 bellard
        if ( bSig == 0 ) {
2067 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2068 158142c2 bellard
            return float32_default_nan;
2069 158142c2 bellard
        }
2070 158142c2 bellard
        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2071 158142c2 bellard
    }
2072 158142c2 bellard
    if ( aExp == 0 ) {
2073 158142c2 bellard
        if ( aSig == 0 ) return a;
2074 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2075 158142c2 bellard
    }
2076 158142c2 bellard
    expDiff = aExp - bExp;
2077 158142c2 bellard
    aSig |= 0x00800000;
2078 158142c2 bellard
    bSig |= 0x00800000;
2079 158142c2 bellard
    if ( expDiff < 32 ) {
2080 158142c2 bellard
        aSig <<= 8;
2081 158142c2 bellard
        bSig <<= 8;
2082 158142c2 bellard
        if ( expDiff < 0 ) {
2083 158142c2 bellard
            if ( expDiff < -1 ) return a;
2084 158142c2 bellard
            aSig >>= 1;
2085 158142c2 bellard
        }
2086 158142c2 bellard
        q = ( bSig <= aSig );
2087 158142c2 bellard
        if ( q ) aSig -= bSig;
2088 158142c2 bellard
        if ( 0 < expDiff ) {
2089 bb98fe42 Andreas Färber
            q = ( ( (uint64_t) aSig )<<32 ) / bSig;
2090 158142c2 bellard
            q >>= 32 - expDiff;
2091 158142c2 bellard
            bSig >>= 2;
2092 158142c2 bellard
            aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
2093 158142c2 bellard
        }
2094 158142c2 bellard
        else {
2095 158142c2 bellard
            aSig >>= 2;
2096 158142c2 bellard
            bSig >>= 2;
2097 158142c2 bellard
        }
2098 158142c2 bellard
    }
2099 158142c2 bellard
    else {
2100 158142c2 bellard
        if ( bSig <= aSig ) aSig -= bSig;
2101 bb98fe42 Andreas Färber
        aSig64 = ( (uint64_t) aSig )<<40;
2102 bb98fe42 Andreas Färber
        bSig64 = ( (uint64_t) bSig )<<40;
2103 158142c2 bellard
        expDiff -= 64;
2104 158142c2 bellard
        while ( 0 < expDiff ) {
2105 158142c2 bellard
            q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2106 158142c2 bellard
            q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2107 158142c2 bellard
            aSig64 = - ( ( bSig * q64 )<<38 );
2108 158142c2 bellard
            expDiff -= 62;
2109 158142c2 bellard
        }
2110 158142c2 bellard
        expDiff += 64;
2111 158142c2 bellard
        q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2112 158142c2 bellard
        q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2113 158142c2 bellard
        q = q64>>( 64 - expDiff );
2114 158142c2 bellard
        bSig <<= 6;
2115 158142c2 bellard
        aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
2116 158142c2 bellard
    }
2117 158142c2 bellard
    do {
2118 158142c2 bellard
        alternateASig = aSig;
2119 158142c2 bellard
        ++q;
2120 158142c2 bellard
        aSig -= bSig;
2121 bb98fe42 Andreas Färber
    } while ( 0 <= (int32_t) aSig );
2122 158142c2 bellard
    sigMean = aSig + alternateASig;
2123 158142c2 bellard
    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
2124 158142c2 bellard
        aSig = alternateASig;
2125 158142c2 bellard
    }
2126 bb98fe42 Andreas Färber
    zSign = ( (int32_t) aSig < 0 );
2127 158142c2 bellard
    if ( zSign ) aSig = - aSig;
2128 158142c2 bellard
    return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig STATUS_VAR );
2129 158142c2 bellard
2130 158142c2 bellard
}
2131 158142c2 bellard
2132 158142c2 bellard
/*----------------------------------------------------------------------------
2133 158142c2 bellard
| Returns the square root of the single-precision floating-point value `a'.
2134 158142c2 bellard
| The operation is performed according to the IEC/IEEE Standard for Binary
2135 158142c2 bellard
| Floating-Point Arithmetic.
2136 158142c2 bellard
*----------------------------------------------------------------------------*/
2137 158142c2 bellard
2138 158142c2 bellard
float32 float32_sqrt( float32 a STATUS_PARAM )
2139 158142c2 bellard
{
2140 158142c2 bellard
    flag aSign;
2141 158142c2 bellard
    int16 aExp, zExp;
2142 bb98fe42 Andreas Färber
    uint32_t aSig, zSig;
2143 bb98fe42 Andreas Färber
    uint64_t rem, term;
2144 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2145 158142c2 bellard
2146 158142c2 bellard
    aSig = extractFloat32Frac( a );
2147 158142c2 bellard
    aExp = extractFloat32Exp( a );
2148 158142c2 bellard
    aSign = extractFloat32Sign( a );
2149 158142c2 bellard
    if ( aExp == 0xFF ) {
2150 f090c9d4 pbrook
        if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
2151 158142c2 bellard
        if ( ! aSign ) return a;
2152 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2153 158142c2 bellard
        return float32_default_nan;
2154 158142c2 bellard
    }
2155 158142c2 bellard
    if ( aSign ) {
2156 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) return a;
2157 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2158 158142c2 bellard
        return float32_default_nan;
2159 158142c2 bellard
    }
2160 158142c2 bellard
    if ( aExp == 0 ) {
2161 f090c9d4 pbrook
        if ( aSig == 0 ) return float32_zero;
2162 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2163 158142c2 bellard
    }
2164 158142c2 bellard
    zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
2165 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<8;
2166 158142c2 bellard
    zSig = estimateSqrt32( aExp, aSig ) + 2;
2167 158142c2 bellard
    if ( ( zSig & 0x7F ) <= 5 ) {
2168 158142c2 bellard
        if ( zSig < 2 ) {
2169 158142c2 bellard
            zSig = 0x7FFFFFFF;
2170 158142c2 bellard
            goto roundAndPack;
2171 158142c2 bellard
        }
2172 158142c2 bellard
        aSig >>= aExp & 1;
2173 bb98fe42 Andreas Färber
        term = ( (uint64_t) zSig ) * zSig;
2174 bb98fe42 Andreas Färber
        rem = ( ( (uint64_t) aSig )<<32 ) - term;
2175 bb98fe42 Andreas Färber
        while ( (int64_t) rem < 0 ) {
2176 158142c2 bellard
            --zSig;
2177 bb98fe42 Andreas Färber
            rem += ( ( (uint64_t) zSig )<<1 ) | 1;
2178 158142c2 bellard
        }
2179 158142c2 bellard
        zSig |= ( rem != 0 );
2180 158142c2 bellard
    }
2181 158142c2 bellard
    shift32RightJamming( zSig, 1, &zSig );
2182 158142c2 bellard
 roundAndPack:
2183 158142c2 bellard
    return roundAndPackFloat32( 0, zExp, zSig STATUS_VAR );
2184 158142c2 bellard
2185 158142c2 bellard
}
2186 158142c2 bellard
2187 158142c2 bellard
/*----------------------------------------------------------------------------
2188 8229c991 Aurelien Jarno
| Returns the binary exponential of the single-precision floating-point value
2189 8229c991 Aurelien Jarno
| `a'. The operation is performed according to the IEC/IEEE Standard for
2190 8229c991 Aurelien Jarno
| Binary Floating-Point Arithmetic.
2191 8229c991 Aurelien Jarno
|
2192 8229c991 Aurelien Jarno
| Uses the following identities:
2193 8229c991 Aurelien Jarno
|
2194 8229c991 Aurelien Jarno
| 1. -------------------------------------------------------------------------
2195 8229c991 Aurelien Jarno
|      x    x*ln(2)
2196 8229c991 Aurelien Jarno
|     2  = e
2197 8229c991 Aurelien Jarno
|
2198 8229c991 Aurelien Jarno
| 2. -------------------------------------------------------------------------
2199 8229c991 Aurelien Jarno
|                      2     3     4     5           n
2200 8229c991 Aurelien Jarno
|      x        x     x     x     x     x           x
2201 8229c991 Aurelien Jarno
|     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
2202 8229c991 Aurelien Jarno
|               1!    2!    3!    4!    5!          n!
2203 8229c991 Aurelien Jarno
*----------------------------------------------------------------------------*/
2204 8229c991 Aurelien Jarno
2205 8229c991 Aurelien Jarno
static const float64 float32_exp2_coefficients[15] =
2206 8229c991 Aurelien Jarno
{
2207 d5138cf4 Peter Maydell
    const_float64( 0x3ff0000000000000ll ), /*  1 */
2208 d5138cf4 Peter Maydell
    const_float64( 0x3fe0000000000000ll ), /*  2 */
2209 d5138cf4 Peter Maydell
    const_float64( 0x3fc5555555555555ll ), /*  3 */
2210 d5138cf4 Peter Maydell
    const_float64( 0x3fa5555555555555ll ), /*  4 */
2211 d5138cf4 Peter Maydell
    const_float64( 0x3f81111111111111ll ), /*  5 */
2212 d5138cf4 Peter Maydell
    const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
2213 d5138cf4 Peter Maydell
    const_float64( 0x3f2a01a01a01a01all ), /*  7 */
2214 d5138cf4 Peter Maydell
    const_float64( 0x3efa01a01a01a01all ), /*  8 */
2215 d5138cf4 Peter Maydell
    const_float64( 0x3ec71de3a556c734ll ), /*  9 */
2216 d5138cf4 Peter Maydell
    const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
2217 d5138cf4 Peter Maydell
    const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
2218 d5138cf4 Peter Maydell
    const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
2219 d5138cf4 Peter Maydell
    const_float64( 0x3de6124613a86d09ll ), /* 13 */
2220 d5138cf4 Peter Maydell
    const_float64( 0x3da93974a8c07c9dll ), /* 14 */
2221 d5138cf4 Peter Maydell
    const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
2222 8229c991 Aurelien Jarno
};
2223 8229c991 Aurelien Jarno
2224 8229c991 Aurelien Jarno
float32 float32_exp2( float32 a STATUS_PARAM )
2225 8229c991 Aurelien Jarno
{
2226 8229c991 Aurelien Jarno
    flag aSign;
2227 8229c991 Aurelien Jarno
    int16 aExp;
2228 bb98fe42 Andreas Färber
    uint32_t aSig;
2229 8229c991 Aurelien Jarno
    float64 r, x, xn;
2230 8229c991 Aurelien Jarno
    int i;
2231 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2232 8229c991 Aurelien Jarno
2233 8229c991 Aurelien Jarno
    aSig = extractFloat32Frac( a );
2234 8229c991 Aurelien Jarno
    aExp = extractFloat32Exp( a );
2235 8229c991 Aurelien Jarno
    aSign = extractFloat32Sign( a );
2236 8229c991 Aurelien Jarno
2237 8229c991 Aurelien Jarno
    if ( aExp == 0xFF) {
2238 8229c991 Aurelien Jarno
        if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
2239 8229c991 Aurelien Jarno
        return (aSign) ? float32_zero : a;
2240 8229c991 Aurelien Jarno
    }
2241 8229c991 Aurelien Jarno
    if (aExp == 0) {
2242 8229c991 Aurelien Jarno
        if (aSig == 0) return float32_one;
2243 8229c991 Aurelien Jarno
    }
2244 8229c991 Aurelien Jarno
2245 8229c991 Aurelien Jarno
    float_raise( float_flag_inexact STATUS_VAR);
2246 8229c991 Aurelien Jarno
2247 8229c991 Aurelien Jarno
    /* ******************************* */
2248 8229c991 Aurelien Jarno
    /* using float64 for approximation */
2249 8229c991 Aurelien Jarno
    /* ******************************* */
2250 8229c991 Aurelien Jarno
    x = float32_to_float64(a STATUS_VAR);
2251 8229c991 Aurelien Jarno
    x = float64_mul(x, float64_ln2 STATUS_VAR);
2252 8229c991 Aurelien Jarno
2253 8229c991 Aurelien Jarno
    xn = x;
2254 8229c991 Aurelien Jarno
    r = float64_one;
2255 8229c991 Aurelien Jarno
    for (i = 0 ; i < 15 ; i++) {
2256 8229c991 Aurelien Jarno
        float64 f;
2257 8229c991 Aurelien Jarno
2258 8229c991 Aurelien Jarno
        f = float64_mul(xn, float32_exp2_coefficients[i] STATUS_VAR);
2259 8229c991 Aurelien Jarno
        r = float64_add(r, f STATUS_VAR);
2260 8229c991 Aurelien Jarno
2261 8229c991 Aurelien Jarno
        xn = float64_mul(xn, x STATUS_VAR);
2262 8229c991 Aurelien Jarno
    }
2263 8229c991 Aurelien Jarno
2264 8229c991 Aurelien Jarno
    return float64_to_float32(r, status);
2265 8229c991 Aurelien Jarno
}
2266 8229c991 Aurelien Jarno
2267 8229c991 Aurelien Jarno
/*----------------------------------------------------------------------------
2268 374dfc33 aurel32
| Returns the binary log of the single-precision floating-point value `a'.
2269 374dfc33 aurel32
| The operation is performed according to the IEC/IEEE Standard for Binary
2270 374dfc33 aurel32
| Floating-Point Arithmetic.
2271 374dfc33 aurel32
*----------------------------------------------------------------------------*/
2272 374dfc33 aurel32
float32 float32_log2( float32 a STATUS_PARAM )
2273 374dfc33 aurel32
{
2274 374dfc33 aurel32
    flag aSign, zSign;
2275 374dfc33 aurel32
    int16 aExp;
2276 bb98fe42 Andreas Färber
    uint32_t aSig, zSig, i;
2277 374dfc33 aurel32
2278 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2279 374dfc33 aurel32
    aSig = extractFloat32Frac( a );
2280 374dfc33 aurel32
    aExp = extractFloat32Exp( a );
2281 374dfc33 aurel32
    aSign = extractFloat32Sign( a );
2282 374dfc33 aurel32
2283 374dfc33 aurel32
    if ( aExp == 0 ) {
2284 374dfc33 aurel32
        if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
2285 374dfc33 aurel32
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2286 374dfc33 aurel32
    }
2287 374dfc33 aurel32
    if ( aSign ) {
2288 374dfc33 aurel32
        float_raise( float_flag_invalid STATUS_VAR);
2289 374dfc33 aurel32
        return float32_default_nan;
2290 374dfc33 aurel32
    }
2291 374dfc33 aurel32
    if ( aExp == 0xFF ) {
2292 374dfc33 aurel32
        if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
2293 374dfc33 aurel32
        return a;
2294 374dfc33 aurel32
    }
2295 374dfc33 aurel32
2296 374dfc33 aurel32
    aExp -= 0x7F;
2297 374dfc33 aurel32
    aSig |= 0x00800000;
2298 374dfc33 aurel32
    zSign = aExp < 0;
2299 374dfc33 aurel32
    zSig = aExp << 23;
2300 374dfc33 aurel32
2301 374dfc33 aurel32
    for (i = 1 << 22; i > 0; i >>= 1) {
2302 bb98fe42 Andreas Färber
        aSig = ( (uint64_t)aSig * aSig ) >> 23;
2303 374dfc33 aurel32
        if ( aSig & 0x01000000 ) {
2304 374dfc33 aurel32
            aSig >>= 1;
2305 374dfc33 aurel32
            zSig |= i;
2306 374dfc33 aurel32
        }
2307 374dfc33 aurel32
    }
2308 374dfc33 aurel32
2309 374dfc33 aurel32
    if ( zSign )
2310 374dfc33 aurel32
        zSig = -zSig;
2311 374dfc33 aurel32
2312 374dfc33 aurel32
    return normalizeRoundAndPackFloat32( zSign, 0x85, zSig STATUS_VAR );
2313 374dfc33 aurel32
}
2314 374dfc33 aurel32
2315 374dfc33 aurel32
/*----------------------------------------------------------------------------
2316 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is equal to
2317 b689362d Aurelien Jarno
| the corresponding value `b', and 0 otherwise.  The invalid exception is
2318 b689362d Aurelien Jarno
| raised if either operand is a NaN.  Otherwise, the comparison is performed
2319 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2320 158142c2 bellard
*----------------------------------------------------------------------------*/
2321 158142c2 bellard
2322 b689362d Aurelien Jarno
int float32_eq( float32 a, float32 b STATUS_PARAM )
2323 158142c2 bellard
{
2324 b689362d Aurelien Jarno
    uint32_t av, bv;
2325 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2326 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2327 158142c2 bellard
2328 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2329 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2330 158142c2 bellard
       ) {
2331 b689362d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
2332 158142c2 bellard
        return 0;
2333 158142c2 bellard
    }
2334 b689362d Aurelien Jarno
    av = float32_val(a);
2335 b689362d Aurelien Jarno
    bv = float32_val(b);
2336 b689362d Aurelien Jarno
    return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
2337 158142c2 bellard
}
2338 158142c2 bellard
2339 158142c2 bellard
/*----------------------------------------------------------------------------
2340 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than
2341 f5a64251 Aurelien Jarno
| or equal to the corresponding value `b', and 0 otherwise.  The invalid
2342 f5a64251 Aurelien Jarno
| exception is raised if either operand is a NaN.  The comparison is performed
2343 f5a64251 Aurelien Jarno
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2344 158142c2 bellard
*----------------------------------------------------------------------------*/
2345 158142c2 bellard
2346 750afe93 bellard
int float32_le( float32 a, float32 b STATUS_PARAM )
2347 158142c2 bellard
{
2348 158142c2 bellard
    flag aSign, bSign;
2349 bb98fe42 Andreas Färber
    uint32_t av, bv;
2350 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2351 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2352 158142c2 bellard
2353 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2354 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2355 158142c2 bellard
       ) {
2356 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2357 158142c2 bellard
        return 0;
2358 158142c2 bellard
    }
2359 158142c2 bellard
    aSign = extractFloat32Sign( a );
2360 158142c2 bellard
    bSign = extractFloat32Sign( b );
2361 f090c9d4 pbrook
    av = float32_val(a);
2362 f090c9d4 pbrook
    bv = float32_val(b);
2363 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
2364 f090c9d4 pbrook
    return ( av == bv ) || ( aSign ^ ( av < bv ) );
2365 158142c2 bellard
2366 158142c2 bellard
}
2367 158142c2 bellard
2368 158142c2 bellard
/*----------------------------------------------------------------------------
2369 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than
2370 f5a64251 Aurelien Jarno
| the corresponding value `b', and 0 otherwise.  The invalid exception is
2371 f5a64251 Aurelien Jarno
| raised if either operand is a NaN.  The comparison is performed according
2372 f5a64251 Aurelien Jarno
| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2373 158142c2 bellard
*----------------------------------------------------------------------------*/
2374 158142c2 bellard
2375 750afe93 bellard
int float32_lt( float32 a, float32 b STATUS_PARAM )
2376 158142c2 bellard
{
2377 158142c2 bellard
    flag aSign, bSign;
2378 bb98fe42 Andreas Färber
    uint32_t av, bv;
2379 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2380 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2381 158142c2 bellard
2382 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2383 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2384 158142c2 bellard
       ) {
2385 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2386 158142c2 bellard
        return 0;
2387 158142c2 bellard
    }
2388 158142c2 bellard
    aSign = extractFloat32Sign( a );
2389 158142c2 bellard
    bSign = extractFloat32Sign( b );
2390 f090c9d4 pbrook
    av = float32_val(a);
2391 f090c9d4 pbrook
    bv = float32_val(b);
2392 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
2393 f090c9d4 pbrook
    return ( av != bv ) && ( aSign ^ ( av < bv ) );
2394 158142c2 bellard
2395 158142c2 bellard
}
2396 158142c2 bellard
2397 158142c2 bellard
/*----------------------------------------------------------------------------
2398 67b7861d Aurelien Jarno
| Returns 1 if the single-precision floating-point values `a' and `b' cannot
2399 f5a64251 Aurelien Jarno
| be compared, and 0 otherwise.  The invalid exception is raised if either
2400 f5a64251 Aurelien Jarno
| operand is a NaN.  The comparison is performed according to the IEC/IEEE
2401 f5a64251 Aurelien Jarno
| Standard for Binary Floating-Point Arithmetic.
2402 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
2403 67b7861d Aurelien Jarno
2404 67b7861d Aurelien Jarno
int float32_unordered( float32 a, float32 b STATUS_PARAM )
2405 67b7861d Aurelien Jarno
{
2406 67b7861d Aurelien Jarno
    a = float32_squash_input_denormal(a STATUS_VAR);
2407 67b7861d Aurelien Jarno
    b = float32_squash_input_denormal(b STATUS_VAR);
2408 67b7861d Aurelien Jarno
2409 67b7861d Aurelien Jarno
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2410 67b7861d Aurelien Jarno
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2411 67b7861d Aurelien Jarno
       ) {
2412 67b7861d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
2413 67b7861d Aurelien Jarno
        return 1;
2414 67b7861d Aurelien Jarno
    }
2415 67b7861d Aurelien Jarno
    return 0;
2416 67b7861d Aurelien Jarno
}
2417 b689362d Aurelien Jarno
2418 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
2419 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is equal to
2420 f5a64251 Aurelien Jarno
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
2421 f5a64251 Aurelien Jarno
| exception.  The comparison is performed according to the IEC/IEEE Standard
2422 f5a64251 Aurelien Jarno
| for Binary Floating-Point Arithmetic.
2423 158142c2 bellard
*----------------------------------------------------------------------------*/
2424 158142c2 bellard
2425 b689362d Aurelien Jarno
int float32_eq_quiet( float32 a, float32 b STATUS_PARAM )
2426 158142c2 bellard
{
2427 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2428 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2429 158142c2 bellard
2430 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2431 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2432 158142c2 bellard
       ) {
2433 b689362d Aurelien Jarno
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2434 b689362d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
2435 b689362d Aurelien Jarno
        }
2436 158142c2 bellard
        return 0;
2437 158142c2 bellard
    }
2438 b689362d Aurelien Jarno
    return ( float32_val(a) == float32_val(b) ) ||
2439 b689362d Aurelien Jarno
            ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
2440 158142c2 bellard
}
2441 158142c2 bellard
2442 158142c2 bellard
/*----------------------------------------------------------------------------
2443 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than or
2444 158142c2 bellard
| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
2445 158142c2 bellard
| cause an exception.  Otherwise, the comparison is performed according to the
2446 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2447 158142c2 bellard
*----------------------------------------------------------------------------*/
2448 158142c2 bellard
2449 750afe93 bellard
int float32_le_quiet( float32 a, float32 b STATUS_PARAM )
2450 158142c2 bellard
{
2451 158142c2 bellard
    flag aSign, bSign;
2452 bb98fe42 Andreas Färber
    uint32_t av, bv;
2453 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2454 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2455 158142c2 bellard
2456 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2457 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2458 158142c2 bellard
       ) {
2459 158142c2 bellard
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2460 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2461 158142c2 bellard
        }
2462 158142c2 bellard
        return 0;
2463 158142c2 bellard
    }
2464 158142c2 bellard
    aSign = extractFloat32Sign( a );
2465 158142c2 bellard
    bSign = extractFloat32Sign( b );
2466 f090c9d4 pbrook
    av = float32_val(a);
2467 f090c9d4 pbrook
    bv = float32_val(b);
2468 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
2469 f090c9d4 pbrook
    return ( av == bv ) || ( aSign ^ ( av < bv ) );
2470 158142c2 bellard
2471 158142c2 bellard
}
2472 158142c2 bellard
2473 158142c2 bellard
/*----------------------------------------------------------------------------
2474 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than
2475 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
2476 158142c2 bellard
| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
2477 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
2478 158142c2 bellard
*----------------------------------------------------------------------------*/
2479 158142c2 bellard
2480 750afe93 bellard
int float32_lt_quiet( float32 a, float32 b STATUS_PARAM )
2481 158142c2 bellard
{
2482 158142c2 bellard
    flag aSign, bSign;
2483 bb98fe42 Andreas Färber
    uint32_t av, bv;
2484 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2485 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2486 158142c2 bellard
2487 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2488 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2489 158142c2 bellard
       ) {
2490 158142c2 bellard
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2491 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2492 158142c2 bellard
        }
2493 158142c2 bellard
        return 0;
2494 158142c2 bellard
    }
2495 158142c2 bellard
    aSign = extractFloat32Sign( a );
2496 158142c2 bellard
    bSign = extractFloat32Sign( b );
2497 f090c9d4 pbrook
    av = float32_val(a);
2498 f090c9d4 pbrook
    bv = float32_val(b);
2499 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
2500 f090c9d4 pbrook
    return ( av != bv ) && ( aSign ^ ( av < bv ) );
2501 158142c2 bellard
2502 158142c2 bellard
}
2503 158142c2 bellard
2504 158142c2 bellard
/*----------------------------------------------------------------------------
2505 67b7861d Aurelien Jarno
| Returns 1 if the single-precision floating-point values `a' and `b' cannot
2506 67b7861d Aurelien Jarno
| be compared, and 0 otherwise.  Quiet NaNs do not cause an exception.  The
2507 67b7861d Aurelien Jarno
| comparison is performed according to the IEC/IEEE Standard for Binary
2508 67b7861d Aurelien Jarno
| Floating-Point Arithmetic.
2509 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
2510 67b7861d Aurelien Jarno
2511 67b7861d Aurelien Jarno
int float32_unordered_quiet( float32 a, float32 b STATUS_PARAM )
2512 67b7861d Aurelien Jarno
{
2513 67b7861d Aurelien Jarno
    a = float32_squash_input_denormal(a STATUS_VAR);
2514 67b7861d Aurelien Jarno
    b = float32_squash_input_denormal(b STATUS_VAR);
2515 67b7861d Aurelien Jarno
2516 67b7861d Aurelien Jarno
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2517 67b7861d Aurelien Jarno
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2518 67b7861d Aurelien Jarno
       ) {
2519 67b7861d Aurelien Jarno
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2520 67b7861d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
2521 67b7861d Aurelien Jarno
        }
2522 67b7861d Aurelien Jarno
        return 1;
2523 67b7861d Aurelien Jarno
    }
2524 67b7861d Aurelien Jarno
    return 0;
2525 67b7861d Aurelien Jarno
}
2526 67b7861d Aurelien Jarno
2527 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
2528 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2529 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
2530 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2531 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
2532 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
2533 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
2534 158142c2 bellard
| largest integer with the same sign as `a' is returned.
2535 158142c2 bellard
*----------------------------------------------------------------------------*/
2536 158142c2 bellard
2537 158142c2 bellard
int32 float64_to_int32( float64 a STATUS_PARAM )
2538 158142c2 bellard
{
2539 158142c2 bellard
    flag aSign;
2540 158142c2 bellard
    int16 aExp, shiftCount;
2541 bb98fe42 Andreas Färber
    uint64_t aSig;
2542 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
2543 158142c2 bellard
2544 158142c2 bellard
    aSig = extractFloat64Frac( a );
2545 158142c2 bellard
    aExp = extractFloat64Exp( a );
2546 158142c2 bellard
    aSign = extractFloat64Sign( a );
2547 158142c2 bellard
    if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2548 158142c2 bellard
    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2549 158142c2 bellard
    shiftCount = 0x42C - aExp;
2550 158142c2 bellard
    if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
2551 158142c2 bellard
    return roundAndPackInt32( aSign, aSig STATUS_VAR );
2552 158142c2 bellard
2553 158142c2 bellard
}
2554 158142c2 bellard
2555 158142c2 bellard
/*----------------------------------------------------------------------------
2556 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2557 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
2558 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2559 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
2560 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
2561 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
2562 158142c2 bellard
| returned.
2563 158142c2 bellard
*----------------------------------------------------------------------------*/
2564 158142c2 bellard
2565 158142c2 bellard
int32 float64_to_int32_round_to_zero( float64 a STATUS_PARAM )
2566 158142c2 bellard
{
2567 158142c2 bellard
    flag aSign;
2568 158142c2 bellard
    int16 aExp, shiftCount;
2569 bb98fe42 Andreas Färber
    uint64_t aSig, savedASig;
2570 158142c2 bellard
    int32 z;
2571 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
2572 158142c2 bellard
2573 158142c2 bellard
    aSig = extractFloat64Frac( a );
2574 158142c2 bellard
    aExp = extractFloat64Exp( a );
2575 158142c2 bellard
    aSign = extractFloat64Sign( a );
2576 158142c2 bellard
    if ( 0x41E < aExp ) {
2577 158142c2 bellard
        if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2578 158142c2 bellard
        goto invalid;
2579 158142c2 bellard
    }
2580 158142c2 bellard
    else if ( aExp < 0x3FF ) {
2581 158142c2 bellard
        if ( aExp || aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
2582 158142c2 bellard
        return 0;
2583 158142c2 bellard
    }
2584 158142c2 bellard
    aSig |= LIT64( 0x0010000000000000 );
2585 158142c2 bellard
    shiftCount = 0x433 - aExp;
2586 158142c2 bellard
    savedASig = aSig;
2587 158142c2 bellard
    aSig >>= shiftCount;
2588 158142c2 bellard
    z = aSig;
2589 158142c2 bellard
    if ( aSign ) z = - z;
2590 158142c2 bellard
    if ( ( z < 0 ) ^ aSign ) {
2591 158142c2 bellard
 invalid:
2592 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2593 bb98fe42 Andreas Färber
        return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
2594 158142c2 bellard
    }
2595 158142c2 bellard
    if ( ( aSig<<shiftCount ) != savedASig ) {
2596 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
2597 158142c2 bellard
    }
2598 158142c2 bellard
    return z;
2599 158142c2 bellard
2600 158142c2 bellard
}
2601 158142c2 bellard
2602 158142c2 bellard
/*----------------------------------------------------------------------------
2603 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2604 cbcef455 Peter Maydell
| `a' to the 16-bit two's complement integer format.  The conversion is
2605 cbcef455 Peter Maydell
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2606 cbcef455 Peter Maydell
| Arithmetic, except that the conversion is always rounded toward zero.
2607 cbcef455 Peter Maydell
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
2608 cbcef455 Peter Maydell
| the conversion overflows, the largest integer with the same sign as `a' is
2609 cbcef455 Peter Maydell
| returned.
2610 cbcef455 Peter Maydell
*----------------------------------------------------------------------------*/
2611 cbcef455 Peter Maydell
2612 cbcef455 Peter Maydell
int16 float64_to_int16_round_to_zero( float64 a STATUS_PARAM )
2613 cbcef455 Peter Maydell
{
2614 cbcef455 Peter Maydell
    flag aSign;
2615 cbcef455 Peter Maydell
    int16 aExp, shiftCount;
2616 bb98fe42 Andreas Färber
    uint64_t aSig, savedASig;
2617 cbcef455 Peter Maydell
    int32 z;
2618 cbcef455 Peter Maydell
2619 cbcef455 Peter Maydell
    aSig = extractFloat64Frac( a );
2620 cbcef455 Peter Maydell
    aExp = extractFloat64Exp( a );
2621 cbcef455 Peter Maydell
    aSign = extractFloat64Sign( a );
2622 cbcef455 Peter Maydell
    if ( 0x40E < aExp ) {
2623 cbcef455 Peter Maydell
        if ( ( aExp == 0x7FF ) && aSig ) {
2624 cbcef455 Peter Maydell
            aSign = 0;
2625 cbcef455 Peter Maydell
        }
2626 cbcef455 Peter Maydell
        goto invalid;
2627 cbcef455 Peter Maydell
    }
2628 cbcef455 Peter Maydell
    else if ( aExp < 0x3FF ) {
2629 cbcef455 Peter Maydell
        if ( aExp || aSig ) {
2630 cbcef455 Peter Maydell
            STATUS(float_exception_flags) |= float_flag_inexact;
2631 cbcef455 Peter Maydell
        }
2632 cbcef455 Peter Maydell
        return 0;
2633 cbcef455 Peter Maydell
    }
2634 cbcef455 Peter Maydell
    aSig |= LIT64( 0x0010000000000000 );
2635 cbcef455 Peter Maydell
    shiftCount = 0x433 - aExp;
2636 cbcef455 Peter Maydell
    savedASig = aSig;
2637 cbcef455 Peter Maydell
    aSig >>= shiftCount;
2638 cbcef455 Peter Maydell
    z = aSig;
2639 cbcef455 Peter Maydell
    if ( aSign ) {
2640 cbcef455 Peter Maydell
        z = - z;
2641 cbcef455 Peter Maydell
    }
2642 cbcef455 Peter Maydell
    if ( ( (int16_t)z < 0 ) ^ aSign ) {
2643 cbcef455 Peter Maydell
 invalid:
2644 cbcef455 Peter Maydell
        float_raise( float_flag_invalid STATUS_VAR);
2645 bb98fe42 Andreas Färber
        return aSign ? (int32_t) 0xffff8000 : 0x7FFF;
2646 cbcef455 Peter Maydell
    }
2647 cbcef455 Peter Maydell
    if ( ( aSig<<shiftCount ) != savedASig ) {
2648 cbcef455 Peter Maydell
        STATUS(float_exception_flags) |= float_flag_inexact;
2649 cbcef455 Peter Maydell
    }
2650 cbcef455 Peter Maydell
    return z;
2651 cbcef455 Peter Maydell
}
2652 cbcef455 Peter Maydell
2653 cbcef455 Peter Maydell
/*----------------------------------------------------------------------------
2654 cbcef455 Peter Maydell
| Returns the result of converting the double-precision floating-point value
2655 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
2656 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2657 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
2658 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
2659 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
2660 158142c2 bellard
| largest integer with the same sign as `a' is returned.
2661 158142c2 bellard
*----------------------------------------------------------------------------*/
2662 158142c2 bellard
2663 158142c2 bellard
int64 float64_to_int64( float64 a STATUS_PARAM )
2664 158142c2 bellard
{
2665 158142c2 bellard
    flag aSign;
2666 158142c2 bellard
    int16 aExp, shiftCount;
2667 bb98fe42 Andreas Färber
    uint64_t aSig, aSigExtra;
2668 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
2669 158142c2 bellard
2670 158142c2 bellard
    aSig = extractFloat64Frac( a );
2671 158142c2 bellard
    aExp = extractFloat64Exp( a );
2672 158142c2 bellard
    aSign = extractFloat64Sign( a );
2673 158142c2 bellard
    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2674 158142c2 bellard
    shiftCount = 0x433 - aExp;
2675 158142c2 bellard
    if ( shiftCount <= 0 ) {
2676 158142c2 bellard
        if ( 0x43E < aExp ) {
2677 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2678 158142c2 bellard
            if (    ! aSign
2679 158142c2 bellard
                 || (    ( aExp == 0x7FF )
2680 158142c2 bellard
                      && ( aSig != LIT64( 0x0010000000000000 ) ) )
2681 158142c2 bellard
               ) {
2682 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
2683 158142c2 bellard
            }
2684 bb98fe42 Andreas Färber
            return (int64_t) LIT64( 0x8000000000000000 );
2685 158142c2 bellard
        }
2686 158142c2 bellard
        aSigExtra = 0;
2687 158142c2 bellard
        aSig <<= - shiftCount;
2688 158142c2 bellard
    }
2689 158142c2 bellard
    else {
2690 158142c2 bellard
        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
2691 158142c2 bellard
    }
2692 158142c2 bellard
    return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
2693 158142c2 bellard
2694 158142c2 bellard
}
2695 158142c2 bellard
2696 158142c2 bellard
/*----------------------------------------------------------------------------
2697 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2698 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
2699 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2700 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
2701 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
2702 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
2703 158142c2 bellard
| returned.
2704 158142c2 bellard
*----------------------------------------------------------------------------*/
2705 158142c2 bellard
2706 158142c2 bellard
int64 float64_to_int64_round_to_zero( float64 a STATUS_PARAM )
2707 158142c2 bellard
{
2708 158142c2 bellard
    flag aSign;
2709 158142c2 bellard
    int16 aExp, shiftCount;
2710 bb98fe42 Andreas Färber
    uint64_t aSig;
2711 158142c2 bellard
    int64 z;
2712 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
2713 158142c2 bellard
2714 158142c2 bellard
    aSig = extractFloat64Frac( a );
2715 158142c2 bellard
    aExp = extractFloat64Exp( a );
2716 158142c2 bellard
    aSign = extractFloat64Sign( a );
2717 158142c2 bellard
    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2718 158142c2 bellard
    shiftCount = aExp - 0x433;
2719 158142c2 bellard
    if ( 0 <= shiftCount ) {
2720 158142c2 bellard
        if ( 0x43E <= aExp ) {
2721 f090c9d4 pbrook
            if ( float64_val(a) != LIT64( 0xC3E0000000000000 ) ) {
2722 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
2723 158142c2 bellard
                if (    ! aSign
2724 158142c2 bellard
                     || (    ( aExp == 0x7FF )
2725 158142c2 bellard
                          && ( aSig != LIT64( 0x0010000000000000 ) ) )
2726 158142c2 bellard
                   ) {
2727 158142c2 bellard
                    return LIT64( 0x7FFFFFFFFFFFFFFF );
2728 158142c2 bellard
                }
2729 158142c2 bellard
            }
2730 bb98fe42 Andreas Färber
            return (int64_t) LIT64( 0x8000000000000000 );
2731 158142c2 bellard
        }
2732 158142c2 bellard
        z = aSig<<shiftCount;
2733 158142c2 bellard
    }
2734 158142c2 bellard
    else {
2735 158142c2 bellard
        if ( aExp < 0x3FE ) {
2736 158142c2 bellard
            if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
2737 158142c2 bellard
            return 0;
2738 158142c2 bellard
        }
2739 158142c2 bellard
        z = aSig>>( - shiftCount );
2740 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
2741 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
2742 158142c2 bellard
        }
2743 158142c2 bellard
    }
2744 158142c2 bellard
    if ( aSign ) z = - z;
2745 158142c2 bellard
    return z;
2746 158142c2 bellard
2747 158142c2 bellard
}
2748 158142c2 bellard
2749 158142c2 bellard
/*----------------------------------------------------------------------------
2750 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2751 158142c2 bellard
| `a' to the single-precision floating-point format.  The conversion is
2752 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2753 158142c2 bellard
| Arithmetic.
2754 158142c2 bellard
*----------------------------------------------------------------------------*/
2755 158142c2 bellard
2756 158142c2 bellard
float32 float64_to_float32( float64 a STATUS_PARAM )
2757 158142c2 bellard
{
2758 158142c2 bellard
    flag aSign;
2759 158142c2 bellard
    int16 aExp;
2760 bb98fe42 Andreas Färber
    uint64_t aSig;
2761 bb98fe42 Andreas Färber
    uint32_t zSig;
2762 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
2763 158142c2 bellard
2764 158142c2 bellard
    aSig = extractFloat64Frac( a );
2765 158142c2 bellard
    aExp = extractFloat64Exp( a );
2766 158142c2 bellard
    aSign = extractFloat64Sign( a );
2767 158142c2 bellard
    if ( aExp == 0x7FF ) {
2768 bcd4d9af Christophe Lyon
        if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
2769 158142c2 bellard
        return packFloat32( aSign, 0xFF, 0 );
2770 158142c2 bellard
    }
2771 158142c2 bellard
    shift64RightJamming( aSig, 22, &aSig );
2772 158142c2 bellard
    zSig = aSig;
2773 158142c2 bellard
    if ( aExp || zSig ) {
2774 158142c2 bellard
        zSig |= 0x40000000;
2775 158142c2 bellard
        aExp -= 0x381;
2776 158142c2 bellard
    }
2777 158142c2 bellard
    return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
2778 158142c2 bellard
2779 158142c2 bellard
}
2780 158142c2 bellard
2781 60011498 Paul Brook
2782 60011498 Paul Brook
/*----------------------------------------------------------------------------
2783 60011498 Paul Brook
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
2784 60011498 Paul Brook
| half-precision floating-point value, returning the result.  After being
2785 60011498 Paul Brook
| shifted into the proper positions, the three fields are simply added
2786 60011498 Paul Brook
| together to form the result.  This means that any integer portion of `zSig'
2787 60011498 Paul Brook
| will be added into the exponent.  Since a properly normalized significand
2788 60011498 Paul Brook
| will have an integer portion equal to 1, the `zExp' input should be 1 less
2789 60011498 Paul Brook
| than the desired result exponent whenever `zSig' is a complete, normalized
2790 60011498 Paul Brook
| significand.
2791 60011498 Paul Brook
*----------------------------------------------------------------------------*/
2792 bb98fe42 Andreas Färber
static float16 packFloat16(flag zSign, int16 zExp, uint16_t zSig)
2793 60011498 Paul Brook
{
2794 bb4d4bb3 Peter Maydell
    return make_float16(
2795 bb98fe42 Andreas Färber
        (((uint32_t)zSign) << 15) + (((uint32_t)zExp) << 10) + zSig);
2796 60011498 Paul Brook
}
2797 60011498 Paul Brook
2798 60011498 Paul Brook
/* Half precision floats come in two formats: standard IEEE and "ARM" format.
2799 60011498 Paul Brook
   The latter gains extra exponent range by omitting the NaN/Inf encodings.  */
2800 bb4d4bb3 Peter Maydell
2801 bb4d4bb3 Peter Maydell
float32 float16_to_float32(float16 a, flag ieee STATUS_PARAM)
2802 60011498 Paul Brook
{
2803 60011498 Paul Brook
    flag aSign;
2804 60011498 Paul Brook
    int16 aExp;
2805 bb98fe42 Andreas Färber
    uint32_t aSig;
2806 60011498 Paul Brook
2807 bb4d4bb3 Peter Maydell
    aSign = extractFloat16Sign(a);
2808 bb4d4bb3 Peter Maydell
    aExp = extractFloat16Exp(a);
2809 bb4d4bb3 Peter Maydell
    aSig = extractFloat16Frac(a);
2810 60011498 Paul Brook
2811 60011498 Paul Brook
    if (aExp == 0x1f && ieee) {
2812 60011498 Paul Brook
        if (aSig) {
2813 f591e1be Peter Maydell
            return commonNaNToFloat32(float16ToCommonNaN(a STATUS_VAR) STATUS_VAR);
2814 60011498 Paul Brook
        }
2815 60011498 Paul Brook
        return packFloat32(aSign, 0xff, aSig << 13);
2816 60011498 Paul Brook
    }
2817 60011498 Paul Brook
    if (aExp == 0) {
2818 60011498 Paul Brook
        int8 shiftCount;
2819 60011498 Paul Brook
2820 60011498 Paul Brook
        if (aSig == 0) {
2821 60011498 Paul Brook
            return packFloat32(aSign, 0, 0);
2822 60011498 Paul Brook
        }
2823 60011498 Paul Brook
2824 60011498 Paul Brook
        shiftCount = countLeadingZeros32( aSig ) - 21;
2825 60011498 Paul Brook
        aSig = aSig << shiftCount;
2826 60011498 Paul Brook
        aExp = -shiftCount;
2827 60011498 Paul Brook
    }
2828 60011498 Paul Brook
    return packFloat32( aSign, aExp + 0x70, aSig << 13);
2829 60011498 Paul Brook
}
2830 60011498 Paul Brook
2831 bb4d4bb3 Peter Maydell
float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM)
2832 60011498 Paul Brook
{
2833 60011498 Paul Brook
    flag aSign;
2834 60011498 Paul Brook
    int16 aExp;
2835 bb98fe42 Andreas Färber
    uint32_t aSig;
2836 bb98fe42 Andreas Färber
    uint32_t mask;
2837 bb98fe42 Andreas Färber
    uint32_t increment;
2838 60011498 Paul Brook
    int8 roundingMode;
2839 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2840 60011498 Paul Brook
2841 60011498 Paul Brook
    aSig = extractFloat32Frac( a );
2842 60011498 Paul Brook
    aExp = extractFloat32Exp( a );
2843 60011498 Paul Brook
    aSign = extractFloat32Sign( a );
2844 60011498 Paul Brook
    if ( aExp == 0xFF ) {
2845 60011498 Paul Brook
        if (aSig) {
2846 600e30d2 Peter Maydell
            /* Input is a NaN */
2847 600e30d2 Peter Maydell
            float16 r = commonNaNToFloat16( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
2848 600e30d2 Peter Maydell
            if (!ieee) {
2849 600e30d2 Peter Maydell
                return packFloat16(aSign, 0, 0);
2850 600e30d2 Peter Maydell
            }
2851 600e30d2 Peter Maydell
            return r;
2852 60011498 Paul Brook
        }
2853 600e30d2 Peter Maydell
        /* Infinity */
2854 600e30d2 Peter Maydell
        if (!ieee) {
2855 600e30d2 Peter Maydell
            float_raise(float_flag_invalid STATUS_VAR);
2856 600e30d2 Peter Maydell
            return packFloat16(aSign, 0x1f, 0x3ff);
2857 600e30d2 Peter Maydell
        }
2858 600e30d2 Peter Maydell
        return packFloat16(aSign, 0x1f, 0);
2859 60011498 Paul Brook
    }
2860 600e30d2 Peter Maydell
    if (aExp == 0 && aSig == 0) {
2861 60011498 Paul Brook
        return packFloat16(aSign, 0, 0);
2862 60011498 Paul Brook
    }
2863 60011498 Paul Brook
    /* Decimal point between bits 22 and 23.  */
2864 60011498 Paul Brook
    aSig |= 0x00800000;
2865 60011498 Paul Brook
    aExp -= 0x7f;
2866 60011498 Paul Brook
    if (aExp < -14) {
2867 600e30d2 Peter Maydell
        mask = 0x00ffffff;
2868 600e30d2 Peter Maydell
        if (aExp >= -24) {
2869 600e30d2 Peter Maydell
            mask >>= 25 + aExp;
2870 60011498 Paul Brook
        }
2871 60011498 Paul Brook
    } else {
2872 60011498 Paul Brook
        mask = 0x00001fff;
2873 60011498 Paul Brook
    }
2874 60011498 Paul Brook
    if (aSig & mask) {
2875 60011498 Paul Brook
        float_raise( float_flag_underflow STATUS_VAR );
2876 60011498 Paul Brook
        roundingMode = STATUS(float_rounding_mode);
2877 60011498 Paul Brook
        switch (roundingMode) {
2878 60011498 Paul Brook
        case float_round_nearest_even:
2879 60011498 Paul Brook
            increment = (mask + 1) >> 1;
2880 60011498 Paul Brook
            if ((aSig & mask) == increment) {
2881 60011498 Paul Brook
                increment = aSig & (increment << 1);
2882 60011498 Paul Brook
            }
2883 60011498 Paul Brook
            break;
2884 60011498 Paul Brook
        case float_round_up:
2885 60011498 Paul Brook
            increment = aSign ? 0 : mask;
2886 60011498 Paul Brook
            break;
2887 60011498 Paul Brook
        case float_round_down:
2888 60011498 Paul Brook
            increment = aSign ? mask : 0;
2889 60011498 Paul Brook
            break;
2890 60011498 Paul Brook
        default: /* round_to_zero */
2891 60011498 Paul Brook
            increment = 0;
2892 60011498 Paul Brook
            break;
2893 60011498 Paul Brook
        }
2894 60011498 Paul Brook
        aSig += increment;
2895 60011498 Paul Brook
        if (aSig >= 0x01000000) {
2896 60011498 Paul Brook
            aSig >>= 1;
2897 60011498 Paul Brook
            aExp++;
2898 60011498 Paul Brook
        }
2899 60011498 Paul Brook
    } else if (aExp < -14
2900 60011498 Paul Brook
          && STATUS(float_detect_tininess) == float_tininess_before_rounding) {
2901 60011498 Paul Brook
        float_raise( float_flag_underflow STATUS_VAR);
2902 60011498 Paul Brook
    }
2903 60011498 Paul Brook
2904 60011498 Paul Brook
    if (ieee) {
2905 60011498 Paul Brook
        if (aExp > 15) {
2906 60011498 Paul Brook
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
2907 60011498 Paul Brook
            return packFloat16(aSign, 0x1f, 0);
2908 60011498 Paul Brook
        }
2909 60011498 Paul Brook
    } else {
2910 60011498 Paul Brook
        if (aExp > 16) {
2911 600e30d2 Peter Maydell
            float_raise(float_flag_invalid | float_flag_inexact STATUS_VAR);
2912 60011498 Paul Brook
            return packFloat16(aSign, 0x1f, 0x3ff);
2913 60011498 Paul Brook
        }
2914 60011498 Paul Brook
    }
2915 60011498 Paul Brook
    if (aExp < -24) {
2916 60011498 Paul Brook
        return packFloat16(aSign, 0, 0);
2917 60011498 Paul Brook
    }
2918 60011498 Paul Brook
    if (aExp < -14) {
2919 60011498 Paul Brook
        aSig >>= -14 - aExp;
2920 60011498 Paul Brook
        aExp = -14;
2921 60011498 Paul Brook
    }
2922 60011498 Paul Brook
    return packFloat16(aSign, aExp + 14, aSig >> 13);
2923 60011498 Paul Brook
}
2924 60011498 Paul Brook
2925 158142c2 bellard
#ifdef FLOATX80
2926 158142c2 bellard
2927 158142c2 bellard
/*----------------------------------------------------------------------------
2928 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2929 158142c2 bellard
| `a' to the extended double-precision floating-point format.  The conversion
2930 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
2931 158142c2 bellard
| Arithmetic.
2932 158142c2 bellard
*----------------------------------------------------------------------------*/
2933 158142c2 bellard
2934 158142c2 bellard
floatx80 float64_to_floatx80( float64 a STATUS_PARAM )
2935 158142c2 bellard
{
2936 158142c2 bellard
    flag aSign;
2937 158142c2 bellard
    int16 aExp;
2938 bb98fe42 Andreas Färber
    uint64_t aSig;
2939 158142c2 bellard
2940 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
2941 158142c2 bellard
    aSig = extractFloat64Frac( a );
2942 158142c2 bellard
    aExp = extractFloat64Exp( a );
2943 158142c2 bellard
    aSign = extractFloat64Sign( a );
2944 158142c2 bellard
    if ( aExp == 0x7FF ) {
2945 bcd4d9af Christophe Lyon
        if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
2946 158142c2 bellard
        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2947 158142c2 bellard
    }
2948 158142c2 bellard
    if ( aExp == 0 ) {
2949 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
2950 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2951 158142c2 bellard
    }
2952 158142c2 bellard
    return
2953 158142c2 bellard
        packFloatx80(
2954 158142c2 bellard
            aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
2955 158142c2 bellard
2956 158142c2 bellard
}
2957 158142c2 bellard
2958 158142c2 bellard
#endif
2959 158142c2 bellard
2960 158142c2 bellard
#ifdef FLOAT128
2961 158142c2 bellard
2962 158142c2 bellard
/*----------------------------------------------------------------------------
2963 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2964 158142c2 bellard
| `a' to the quadruple-precision floating-point format.  The conversion is
2965 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2966 158142c2 bellard
| Arithmetic.
2967 158142c2 bellard
*----------------------------------------------------------------------------*/
2968 158142c2 bellard
2969 158142c2 bellard
float128 float64_to_float128( float64 a STATUS_PARAM )
2970 158142c2 bellard
{
2971 158142c2 bellard
    flag aSign;
2972 158142c2 bellard
    int16 aExp;
2973 bb98fe42 Andreas Färber
    uint64_t aSig, zSig0, zSig1;
2974 158142c2 bellard
2975 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
2976 158142c2 bellard
    aSig = extractFloat64Frac( a );
2977 158142c2 bellard
    aExp = extractFloat64Exp( a );
2978 158142c2 bellard
    aSign = extractFloat64Sign( a );
2979 158142c2 bellard
    if ( aExp == 0x7FF ) {
2980 bcd4d9af Christophe Lyon
        if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
2981 158142c2 bellard
        return packFloat128( aSign, 0x7FFF, 0, 0 );
2982 158142c2 bellard
    }
2983 158142c2 bellard
    if ( aExp == 0 ) {
2984 158142c2 bellard
        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
2985 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2986 158142c2 bellard
        --aExp;
2987 158142c2 bellard
    }
2988 158142c2 bellard
    shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
2989 158142c2 bellard
    return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
2990 158142c2 bellard
2991 158142c2 bellard
}
2992 158142c2 bellard
2993 158142c2 bellard
#endif
2994 158142c2 bellard
2995 158142c2 bellard
/*----------------------------------------------------------------------------
2996 158142c2 bellard
| Rounds the double-precision floating-point value `a' to an integer, and
2997 158142c2 bellard
| returns the result as a double-precision floating-point value.  The
2998 158142c2 bellard
| operation is performed according to the IEC/IEEE Standard for Binary
2999 158142c2 bellard
| Floating-Point Arithmetic.
3000 158142c2 bellard
*----------------------------------------------------------------------------*/
3001 158142c2 bellard
3002 158142c2 bellard
float64 float64_round_to_int( float64 a STATUS_PARAM )
3003 158142c2 bellard
{
3004 158142c2 bellard
    flag aSign;
3005 158142c2 bellard
    int16 aExp;
3006 bb98fe42 Andreas Färber
    uint64_t lastBitMask, roundBitsMask;
3007 158142c2 bellard
    int8 roundingMode;
3008 bb98fe42 Andreas Färber
    uint64_t z;
3009 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3010 158142c2 bellard
3011 158142c2 bellard
    aExp = extractFloat64Exp( a );
3012 158142c2 bellard
    if ( 0x433 <= aExp ) {
3013 158142c2 bellard
        if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
3014 158142c2 bellard
            return propagateFloat64NaN( a, a STATUS_VAR );
3015 158142c2 bellard
        }
3016 158142c2 bellard
        return a;
3017 158142c2 bellard
    }
3018 158142c2 bellard
    if ( aExp < 0x3FF ) {
3019 bb98fe42 Andreas Färber
        if ( (uint64_t) ( float64_val(a)<<1 ) == 0 ) return a;
3020 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
3021 158142c2 bellard
        aSign = extractFloat64Sign( a );
3022 158142c2 bellard
        switch ( STATUS(float_rounding_mode) ) {
3023 158142c2 bellard
         case float_round_nearest_even:
3024 158142c2 bellard
            if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
3025 158142c2 bellard
                return packFloat64( aSign, 0x3FF, 0 );
3026 158142c2 bellard
            }
3027 158142c2 bellard
            break;
3028 158142c2 bellard
         case float_round_down:
3029 f090c9d4 pbrook
            return make_float64(aSign ? LIT64( 0xBFF0000000000000 ) : 0);
3030 158142c2 bellard
         case float_round_up:
3031 f090c9d4 pbrook
            return make_float64(
3032 f090c9d4 pbrook
            aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ));
3033 158142c2 bellard
        }
3034 158142c2 bellard
        return packFloat64( aSign, 0, 0 );
3035 158142c2 bellard
    }
3036 158142c2 bellard
    lastBitMask = 1;
3037 158142c2 bellard
    lastBitMask <<= 0x433 - aExp;
3038 158142c2 bellard
    roundBitsMask = lastBitMask - 1;
3039 f090c9d4 pbrook
    z = float64_val(a);
3040 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
3041 158142c2 bellard
    if ( roundingMode == float_round_nearest_even ) {
3042 158142c2 bellard
        z += lastBitMask>>1;
3043 158142c2 bellard
        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
3044 158142c2 bellard
    }
3045 158142c2 bellard
    else if ( roundingMode != float_round_to_zero ) {
3046 f090c9d4 pbrook
        if ( extractFloat64Sign( make_float64(z) ) ^ ( roundingMode == float_round_up ) ) {
3047 158142c2 bellard
            z += roundBitsMask;
3048 158142c2 bellard
        }
3049 158142c2 bellard
    }
3050 158142c2 bellard
    z &= ~ roundBitsMask;
3051 f090c9d4 pbrook
    if ( z != float64_val(a) )
3052 f090c9d4 pbrook
        STATUS(float_exception_flags) |= float_flag_inexact;
3053 f090c9d4 pbrook
    return make_float64(z);
3054 158142c2 bellard
3055 158142c2 bellard
}
3056 158142c2 bellard
3057 e6e5906b pbrook
float64 float64_trunc_to_int( float64 a STATUS_PARAM)
3058 e6e5906b pbrook
{
3059 e6e5906b pbrook
    int oldmode;
3060 e6e5906b pbrook
    float64 res;
3061 e6e5906b pbrook
    oldmode = STATUS(float_rounding_mode);
3062 e6e5906b pbrook
    STATUS(float_rounding_mode) = float_round_to_zero;
3063 e6e5906b pbrook
    res = float64_round_to_int(a STATUS_VAR);
3064 e6e5906b pbrook
    STATUS(float_rounding_mode) = oldmode;
3065 e6e5906b pbrook
    return res;
3066 e6e5906b pbrook
}
3067 e6e5906b pbrook
3068 158142c2 bellard
/*----------------------------------------------------------------------------
3069 158142c2 bellard
| Returns the result of adding the absolute values of the double-precision
3070 158142c2 bellard
| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
3071 158142c2 bellard
| before being returned.  `zSign' is ignored if the result is a NaN.
3072 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
3073 158142c2 bellard
| Floating-Point Arithmetic.
3074 158142c2 bellard
*----------------------------------------------------------------------------*/
3075 158142c2 bellard
3076 158142c2 bellard
static float64 addFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
3077 158142c2 bellard
{
3078 158142c2 bellard
    int16 aExp, bExp, zExp;
3079 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig;
3080 158142c2 bellard
    int16 expDiff;
3081 158142c2 bellard
3082 158142c2 bellard
    aSig = extractFloat64Frac( a );
3083 158142c2 bellard
    aExp = extractFloat64Exp( a );
3084 158142c2 bellard
    bSig = extractFloat64Frac( b );
3085 158142c2 bellard
    bExp = extractFloat64Exp( b );
3086 158142c2 bellard
    expDiff = aExp - bExp;
3087 158142c2 bellard
    aSig <<= 9;
3088 158142c2 bellard
    bSig <<= 9;
3089 158142c2 bellard
    if ( 0 < expDiff ) {
3090 158142c2 bellard
        if ( aExp == 0x7FF ) {
3091 158142c2 bellard
            if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3092 158142c2 bellard
            return a;
3093 158142c2 bellard
        }
3094 158142c2 bellard
        if ( bExp == 0 ) {
3095 158142c2 bellard
            --expDiff;
3096 158142c2 bellard
        }
3097 158142c2 bellard
        else {
3098 158142c2 bellard
            bSig |= LIT64( 0x2000000000000000 );
3099 158142c2 bellard
        }
3100 158142c2 bellard
        shift64RightJamming( bSig, expDiff, &bSig );
3101 158142c2 bellard
        zExp = aExp;
3102 158142c2 bellard
    }
3103 158142c2 bellard
    else if ( expDiff < 0 ) {
3104 158142c2 bellard
        if ( bExp == 0x7FF ) {
3105 158142c2 bellard
            if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3106 158142c2 bellard
            return packFloat64( zSign, 0x7FF, 0 );
3107 158142c2 bellard
        }
3108 158142c2 bellard
        if ( aExp == 0 ) {
3109 158142c2 bellard
            ++expDiff;
3110 158142c2 bellard
        }
3111 158142c2 bellard
        else {
3112 158142c2 bellard
            aSig |= LIT64( 0x2000000000000000 );
3113 158142c2 bellard
        }
3114 158142c2 bellard
        shift64RightJamming( aSig, - expDiff, &aSig );
3115 158142c2 bellard
        zExp = bExp;
3116 158142c2 bellard
    }
3117 158142c2 bellard
    else {
3118 158142c2 bellard
        if ( aExp == 0x7FF ) {
3119 158142c2 bellard
            if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3120 158142c2 bellard
            return a;
3121 158142c2 bellard
        }
3122 fe76d976 pbrook
        if ( aExp == 0 ) {
3123 fe76d976 pbrook
            if ( STATUS(flush_to_zero) ) return packFloat64( zSign, 0, 0 );
3124 fe76d976 pbrook
            return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
3125 fe76d976 pbrook
        }
3126 158142c2 bellard
        zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
3127 158142c2 bellard
        zExp = aExp;
3128 158142c2 bellard
        goto roundAndPack;
3129 158142c2 bellard
    }
3130 158142c2 bellard
    aSig |= LIT64( 0x2000000000000000 );
3131 158142c2 bellard
    zSig = ( aSig + bSig )<<1;
3132 158142c2 bellard
    --zExp;
3133 bb98fe42 Andreas Färber
    if ( (int64_t) zSig < 0 ) {
3134 158142c2 bellard
        zSig = aSig + bSig;
3135 158142c2 bellard
        ++zExp;
3136 158142c2 bellard
    }
3137 158142c2 bellard
 roundAndPack:
3138 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
3139 158142c2 bellard
3140 158142c2 bellard
}
3141 158142c2 bellard
3142 158142c2 bellard
/*----------------------------------------------------------------------------
3143 158142c2 bellard
| Returns the result of subtracting the absolute values of the double-
3144 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the
3145 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
3146 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
3147 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
3148 158142c2 bellard
*----------------------------------------------------------------------------*/
3149 158142c2 bellard
3150 158142c2 bellard
static float64 subFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
3151 158142c2 bellard
{
3152 158142c2 bellard
    int16 aExp, bExp, zExp;
3153 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig;
3154 158142c2 bellard
    int16 expDiff;
3155 158142c2 bellard
3156 158142c2 bellard
    aSig = extractFloat64Frac( a );
3157 158142c2 bellard
    aExp = extractFloat64Exp( a );
3158 158142c2 bellard
    bSig = extractFloat64Frac( b );
3159 158142c2 bellard
    bExp = extractFloat64Exp( b );
3160 158142c2 bellard
    expDiff = aExp - bExp;
3161 158142c2 bellard
    aSig <<= 10;
3162 158142c2 bellard
    bSig <<= 10;
3163 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
3164 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
3165 158142c2 bellard
    if ( aExp == 0x7FF ) {
3166 158142c2 bellard
        if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3167 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3168 158142c2 bellard
        return float64_default_nan;
3169 158142c2 bellard
    }
3170 158142c2 bellard
    if ( aExp == 0 ) {
3171 158142c2 bellard
        aExp = 1;
3172 158142c2 bellard
        bExp = 1;
3173 158142c2 bellard
    }
3174 158142c2 bellard
    if ( bSig < aSig ) goto aBigger;
3175 158142c2 bellard
    if ( aSig < bSig ) goto bBigger;
3176 158142c2 bellard
    return packFloat64( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
3177 158142c2 bellard
 bExpBigger:
3178 158142c2 bellard
    if ( bExp == 0x7FF ) {
3179 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3180 158142c2 bellard
        return packFloat64( zSign ^ 1, 0x7FF, 0 );
3181 158142c2 bellard
    }
3182 158142c2 bellard
    if ( aExp == 0 ) {
3183 158142c2 bellard
        ++expDiff;
3184 158142c2 bellard
    }
3185 158142c2 bellard
    else {
3186 158142c2 bellard
        aSig |= LIT64( 0x4000000000000000 );
3187 158142c2 bellard
    }
3188 158142c2 bellard
    shift64RightJamming( aSig, - expDiff, &aSig );
3189 158142c2 bellard
    bSig |= LIT64( 0x4000000000000000 );
3190 158142c2 bellard
 bBigger:
3191 158142c2 bellard
    zSig = bSig - aSig;
3192 158142c2 bellard
    zExp = bExp;
3193 158142c2 bellard
    zSign ^= 1;
3194 158142c2 bellard
    goto normalizeRoundAndPack;
3195 158142c2 bellard
 aExpBigger:
3196 158142c2 bellard
    if ( aExp == 0x7FF ) {
3197 158142c2 bellard
        if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3198 158142c2 bellard
        return a;
3199 158142c2 bellard
    }
3200 158142c2 bellard
    if ( bExp == 0 ) {
3201 158142c2 bellard
        --expDiff;
3202 158142c2 bellard
    }
3203 158142c2 bellard
    else {
3204 158142c2 bellard
        bSig |= LIT64( 0x4000000000000000 );
3205 158142c2 bellard
    }
3206 158142c2 bellard
    shift64RightJamming( bSig, expDiff, &bSig );
3207 158142c2 bellard
    aSig |= LIT64( 0x4000000000000000 );
3208 158142c2 bellard
 aBigger:
3209 158142c2 bellard
    zSig = aSig - bSig;
3210 158142c2 bellard
    zExp = aExp;
3211 158142c2 bellard
 normalizeRoundAndPack:
3212 158142c2 bellard
    --zExp;
3213 158142c2 bellard
    return normalizeRoundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
3214 158142c2 bellard
3215 158142c2 bellard
}
3216 158142c2 bellard
3217 158142c2 bellard
/*----------------------------------------------------------------------------
3218 158142c2 bellard
| Returns the result of adding the double-precision floating-point values `a'
3219 158142c2 bellard
| and `b'.  The operation is performed according to the IEC/IEEE Standard for
3220 158142c2 bellard
| Binary Floating-Point Arithmetic.
3221 158142c2 bellard
*----------------------------------------------------------------------------*/
3222 158142c2 bellard
3223 158142c2 bellard
float64 float64_add( float64 a, float64 b STATUS_PARAM )
3224 158142c2 bellard
{
3225 158142c2 bellard
    flag aSign, bSign;
3226 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3227 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3228 158142c2 bellard
3229 158142c2 bellard
    aSign = extractFloat64Sign( a );
3230 158142c2 bellard
    bSign = extractFloat64Sign( b );
3231 158142c2 bellard
    if ( aSign == bSign ) {
3232 158142c2 bellard
        return addFloat64Sigs( a, b, aSign STATUS_VAR );
3233 158142c2 bellard
    }
3234 158142c2 bellard
    else {
3235 158142c2 bellard
        return subFloat64Sigs( a, b, aSign STATUS_VAR );
3236 158142c2 bellard
    }
3237 158142c2 bellard
3238 158142c2 bellard
}
3239 158142c2 bellard
3240 158142c2 bellard
/*----------------------------------------------------------------------------
3241 158142c2 bellard
| Returns the result of subtracting the double-precision floating-point values
3242 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
3243 158142c2 bellard
| for Binary Floating-Point Arithmetic.
3244 158142c2 bellard
*----------------------------------------------------------------------------*/
3245 158142c2 bellard
3246 158142c2 bellard
float64 float64_sub( float64 a, float64 b STATUS_PARAM )
3247 158142c2 bellard
{
3248 158142c2 bellard
    flag aSign, bSign;
3249 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3250 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3251 158142c2 bellard
3252 158142c2 bellard
    aSign = extractFloat64Sign( a );
3253 158142c2 bellard
    bSign = extractFloat64Sign( b );
3254 158142c2 bellard
    if ( aSign == bSign ) {
3255 158142c2 bellard
        return subFloat64Sigs( a, b, aSign STATUS_VAR );
3256 158142c2 bellard
    }
3257 158142c2 bellard
    else {
3258 158142c2 bellard
        return addFloat64Sigs( a, b, aSign STATUS_VAR );
3259 158142c2 bellard
    }
3260 158142c2 bellard
3261 158142c2 bellard
}
3262 158142c2 bellard
3263 158142c2 bellard
/*----------------------------------------------------------------------------
3264 158142c2 bellard
| Returns the result of multiplying the double-precision floating-point values
3265 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
3266 158142c2 bellard
| for Binary Floating-Point Arithmetic.
3267 158142c2 bellard
*----------------------------------------------------------------------------*/
3268 158142c2 bellard
3269 158142c2 bellard
float64 float64_mul( float64 a, float64 b STATUS_PARAM )
3270 158142c2 bellard
{
3271 158142c2 bellard
    flag aSign, bSign, zSign;
3272 158142c2 bellard
    int16 aExp, bExp, zExp;
3273 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig0, zSig1;
3274 158142c2 bellard
3275 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3276 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3277 37d18660 Peter Maydell
3278 158142c2 bellard
    aSig = extractFloat64Frac( a );
3279 158142c2 bellard
    aExp = extractFloat64Exp( a );
3280 158142c2 bellard
    aSign = extractFloat64Sign( a );
3281 158142c2 bellard
    bSig = extractFloat64Frac( b );
3282 158142c2 bellard
    bExp = extractFloat64Exp( b );
3283 158142c2 bellard
    bSign = extractFloat64Sign( b );
3284 158142c2 bellard
    zSign = aSign ^ bSign;
3285 158142c2 bellard
    if ( aExp == 0x7FF ) {
3286 158142c2 bellard
        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
3287 158142c2 bellard
            return propagateFloat64NaN( a, b STATUS_VAR );
3288 158142c2 bellard
        }
3289 158142c2 bellard
        if ( ( bExp | bSig ) == 0 ) {
3290 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3291 158142c2 bellard
            return float64_default_nan;
3292 158142c2 bellard
        }
3293 158142c2 bellard
        return packFloat64( zSign, 0x7FF, 0 );
3294 158142c2 bellard
    }
3295 158142c2 bellard
    if ( bExp == 0x7FF ) {
3296 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3297 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) {
3298 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3299 158142c2 bellard
            return float64_default_nan;
3300 158142c2 bellard
        }
3301 158142c2 bellard
        return packFloat64( zSign, 0x7FF, 0 );
3302 158142c2 bellard
    }
3303 158142c2 bellard
    if ( aExp == 0 ) {
3304 158142c2 bellard
        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
3305 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3306 158142c2 bellard
    }
3307 158142c2 bellard
    if ( bExp == 0 ) {
3308 158142c2 bellard
        if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
3309 158142c2 bellard
        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3310 158142c2 bellard
    }
3311 158142c2 bellard
    zExp = aExp + bExp - 0x3FF;
3312 158142c2 bellard
    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
3313 158142c2 bellard
    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3314 158142c2 bellard
    mul64To128( aSig, bSig, &zSig0, &zSig1 );
3315 158142c2 bellard
    zSig0 |= ( zSig1 != 0 );
3316 bb98fe42 Andreas Färber
    if ( 0 <= (int64_t) ( zSig0<<1 ) ) {
3317 158142c2 bellard
        zSig0 <<= 1;
3318 158142c2 bellard
        --zExp;
3319 158142c2 bellard
    }
3320 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp, zSig0 STATUS_VAR );
3321 158142c2 bellard
3322 158142c2 bellard
}
3323 158142c2 bellard
3324 158142c2 bellard
/*----------------------------------------------------------------------------
3325 158142c2 bellard
| Returns the result of dividing the double-precision floating-point value `a'
3326 158142c2 bellard
| by the corresponding value `b'.  The operation is performed according to
3327 158142c2 bellard
| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3328 158142c2 bellard
*----------------------------------------------------------------------------*/
3329 158142c2 bellard
3330 158142c2 bellard
float64 float64_div( float64 a, float64 b STATUS_PARAM )
3331 158142c2 bellard
{
3332 158142c2 bellard
    flag aSign, bSign, zSign;
3333 158142c2 bellard
    int16 aExp, bExp, zExp;
3334 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig;
3335 bb98fe42 Andreas Färber
    uint64_t rem0, rem1;
3336 bb98fe42 Andreas Färber
    uint64_t term0, term1;
3337 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3338 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3339 158142c2 bellard
3340 158142c2 bellard
    aSig = extractFloat64Frac( a );
3341 158142c2 bellard
    aExp = extractFloat64Exp( a );
3342 158142c2 bellard
    aSign = extractFloat64Sign( a );
3343 158142c2 bellard
    bSig = extractFloat64Frac( b );
3344 158142c2 bellard
    bExp = extractFloat64Exp( b );
3345 158142c2 bellard
    bSign = extractFloat64Sign( b );
3346 158142c2 bellard
    zSign = aSign ^ bSign;
3347 158142c2 bellard
    if ( aExp == 0x7FF ) {
3348 158142c2 bellard
        if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3349 158142c2 bellard
        if ( bExp == 0x7FF ) {
3350 158142c2 bellard
            if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3351 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3352 158142c2 bellard
            return float64_default_nan;
3353 158142c2 bellard
        }
3354 158142c2 bellard
        return packFloat64( zSign, 0x7FF, 0 );
3355 158142c2 bellard
    }
3356 158142c2 bellard
    if ( bExp == 0x7FF ) {
3357 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3358 158142c2 bellard
        return packFloat64( zSign, 0, 0 );
3359 158142c2 bellard
    }
3360 158142c2 bellard
    if ( bExp == 0 ) {
3361 158142c2 bellard
        if ( bSig == 0 ) {
3362 158142c2 bellard
            if ( ( aExp | aSig ) == 0 ) {
3363 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
3364 158142c2 bellard
                return float64_default_nan;
3365 158142c2 bellard
            }
3366 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
3367 158142c2 bellard
            return packFloat64( zSign, 0x7FF, 0 );
3368 158142c2 bellard
        }
3369 158142c2 bellard
        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3370 158142c2 bellard
    }
3371 158142c2 bellard
    if ( aExp == 0 ) {
3372 158142c2 bellard
        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
3373 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3374 158142c2 bellard
    }
3375 158142c2 bellard
    zExp = aExp - bExp + 0x3FD;
3376 158142c2 bellard
    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
3377 158142c2 bellard
    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3378 158142c2 bellard
    if ( bSig <= ( aSig + aSig ) ) {
3379 158142c2 bellard
        aSig >>= 1;
3380 158142c2 bellard
        ++zExp;
3381 158142c2 bellard
    }
3382 158142c2 bellard
    zSig = estimateDiv128To64( aSig, 0, bSig );
3383 158142c2 bellard
    if ( ( zSig & 0x1FF ) <= 2 ) {
3384 158142c2 bellard
        mul64To128( bSig, zSig, &term0, &term1 );
3385 158142c2 bellard
        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
3386 bb98fe42 Andreas Färber
        while ( (int64_t) rem0 < 0 ) {
3387 158142c2 bellard
            --zSig;
3388 158142c2 bellard
            add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
3389 158142c2 bellard
        }
3390 158142c2 bellard
        zSig |= ( rem1 != 0 );
3391 158142c2 bellard
    }
3392 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
3393 158142c2 bellard
3394 158142c2 bellard
}
3395 158142c2 bellard
3396 158142c2 bellard
/*----------------------------------------------------------------------------
3397 158142c2 bellard
| Returns the remainder of the double-precision floating-point value `a'
3398 158142c2 bellard
| with respect to the corresponding value `b'.  The operation is performed
3399 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3400 158142c2 bellard
*----------------------------------------------------------------------------*/
3401 158142c2 bellard
3402 158142c2 bellard
float64 float64_rem( float64 a, float64 b STATUS_PARAM )
3403 158142c2 bellard
{
3404 ed086f3d Blue Swirl
    flag aSign, zSign;
3405 158142c2 bellard
    int16 aExp, bExp, expDiff;
3406 bb98fe42 Andreas Färber
    uint64_t aSig, bSig;
3407 bb98fe42 Andreas Färber
    uint64_t q, alternateASig;
3408 bb98fe42 Andreas Färber
    int64_t sigMean;
3409 158142c2 bellard
3410 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3411 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3412 158142c2 bellard
    aSig = extractFloat64Frac( a );
3413 158142c2 bellard
    aExp = extractFloat64Exp( a );
3414 158142c2 bellard
    aSign = extractFloat64Sign( a );
3415 158142c2 bellard
    bSig = extractFloat64Frac( b );
3416 158142c2 bellard
    bExp = extractFloat64Exp( b );
3417 158142c2 bellard
    if ( aExp == 0x7FF ) {
3418 158142c2 bellard
        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
3419 158142c2 bellard
            return propagateFloat64NaN( a, b STATUS_VAR );
3420 158142c2 bellard
        }
3421 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3422 158142c2 bellard
        return float64_default_nan;
3423 158142c2 bellard
    }
3424 158142c2 bellard
    if ( bExp == 0x7FF ) {
3425 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3426 158142c2 bellard
        return a;
3427 158142c2 bellard
    }
3428 158142c2 bellard
    if ( bExp == 0 ) {
3429 158142c2 bellard
        if ( bSig == 0 ) {
3430 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3431 158142c2 bellard
            return float64_default_nan;
3432 158142c2 bellard
        }
3433 158142c2 bellard
        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3434 158142c2 bellard
    }
3435 158142c2 bellard
    if ( aExp == 0 ) {
3436 158142c2 bellard
        if ( aSig == 0 ) return a;
3437 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3438 158142c2 bellard
    }
3439 158142c2 bellard
    expDiff = aExp - bExp;
3440 158142c2 bellard
    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
3441 158142c2 bellard
    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3442 158142c2 bellard
    if ( expDiff < 0 ) {
3443 158142c2 bellard
        if ( expDiff < -1 ) return a;
3444 158142c2 bellard
        aSig >>= 1;
3445 158142c2 bellard
    }
3446 158142c2 bellard
    q = ( bSig <= aSig );
3447 158142c2 bellard
    if ( q ) aSig -= bSig;
3448 158142c2 bellard
    expDiff -= 64;
3449 158142c2 bellard
    while ( 0 < expDiff ) {
3450 158142c2 bellard
        q = estimateDiv128To64( aSig, 0, bSig );
3451 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
3452 158142c2 bellard
        aSig = - ( ( bSig>>2 ) * q );
3453 158142c2 bellard
        expDiff -= 62;
3454 158142c2 bellard
    }
3455 158142c2 bellard
    expDiff += 64;
3456 158142c2 bellard
    if ( 0 < expDiff ) {
3457 158142c2 bellard
        q = estimateDiv128To64( aSig, 0, bSig );
3458 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
3459 158142c2 bellard
        q >>= 64 - expDiff;
3460 158142c2 bellard
        bSig >>= 2;
3461 158142c2 bellard
        aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
3462 158142c2 bellard
    }
3463 158142c2 bellard
    else {
3464 158142c2 bellard
        aSig >>= 2;
3465 158142c2 bellard
        bSig >>= 2;
3466 158142c2 bellard
    }
3467 158142c2 bellard
    do {
3468 158142c2 bellard
        alternateASig = aSig;
3469 158142c2 bellard
        ++q;
3470 158142c2 bellard
        aSig -= bSig;
3471 bb98fe42 Andreas Färber
    } while ( 0 <= (int64_t) aSig );
3472 158142c2 bellard
    sigMean = aSig + alternateASig;
3473 158142c2 bellard
    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
3474 158142c2 bellard
        aSig = alternateASig;
3475 158142c2 bellard
    }
3476 bb98fe42 Andreas Färber
    zSign = ( (int64_t) aSig < 0 );
3477 158142c2 bellard
    if ( zSign ) aSig = - aSig;
3478 158142c2 bellard
    return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig STATUS_VAR );
3479 158142c2 bellard
3480 158142c2 bellard
}
3481 158142c2 bellard
3482 158142c2 bellard
/*----------------------------------------------------------------------------
3483 158142c2 bellard
| Returns the square root of the double-precision floating-point value `a'.
3484 158142c2 bellard
| The operation is performed according to the IEC/IEEE Standard for Binary
3485 158142c2 bellard
| Floating-Point Arithmetic.
3486 158142c2 bellard
*----------------------------------------------------------------------------*/
3487 158142c2 bellard
3488 158142c2 bellard
float64 float64_sqrt( float64 a STATUS_PARAM )
3489 158142c2 bellard
{
3490 158142c2 bellard
    flag aSign;
3491 158142c2 bellard
    int16 aExp, zExp;
3492 bb98fe42 Andreas Färber
    uint64_t aSig, zSig, doubleZSig;
3493 bb98fe42 Andreas Färber
    uint64_t rem0, rem1, term0, term1;
3494 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3495 158142c2 bellard
3496 158142c2 bellard
    aSig = extractFloat64Frac( a );
3497 158142c2 bellard
    aExp = extractFloat64Exp( a );
3498 158142c2 bellard
    aSign = extractFloat64Sign( a );
3499 158142c2 bellard
    if ( aExp == 0x7FF ) {
3500 158142c2 bellard
        if ( aSig ) return propagateFloat64NaN( a, a STATUS_VAR );
3501 158142c2 bellard
        if ( ! aSign ) return a;
3502 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3503 158142c2 bellard
        return float64_default_nan;
3504 158142c2 bellard
    }
3505 158142c2 bellard
    if ( aSign ) {
3506 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) return a;
3507 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3508 158142c2 bellard
        return float64_default_nan;
3509 158142c2 bellard
    }
3510 158142c2 bellard
    if ( aExp == 0 ) {
3511 f090c9d4 pbrook
        if ( aSig == 0 ) return float64_zero;
3512 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3513 158142c2 bellard
    }
3514 158142c2 bellard
    zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
3515 158142c2 bellard
    aSig |= LIT64( 0x0010000000000000 );
3516 158142c2 bellard
    zSig = estimateSqrt32( aExp, aSig>>21 );
3517 158142c2 bellard
    aSig <<= 9 - ( aExp & 1 );
3518 158142c2 bellard
    zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
3519 158142c2 bellard
    if ( ( zSig & 0x1FF ) <= 5 ) {
3520 158142c2 bellard
        doubleZSig = zSig<<1;
3521 158142c2 bellard
        mul64To128( zSig, zSig, &term0, &term1 );
3522 158142c2 bellard
        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
3523 bb98fe42 Andreas Färber
        while ( (int64_t) rem0 < 0 ) {
3524 158142c2 bellard
            --zSig;
3525 158142c2 bellard
            doubleZSig -= 2;
3526 158142c2 bellard
            add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
3527 158142c2 bellard
        }
3528 158142c2 bellard
        zSig |= ( ( rem0 | rem1 ) != 0 );
3529 158142c2 bellard
    }
3530 158142c2 bellard
    return roundAndPackFloat64( 0, zExp, zSig STATUS_VAR );
3531 158142c2 bellard
3532 158142c2 bellard
}
3533 158142c2 bellard
3534 158142c2 bellard
/*----------------------------------------------------------------------------
3535 374dfc33 aurel32
| Returns the binary log of the double-precision floating-point value `a'.
3536 374dfc33 aurel32
| The operation is performed according to the IEC/IEEE Standard for Binary
3537 374dfc33 aurel32
| Floating-Point Arithmetic.
3538 374dfc33 aurel32
*----------------------------------------------------------------------------*/
3539 374dfc33 aurel32
float64 float64_log2( float64 a STATUS_PARAM )
3540 374dfc33 aurel32
{
3541 374dfc33 aurel32
    flag aSign, zSign;
3542 374dfc33 aurel32
    int16 aExp;
3543 bb98fe42 Andreas Färber
    uint64_t aSig, aSig0, aSig1, zSig, i;
3544 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3545 374dfc33 aurel32
3546 374dfc33 aurel32
    aSig = extractFloat64Frac( a );
3547 374dfc33 aurel32
    aExp = extractFloat64Exp( a );
3548 374dfc33 aurel32
    aSign = extractFloat64Sign( a );
3549 374dfc33 aurel32
3550 374dfc33 aurel32
    if ( aExp == 0 ) {
3551 374dfc33 aurel32
        if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
3552 374dfc33 aurel32
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3553 374dfc33 aurel32
    }
3554 374dfc33 aurel32
    if ( aSign ) {
3555 374dfc33 aurel32
        float_raise( float_flag_invalid STATUS_VAR);
3556 374dfc33 aurel32
        return float64_default_nan;
3557 374dfc33 aurel32
    }
3558 374dfc33 aurel32
    if ( aExp == 0x7FF ) {
3559 374dfc33 aurel32
        if ( aSig ) return propagateFloat64NaN( a, float64_zero STATUS_VAR );
3560 374dfc33 aurel32
        return a;
3561 374dfc33 aurel32
    }
3562 374dfc33 aurel32
3563 374dfc33 aurel32
    aExp -= 0x3FF;
3564 374dfc33 aurel32
    aSig |= LIT64( 0x0010000000000000 );
3565 374dfc33 aurel32
    zSign = aExp < 0;
3566 bb98fe42 Andreas Färber
    zSig = (uint64_t)aExp << 52;
3567 374dfc33 aurel32
    for (i = 1LL << 51; i > 0; i >>= 1) {
3568 374dfc33 aurel32
        mul64To128( aSig, aSig, &aSig0, &aSig1 );
3569 374dfc33 aurel32
        aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
3570 374dfc33 aurel32
        if ( aSig & LIT64( 0x0020000000000000 ) ) {
3571 374dfc33 aurel32
            aSig >>= 1;
3572 374dfc33 aurel32
            zSig |= i;
3573 374dfc33 aurel32
        }
3574 374dfc33 aurel32
    }
3575 374dfc33 aurel32
3576 374dfc33 aurel32
    if ( zSign )
3577 374dfc33 aurel32
        zSig = -zSig;
3578 374dfc33 aurel32
    return normalizeRoundAndPackFloat64( zSign, 0x408, zSig STATUS_VAR );
3579 374dfc33 aurel32
}
3580 374dfc33 aurel32
3581 374dfc33 aurel32
/*----------------------------------------------------------------------------
3582 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is equal to the
3583 b689362d Aurelien Jarno
| corresponding value `b', and 0 otherwise.  The invalid exception is raised
3584 b689362d Aurelien Jarno
| if either operand is a NaN.  Otherwise, the comparison is performed
3585 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3586 158142c2 bellard
*----------------------------------------------------------------------------*/
3587 158142c2 bellard
3588 b689362d Aurelien Jarno
int float64_eq( float64 a, float64 b STATUS_PARAM )
3589 158142c2 bellard
{
3590 bb98fe42 Andreas Färber
    uint64_t av, bv;
3591 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3592 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3593 158142c2 bellard
3594 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3595 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3596 158142c2 bellard
       ) {
3597 b689362d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
3598 158142c2 bellard
        return 0;
3599 158142c2 bellard
    }
3600 f090c9d4 pbrook
    av = float64_val(a);
3601 a1b91bb4 pbrook
    bv = float64_val(b);
3602 bb98fe42 Andreas Färber
    return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
3603 158142c2 bellard
3604 158142c2 bellard
}
3605 158142c2 bellard
3606 158142c2 bellard
/*----------------------------------------------------------------------------
3607 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than or
3608 f5a64251 Aurelien Jarno
| equal to the corresponding value `b', and 0 otherwise.  The invalid
3609 f5a64251 Aurelien Jarno
| exception is raised if either operand is a NaN.  The comparison is performed
3610 f5a64251 Aurelien Jarno
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3611 158142c2 bellard
*----------------------------------------------------------------------------*/
3612 158142c2 bellard
3613 750afe93 bellard
int float64_le( float64 a, float64 b STATUS_PARAM )
3614 158142c2 bellard
{
3615 158142c2 bellard
    flag aSign, bSign;
3616 bb98fe42 Andreas Färber
    uint64_t av, bv;
3617 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3618 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3619 158142c2 bellard
3620 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3621 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3622 158142c2 bellard
       ) {
3623 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3624 158142c2 bellard
        return 0;
3625 158142c2 bellard
    }
3626 158142c2 bellard
    aSign = extractFloat64Sign( a );
3627 158142c2 bellard
    bSign = extractFloat64Sign( b );
3628 f090c9d4 pbrook
    av = float64_val(a);
3629 a1b91bb4 pbrook
    bv = float64_val(b);
3630 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
3631 f090c9d4 pbrook
    return ( av == bv ) || ( aSign ^ ( av < bv ) );
3632 158142c2 bellard
3633 158142c2 bellard
}
3634 158142c2 bellard
3635 158142c2 bellard
/*----------------------------------------------------------------------------
3636 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than
3637 f5a64251 Aurelien Jarno
| the corresponding value `b', and 0 otherwise.  The invalid exception is
3638 f5a64251 Aurelien Jarno
| raised if either operand is a NaN.  The comparison is performed according
3639 f5a64251 Aurelien Jarno
| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3640 158142c2 bellard
*----------------------------------------------------------------------------*/
3641 158142c2 bellard
3642 750afe93 bellard
int float64_lt( float64 a, float64 b STATUS_PARAM )
3643 158142c2 bellard
{
3644 158142c2 bellard
    flag aSign, bSign;
3645 bb98fe42 Andreas Färber
    uint64_t av, bv;
3646 158142c2 bellard
3647 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3648 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3649 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3650 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3651 158142c2 bellard
       ) {
3652 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3653 158142c2 bellard
        return 0;
3654 158142c2 bellard
    }
3655 158142c2 bellard
    aSign = extractFloat64Sign( a );
3656 158142c2 bellard
    bSign = extractFloat64Sign( b );
3657 f090c9d4 pbrook
    av = float64_val(a);
3658 a1b91bb4 pbrook
    bv = float64_val(b);
3659 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
3660 f090c9d4 pbrook
    return ( av != bv ) && ( aSign ^ ( av < bv ) );
3661 158142c2 bellard
3662 158142c2 bellard
}
3663 158142c2 bellard
3664 158142c2 bellard
/*----------------------------------------------------------------------------
3665 67b7861d Aurelien Jarno
| Returns 1 if the double-precision floating-point values `a' and `b' cannot
3666 f5a64251 Aurelien Jarno
| be compared, and 0 otherwise.  The invalid exception is raised if either
3667 f5a64251 Aurelien Jarno
| operand is a NaN.  The comparison is performed according to the IEC/IEEE
3668 f5a64251 Aurelien Jarno
| Standard for Binary Floating-Point Arithmetic.
3669 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
3670 67b7861d Aurelien Jarno
3671 67b7861d Aurelien Jarno
int float64_unordered( float64 a, float64 b STATUS_PARAM )
3672 67b7861d Aurelien Jarno
{
3673 67b7861d Aurelien Jarno
    a = float64_squash_input_denormal(a STATUS_VAR);
3674 67b7861d Aurelien Jarno
    b = float64_squash_input_denormal(b STATUS_VAR);
3675 67b7861d Aurelien Jarno
3676 67b7861d Aurelien Jarno
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3677 67b7861d Aurelien Jarno
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3678 67b7861d Aurelien Jarno
       ) {
3679 67b7861d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
3680 67b7861d Aurelien Jarno
        return 1;
3681 67b7861d Aurelien Jarno
    }
3682 67b7861d Aurelien Jarno
    return 0;
3683 67b7861d Aurelien Jarno
}
3684 67b7861d Aurelien Jarno
3685 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
3686 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is equal to the
3687 f5a64251 Aurelien Jarno
| corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
3688 f5a64251 Aurelien Jarno
| exception.The comparison is performed according to the IEC/IEEE Standard
3689 f5a64251 Aurelien Jarno
| for Binary Floating-Point Arithmetic.
3690 158142c2 bellard
*----------------------------------------------------------------------------*/
3691 158142c2 bellard
3692 b689362d Aurelien Jarno
int float64_eq_quiet( float64 a, float64 b STATUS_PARAM )
3693 158142c2 bellard
{
3694 bb98fe42 Andreas Färber
    uint64_t av, bv;
3695 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3696 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3697 158142c2 bellard
3698 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3699 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3700 158142c2 bellard
       ) {
3701 b689362d Aurelien Jarno
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3702 b689362d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
3703 b689362d Aurelien Jarno
        }
3704 158142c2 bellard
        return 0;
3705 158142c2 bellard
    }
3706 f090c9d4 pbrook
    av = float64_val(a);
3707 a1b91bb4 pbrook
    bv = float64_val(b);
3708 bb98fe42 Andreas Färber
    return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
3709 158142c2 bellard
3710 158142c2 bellard
}
3711 158142c2 bellard
3712 158142c2 bellard
/*----------------------------------------------------------------------------
3713 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than or
3714 158142c2 bellard
| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
3715 158142c2 bellard
| cause an exception.  Otherwise, the comparison is performed according to the
3716 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3717 158142c2 bellard
*----------------------------------------------------------------------------*/
3718 158142c2 bellard
3719 750afe93 bellard
int float64_le_quiet( float64 a, float64 b STATUS_PARAM )
3720 158142c2 bellard
{
3721 158142c2 bellard
    flag aSign, bSign;
3722 bb98fe42 Andreas Färber
    uint64_t av, bv;
3723 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3724 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3725 158142c2 bellard
3726 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3727 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3728 158142c2 bellard
       ) {
3729 158142c2 bellard
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3730 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3731 158142c2 bellard
        }
3732 158142c2 bellard
        return 0;
3733 158142c2 bellard
    }
3734 158142c2 bellard
    aSign = extractFloat64Sign( a );
3735 158142c2 bellard
    bSign = extractFloat64Sign( b );
3736 f090c9d4 pbrook
    av = float64_val(a);
3737 a1b91bb4 pbrook
    bv = float64_val(b);
3738 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
3739 f090c9d4 pbrook
    return ( av == bv ) || ( aSign ^ ( av < bv ) );
3740 158142c2 bellard
3741 158142c2 bellard
}
3742 158142c2 bellard
3743 158142c2 bellard
/*----------------------------------------------------------------------------
3744 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than
3745 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
3746 158142c2 bellard
| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
3747 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
3748 158142c2 bellard
*----------------------------------------------------------------------------*/
3749 158142c2 bellard
3750 750afe93 bellard
int float64_lt_quiet( float64 a, float64 b STATUS_PARAM )
3751 158142c2 bellard
{
3752 158142c2 bellard
    flag aSign, bSign;
3753 bb98fe42 Andreas Färber
    uint64_t av, bv;
3754 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3755 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3756 158142c2 bellard
3757 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3758 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3759 158142c2 bellard
       ) {
3760 158142c2 bellard
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3761 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3762 158142c2 bellard
        }
3763 158142c2 bellard
        return 0;
3764 158142c2 bellard
    }
3765 158142c2 bellard
    aSign = extractFloat64Sign( a );
3766 158142c2 bellard
    bSign = extractFloat64Sign( b );
3767 f090c9d4 pbrook
    av = float64_val(a);
3768 a1b91bb4 pbrook
    bv = float64_val(b);
3769 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
3770 f090c9d4 pbrook
    return ( av != bv ) && ( aSign ^ ( av < bv ) );
3771 158142c2 bellard
3772 158142c2 bellard
}
3773 158142c2 bellard
3774 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
3775 67b7861d Aurelien Jarno
| Returns 1 if the double-precision floating-point values `a' and `b' cannot
3776 67b7861d Aurelien Jarno
| be compared, and 0 otherwise.  Quiet NaNs do not cause an exception.  The
3777 67b7861d Aurelien Jarno
| comparison is performed according to the IEC/IEEE Standard for Binary
3778 67b7861d Aurelien Jarno
| Floating-Point Arithmetic.
3779 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
3780 67b7861d Aurelien Jarno
3781 67b7861d Aurelien Jarno
int float64_unordered_quiet( float64 a, float64 b STATUS_PARAM )
3782 67b7861d Aurelien Jarno
{
3783 67b7861d Aurelien Jarno
    a = float64_squash_input_denormal(a STATUS_VAR);
3784 67b7861d Aurelien Jarno
    b = float64_squash_input_denormal(b STATUS_VAR);
3785 67b7861d Aurelien Jarno
3786 67b7861d Aurelien Jarno
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3787 67b7861d Aurelien Jarno
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3788 67b7861d Aurelien Jarno
       ) {
3789 67b7861d Aurelien Jarno
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3790 67b7861d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
3791 67b7861d Aurelien Jarno
        }
3792 67b7861d Aurelien Jarno
        return 1;
3793 67b7861d Aurelien Jarno
    }
3794 67b7861d Aurelien Jarno
    return 0;
3795 67b7861d Aurelien Jarno
}
3796 67b7861d Aurelien Jarno
3797 158142c2 bellard
#ifdef FLOATX80
3798 158142c2 bellard
3799 158142c2 bellard
/*----------------------------------------------------------------------------
3800 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3801 158142c2 bellard
| point value `a' to the 32-bit two's complement integer format.  The
3802 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3803 158142c2 bellard
| Floating-Point Arithmetic---which means in particular that the conversion
3804 158142c2 bellard
| is rounded according to the current rounding mode.  If `a' is a NaN, the
3805 158142c2 bellard
| largest positive integer is returned.  Otherwise, if the conversion
3806 158142c2 bellard
| overflows, the largest integer with the same sign as `a' is returned.
3807 158142c2 bellard
*----------------------------------------------------------------------------*/
3808 158142c2 bellard
3809 158142c2 bellard
int32 floatx80_to_int32( floatx80 a STATUS_PARAM )
3810 158142c2 bellard
{
3811 158142c2 bellard
    flag aSign;
3812 158142c2 bellard
    int32 aExp, shiftCount;
3813 bb98fe42 Andreas Färber
    uint64_t aSig;
3814 158142c2 bellard
3815 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3816 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3817 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3818 bb98fe42 Andreas Färber
    if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
3819 158142c2 bellard
    shiftCount = 0x4037 - aExp;
3820 158142c2 bellard
    if ( shiftCount <= 0 ) shiftCount = 1;
3821 158142c2 bellard
    shift64RightJamming( aSig, shiftCount, &aSig );
3822 158142c2 bellard
    return roundAndPackInt32( aSign, aSig STATUS_VAR );
3823 158142c2 bellard
3824 158142c2 bellard
}
3825 158142c2 bellard
3826 158142c2 bellard
/*----------------------------------------------------------------------------
3827 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3828 158142c2 bellard
| point value `a' to the 32-bit two's complement integer format.  The
3829 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3830 158142c2 bellard
| Floating-Point Arithmetic, except that the conversion is always rounded
3831 158142c2 bellard
| toward zero.  If `a' is a NaN, the largest positive integer is returned.
3832 158142c2 bellard
| Otherwise, if the conversion overflows, the largest integer with the same
3833 158142c2 bellard
| sign as `a' is returned.
3834 158142c2 bellard
*----------------------------------------------------------------------------*/
3835 158142c2 bellard
3836 158142c2 bellard
int32 floatx80_to_int32_round_to_zero( floatx80 a STATUS_PARAM )
3837 158142c2 bellard
{
3838 158142c2 bellard
    flag aSign;
3839 158142c2 bellard
    int32 aExp, shiftCount;
3840 bb98fe42 Andreas Färber
    uint64_t aSig, savedASig;
3841 158142c2 bellard
    int32 z;
3842 158142c2 bellard
3843 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3844 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3845 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3846 158142c2 bellard
    if ( 0x401E < aExp ) {
3847 bb98fe42 Andreas Färber
        if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
3848 158142c2 bellard
        goto invalid;
3849 158142c2 bellard
    }
3850 158142c2 bellard
    else if ( aExp < 0x3FFF ) {
3851 158142c2 bellard
        if ( aExp || aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
3852 158142c2 bellard
        return 0;
3853 158142c2 bellard
    }
3854 158142c2 bellard
    shiftCount = 0x403E - aExp;
3855 158142c2 bellard
    savedASig = aSig;
3856 158142c2 bellard
    aSig >>= shiftCount;
3857 158142c2 bellard
    z = aSig;
3858 158142c2 bellard
    if ( aSign ) z = - z;
3859 158142c2 bellard
    if ( ( z < 0 ) ^ aSign ) {
3860 158142c2 bellard
 invalid:
3861 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3862 bb98fe42 Andreas Färber
        return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
3863 158142c2 bellard
    }
3864 158142c2 bellard
    if ( ( aSig<<shiftCount ) != savedASig ) {
3865 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
3866 158142c2 bellard
    }
3867 158142c2 bellard
    return z;
3868 158142c2 bellard
3869 158142c2 bellard
}
3870 158142c2 bellard
3871 158142c2 bellard
/*----------------------------------------------------------------------------
3872 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3873 158142c2 bellard
| point value `a' to the 64-bit two's complement integer format.  The
3874 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3875 158142c2 bellard
| Floating-Point Arithmetic---which means in particular that the conversion
3876 158142c2 bellard
| is rounded according to the current rounding mode.  If `a' is a NaN,
3877 158142c2 bellard
| the largest positive integer is returned.  Otherwise, if the conversion
3878 158142c2 bellard
| overflows, the largest integer with the same sign as `a' is returned.
3879 158142c2 bellard
*----------------------------------------------------------------------------*/
3880 158142c2 bellard
3881 158142c2 bellard
int64 floatx80_to_int64( floatx80 a STATUS_PARAM )
3882 158142c2 bellard
{
3883 158142c2 bellard
    flag aSign;
3884 158142c2 bellard
    int32 aExp, shiftCount;
3885 bb98fe42 Andreas Färber
    uint64_t aSig, aSigExtra;
3886 158142c2 bellard
3887 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3888 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3889 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3890 158142c2 bellard
    shiftCount = 0x403E - aExp;
3891 158142c2 bellard
    if ( shiftCount <= 0 ) {
3892 158142c2 bellard
        if ( shiftCount ) {
3893 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3894 158142c2 bellard
            if (    ! aSign
3895 158142c2 bellard
                 || (    ( aExp == 0x7FFF )
3896 158142c2 bellard
                      && ( aSig != LIT64( 0x8000000000000000 ) ) )
3897 158142c2 bellard
               ) {
3898 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
3899 158142c2 bellard
            }
3900 bb98fe42 Andreas Färber
            return (int64_t) LIT64( 0x8000000000000000 );
3901 158142c2 bellard
        }
3902 158142c2 bellard
        aSigExtra = 0;
3903 158142c2 bellard
    }
3904 158142c2 bellard
    else {
3905 158142c2 bellard
        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
3906 158142c2 bellard
    }
3907 158142c2 bellard
    return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
3908 158142c2 bellard
3909 158142c2 bellard
}
3910 158142c2 bellard
3911 158142c2 bellard
/*----------------------------------------------------------------------------
3912 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3913 158142c2 bellard
| point value `a' to the 64-bit two's complement integer format.  The
3914 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3915 158142c2 bellard
| Floating-Point Arithmetic, except that the conversion is always rounded
3916 158142c2 bellard
| toward zero.  If `a' is a NaN, the largest positive integer is returned.
3917 158142c2 bellard
| Otherwise, if the conversion overflows, the largest integer with the same
3918 158142c2 bellard
| sign as `a' is returned.
3919 158142c2 bellard
*----------------------------------------------------------------------------*/
3920 158142c2 bellard
3921 158142c2 bellard
int64 floatx80_to_int64_round_to_zero( floatx80 a STATUS_PARAM )
3922 158142c2 bellard
{
3923 158142c2 bellard
    flag aSign;
3924 158142c2 bellard
    int32 aExp, shiftCount;
3925 bb98fe42 Andreas Färber
    uint64_t aSig;
3926 158142c2 bellard
    int64 z;
3927 158142c2 bellard
3928 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3929 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3930 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3931 158142c2 bellard
    shiftCount = aExp - 0x403E;
3932 158142c2 bellard
    if ( 0 <= shiftCount ) {
3933 158142c2 bellard
        aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
3934 158142c2 bellard
        if ( ( a.high != 0xC03E ) || aSig ) {
3935 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3936 158142c2 bellard
            if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
3937 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
3938 158142c2 bellard
            }
3939 158142c2 bellard
        }
3940 bb98fe42 Andreas Färber
        return (int64_t) LIT64( 0x8000000000000000 );
3941 158142c2 bellard
    }
3942 158142c2 bellard
    else if ( aExp < 0x3FFF ) {
3943 158142c2 bellard
        if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
3944 158142c2 bellard
        return 0;
3945 158142c2 bellard
    }
3946 158142c2 bellard
    z = aSig>>( - shiftCount );
3947 bb98fe42 Andreas Färber
    if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
3948 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
3949 158142c2 bellard
    }
3950 158142c2 bellard
    if ( aSign ) z = - z;
3951 158142c2 bellard
    return z;
3952 158142c2 bellard
3953 158142c2 bellard
}
3954 158142c2 bellard
3955 158142c2 bellard
/*----------------------------------------------------------------------------
3956 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3957 158142c2 bellard
| point value `a' to the single-precision floating-point format.  The
3958 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3959 158142c2 bellard
| Floating-Point Arithmetic.
3960 158142c2 bellard
*----------------------------------------------------------------------------*/
3961 158142c2 bellard
3962 158142c2 bellard
float32 floatx80_to_float32( floatx80 a STATUS_PARAM )
3963 158142c2 bellard
{
3964 158142c2 bellard
    flag aSign;
3965 158142c2 bellard
    int32 aExp;
3966 bb98fe42 Andreas Färber
    uint64_t aSig;
3967 158142c2 bellard
3968 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3969 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3970 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3971 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3972 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig<<1 ) ) {
3973 bcd4d9af Christophe Lyon
            return commonNaNToFloat32( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
3974 158142c2 bellard
        }
3975 158142c2 bellard
        return packFloat32( aSign, 0xFF, 0 );
3976 158142c2 bellard
    }
3977 158142c2 bellard
    shift64RightJamming( aSig, 33, &aSig );
3978 158142c2 bellard
    if ( aExp || aSig ) aExp -= 0x3F81;
3979 158142c2 bellard
    return roundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
3980 158142c2 bellard
3981 158142c2 bellard
}
3982 158142c2 bellard
3983 158142c2 bellard
/*----------------------------------------------------------------------------
3984 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3985 158142c2 bellard
| point value `a' to the double-precision floating-point format.  The
3986 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3987 158142c2 bellard
| Floating-Point Arithmetic.
3988 158142c2 bellard
*----------------------------------------------------------------------------*/
3989 158142c2 bellard
3990 158142c2 bellard
float64 floatx80_to_float64( floatx80 a STATUS_PARAM )
3991 158142c2 bellard
{
3992 158142c2 bellard
    flag aSign;
3993 158142c2 bellard
    int32 aExp;
3994 bb98fe42 Andreas Färber
    uint64_t aSig, zSig;
3995 158142c2 bellard
3996 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3997 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3998 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3999 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4000 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig<<1 ) ) {
4001 bcd4d9af Christophe Lyon
            return commonNaNToFloat64( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
4002 158142c2 bellard
        }
4003 158142c2 bellard
        return packFloat64( aSign, 0x7FF, 0 );
4004 158142c2 bellard
    }
4005 158142c2 bellard
    shift64RightJamming( aSig, 1, &zSig );
4006 158142c2 bellard
    if ( aExp || aSig ) aExp -= 0x3C01;
4007 158142c2 bellard
    return roundAndPackFloat64( aSign, aExp, zSig STATUS_VAR );
4008 158142c2 bellard
4009 158142c2 bellard
}
4010 158142c2 bellard
4011 158142c2 bellard
#ifdef FLOAT128
4012 158142c2 bellard
4013 158142c2 bellard
/*----------------------------------------------------------------------------
4014 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
4015 158142c2 bellard
| point value `a' to the quadruple-precision floating-point format.  The
4016 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
4017 158142c2 bellard
| Floating-Point Arithmetic.
4018 158142c2 bellard
*----------------------------------------------------------------------------*/
4019 158142c2 bellard
4020 158142c2 bellard
float128 floatx80_to_float128( floatx80 a STATUS_PARAM )
4021 158142c2 bellard
{
4022 158142c2 bellard
    flag aSign;
4023 158142c2 bellard
    int16 aExp;
4024 bb98fe42 Andreas Färber
    uint64_t aSig, zSig0, zSig1;
4025 158142c2 bellard
4026 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4027 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4028 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4029 bb98fe42 Andreas Färber
    if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
4030 bcd4d9af Christophe Lyon
        return commonNaNToFloat128( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
4031 158142c2 bellard
    }
4032 158142c2 bellard
    shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
4033 158142c2 bellard
    return packFloat128( aSign, aExp, zSig0, zSig1 );
4034 158142c2 bellard
4035 158142c2 bellard
}
4036 158142c2 bellard
4037 158142c2 bellard
#endif
4038 158142c2 bellard
4039 158142c2 bellard
/*----------------------------------------------------------------------------
4040 158142c2 bellard
| Rounds the extended double-precision floating-point value `a' to an integer,
4041 158142c2 bellard
| and returns the result as an extended quadruple-precision floating-point
4042 158142c2 bellard
| value.  The operation is performed according to the IEC/IEEE Standard for
4043 158142c2 bellard
| Binary Floating-Point Arithmetic.
4044 158142c2 bellard
*----------------------------------------------------------------------------*/
4045 158142c2 bellard
4046 158142c2 bellard
floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM )
4047 158142c2 bellard
{
4048 158142c2 bellard
    flag aSign;
4049 158142c2 bellard
    int32 aExp;
4050 bb98fe42 Andreas Färber
    uint64_t lastBitMask, roundBitsMask;
4051 158142c2 bellard
    int8 roundingMode;
4052 158142c2 bellard
    floatx80 z;
4053 158142c2 bellard
4054 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4055 158142c2 bellard
    if ( 0x403E <= aExp ) {
4056 bb98fe42 Andreas Färber
        if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
4057 158142c2 bellard
            return propagateFloatx80NaN( a, a STATUS_VAR );
4058 158142c2 bellard
        }
4059 158142c2 bellard
        return a;
4060 158142c2 bellard
    }
4061 158142c2 bellard
    if ( aExp < 0x3FFF ) {
4062 158142c2 bellard
        if (    ( aExp == 0 )
4063 bb98fe42 Andreas Färber
             && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
4064 158142c2 bellard
            return a;
4065 158142c2 bellard
        }
4066 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
4067 158142c2 bellard
        aSign = extractFloatx80Sign( a );
4068 158142c2 bellard
        switch ( STATUS(float_rounding_mode) ) {
4069 158142c2 bellard
         case float_round_nearest_even:
4070 bb98fe42 Andreas Färber
            if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
4071 158142c2 bellard
               ) {
4072 158142c2 bellard
                return
4073 158142c2 bellard
                    packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
4074 158142c2 bellard
            }
4075 158142c2 bellard
            break;
4076 158142c2 bellard
         case float_round_down:
4077 158142c2 bellard
            return
4078 158142c2 bellard
                  aSign ?
4079 158142c2 bellard
                      packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
4080 158142c2 bellard
                : packFloatx80( 0, 0, 0 );
4081 158142c2 bellard
         case float_round_up:
4082 158142c2 bellard
            return
4083 158142c2 bellard
                  aSign ? packFloatx80( 1, 0, 0 )
4084 158142c2 bellard
                : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
4085 158142c2 bellard
        }
4086 158142c2 bellard
        return packFloatx80( aSign, 0, 0 );
4087 158142c2 bellard
    }
4088 158142c2 bellard
    lastBitMask = 1;
4089 158142c2 bellard
    lastBitMask <<= 0x403E - aExp;
4090 158142c2 bellard
    roundBitsMask = lastBitMask - 1;
4091 158142c2 bellard
    z = a;
4092 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
4093 158142c2 bellard
    if ( roundingMode == float_round_nearest_even ) {
4094 158142c2 bellard
        z.low += lastBitMask>>1;
4095 158142c2 bellard
        if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
4096 158142c2 bellard
    }
4097 158142c2 bellard
    else if ( roundingMode != float_round_to_zero ) {
4098 158142c2 bellard
        if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
4099 158142c2 bellard
            z.low += roundBitsMask;
4100 158142c2 bellard
        }
4101 158142c2 bellard
    }
4102 158142c2 bellard
    z.low &= ~ roundBitsMask;
4103 158142c2 bellard
    if ( z.low == 0 ) {
4104 158142c2 bellard
        ++z.high;
4105 158142c2 bellard
        z.low = LIT64( 0x8000000000000000 );
4106 158142c2 bellard
    }
4107 158142c2 bellard
    if ( z.low != a.low ) STATUS(float_exception_flags) |= float_flag_inexact;
4108 158142c2 bellard
    return z;
4109 158142c2 bellard
4110 158142c2 bellard
}
4111 158142c2 bellard
4112 158142c2 bellard
/*----------------------------------------------------------------------------
4113 158142c2 bellard
| Returns the result of adding the absolute values of the extended double-
4114 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
4115 158142c2 bellard
| negated before being returned.  `zSign' is ignored if the result is a NaN.
4116 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
4117 158142c2 bellard
| Floating-Point Arithmetic.
4118 158142c2 bellard
*----------------------------------------------------------------------------*/
4119 158142c2 bellard
4120 158142c2 bellard
static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM)
4121 158142c2 bellard
{
4122 158142c2 bellard
    int32 aExp, bExp, zExp;
4123 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig0, zSig1;
4124 158142c2 bellard
    int32 expDiff;
4125 158142c2 bellard
4126 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4127 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4128 158142c2 bellard
    bSig = extractFloatx80Frac( b );
4129 158142c2 bellard
    bExp = extractFloatx80Exp( b );
4130 158142c2 bellard
    expDiff = aExp - bExp;
4131 158142c2 bellard
    if ( 0 < expDiff ) {
4132 158142c2 bellard
        if ( aExp == 0x7FFF ) {
4133 bb98fe42 Andreas Färber
            if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4134 158142c2 bellard
            return a;
4135 158142c2 bellard
        }
4136 158142c2 bellard
        if ( bExp == 0 ) --expDiff;
4137 158142c2 bellard
        shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
4138 158142c2 bellard
        zExp = aExp;
4139 158142c2 bellard
    }
4140 158142c2 bellard
    else if ( expDiff < 0 ) {
4141 158142c2 bellard
        if ( bExp == 0x7FFF ) {
4142 bb98fe42 Andreas Färber
            if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4143 158142c2 bellard
            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4144 158142c2 bellard
        }
4145 158142c2 bellard
        if ( aExp == 0 ) ++expDiff;
4146 158142c2 bellard
        shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
4147 158142c2 bellard
        zExp = bExp;
4148 158142c2 bellard
    }
4149 158142c2 bellard
    else {
4150 158142c2 bellard
        if ( aExp == 0x7FFF ) {
4151 bb98fe42 Andreas Färber
            if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
4152 158142c2 bellard
                return propagateFloatx80NaN( a, b STATUS_VAR );
4153 158142c2 bellard
            }
4154 158142c2 bellard
            return a;
4155 158142c2 bellard
        }
4156 158142c2 bellard
        zSig1 = 0;
4157 158142c2 bellard
        zSig0 = aSig + bSig;
4158 158142c2 bellard
        if ( aExp == 0 ) {
4159 158142c2 bellard
            normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
4160 158142c2 bellard
            goto roundAndPack;
4161 158142c2 bellard
        }
4162 158142c2 bellard
        zExp = aExp;
4163 158142c2 bellard
        goto shiftRight1;
4164 158142c2 bellard
    }
4165 158142c2 bellard
    zSig0 = aSig + bSig;
4166 bb98fe42 Andreas Färber
    if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
4167 158142c2 bellard
 shiftRight1:
4168 158142c2 bellard
    shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
4169 158142c2 bellard
    zSig0 |= LIT64( 0x8000000000000000 );
4170 158142c2 bellard
    ++zExp;
4171 158142c2 bellard
 roundAndPack:
4172 158142c2 bellard
    return
4173 158142c2 bellard
        roundAndPackFloatx80(
4174 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
4175 158142c2 bellard
4176 158142c2 bellard
}
4177 158142c2 bellard
4178 158142c2 bellard
/*----------------------------------------------------------------------------
4179 158142c2 bellard
| Returns the result of subtracting the absolute values of the extended
4180 158142c2 bellard
| double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
4181 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
4182 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
4183 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
4184 158142c2 bellard
*----------------------------------------------------------------------------*/
4185 158142c2 bellard
4186 158142c2 bellard
static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM )
4187 158142c2 bellard
{
4188 158142c2 bellard
    int32 aExp, bExp, zExp;
4189 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig0, zSig1;
4190 158142c2 bellard
    int32 expDiff;
4191 158142c2 bellard
    floatx80 z;
4192 158142c2 bellard
4193 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4194 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4195 158142c2 bellard
    bSig = extractFloatx80Frac( b );
4196 158142c2 bellard
    bExp = extractFloatx80Exp( b );
4197 158142c2 bellard
    expDiff = aExp - bExp;
4198 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
4199 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
4200 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4201 bb98fe42 Andreas Färber
        if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
4202 158142c2 bellard
            return propagateFloatx80NaN( a, b STATUS_VAR );
4203 158142c2 bellard
        }
4204 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4205 158142c2 bellard
        z.low = floatx80_default_nan_low;
4206 158142c2 bellard
        z.high = floatx80_default_nan_high;
4207 158142c2 bellard
        return z;
4208 158142c2 bellard
    }
4209 158142c2 bellard
    if ( aExp == 0 ) {
4210 158142c2 bellard
        aExp = 1;
4211 158142c2 bellard
        bExp = 1;
4212 158142c2 bellard
    }
4213 158142c2 bellard
    zSig1 = 0;
4214 158142c2 bellard
    if ( bSig < aSig ) goto aBigger;
4215 158142c2 bellard
    if ( aSig < bSig ) goto bBigger;
4216 158142c2 bellard
    return packFloatx80( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
4217 158142c2 bellard
 bExpBigger:
4218 158142c2 bellard
    if ( bExp == 0x7FFF ) {
4219 bb98fe42 Andreas Färber
        if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4220 158142c2 bellard
        return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
4221 158142c2 bellard
    }
4222 158142c2 bellard
    if ( aExp == 0 ) ++expDiff;
4223 158142c2 bellard
    shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
4224 158142c2 bellard
 bBigger:
4225 158142c2 bellard
    sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
4226 158142c2 bellard
    zExp = bExp;
4227 158142c2 bellard
    zSign ^= 1;
4228 158142c2 bellard
    goto normalizeRoundAndPack;
4229 158142c2 bellard
 aExpBigger:
4230 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4231 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4232 158142c2 bellard
        return a;
4233 158142c2 bellard
    }
4234 158142c2 bellard
    if ( bExp == 0 ) --expDiff;
4235 158142c2 bellard
    shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
4236 158142c2 bellard
 aBigger:
4237 158142c2 bellard
    sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
4238 158142c2 bellard
    zExp = aExp;
4239 158142c2 bellard
 normalizeRoundAndPack:
4240 158142c2 bellard
    return
4241 158142c2 bellard
        normalizeRoundAndPackFloatx80(
4242 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
4243 158142c2 bellard
4244 158142c2 bellard
}
4245 158142c2 bellard
4246 158142c2 bellard
/*----------------------------------------------------------------------------
4247 158142c2 bellard
| Returns the result of adding the extended double-precision floating-point
4248 158142c2 bellard
| values `a' and `b'.  The operation is performed according to the IEC/IEEE
4249 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
4250 158142c2 bellard
*----------------------------------------------------------------------------*/
4251 158142c2 bellard
4252 158142c2 bellard
floatx80 floatx80_add( floatx80 a, floatx80 b STATUS_PARAM )
4253 158142c2 bellard
{
4254 158142c2 bellard
    flag aSign, bSign;
4255 158142c2 bellard
4256 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4257 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4258 158142c2 bellard
    if ( aSign == bSign ) {
4259 158142c2 bellard
        return addFloatx80Sigs( a, b, aSign STATUS_VAR );
4260 158142c2 bellard
    }
4261 158142c2 bellard
    else {
4262 158142c2 bellard
        return subFloatx80Sigs( a, b, aSign STATUS_VAR );
4263 158142c2 bellard
    }
4264 158142c2 bellard
4265 158142c2 bellard
}
4266 158142c2 bellard
4267 158142c2 bellard
/*----------------------------------------------------------------------------
4268 158142c2 bellard
| Returns the result of subtracting the extended double-precision floating-
4269 158142c2 bellard
| point values `a' and `b'.  The operation is performed according to the
4270 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4271 158142c2 bellard
*----------------------------------------------------------------------------*/
4272 158142c2 bellard
4273 158142c2 bellard
floatx80 floatx80_sub( floatx80 a, floatx80 b STATUS_PARAM )
4274 158142c2 bellard
{
4275 158142c2 bellard
    flag aSign, bSign;
4276 158142c2 bellard
4277 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4278 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4279 158142c2 bellard
    if ( aSign == bSign ) {
4280 158142c2 bellard
        return subFloatx80Sigs( a, b, aSign STATUS_VAR );
4281 158142c2 bellard
    }
4282 158142c2 bellard
    else {
4283 158142c2 bellard
        return addFloatx80Sigs( a, b, aSign STATUS_VAR );
4284 158142c2 bellard
    }
4285 158142c2 bellard
4286 158142c2 bellard
}
4287 158142c2 bellard
4288 158142c2 bellard
/*----------------------------------------------------------------------------
4289 158142c2 bellard
| Returns the result of multiplying the extended double-precision floating-
4290 158142c2 bellard
| point values `a' and `b'.  The operation is performed according to the
4291 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4292 158142c2 bellard
*----------------------------------------------------------------------------*/
4293 158142c2 bellard
4294 158142c2 bellard
floatx80 floatx80_mul( floatx80 a, floatx80 b STATUS_PARAM )
4295 158142c2 bellard
{
4296 158142c2 bellard
    flag aSign, bSign, zSign;
4297 158142c2 bellard
    int32 aExp, bExp, zExp;
4298 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig0, zSig1;
4299 158142c2 bellard
    floatx80 z;
4300 158142c2 bellard
4301 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4302 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4303 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4304 158142c2 bellard
    bSig = extractFloatx80Frac( b );
4305 158142c2 bellard
    bExp = extractFloatx80Exp( b );
4306 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4307 158142c2 bellard
    zSign = aSign ^ bSign;
4308 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4309 bb98fe42 Andreas Färber
        if (    (uint64_t) ( aSig<<1 )
4310 bb98fe42 Andreas Färber
             || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
4311 158142c2 bellard
            return propagateFloatx80NaN( a, b STATUS_VAR );
4312 158142c2 bellard
        }
4313 158142c2 bellard
        if ( ( bExp | bSig ) == 0 ) goto invalid;
4314 158142c2 bellard
        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4315 158142c2 bellard
    }
4316 158142c2 bellard
    if ( bExp == 0x7FFF ) {
4317 bb98fe42 Andreas Färber
        if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4318 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) {
4319 158142c2 bellard
 invalid:
4320 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4321 158142c2 bellard
            z.low = floatx80_default_nan_low;
4322 158142c2 bellard
            z.high = floatx80_default_nan_high;
4323 158142c2 bellard
            return z;
4324 158142c2 bellard
        }
4325 158142c2 bellard
        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4326 158142c2 bellard
    }
4327 158142c2 bellard
    if ( aExp == 0 ) {
4328 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
4329 158142c2 bellard
        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
4330 158142c2 bellard
    }
4331 158142c2 bellard
    if ( bExp == 0 ) {
4332 158142c2 bellard
        if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
4333 158142c2 bellard
        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
4334 158142c2 bellard
    }
4335 158142c2 bellard
    zExp = aExp + bExp - 0x3FFE;
4336 158142c2 bellard
    mul64To128( aSig, bSig, &zSig0, &zSig1 );
4337 bb98fe42 Andreas Färber
    if ( 0 < (int64_t) zSig0 ) {
4338 158142c2 bellard
        shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
4339 158142c2 bellard
        --zExp;
4340 158142c2 bellard
    }
4341 158142c2 bellard
    return
4342 158142c2 bellard
        roundAndPackFloatx80(
4343 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
4344 158142c2 bellard
4345 158142c2 bellard
}
4346 158142c2 bellard
4347 158142c2 bellard
/*----------------------------------------------------------------------------
4348 158142c2 bellard
| Returns the result of dividing the extended double-precision floating-point
4349 158142c2 bellard
| value `a' by the corresponding value `b'.  The operation is performed
4350 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4351 158142c2 bellard
*----------------------------------------------------------------------------*/
4352 158142c2 bellard
4353 158142c2 bellard
floatx80 floatx80_div( floatx80 a, floatx80 b STATUS_PARAM )
4354 158142c2 bellard
{
4355 158142c2 bellard
    flag aSign, bSign, zSign;
4356 158142c2 bellard
    int32 aExp, bExp, zExp;
4357 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig0, zSig1;
4358 bb98fe42 Andreas Färber
    uint64_t rem0, rem1, rem2, term0, term1, term2;
4359 158142c2 bellard
    floatx80 z;
4360 158142c2 bellard
4361 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4362 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4363 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4364 158142c2 bellard
    bSig = extractFloatx80Frac( b );
4365 158142c2 bellard
    bExp = extractFloatx80Exp( b );
4366 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4367 158142c2 bellard
    zSign = aSign ^ bSign;
4368 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4369 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4370 158142c2 bellard
        if ( bExp == 0x7FFF ) {
4371 bb98fe42 Andreas Färber
            if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4372 158142c2 bellard
            goto invalid;
4373 158142c2 bellard
        }
4374 158142c2 bellard
        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4375 158142c2 bellard
    }
4376 158142c2 bellard
    if ( bExp == 0x7FFF ) {
4377 bb98fe42 Andreas Färber
        if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4378 158142c2 bellard
        return packFloatx80( zSign, 0, 0 );
4379 158142c2 bellard
    }
4380 158142c2 bellard
    if ( bExp == 0 ) {
4381 158142c2 bellard
        if ( bSig == 0 ) {
4382 158142c2 bellard
            if ( ( aExp | aSig ) == 0 ) {
4383 158142c2 bellard
 invalid:
4384 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
4385 158142c2 bellard
                z.low = floatx80_default_nan_low;
4386 158142c2 bellard
                z.high = floatx80_default_nan_high;
4387 158142c2 bellard
                return z;
4388 158142c2 bellard
            }
4389 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
4390 158142c2 bellard
            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4391 158142c2 bellard
        }
4392 158142c2 bellard
        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
4393 158142c2 bellard
    }
4394 158142c2 bellard
    if ( aExp == 0 ) {
4395 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
4396 158142c2 bellard
        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
4397 158142c2 bellard
    }
4398 158142c2 bellard
    zExp = aExp - bExp + 0x3FFE;
4399 158142c2 bellard
    rem1 = 0;
4400 158142c2 bellard
    if ( bSig <= aSig ) {
4401 158142c2 bellard
        shift128Right( aSig, 0, 1, &aSig, &rem1 );
4402 158142c2 bellard
        ++zExp;
4403 158142c2 bellard
    }
4404 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig, rem1, bSig );
4405 158142c2 bellard
    mul64To128( bSig, zSig0, &term0, &term1 );
4406 158142c2 bellard
    sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
4407 bb98fe42 Andreas Färber
    while ( (int64_t) rem0 < 0 ) {
4408 158142c2 bellard
        --zSig0;
4409 158142c2 bellard
        add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
4410 158142c2 bellard
    }
4411 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, 0, bSig );
4412 bb98fe42 Andreas Färber
    if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
4413 158142c2 bellard
        mul64To128( bSig, zSig1, &term1, &term2 );
4414 158142c2 bellard
        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
4415 bb98fe42 Andreas Färber
        while ( (int64_t) rem1 < 0 ) {
4416 158142c2 bellard
            --zSig1;
4417 158142c2 bellard
            add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
4418 158142c2 bellard
        }
4419 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 ) != 0 );
4420 158142c2 bellard
    }
4421 158142c2 bellard
    return
4422 158142c2 bellard
        roundAndPackFloatx80(
4423 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
4424 158142c2 bellard
4425 158142c2 bellard
}
4426 158142c2 bellard
4427 158142c2 bellard
/*----------------------------------------------------------------------------
4428 158142c2 bellard
| Returns the remainder of the extended double-precision floating-point value
4429 158142c2 bellard
| `a' with respect to the corresponding value `b'.  The operation is performed
4430 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4431 158142c2 bellard
*----------------------------------------------------------------------------*/
4432 158142c2 bellard
4433 158142c2 bellard
floatx80 floatx80_rem( floatx80 a, floatx80 b STATUS_PARAM )
4434 158142c2 bellard
{
4435 ed086f3d Blue Swirl
    flag aSign, zSign;
4436 158142c2 bellard
    int32 aExp, bExp, expDiff;
4437 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, bSig;
4438 bb98fe42 Andreas Färber
    uint64_t q, term0, term1, alternateASig0, alternateASig1;
4439 158142c2 bellard
    floatx80 z;
4440 158142c2 bellard
4441 158142c2 bellard
    aSig0 = extractFloatx80Frac( a );
4442 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4443 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4444 158142c2 bellard
    bSig = extractFloatx80Frac( b );
4445 158142c2 bellard
    bExp = extractFloatx80Exp( b );
4446 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4447 bb98fe42 Andreas Färber
        if (    (uint64_t) ( aSig0<<1 )
4448 bb98fe42 Andreas Färber
             || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
4449 158142c2 bellard
            return propagateFloatx80NaN( a, b STATUS_VAR );
4450 158142c2 bellard
        }
4451 158142c2 bellard
        goto invalid;
4452 158142c2 bellard
    }
4453 158142c2 bellard
    if ( bExp == 0x7FFF ) {
4454 bb98fe42 Andreas Färber
        if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4455 158142c2 bellard
        return a;
4456 158142c2 bellard
    }
4457 158142c2 bellard
    if ( bExp == 0 ) {
4458 158142c2 bellard
        if ( bSig == 0 ) {
4459 158142c2 bellard
 invalid:
4460 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4461 158142c2 bellard
            z.low = floatx80_default_nan_low;
4462 158142c2 bellard
            z.high = floatx80_default_nan_high;
4463 158142c2 bellard
            return z;
4464 158142c2 bellard
        }
4465 158142c2 bellard
        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
4466 158142c2 bellard
    }
4467 158142c2 bellard
    if ( aExp == 0 ) {
4468 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
4469 158142c2 bellard
        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
4470 158142c2 bellard
    }
4471 158142c2 bellard
    bSig |= LIT64( 0x8000000000000000 );
4472 158142c2 bellard
    zSign = aSign;
4473 158142c2 bellard
    expDiff = aExp - bExp;
4474 158142c2 bellard
    aSig1 = 0;
4475 158142c2 bellard
    if ( expDiff < 0 ) {
4476 158142c2 bellard
        if ( expDiff < -1 ) return a;
4477 158142c2 bellard
        shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
4478 158142c2 bellard
        expDiff = 0;
4479 158142c2 bellard
    }
4480 158142c2 bellard
    q = ( bSig <= aSig0 );
4481 158142c2 bellard
    if ( q ) aSig0 -= bSig;
4482 158142c2 bellard
    expDiff -= 64;
4483 158142c2 bellard
    while ( 0 < expDiff ) {
4484 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig );
4485 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
4486 158142c2 bellard
        mul64To128( bSig, q, &term0, &term1 );
4487 158142c2 bellard
        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4488 158142c2 bellard
        shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
4489 158142c2 bellard
        expDiff -= 62;
4490 158142c2 bellard
    }
4491 158142c2 bellard
    expDiff += 64;
4492 158142c2 bellard
    if ( 0 < expDiff ) {
4493 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig );
4494 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
4495 158142c2 bellard
        q >>= 64 - expDiff;
4496 158142c2 bellard
        mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
4497 158142c2 bellard
        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4498 158142c2 bellard
        shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
4499 158142c2 bellard
        while ( le128( term0, term1, aSig0, aSig1 ) ) {
4500 158142c2 bellard
            ++q;
4501 158142c2 bellard
            sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4502 158142c2 bellard
        }
4503 158142c2 bellard
    }
4504 158142c2 bellard
    else {
4505 158142c2 bellard
        term1 = 0;
4506 158142c2 bellard
        term0 = bSig;
4507 158142c2 bellard
    }
4508 158142c2 bellard
    sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
4509 158142c2 bellard
    if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
4510 158142c2 bellard
         || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
4511 158142c2 bellard
              && ( q & 1 ) )
4512 158142c2 bellard
       ) {
4513 158142c2 bellard
        aSig0 = alternateASig0;
4514 158142c2 bellard
        aSig1 = alternateASig1;
4515 158142c2 bellard
        zSign = ! zSign;
4516 158142c2 bellard
    }
4517 158142c2 bellard
    return
4518 158142c2 bellard
        normalizeRoundAndPackFloatx80(
4519 158142c2 bellard
            80, zSign, bExp + expDiff, aSig0, aSig1 STATUS_VAR );
4520 158142c2 bellard
4521 158142c2 bellard
}
4522 158142c2 bellard
4523 158142c2 bellard
/*----------------------------------------------------------------------------
4524 158142c2 bellard
| Returns the square root of the extended double-precision floating-point
4525 158142c2 bellard
| value `a'.  The operation is performed according to the IEC/IEEE Standard
4526 158142c2 bellard
| for Binary Floating-Point Arithmetic.
4527 158142c2 bellard
*----------------------------------------------------------------------------*/
4528 158142c2 bellard
4529 158142c2 bellard
floatx80 floatx80_sqrt( floatx80 a STATUS_PARAM )
4530 158142c2 bellard
{
4531 158142c2 bellard
    flag aSign;
4532 158142c2 bellard
    int32 aExp, zExp;
4533 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
4534 bb98fe42 Andreas Färber
    uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
4535 158142c2 bellard
    floatx80 z;
4536 158142c2 bellard
4537 158142c2 bellard
    aSig0 = extractFloatx80Frac( a );
4538 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4539 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4540 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4541 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a STATUS_VAR );
4542 158142c2 bellard
        if ( ! aSign ) return a;
4543 158142c2 bellard
        goto invalid;
4544 158142c2 bellard
    }
4545 158142c2 bellard
    if ( aSign ) {
4546 158142c2 bellard
        if ( ( aExp | aSig0 ) == 0 ) return a;
4547 158142c2 bellard
 invalid:
4548 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4549 158142c2 bellard
        z.low = floatx80_default_nan_low;
4550 158142c2 bellard
        z.high = floatx80_default_nan_high;
4551 158142c2 bellard
        return z;
4552 158142c2 bellard
    }
4553 158142c2 bellard
    if ( aExp == 0 ) {
4554 158142c2 bellard
        if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
4555 158142c2 bellard
        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
4556 158142c2 bellard
    }
4557 158142c2 bellard
    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
4558 158142c2 bellard
    zSig0 = estimateSqrt32( aExp, aSig0>>32 );
4559 158142c2 bellard
    shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
4560 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
4561 158142c2 bellard
    doubleZSig0 = zSig0<<1;
4562 158142c2 bellard
    mul64To128( zSig0, zSig0, &term0, &term1 );
4563 158142c2 bellard
    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
4564 bb98fe42 Andreas Färber
    while ( (int64_t) rem0 < 0 ) {
4565 158142c2 bellard
        --zSig0;
4566 158142c2 bellard
        doubleZSig0 -= 2;
4567 158142c2 bellard
        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
4568 158142c2 bellard
    }
4569 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
4570 158142c2 bellard
    if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
4571 158142c2 bellard
        if ( zSig1 == 0 ) zSig1 = 1;
4572 158142c2 bellard
        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
4573 158142c2 bellard
        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
4574 158142c2 bellard
        mul64To128( zSig1, zSig1, &term2, &term3 );
4575 158142c2 bellard
        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
4576 bb98fe42 Andreas Färber
        while ( (int64_t) rem1 < 0 ) {
4577 158142c2 bellard
            --zSig1;
4578 158142c2 bellard
            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
4579 158142c2 bellard
            term3 |= 1;
4580 158142c2 bellard
            term2 |= doubleZSig0;
4581 158142c2 bellard
            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
4582 158142c2 bellard
        }
4583 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
4584 158142c2 bellard
    }
4585 158142c2 bellard
    shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
4586 158142c2 bellard
    zSig0 |= doubleZSig0;
4587 158142c2 bellard
    return
4588 158142c2 bellard
        roundAndPackFloatx80(
4589 158142c2 bellard
            STATUS(floatx80_rounding_precision), 0, zExp, zSig0, zSig1 STATUS_VAR );
4590 158142c2 bellard
4591 158142c2 bellard
}
4592 158142c2 bellard
4593 158142c2 bellard
/*----------------------------------------------------------------------------
4594 b689362d Aurelien Jarno
| Returns 1 if the extended double-precision floating-point value `a' is equal
4595 b689362d Aurelien Jarno
| to the corresponding value `b', and 0 otherwise.  The invalid exception is
4596 b689362d Aurelien Jarno
| raised if either operand is a NaN.  Otherwise, the comparison is performed
4597 b689362d Aurelien Jarno
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4598 158142c2 bellard
*----------------------------------------------------------------------------*/
4599 158142c2 bellard
4600 b689362d Aurelien Jarno
int floatx80_eq( floatx80 a, floatx80 b STATUS_PARAM )
4601 158142c2 bellard
{
4602 158142c2 bellard
4603 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4604 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
4605 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4606 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
4607 158142c2 bellard
       ) {
4608 b689362d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
4609 158142c2 bellard
        return 0;
4610 158142c2 bellard
    }
4611 158142c2 bellard
    return
4612 158142c2 bellard
           ( a.low == b.low )
4613 158142c2 bellard
        && (    ( a.high == b.high )
4614 158142c2 bellard
             || (    ( a.low == 0 )
4615 bb98fe42 Andreas Färber
                  && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
4616 158142c2 bellard
           );
4617 158142c2 bellard
4618 158142c2 bellard
}
4619 158142c2 bellard
4620 158142c2 bellard
/*----------------------------------------------------------------------------
4621 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is
4622 158142c2 bellard
| less than or equal to the corresponding value `b', and 0 otherwise.  The
4623 f5a64251 Aurelien Jarno
| invalid exception is raised if either operand is a NaN.  The comparison is
4624 f5a64251 Aurelien Jarno
| performed according to the IEC/IEEE Standard for Binary Floating-Point
4625 f5a64251 Aurelien Jarno
| Arithmetic.
4626 158142c2 bellard
*----------------------------------------------------------------------------*/
4627 158142c2 bellard
4628 750afe93 bellard
int floatx80_le( floatx80 a, floatx80 b STATUS_PARAM )
4629 158142c2 bellard
{
4630 158142c2 bellard
    flag aSign, bSign;
4631 158142c2 bellard
4632 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4633 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
4634 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4635 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
4636 158142c2 bellard
       ) {
4637 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4638 158142c2 bellard
        return 0;
4639 158142c2 bellard
    }
4640 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4641 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4642 158142c2 bellard
    if ( aSign != bSign ) {
4643 158142c2 bellard
        return
4644 158142c2 bellard
               aSign
4645 bb98fe42 Andreas Färber
            || (    ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4646 158142c2 bellard
                 == 0 );
4647 158142c2 bellard
    }
4648 158142c2 bellard
    return
4649 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
4650 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
4651 158142c2 bellard
4652 158142c2 bellard
}
4653 158142c2 bellard
4654 158142c2 bellard
/*----------------------------------------------------------------------------
4655 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is
4656 f5a64251 Aurelien Jarno
| less than the corresponding value `b', and 0 otherwise.  The invalid
4657 f5a64251 Aurelien Jarno
| exception is raised if either operand is a NaN.  The comparison is performed
4658 f5a64251 Aurelien Jarno
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4659 158142c2 bellard
*----------------------------------------------------------------------------*/
4660 158142c2 bellard
4661 750afe93 bellard
int floatx80_lt( floatx80 a, floatx80 b STATUS_PARAM )
4662 158142c2 bellard
{
4663 158142c2 bellard
    flag aSign, bSign;
4664 158142c2 bellard
4665 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4666 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
4667 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4668 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
4669 158142c2 bellard
       ) {
4670 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4671 158142c2 bellard
        return 0;
4672 158142c2 bellard
    }
4673 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4674 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4675 158142c2 bellard
    if ( aSign != bSign ) {
4676 158142c2 bellard
        return
4677 158142c2 bellard
               aSign
4678 bb98fe42 Andreas Färber
            && (    ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4679 158142c2 bellard
                 != 0 );
4680 158142c2 bellard
    }
4681 158142c2 bellard
    return
4682 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
4683 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
4684 158142c2 bellard
4685 158142c2 bellard
}
4686 158142c2 bellard
4687 158142c2 bellard
/*----------------------------------------------------------------------------
4688 67b7861d Aurelien Jarno
| Returns 1 if the extended double-precision floating-point values `a' and `b'
4689 f5a64251 Aurelien Jarno
| cannot be compared, and 0 otherwise.  The invalid exception is raised if
4690 f5a64251 Aurelien Jarno
| either operand is a NaN.   The comparison is performed according to the
4691 f5a64251 Aurelien Jarno
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4692 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
4693 67b7861d Aurelien Jarno
int floatx80_unordered( floatx80 a, floatx80 b STATUS_PARAM )
4694 67b7861d Aurelien Jarno
{
4695 67b7861d Aurelien Jarno
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4696 67b7861d Aurelien Jarno
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
4697 67b7861d Aurelien Jarno
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4698 67b7861d Aurelien Jarno
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
4699 67b7861d Aurelien Jarno
       ) {
4700 67b7861d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
4701 67b7861d Aurelien Jarno
        return 1;
4702 67b7861d Aurelien Jarno
    }
4703 67b7861d Aurelien Jarno
    return 0;
4704 67b7861d Aurelien Jarno
}
4705 67b7861d Aurelien Jarno
4706 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
4707 b689362d Aurelien Jarno
| Returns 1 if the extended double-precision floating-point value `a' is
4708 f5a64251 Aurelien Jarno
| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
4709 f5a64251 Aurelien Jarno
| cause an exception.  The comparison is performed according to the IEC/IEEE
4710 f5a64251 Aurelien Jarno
| Standard for Binary Floating-Point Arithmetic.
4711 158142c2 bellard
*----------------------------------------------------------------------------*/
4712 158142c2 bellard
4713 b689362d Aurelien Jarno
int floatx80_eq_quiet( floatx80 a, floatx80 b STATUS_PARAM )
4714 158142c2 bellard
{
4715 158142c2 bellard
4716 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4717 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
4718 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4719 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
4720 158142c2 bellard
       ) {
4721 b689362d Aurelien Jarno
        if (    floatx80_is_signaling_nan( a )
4722 b689362d Aurelien Jarno
             || floatx80_is_signaling_nan( b ) ) {
4723 b689362d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
4724 b689362d Aurelien Jarno
        }
4725 158142c2 bellard
        return 0;
4726 158142c2 bellard
    }
4727 158142c2 bellard
    return
4728 158142c2 bellard
           ( a.low == b.low )
4729 158142c2 bellard
        && (    ( a.high == b.high )
4730 158142c2 bellard
             || (    ( a.low == 0 )
4731 bb98fe42 Andreas Färber
                  && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
4732 158142c2 bellard
           );
4733 158142c2 bellard
4734 158142c2 bellard
}
4735 158142c2 bellard
4736 158142c2 bellard
/*----------------------------------------------------------------------------
4737 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is less
4738 158142c2 bellard
| than or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
4739 158142c2 bellard
| do not cause an exception.  Otherwise, the comparison is performed according
4740 158142c2 bellard
| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4741 158142c2 bellard
*----------------------------------------------------------------------------*/
4742 158142c2 bellard
4743 750afe93 bellard
int floatx80_le_quiet( floatx80 a, floatx80 b STATUS_PARAM )
4744 158142c2 bellard
{
4745 158142c2 bellard
    flag aSign, bSign;
4746 158142c2 bellard
4747 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4748 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
4749 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4750 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
4751 158142c2 bellard
       ) {
4752 158142c2 bellard
        if (    floatx80_is_signaling_nan( a )
4753 158142c2 bellard
             || floatx80_is_signaling_nan( b ) ) {
4754 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4755 158142c2 bellard
        }
4756 158142c2 bellard
        return 0;
4757 158142c2 bellard
    }
4758 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4759 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4760 158142c2 bellard
    if ( aSign != bSign ) {
4761 158142c2 bellard
        return
4762 158142c2 bellard
               aSign
4763 bb98fe42 Andreas Färber
            || (    ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4764 158142c2 bellard
                 == 0 );
4765 158142c2 bellard
    }
4766 158142c2 bellard
    return
4767 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
4768 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
4769 158142c2 bellard
4770 158142c2 bellard
}
4771 158142c2 bellard
4772 158142c2 bellard
/*----------------------------------------------------------------------------
4773 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is less
4774 158142c2 bellard
| than the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
4775 158142c2 bellard
| an exception.  Otherwise, the comparison is performed according to the
4776 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4777 158142c2 bellard
*----------------------------------------------------------------------------*/
4778 158142c2 bellard
4779 750afe93 bellard
int floatx80_lt_quiet( floatx80 a, floatx80 b STATUS_PARAM )
4780 158142c2 bellard
{
4781 158142c2 bellard
    flag aSign, bSign;
4782 158142c2 bellard
4783 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4784 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
4785 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4786 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
4787 158142c2 bellard
       ) {
4788 158142c2 bellard
        if (    floatx80_is_signaling_nan( a )
4789 158142c2 bellard
             || floatx80_is_signaling_nan( b ) ) {
4790 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4791 158142c2 bellard
        }
4792 158142c2 bellard
        return 0;
4793 158142c2 bellard
    }
4794 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4795 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4796 158142c2 bellard
    if ( aSign != bSign ) {
4797 158142c2 bellard
        return
4798 158142c2 bellard
               aSign
4799 bb98fe42 Andreas Färber
            && (    ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4800 158142c2 bellard
                 != 0 );
4801 158142c2 bellard
    }
4802 158142c2 bellard
    return
4803 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
4804 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
4805 158142c2 bellard
4806 158142c2 bellard
}
4807 158142c2 bellard
4808 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
4809 67b7861d Aurelien Jarno
| Returns 1 if the extended double-precision floating-point values `a' and `b'
4810 67b7861d Aurelien Jarno
| cannot be compared, and 0 otherwise.  Quiet NaNs do not cause an exception.
4811 67b7861d Aurelien Jarno
| The comparison is performed according to the IEC/IEEE Standard for Binary
4812 67b7861d Aurelien Jarno
| Floating-Point Arithmetic.
4813 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
4814 67b7861d Aurelien Jarno
int floatx80_unordered_quiet( floatx80 a, floatx80 b STATUS_PARAM )
4815 67b7861d Aurelien Jarno
{
4816 67b7861d Aurelien Jarno
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4817 67b7861d Aurelien Jarno
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
4818 67b7861d Aurelien Jarno
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4819 67b7861d Aurelien Jarno
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
4820 67b7861d Aurelien Jarno
       ) {
4821 67b7861d Aurelien Jarno
        if (    floatx80_is_signaling_nan( a )
4822 67b7861d Aurelien Jarno
             || floatx80_is_signaling_nan( b ) ) {
4823 67b7861d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
4824 67b7861d Aurelien Jarno
        }
4825 67b7861d Aurelien Jarno
        return 1;
4826 67b7861d Aurelien Jarno
    }
4827 67b7861d Aurelien Jarno
    return 0;
4828 67b7861d Aurelien Jarno
}
4829 67b7861d Aurelien Jarno
4830 158142c2 bellard
#endif
4831 158142c2 bellard
4832 158142c2 bellard
#ifdef FLOAT128
4833 158142c2 bellard
4834 158142c2 bellard
/*----------------------------------------------------------------------------
4835 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4836 158142c2 bellard
| value `a' to the 32-bit two's complement integer format.  The conversion
4837 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4838 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
4839 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
4840 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
4841 158142c2 bellard
| largest integer with the same sign as `a' is returned.
4842 158142c2 bellard
*----------------------------------------------------------------------------*/
4843 158142c2 bellard
4844 158142c2 bellard
int32 float128_to_int32( float128 a STATUS_PARAM )
4845 158142c2 bellard
{
4846 158142c2 bellard
    flag aSign;
4847 158142c2 bellard
    int32 aExp, shiftCount;
4848 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1;
4849 158142c2 bellard
4850 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4851 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4852 158142c2 bellard
    aExp = extractFloat128Exp( a );
4853 158142c2 bellard
    aSign = extractFloat128Sign( a );
4854 158142c2 bellard
    if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
4855 158142c2 bellard
    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4856 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
4857 158142c2 bellard
    shiftCount = 0x4028 - aExp;
4858 158142c2 bellard
    if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
4859 158142c2 bellard
    return roundAndPackInt32( aSign, aSig0 STATUS_VAR );
4860 158142c2 bellard
4861 158142c2 bellard
}
4862 158142c2 bellard
4863 158142c2 bellard
/*----------------------------------------------------------------------------
4864 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4865 158142c2 bellard
| value `a' to the 32-bit two's complement integer format.  The conversion
4866 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4867 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.  If
4868 158142c2 bellard
| `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
4869 158142c2 bellard
| conversion overflows, the largest integer with the same sign as `a' is
4870 158142c2 bellard
| returned.
4871 158142c2 bellard
*----------------------------------------------------------------------------*/
4872 158142c2 bellard
4873 158142c2 bellard
int32 float128_to_int32_round_to_zero( float128 a STATUS_PARAM )
4874 158142c2 bellard
{
4875 158142c2 bellard
    flag aSign;
4876 158142c2 bellard
    int32 aExp, shiftCount;
4877 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, savedASig;
4878 158142c2 bellard
    int32 z;
4879 158142c2 bellard
4880 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4881 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4882 158142c2 bellard
    aExp = extractFloat128Exp( a );
4883 158142c2 bellard
    aSign = extractFloat128Sign( a );
4884 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
4885 158142c2 bellard
    if ( 0x401E < aExp ) {
4886 158142c2 bellard
        if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
4887 158142c2 bellard
        goto invalid;
4888 158142c2 bellard
    }
4889 158142c2 bellard
    else if ( aExp < 0x3FFF ) {
4890 158142c2 bellard
        if ( aExp || aSig0 ) STATUS(float_exception_flags) |= float_flag_inexact;
4891 158142c2 bellard
        return 0;
4892 158142c2 bellard
    }
4893 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
4894 158142c2 bellard
    shiftCount = 0x402F - aExp;
4895 158142c2 bellard
    savedASig = aSig0;
4896 158142c2 bellard
    aSig0 >>= shiftCount;
4897 158142c2 bellard
    z = aSig0;
4898 158142c2 bellard
    if ( aSign ) z = - z;
4899 158142c2 bellard
    if ( ( z < 0 ) ^ aSign ) {
4900 158142c2 bellard
 invalid:
4901 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4902 bb98fe42 Andreas Färber
        return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
4903 158142c2 bellard
    }
4904 158142c2 bellard
    if ( ( aSig0<<shiftCount ) != savedASig ) {
4905 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
4906 158142c2 bellard
    }
4907 158142c2 bellard
    return z;
4908 158142c2 bellard
4909 158142c2 bellard
}
4910 158142c2 bellard
4911 158142c2 bellard
/*----------------------------------------------------------------------------
4912 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4913 158142c2 bellard
| value `a' to the 64-bit two's complement integer format.  The conversion
4914 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4915 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
4916 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
4917 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
4918 158142c2 bellard
| largest integer with the same sign as `a' is returned.
4919 158142c2 bellard
*----------------------------------------------------------------------------*/
4920 158142c2 bellard
4921 158142c2 bellard
int64 float128_to_int64( float128 a STATUS_PARAM )
4922 158142c2 bellard
{
4923 158142c2 bellard
    flag aSign;
4924 158142c2 bellard
    int32 aExp, shiftCount;
4925 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1;
4926 158142c2 bellard
4927 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4928 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4929 158142c2 bellard
    aExp = extractFloat128Exp( a );
4930 158142c2 bellard
    aSign = extractFloat128Sign( a );
4931 158142c2 bellard
    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4932 158142c2 bellard
    shiftCount = 0x402F - aExp;
4933 158142c2 bellard
    if ( shiftCount <= 0 ) {
4934 158142c2 bellard
        if ( 0x403E < aExp ) {
4935 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4936 158142c2 bellard
            if (    ! aSign
4937 158142c2 bellard
                 || (    ( aExp == 0x7FFF )
4938 158142c2 bellard
                      && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
4939 158142c2 bellard
                    )
4940 158142c2 bellard
               ) {
4941 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
4942 158142c2 bellard
            }
4943 bb98fe42 Andreas Färber
            return (int64_t) LIT64( 0x8000000000000000 );
4944 158142c2 bellard
        }
4945 158142c2 bellard
        shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
4946 158142c2 bellard
    }
4947 158142c2 bellard
    else {
4948 158142c2 bellard
        shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
4949 158142c2 bellard
    }
4950 158142c2 bellard
    return roundAndPackInt64( aSign, aSig0, aSig1 STATUS_VAR );
4951 158142c2 bellard
4952 158142c2 bellard
}
4953 158142c2 bellard
4954 158142c2 bellard
/*----------------------------------------------------------------------------
4955 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4956 158142c2 bellard
| value `a' to the 64-bit two's complement integer format.  The conversion
4957 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4958 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
4959 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
4960 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
4961 158142c2 bellard
| returned.
4962 158142c2 bellard
*----------------------------------------------------------------------------*/
4963 158142c2 bellard
4964 158142c2 bellard
int64 float128_to_int64_round_to_zero( float128 a STATUS_PARAM )
4965 158142c2 bellard
{
4966 158142c2 bellard
    flag aSign;
4967 158142c2 bellard
    int32 aExp, shiftCount;
4968 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1;
4969 158142c2 bellard
    int64 z;
4970 158142c2 bellard
4971 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4972 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4973 158142c2 bellard
    aExp = extractFloat128Exp( a );
4974 158142c2 bellard
    aSign = extractFloat128Sign( a );
4975 158142c2 bellard
    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4976 158142c2 bellard
    shiftCount = aExp - 0x402F;
4977 158142c2 bellard
    if ( 0 < shiftCount ) {
4978 158142c2 bellard
        if ( 0x403E <= aExp ) {
4979 158142c2 bellard
            aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
4980 158142c2 bellard
            if (    ( a.high == LIT64( 0xC03E000000000000 ) )
4981 158142c2 bellard
                 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
4982 158142c2 bellard
                if ( aSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
4983 158142c2 bellard
            }
4984 158142c2 bellard
            else {
4985 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
4986 158142c2 bellard
                if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
4987 158142c2 bellard
                    return LIT64( 0x7FFFFFFFFFFFFFFF );
4988 158142c2 bellard
                }
4989 158142c2 bellard
            }
4990 bb98fe42 Andreas Färber
            return (int64_t) LIT64( 0x8000000000000000 );
4991 158142c2 bellard
        }
4992 158142c2 bellard
        z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
4993 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig1<<shiftCount ) ) {
4994 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
4995 158142c2 bellard
        }
4996 158142c2 bellard
    }
4997 158142c2 bellard
    else {
4998 158142c2 bellard
        if ( aExp < 0x3FFF ) {
4999 158142c2 bellard
            if ( aExp | aSig0 | aSig1 ) {
5000 158142c2 bellard
                STATUS(float_exception_flags) |= float_flag_inexact;
5001 158142c2 bellard
            }
5002 158142c2 bellard
            return 0;
5003 158142c2 bellard
        }
5004 158142c2 bellard
        z = aSig0>>( - shiftCount );
5005 158142c2 bellard
        if (    aSig1
5006 bb98fe42 Andreas Färber
             || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
5007 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
5008 158142c2 bellard
        }
5009 158142c2 bellard
    }
5010 158142c2 bellard
    if ( aSign ) z = - z;
5011 158142c2 bellard
    return z;
5012 158142c2 bellard
5013 158142c2 bellard
}
5014 158142c2 bellard
5015 158142c2 bellard
/*----------------------------------------------------------------------------
5016 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
5017 158142c2 bellard
| value `a' to the single-precision floating-point format.  The conversion
5018 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5019 158142c2 bellard
| Arithmetic.
5020 158142c2 bellard
*----------------------------------------------------------------------------*/
5021 158142c2 bellard
5022 158142c2 bellard
float32 float128_to_float32( float128 a STATUS_PARAM )
5023 158142c2 bellard
{
5024 158142c2 bellard
    flag aSign;
5025 158142c2 bellard
    int32 aExp;
5026 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1;
5027 bb98fe42 Andreas Färber
    uint32_t zSig;
5028 158142c2 bellard
5029 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5030 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5031 158142c2 bellard
    aExp = extractFloat128Exp( a );
5032 158142c2 bellard
    aSign = extractFloat128Sign( a );
5033 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5034 158142c2 bellard
        if ( aSig0 | aSig1 ) {
5035 bcd4d9af Christophe Lyon
            return commonNaNToFloat32( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
5036 158142c2 bellard
        }
5037 158142c2 bellard
        return packFloat32( aSign, 0xFF, 0 );
5038 158142c2 bellard
    }
5039 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
5040 158142c2 bellard
    shift64RightJamming( aSig0, 18, &aSig0 );
5041 158142c2 bellard
    zSig = aSig0;
5042 158142c2 bellard
    if ( aExp || zSig ) {
5043 158142c2 bellard
        zSig |= 0x40000000;
5044 158142c2 bellard
        aExp -= 0x3F81;
5045 158142c2 bellard
    }
5046 158142c2 bellard
    return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
5047 158142c2 bellard
5048 158142c2 bellard
}
5049 158142c2 bellard
5050 158142c2 bellard
/*----------------------------------------------------------------------------
5051 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
5052 158142c2 bellard
| value `a' to the double-precision floating-point format.  The conversion
5053 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5054 158142c2 bellard
| Arithmetic.
5055 158142c2 bellard
*----------------------------------------------------------------------------*/
5056 158142c2 bellard
5057 158142c2 bellard
float64 float128_to_float64( float128 a STATUS_PARAM )
5058 158142c2 bellard
{
5059 158142c2 bellard
    flag aSign;
5060 158142c2 bellard
    int32 aExp;
5061 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1;
5062 158142c2 bellard
5063 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5064 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5065 158142c2 bellard
    aExp = extractFloat128Exp( a );
5066 158142c2 bellard
    aSign = extractFloat128Sign( a );
5067 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5068 158142c2 bellard
        if ( aSig0 | aSig1 ) {
5069 bcd4d9af Christophe Lyon
            return commonNaNToFloat64( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
5070 158142c2 bellard
        }
5071 158142c2 bellard
        return packFloat64( aSign, 0x7FF, 0 );
5072 158142c2 bellard
    }
5073 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
5074 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
5075 158142c2 bellard
    if ( aExp || aSig0 ) {
5076 158142c2 bellard
        aSig0 |= LIT64( 0x4000000000000000 );
5077 158142c2 bellard
        aExp -= 0x3C01;
5078 158142c2 bellard
    }
5079 158142c2 bellard
    return roundAndPackFloat64( aSign, aExp, aSig0 STATUS_VAR );
5080 158142c2 bellard
5081 158142c2 bellard
}
5082 158142c2 bellard
5083 158142c2 bellard
#ifdef FLOATX80
5084 158142c2 bellard
5085 158142c2 bellard
/*----------------------------------------------------------------------------
5086 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
5087 158142c2 bellard
| value `a' to the extended double-precision floating-point format.  The
5088 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
5089 158142c2 bellard
| Floating-Point Arithmetic.
5090 158142c2 bellard
*----------------------------------------------------------------------------*/
5091 158142c2 bellard
5092 158142c2 bellard
floatx80 float128_to_floatx80( float128 a STATUS_PARAM )
5093 158142c2 bellard
{
5094 158142c2 bellard
    flag aSign;
5095 158142c2 bellard
    int32 aExp;
5096 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1;
5097 158142c2 bellard
5098 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5099 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5100 158142c2 bellard
    aExp = extractFloat128Exp( a );
5101 158142c2 bellard
    aSign = extractFloat128Sign( a );
5102 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5103 158142c2 bellard
        if ( aSig0 | aSig1 ) {
5104 bcd4d9af Christophe Lyon
            return commonNaNToFloatx80( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
5105 158142c2 bellard
        }
5106 158142c2 bellard
        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5107 158142c2 bellard
    }
5108 158142c2 bellard
    if ( aExp == 0 ) {
5109 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
5110 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5111 158142c2 bellard
    }
5112 158142c2 bellard
    else {
5113 158142c2 bellard
        aSig0 |= LIT64( 0x0001000000000000 );
5114 158142c2 bellard
    }
5115 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
5116 158142c2 bellard
    return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 STATUS_VAR );
5117 158142c2 bellard
5118 158142c2 bellard
}
5119 158142c2 bellard
5120 158142c2 bellard
#endif
5121 158142c2 bellard
5122 158142c2 bellard
/*----------------------------------------------------------------------------
5123 158142c2 bellard
| Rounds the quadruple-precision floating-point value `a' to an integer, and
5124 158142c2 bellard
| returns the result as a quadruple-precision floating-point value.  The
5125 158142c2 bellard
| operation is performed according to the IEC/IEEE Standard for Binary
5126 158142c2 bellard
| Floating-Point Arithmetic.
5127 158142c2 bellard
*----------------------------------------------------------------------------*/
5128 158142c2 bellard
5129 158142c2 bellard
float128 float128_round_to_int( float128 a STATUS_PARAM )
5130 158142c2 bellard
{
5131 158142c2 bellard
    flag aSign;
5132 158142c2 bellard
    int32 aExp;
5133 bb98fe42 Andreas Färber
    uint64_t lastBitMask, roundBitsMask;
5134 158142c2 bellard
    int8 roundingMode;
5135 158142c2 bellard
    float128 z;
5136 158142c2 bellard
5137 158142c2 bellard
    aExp = extractFloat128Exp( a );
5138 158142c2 bellard
    if ( 0x402F <= aExp ) {
5139 158142c2 bellard
        if ( 0x406F <= aExp ) {
5140 158142c2 bellard
            if (    ( aExp == 0x7FFF )
5141 158142c2 bellard
                 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
5142 158142c2 bellard
               ) {
5143 158142c2 bellard
                return propagateFloat128NaN( a, a STATUS_VAR );
5144 158142c2 bellard
            }
5145 158142c2 bellard
            return a;
5146 158142c2 bellard
        }
5147 158142c2 bellard
        lastBitMask = 1;
5148 158142c2 bellard
        lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
5149 158142c2 bellard
        roundBitsMask = lastBitMask - 1;
5150 158142c2 bellard
        z = a;
5151 158142c2 bellard
        roundingMode = STATUS(float_rounding_mode);
5152 158142c2 bellard
        if ( roundingMode == float_round_nearest_even ) {
5153 158142c2 bellard
            if ( lastBitMask ) {
5154 158142c2 bellard
                add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
5155 158142c2 bellard
                if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
5156 158142c2 bellard
            }
5157 158142c2 bellard
            else {
5158 bb98fe42 Andreas Färber
                if ( (int64_t) z.low < 0 ) {
5159 158142c2 bellard
                    ++z.high;
5160 bb98fe42 Andreas Färber
                    if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
5161 158142c2 bellard
                }
5162 158142c2 bellard
            }
5163 158142c2 bellard
        }
5164 158142c2 bellard
        else if ( roundingMode != float_round_to_zero ) {
5165 158142c2 bellard
            if (   extractFloat128Sign( z )
5166 158142c2 bellard
                 ^ ( roundingMode == float_round_up ) ) {
5167 158142c2 bellard
                add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
5168 158142c2 bellard
            }
5169 158142c2 bellard
        }
5170 158142c2 bellard
        z.low &= ~ roundBitsMask;
5171 158142c2 bellard
    }
5172 158142c2 bellard
    else {
5173 158142c2 bellard
        if ( aExp < 0x3FFF ) {
5174 bb98fe42 Andreas Färber
            if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
5175 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
5176 158142c2 bellard
            aSign = extractFloat128Sign( a );
5177 158142c2 bellard
            switch ( STATUS(float_rounding_mode) ) {
5178 158142c2 bellard
             case float_round_nearest_even:
5179 158142c2 bellard
                if (    ( aExp == 0x3FFE )
5180 158142c2 bellard
                     && (   extractFloat128Frac0( a )
5181 158142c2 bellard
                          | extractFloat128Frac1( a ) )
5182 158142c2 bellard
                   ) {
5183 158142c2 bellard
                    return packFloat128( aSign, 0x3FFF, 0, 0 );
5184 158142c2 bellard
                }
5185 158142c2 bellard
                break;
5186 158142c2 bellard
             case float_round_down:
5187 158142c2 bellard
                return
5188 158142c2 bellard
                      aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
5189 158142c2 bellard
                    : packFloat128( 0, 0, 0, 0 );
5190 158142c2 bellard
             case float_round_up:
5191 158142c2 bellard
                return
5192 158142c2 bellard
                      aSign ? packFloat128( 1, 0, 0, 0 )
5193 158142c2 bellard
                    : packFloat128( 0, 0x3FFF, 0, 0 );
5194 158142c2 bellard
            }
5195 158142c2 bellard
            return packFloat128( aSign, 0, 0, 0 );
5196 158142c2 bellard
        }
5197 158142c2 bellard
        lastBitMask = 1;
5198 158142c2 bellard
        lastBitMask <<= 0x402F - aExp;
5199 158142c2 bellard
        roundBitsMask = lastBitMask - 1;
5200 158142c2 bellard
        z.low = 0;
5201 158142c2 bellard
        z.high = a.high;
5202 158142c2 bellard
        roundingMode = STATUS(float_rounding_mode);
5203 158142c2 bellard
        if ( roundingMode == float_round_nearest_even ) {
5204 158142c2 bellard
            z.high += lastBitMask>>1;
5205 158142c2 bellard
            if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
5206 158142c2 bellard
                z.high &= ~ lastBitMask;
5207 158142c2 bellard
            }
5208 158142c2 bellard
        }
5209 158142c2 bellard
        else if ( roundingMode != float_round_to_zero ) {
5210 158142c2 bellard
            if (   extractFloat128Sign( z )
5211 158142c2 bellard
                 ^ ( roundingMode == float_round_up ) ) {
5212 158142c2 bellard
                z.high |= ( a.low != 0 );
5213 158142c2 bellard
                z.high += roundBitsMask;
5214 158142c2 bellard
            }
5215 158142c2 bellard
        }
5216 158142c2 bellard
        z.high &= ~ roundBitsMask;
5217 158142c2 bellard
    }
5218 158142c2 bellard
    if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
5219 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
5220 158142c2 bellard
    }
5221 158142c2 bellard
    return z;
5222 158142c2 bellard
5223 158142c2 bellard
}
5224 158142c2 bellard
5225 158142c2 bellard
/*----------------------------------------------------------------------------
5226 158142c2 bellard
| Returns the result of adding the absolute values of the quadruple-precision
5227 158142c2 bellard
| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
5228 158142c2 bellard
| before being returned.  `zSign' is ignored if the result is a NaN.
5229 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
5230 158142c2 bellard
| Floating-Point Arithmetic.
5231 158142c2 bellard
*----------------------------------------------------------------------------*/
5232 158142c2 bellard
5233 158142c2 bellard
static float128 addFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
5234 158142c2 bellard
{
5235 158142c2 bellard
    int32 aExp, bExp, zExp;
5236 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
5237 158142c2 bellard
    int32 expDiff;
5238 158142c2 bellard
5239 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5240 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5241 158142c2 bellard
    aExp = extractFloat128Exp( a );
5242 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
5243 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
5244 158142c2 bellard
    bExp = extractFloat128Exp( b );
5245 158142c2 bellard
    expDiff = aExp - bExp;
5246 158142c2 bellard
    if ( 0 < expDiff ) {
5247 158142c2 bellard
        if ( aExp == 0x7FFF ) {
5248 158142c2 bellard
            if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5249 158142c2 bellard
            return a;
5250 158142c2 bellard
        }
5251 158142c2 bellard
        if ( bExp == 0 ) {
5252 158142c2 bellard
            --expDiff;
5253 158142c2 bellard
        }
5254 158142c2 bellard
        else {
5255 158142c2 bellard
            bSig0 |= LIT64( 0x0001000000000000 );
5256 158142c2 bellard
        }
5257 158142c2 bellard
        shift128ExtraRightJamming(
5258 158142c2 bellard
            bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
5259 158142c2 bellard
        zExp = aExp;
5260 158142c2 bellard
    }
5261 158142c2 bellard
    else if ( expDiff < 0 ) {
5262 158142c2 bellard
        if ( bExp == 0x7FFF ) {
5263 158142c2 bellard
            if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5264 158142c2 bellard
            return packFloat128( zSign, 0x7FFF, 0, 0 );
5265 158142c2 bellard
        }
5266 158142c2 bellard
        if ( aExp == 0 ) {
5267 158142c2 bellard
            ++expDiff;
5268 158142c2 bellard
        }
5269 158142c2 bellard
        else {
5270 158142c2 bellard
            aSig0 |= LIT64( 0x0001000000000000 );
5271 158142c2 bellard
        }
5272 158142c2 bellard
        shift128ExtraRightJamming(
5273 158142c2 bellard
            aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
5274 158142c2 bellard
        zExp = bExp;
5275 158142c2 bellard
    }
5276 158142c2 bellard
    else {
5277 158142c2 bellard
        if ( aExp == 0x7FFF ) {
5278 158142c2 bellard
            if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
5279 158142c2 bellard
                return propagateFloat128NaN( a, b STATUS_VAR );
5280 158142c2 bellard
            }
5281 158142c2 bellard
            return a;
5282 158142c2 bellard
        }
5283 158142c2 bellard
        add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
5284 fe76d976 pbrook
        if ( aExp == 0 ) {
5285 fe76d976 pbrook
            if ( STATUS(flush_to_zero) ) return packFloat128( zSign, 0, 0, 0 );
5286 fe76d976 pbrook
            return packFloat128( zSign, 0, zSig0, zSig1 );
5287 fe76d976 pbrook
        }
5288 158142c2 bellard
        zSig2 = 0;
5289 158142c2 bellard
        zSig0 |= LIT64( 0x0002000000000000 );
5290 158142c2 bellard
        zExp = aExp;
5291 158142c2 bellard
        goto shiftRight1;
5292 158142c2 bellard
    }
5293 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
5294 158142c2 bellard
    add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
5295 158142c2 bellard
    --zExp;
5296 158142c2 bellard
    if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
5297 158142c2 bellard
    ++zExp;
5298 158142c2 bellard
 shiftRight1:
5299 158142c2 bellard
    shift128ExtraRightJamming(
5300 158142c2 bellard
        zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
5301 158142c2 bellard
 roundAndPack:
5302 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5303 158142c2 bellard
5304 158142c2 bellard
}
5305 158142c2 bellard
5306 158142c2 bellard
/*----------------------------------------------------------------------------
5307 158142c2 bellard
| Returns the result of subtracting the absolute values of the quadruple-
5308 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the
5309 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
5310 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
5311 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
5312 158142c2 bellard
*----------------------------------------------------------------------------*/
5313 158142c2 bellard
5314 158142c2 bellard
static float128 subFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
5315 158142c2 bellard
{
5316 158142c2 bellard
    int32 aExp, bExp, zExp;
5317 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
5318 158142c2 bellard
    int32 expDiff;
5319 158142c2 bellard
    float128 z;
5320 158142c2 bellard
5321 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5322 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5323 158142c2 bellard
    aExp = extractFloat128Exp( a );
5324 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
5325 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
5326 158142c2 bellard
    bExp = extractFloat128Exp( b );
5327 158142c2 bellard
    expDiff = aExp - bExp;
5328 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
5329 158142c2 bellard
    shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
5330 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
5331 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
5332 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5333 158142c2 bellard
        if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
5334 158142c2 bellard
            return propagateFloat128NaN( a, b STATUS_VAR );
5335 158142c2 bellard
        }
5336 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5337 158142c2 bellard
        z.low = float128_default_nan_low;
5338 158142c2 bellard
        z.high = float128_default_nan_high;
5339 158142c2 bellard
        return z;
5340 158142c2 bellard
    }
5341 158142c2 bellard
    if ( aExp == 0 ) {
5342 158142c2 bellard
        aExp = 1;
5343 158142c2 bellard
        bExp = 1;
5344 158142c2 bellard
    }
5345 158142c2 bellard
    if ( bSig0 < aSig0 ) goto aBigger;
5346 158142c2 bellard
    if ( aSig0 < bSig0 ) goto bBigger;
5347 158142c2 bellard
    if ( bSig1 < aSig1 ) goto aBigger;
5348 158142c2 bellard
    if ( aSig1 < bSig1 ) goto bBigger;
5349 158142c2 bellard
    return packFloat128( STATUS(float_rounding_mode) == float_round_down, 0, 0, 0 );
5350 158142c2 bellard
 bExpBigger:
5351 158142c2 bellard
    if ( bExp == 0x7FFF ) {
5352 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5353 158142c2 bellard
        return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
5354 158142c2 bellard
    }
5355 158142c2 bellard
    if ( aExp == 0 ) {
5356 158142c2 bellard
        ++expDiff;
5357 158142c2 bellard
    }
5358 158142c2 bellard
    else {
5359 158142c2 bellard
        aSig0 |= LIT64( 0x4000000000000000 );
5360 158142c2 bellard
    }
5361 158142c2 bellard
    shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
5362 158142c2 bellard
    bSig0 |= LIT64( 0x4000000000000000 );
5363 158142c2 bellard
 bBigger:
5364 158142c2 bellard
    sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
5365 158142c2 bellard
    zExp = bExp;
5366 158142c2 bellard
    zSign ^= 1;
5367 158142c2 bellard
    goto normalizeRoundAndPack;
5368 158142c2 bellard
 aExpBigger:
5369 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5370 158142c2 bellard
        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5371 158142c2 bellard
        return a;
5372 158142c2 bellard
    }
5373 158142c2 bellard
    if ( bExp == 0 ) {
5374 158142c2 bellard
        --expDiff;
5375 158142c2 bellard
    }
5376 158142c2 bellard
    else {
5377 158142c2 bellard
        bSig0 |= LIT64( 0x4000000000000000 );
5378 158142c2 bellard
    }
5379 158142c2 bellard
    shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
5380 158142c2 bellard
    aSig0 |= LIT64( 0x4000000000000000 );
5381 158142c2 bellard
 aBigger:
5382 158142c2 bellard
    sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
5383 158142c2 bellard
    zExp = aExp;
5384 158142c2 bellard
 normalizeRoundAndPack:
5385 158142c2 bellard
    --zExp;
5386 158142c2 bellard
    return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 STATUS_VAR );
5387 158142c2 bellard
5388 158142c2 bellard
}
5389 158142c2 bellard
5390 158142c2 bellard
/*----------------------------------------------------------------------------
5391 158142c2 bellard
| Returns the result of adding the quadruple-precision floating-point values
5392 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
5393 158142c2 bellard
| for Binary Floating-Point Arithmetic.
5394 158142c2 bellard
*----------------------------------------------------------------------------*/
5395 158142c2 bellard
5396 158142c2 bellard
float128 float128_add( float128 a, float128 b STATUS_PARAM )
5397 158142c2 bellard
{
5398 158142c2 bellard
    flag aSign, bSign;
5399 158142c2 bellard
5400 158142c2 bellard
    aSign = extractFloat128Sign( a );
5401 158142c2 bellard
    bSign = extractFloat128Sign( b );
5402 158142c2 bellard
    if ( aSign == bSign ) {
5403 158142c2 bellard
        return addFloat128Sigs( a, b, aSign STATUS_VAR );
5404 158142c2 bellard
    }
5405 158142c2 bellard
    else {
5406 158142c2 bellard
        return subFloat128Sigs( a, b, aSign STATUS_VAR );
5407 158142c2 bellard
    }
5408 158142c2 bellard
5409 158142c2 bellard
}
5410 158142c2 bellard
5411 158142c2 bellard
/*----------------------------------------------------------------------------
5412 158142c2 bellard
| Returns the result of subtracting the quadruple-precision floating-point
5413 158142c2 bellard
| values `a' and `b'.  The operation is performed according to the IEC/IEEE
5414 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
5415 158142c2 bellard
*----------------------------------------------------------------------------*/
5416 158142c2 bellard
5417 158142c2 bellard
float128 float128_sub( float128 a, float128 b STATUS_PARAM )
5418 158142c2 bellard
{
5419 158142c2 bellard
    flag aSign, bSign;
5420 158142c2 bellard
5421 158142c2 bellard
    aSign = extractFloat128Sign( a );
5422 158142c2 bellard
    bSign = extractFloat128Sign( b );
5423 158142c2 bellard
    if ( aSign == bSign ) {
5424 158142c2 bellard
        return subFloat128Sigs( a, b, aSign STATUS_VAR );
5425 158142c2 bellard
    }
5426 158142c2 bellard
    else {
5427 158142c2 bellard
        return addFloat128Sigs( a, b, aSign STATUS_VAR );
5428 158142c2 bellard
    }
5429 158142c2 bellard
5430 158142c2 bellard
}
5431 158142c2 bellard
5432 158142c2 bellard
/*----------------------------------------------------------------------------
5433 158142c2 bellard
| Returns the result of multiplying the quadruple-precision floating-point
5434 158142c2 bellard
| values `a' and `b'.  The operation is performed according to the IEC/IEEE
5435 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
5436 158142c2 bellard
*----------------------------------------------------------------------------*/
5437 158142c2 bellard
5438 158142c2 bellard
float128 float128_mul( float128 a, float128 b STATUS_PARAM )
5439 158142c2 bellard
{
5440 158142c2 bellard
    flag aSign, bSign, zSign;
5441 158142c2 bellard
    int32 aExp, bExp, zExp;
5442 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
5443 158142c2 bellard
    float128 z;
5444 158142c2 bellard
5445 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5446 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5447 158142c2 bellard
    aExp = extractFloat128Exp( a );
5448 158142c2 bellard
    aSign = extractFloat128Sign( a );
5449 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
5450 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
5451 158142c2 bellard
    bExp = extractFloat128Exp( b );
5452 158142c2 bellard
    bSign = extractFloat128Sign( b );
5453 158142c2 bellard
    zSign = aSign ^ bSign;
5454 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5455 158142c2 bellard
        if (    ( aSig0 | aSig1 )
5456 158142c2 bellard
             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
5457 158142c2 bellard
            return propagateFloat128NaN( a, b STATUS_VAR );
5458 158142c2 bellard
        }
5459 158142c2 bellard
        if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
5460 158142c2 bellard
        return packFloat128( zSign, 0x7FFF, 0, 0 );
5461 158142c2 bellard
    }
5462 158142c2 bellard
    if ( bExp == 0x7FFF ) {
5463 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5464 158142c2 bellard
        if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
5465 158142c2 bellard
 invalid:
5466 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5467 158142c2 bellard
            z.low = float128_default_nan_low;
5468 158142c2 bellard
            z.high = float128_default_nan_high;
5469 158142c2 bellard
            return z;
5470 158142c2 bellard
        }
5471 158142c2 bellard
        return packFloat128( zSign, 0x7FFF, 0, 0 );
5472 158142c2 bellard
    }
5473 158142c2 bellard
    if ( aExp == 0 ) {
5474 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5475 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5476 158142c2 bellard
    }
5477 158142c2 bellard
    if ( bExp == 0 ) {
5478 158142c2 bellard
        if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5479 158142c2 bellard
        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5480 158142c2 bellard
    }
5481 158142c2 bellard
    zExp = aExp + bExp - 0x4000;
5482 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
5483 158142c2 bellard
    shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
5484 158142c2 bellard
    mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
5485 158142c2 bellard
    add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
5486 158142c2 bellard
    zSig2 |= ( zSig3 != 0 );
5487 158142c2 bellard
    if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
5488 158142c2 bellard
        shift128ExtraRightJamming(
5489 158142c2 bellard
            zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
5490 158142c2 bellard
        ++zExp;
5491 158142c2 bellard
    }
5492 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5493 158142c2 bellard
5494 158142c2 bellard
}
5495 158142c2 bellard
5496 158142c2 bellard
/*----------------------------------------------------------------------------
5497 158142c2 bellard
| Returns the result of dividing the quadruple-precision floating-point value
5498 158142c2 bellard
| `a' by the corresponding value `b'.  The operation is performed according to
5499 158142c2 bellard
| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5500 158142c2 bellard
*----------------------------------------------------------------------------*/
5501 158142c2 bellard
5502 158142c2 bellard
float128 float128_div( float128 a, float128 b STATUS_PARAM )
5503 158142c2 bellard
{
5504 158142c2 bellard
    flag aSign, bSign, zSign;
5505 158142c2 bellard
    int32 aExp, bExp, zExp;
5506 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
5507 bb98fe42 Andreas Färber
    uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5508 158142c2 bellard
    float128 z;
5509 158142c2 bellard
5510 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5511 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5512 158142c2 bellard
    aExp = extractFloat128Exp( a );
5513 158142c2 bellard
    aSign = extractFloat128Sign( a );
5514 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
5515 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
5516 158142c2 bellard
    bExp = extractFloat128Exp( b );
5517 158142c2 bellard
    bSign = extractFloat128Sign( b );
5518 158142c2 bellard
    zSign = aSign ^ bSign;
5519 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5520 158142c2 bellard
        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5521 158142c2 bellard
        if ( bExp == 0x7FFF ) {
5522 158142c2 bellard
            if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5523 158142c2 bellard
            goto invalid;
5524 158142c2 bellard
        }
5525 158142c2 bellard
        return packFloat128( zSign, 0x7FFF, 0, 0 );
5526 158142c2 bellard
    }
5527 158142c2 bellard
    if ( bExp == 0x7FFF ) {
5528 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5529 158142c2 bellard
        return packFloat128( zSign, 0, 0, 0 );
5530 158142c2 bellard
    }
5531 158142c2 bellard
    if ( bExp == 0 ) {
5532 158142c2 bellard
        if ( ( bSig0 | bSig1 ) == 0 ) {
5533 158142c2 bellard
            if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
5534 158142c2 bellard
 invalid:
5535 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
5536 158142c2 bellard
                z.low = float128_default_nan_low;
5537 158142c2 bellard
                z.high = float128_default_nan_high;
5538 158142c2 bellard
                return z;
5539 158142c2 bellard
            }
5540 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
5541 158142c2 bellard
            return packFloat128( zSign, 0x7FFF, 0, 0 );
5542 158142c2 bellard
        }
5543 158142c2 bellard
        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5544 158142c2 bellard
    }
5545 158142c2 bellard
    if ( aExp == 0 ) {
5546 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5547 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5548 158142c2 bellard
    }
5549 158142c2 bellard
    zExp = aExp - bExp + 0x3FFD;
5550 158142c2 bellard
    shortShift128Left(
5551 158142c2 bellard
        aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
5552 158142c2 bellard
    shortShift128Left(
5553 158142c2 bellard
        bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
5554 158142c2 bellard
    if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
5555 158142c2 bellard
        shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
5556 158142c2 bellard
        ++zExp;
5557 158142c2 bellard
    }
5558 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
5559 158142c2 bellard
    mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
5560 158142c2 bellard
    sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
5561 bb98fe42 Andreas Färber
    while ( (int64_t) rem0 < 0 ) {
5562 158142c2 bellard
        --zSig0;
5563 158142c2 bellard
        add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
5564 158142c2 bellard
    }
5565 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
5566 158142c2 bellard
    if ( ( zSig1 & 0x3FFF ) <= 4 ) {
5567 158142c2 bellard
        mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
5568 158142c2 bellard
        sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
5569 bb98fe42 Andreas Färber
        while ( (int64_t) rem1 < 0 ) {
5570 158142c2 bellard
            --zSig1;
5571 158142c2 bellard
            add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
5572 158142c2 bellard
        }
5573 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5574 158142c2 bellard
    }
5575 158142c2 bellard
    shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
5576 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5577 158142c2 bellard
5578 158142c2 bellard
}
5579 158142c2 bellard
5580 158142c2 bellard
/*----------------------------------------------------------------------------
5581 158142c2 bellard
| Returns the remainder of the quadruple-precision floating-point value `a'
5582 158142c2 bellard
| with respect to the corresponding value `b'.  The operation is performed
5583 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5584 158142c2 bellard
*----------------------------------------------------------------------------*/
5585 158142c2 bellard
5586 158142c2 bellard
float128 float128_rem( float128 a, float128 b STATUS_PARAM )
5587 158142c2 bellard
{
5588 ed086f3d Blue Swirl
    flag aSign, zSign;
5589 158142c2 bellard
    int32 aExp, bExp, expDiff;
5590 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
5591 bb98fe42 Andreas Färber
    uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
5592 bb98fe42 Andreas Färber
    int64_t sigMean0;
5593 158142c2 bellard
    float128 z;
5594 158142c2 bellard
5595 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5596 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5597 158142c2 bellard
    aExp = extractFloat128Exp( a );
5598 158142c2 bellard
    aSign = extractFloat128Sign( a );
5599 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
5600 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
5601 158142c2 bellard
    bExp = extractFloat128Exp( b );
5602 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5603 158142c2 bellard
        if (    ( aSig0 | aSig1 )
5604 158142c2 bellard
             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
5605 158142c2 bellard
            return propagateFloat128NaN( a, b STATUS_VAR );
5606 158142c2 bellard
        }
5607 158142c2 bellard
        goto invalid;
5608 158142c2 bellard
    }
5609 158142c2 bellard
    if ( bExp == 0x7FFF ) {
5610 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5611 158142c2 bellard
        return a;
5612 158142c2 bellard
    }
5613 158142c2 bellard
    if ( bExp == 0 ) {
5614 158142c2 bellard
        if ( ( bSig0 | bSig1 ) == 0 ) {
5615 158142c2 bellard
 invalid:
5616 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5617 158142c2 bellard
            z.low = float128_default_nan_low;
5618 158142c2 bellard
            z.high = float128_default_nan_high;
5619 158142c2 bellard
            return z;
5620 158142c2 bellard
        }
5621 158142c2 bellard
        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5622 158142c2 bellard
    }
5623 158142c2 bellard
    if ( aExp == 0 ) {
5624 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return a;
5625 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5626 158142c2 bellard
    }
5627 158142c2 bellard
    expDiff = aExp - bExp;
5628 158142c2 bellard
    if ( expDiff < -1 ) return a;
5629 158142c2 bellard
    shortShift128Left(
5630 158142c2 bellard
        aSig0 | LIT64( 0x0001000000000000 ),
5631 158142c2 bellard
        aSig1,
5632 158142c2 bellard
        15 - ( expDiff < 0 ),
5633 158142c2 bellard
        &aSig0,
5634 158142c2 bellard
        &aSig1
5635 158142c2 bellard
    );
5636 158142c2 bellard
    shortShift128Left(
5637 158142c2 bellard
        bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
5638 158142c2 bellard
    q = le128( bSig0, bSig1, aSig0, aSig1 );
5639 158142c2 bellard
    if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
5640 158142c2 bellard
    expDiff -= 64;
5641 158142c2 bellard
    while ( 0 < expDiff ) {
5642 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
5643 158142c2 bellard
        q = ( 4 < q ) ? q - 4 : 0;
5644 158142c2 bellard
        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
5645 158142c2 bellard
        shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
5646 158142c2 bellard
        shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
5647 158142c2 bellard
        sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
5648 158142c2 bellard
        expDiff -= 61;
5649 158142c2 bellard
    }
5650 158142c2 bellard
    if ( -64 < expDiff ) {
5651 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
5652 158142c2 bellard
        q = ( 4 < q ) ? q - 4 : 0;
5653 158142c2 bellard
        q >>= - expDiff;
5654 158142c2 bellard
        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
5655 158142c2 bellard
        expDiff += 52;
5656 158142c2 bellard
        if ( expDiff < 0 ) {
5657 158142c2 bellard
            shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
5658 158142c2 bellard
        }
5659 158142c2 bellard
        else {
5660 158142c2 bellard
            shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
5661 158142c2 bellard
        }
5662 158142c2 bellard
        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
5663 158142c2 bellard
        sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
5664 158142c2 bellard
    }
5665 158142c2 bellard
    else {
5666 158142c2 bellard
        shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
5667 158142c2 bellard
        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
5668 158142c2 bellard
    }
5669 158142c2 bellard
    do {
5670 158142c2 bellard
        alternateASig0 = aSig0;
5671 158142c2 bellard
        alternateASig1 = aSig1;
5672 158142c2 bellard
        ++q;
5673 158142c2 bellard
        sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
5674 bb98fe42 Andreas Färber
    } while ( 0 <= (int64_t) aSig0 );
5675 158142c2 bellard
    add128(
5676 bb98fe42 Andreas Färber
        aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
5677 158142c2 bellard
    if (    ( sigMean0 < 0 )
5678 158142c2 bellard
         || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
5679 158142c2 bellard
        aSig0 = alternateASig0;
5680 158142c2 bellard
        aSig1 = alternateASig1;
5681 158142c2 bellard
    }
5682 bb98fe42 Andreas Färber
    zSign = ( (int64_t) aSig0 < 0 );
5683 158142c2 bellard
    if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
5684 158142c2 bellard
    return
5685 158142c2 bellard
        normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 STATUS_VAR );
5686 158142c2 bellard
5687 158142c2 bellard
}
5688 158142c2 bellard
5689 158142c2 bellard
/*----------------------------------------------------------------------------
5690 158142c2 bellard
| Returns the square root of the quadruple-precision floating-point value `a'.
5691 158142c2 bellard
| The operation is performed according to the IEC/IEEE Standard for Binary
5692 158142c2 bellard
| Floating-Point Arithmetic.
5693 158142c2 bellard
*----------------------------------------------------------------------------*/
5694 158142c2 bellard
5695 158142c2 bellard
float128 float128_sqrt( float128 a STATUS_PARAM )
5696 158142c2 bellard
{
5697 158142c2 bellard
    flag aSign;
5698 158142c2 bellard
    int32 aExp, zExp;
5699 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
5700 bb98fe42 Andreas Färber
    uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5701 158142c2 bellard
    float128 z;
5702 158142c2 bellard
5703 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5704 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5705 158142c2 bellard
    aExp = extractFloat128Exp( a );
5706 158142c2 bellard
    aSign = extractFloat128Sign( a );
5707 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5708 158142c2 bellard
        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a STATUS_VAR );
5709 158142c2 bellard
        if ( ! aSign ) return a;
5710 158142c2 bellard
        goto invalid;
5711 158142c2 bellard
    }
5712 158142c2 bellard
    if ( aSign ) {
5713 158142c2 bellard
        if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
5714 158142c2 bellard
 invalid:
5715 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5716 158142c2 bellard
        z.low = float128_default_nan_low;
5717 158142c2 bellard
        z.high = float128_default_nan_high;
5718 158142c2 bellard
        return z;
5719 158142c2 bellard
    }
5720 158142c2 bellard
    if ( aExp == 0 ) {
5721 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
5722 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5723 158142c2 bellard
    }
5724 158142c2 bellard
    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
5725 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
5726 158142c2 bellard
    zSig0 = estimateSqrt32( aExp, aSig0>>17 );
5727 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
5728 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
5729 158142c2 bellard
    doubleZSig0 = zSig0<<1;
5730 158142c2 bellard
    mul64To128( zSig0, zSig0, &term0, &term1 );
5731 158142c2 bellard
    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
5732 bb98fe42 Andreas Färber
    while ( (int64_t) rem0 < 0 ) {
5733 158142c2 bellard
        --zSig0;
5734 158142c2 bellard
        doubleZSig0 -= 2;
5735 158142c2 bellard
        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
5736 158142c2 bellard
    }
5737 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
5738 158142c2 bellard
    if ( ( zSig1 & 0x1FFF ) <= 5 ) {
5739 158142c2 bellard
        if ( zSig1 == 0 ) zSig1 = 1;
5740 158142c2 bellard
        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
5741 158142c2 bellard
        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5742 158142c2 bellard
        mul64To128( zSig1, zSig1, &term2, &term3 );
5743 158142c2 bellard
        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
5744 bb98fe42 Andreas Färber
        while ( (int64_t) rem1 < 0 ) {
5745 158142c2 bellard
            --zSig1;
5746 158142c2 bellard
            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
5747 158142c2 bellard
            term3 |= 1;
5748 158142c2 bellard
            term2 |= doubleZSig0;
5749 158142c2 bellard
            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
5750 158142c2 bellard
        }
5751 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5752 158142c2 bellard
    }
5753 158142c2 bellard
    shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
5754 158142c2 bellard
    return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5755 158142c2 bellard
5756 158142c2 bellard
}
5757 158142c2 bellard
5758 158142c2 bellard
/*----------------------------------------------------------------------------
5759 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is equal to
5760 b689362d Aurelien Jarno
| the corresponding value `b', and 0 otherwise.  The invalid exception is
5761 b689362d Aurelien Jarno
| raised if either operand is a NaN.  Otherwise, the comparison is performed
5762 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5763 158142c2 bellard
*----------------------------------------------------------------------------*/
5764 158142c2 bellard
5765 b689362d Aurelien Jarno
int float128_eq( float128 a, float128 b STATUS_PARAM )
5766 158142c2 bellard
{
5767 158142c2 bellard
5768 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5769 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5770 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5771 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5772 158142c2 bellard
       ) {
5773 b689362d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
5774 158142c2 bellard
        return 0;
5775 158142c2 bellard
    }
5776 158142c2 bellard
    return
5777 158142c2 bellard
           ( a.low == b.low )
5778 158142c2 bellard
        && (    ( a.high == b.high )
5779 158142c2 bellard
             || (    ( a.low == 0 )
5780 bb98fe42 Andreas Färber
                  && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
5781 158142c2 bellard
           );
5782 158142c2 bellard
5783 158142c2 bellard
}
5784 158142c2 bellard
5785 158142c2 bellard
/*----------------------------------------------------------------------------
5786 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
5787 f5a64251 Aurelien Jarno
| or equal to the corresponding value `b', and 0 otherwise.  The invalid
5788 f5a64251 Aurelien Jarno
| exception is raised if either operand is a NaN.  The comparison is performed
5789 f5a64251 Aurelien Jarno
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5790 158142c2 bellard
*----------------------------------------------------------------------------*/
5791 158142c2 bellard
5792 750afe93 bellard
int float128_le( float128 a, float128 b STATUS_PARAM )
5793 158142c2 bellard
{
5794 158142c2 bellard
    flag aSign, bSign;
5795 158142c2 bellard
5796 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5797 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5798 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5799 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5800 158142c2 bellard
       ) {
5801 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5802 158142c2 bellard
        return 0;
5803 158142c2 bellard
    }
5804 158142c2 bellard
    aSign = extractFloat128Sign( a );
5805 158142c2 bellard
    bSign = extractFloat128Sign( b );
5806 158142c2 bellard
    if ( aSign != bSign ) {
5807 158142c2 bellard
        return
5808 158142c2 bellard
               aSign
5809 bb98fe42 Andreas Färber
            || (    ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5810 158142c2 bellard
                 == 0 );
5811 158142c2 bellard
    }
5812 158142c2 bellard
    return
5813 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
5814 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
5815 158142c2 bellard
5816 158142c2 bellard
}
5817 158142c2 bellard
5818 158142c2 bellard
/*----------------------------------------------------------------------------
5819 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
5820 f5a64251 Aurelien Jarno
| the corresponding value `b', and 0 otherwise.  The invalid exception is
5821 f5a64251 Aurelien Jarno
| raised if either operand is a NaN.  The comparison is performed according
5822 f5a64251 Aurelien Jarno
| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5823 158142c2 bellard
*----------------------------------------------------------------------------*/
5824 158142c2 bellard
5825 750afe93 bellard
int float128_lt( float128 a, float128 b STATUS_PARAM )
5826 158142c2 bellard
{
5827 158142c2 bellard
    flag aSign, bSign;
5828 158142c2 bellard
5829 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5830 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5831 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5832 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5833 158142c2 bellard
       ) {
5834 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5835 158142c2 bellard
        return 0;
5836 158142c2 bellard
    }
5837 158142c2 bellard
    aSign = extractFloat128Sign( a );
5838 158142c2 bellard
    bSign = extractFloat128Sign( b );
5839 158142c2 bellard
    if ( aSign != bSign ) {
5840 158142c2 bellard
        return
5841 158142c2 bellard
               aSign
5842 bb98fe42 Andreas Färber
            && (    ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5843 158142c2 bellard
                 != 0 );
5844 158142c2 bellard
    }
5845 158142c2 bellard
    return
5846 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
5847 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
5848 158142c2 bellard
5849 158142c2 bellard
}
5850 158142c2 bellard
5851 158142c2 bellard
/*----------------------------------------------------------------------------
5852 67b7861d Aurelien Jarno
| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
5853 f5a64251 Aurelien Jarno
| be compared, and 0 otherwise.  The invalid exception is raised if either
5854 f5a64251 Aurelien Jarno
| operand is a NaN. The comparison is performed according to the IEC/IEEE
5855 f5a64251 Aurelien Jarno
| Standard for Binary Floating-Point Arithmetic.
5856 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
5857 67b7861d Aurelien Jarno
5858 67b7861d Aurelien Jarno
int float128_unordered( float128 a, float128 b STATUS_PARAM )
5859 67b7861d Aurelien Jarno
{
5860 67b7861d Aurelien Jarno
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5861 67b7861d Aurelien Jarno
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5862 67b7861d Aurelien Jarno
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5863 67b7861d Aurelien Jarno
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5864 67b7861d Aurelien Jarno
       ) {
5865 67b7861d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
5866 67b7861d Aurelien Jarno
        return 1;
5867 67b7861d Aurelien Jarno
    }
5868 67b7861d Aurelien Jarno
    return 0;
5869 67b7861d Aurelien Jarno
}
5870 67b7861d Aurelien Jarno
5871 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
5872 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is equal to
5873 f5a64251 Aurelien Jarno
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
5874 f5a64251 Aurelien Jarno
| exception.  The comparison is performed according to the IEC/IEEE Standard
5875 f5a64251 Aurelien Jarno
| for Binary Floating-Point Arithmetic.
5876 158142c2 bellard
*----------------------------------------------------------------------------*/
5877 158142c2 bellard
5878 b689362d Aurelien Jarno
int float128_eq_quiet( float128 a, float128 b STATUS_PARAM )
5879 158142c2 bellard
{
5880 158142c2 bellard
5881 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5882 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5883 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5884 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5885 158142c2 bellard
       ) {
5886 b689362d Aurelien Jarno
        if (    float128_is_signaling_nan( a )
5887 b689362d Aurelien Jarno
             || float128_is_signaling_nan( b ) ) {
5888 b689362d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
5889 b689362d Aurelien Jarno
        }
5890 158142c2 bellard
        return 0;
5891 158142c2 bellard
    }
5892 158142c2 bellard
    return
5893 158142c2 bellard
           ( a.low == b.low )
5894 158142c2 bellard
        && (    ( a.high == b.high )
5895 158142c2 bellard
             || (    ( a.low == 0 )
5896 bb98fe42 Andreas Färber
                  && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
5897 158142c2 bellard
           );
5898 158142c2 bellard
5899 158142c2 bellard
}
5900 158142c2 bellard
5901 158142c2 bellard
/*----------------------------------------------------------------------------
5902 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
5903 158142c2 bellard
| or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
5904 158142c2 bellard
| cause an exception.  Otherwise, the comparison is performed according to the
5905 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5906 158142c2 bellard
*----------------------------------------------------------------------------*/
5907 158142c2 bellard
5908 750afe93 bellard
int float128_le_quiet( float128 a, float128 b STATUS_PARAM )
5909 158142c2 bellard
{
5910 158142c2 bellard
    flag aSign, bSign;
5911 158142c2 bellard
5912 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5913 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5914 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5915 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5916 158142c2 bellard
       ) {
5917 158142c2 bellard
        if (    float128_is_signaling_nan( a )
5918 158142c2 bellard
             || float128_is_signaling_nan( b ) ) {
5919 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5920 158142c2 bellard
        }
5921 158142c2 bellard
        return 0;
5922 158142c2 bellard
    }
5923 158142c2 bellard
    aSign = extractFloat128Sign( a );
5924 158142c2 bellard
    bSign = extractFloat128Sign( b );
5925 158142c2 bellard
    if ( aSign != bSign ) {
5926 158142c2 bellard
        return
5927 158142c2 bellard
               aSign
5928 bb98fe42 Andreas Färber
            || (    ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5929 158142c2 bellard
                 == 0 );
5930 158142c2 bellard
    }
5931 158142c2 bellard
    return
5932 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
5933 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
5934 158142c2 bellard
5935 158142c2 bellard
}
5936 158142c2 bellard
5937 158142c2 bellard
/*----------------------------------------------------------------------------
5938 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
5939 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
5940 158142c2 bellard
| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
5941 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
5942 158142c2 bellard
*----------------------------------------------------------------------------*/
5943 158142c2 bellard
5944 750afe93 bellard
int float128_lt_quiet( float128 a, float128 b STATUS_PARAM )
5945 158142c2 bellard
{
5946 158142c2 bellard
    flag aSign, bSign;
5947 158142c2 bellard
5948 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5949 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5950 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5951 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5952 158142c2 bellard
       ) {
5953 158142c2 bellard
        if (    float128_is_signaling_nan( a )
5954 158142c2 bellard
             || float128_is_signaling_nan( b ) ) {
5955 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5956 158142c2 bellard
        }
5957 158142c2 bellard
        return 0;
5958 158142c2 bellard
    }
5959 158142c2 bellard
    aSign = extractFloat128Sign( a );
5960 158142c2 bellard
    bSign = extractFloat128Sign( b );
5961 158142c2 bellard
    if ( aSign != bSign ) {
5962 158142c2 bellard
        return
5963 158142c2 bellard
               aSign
5964 bb98fe42 Andreas Färber
            && (    ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5965 158142c2 bellard
                 != 0 );
5966 158142c2 bellard
    }
5967 158142c2 bellard
    return
5968 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
5969 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
5970 158142c2 bellard
5971 158142c2 bellard
}
5972 158142c2 bellard
5973 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
5974 67b7861d Aurelien Jarno
| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
5975 67b7861d Aurelien Jarno
| be compared, and 0 otherwise.  Quiet NaNs do not cause an exception.  The
5976 67b7861d Aurelien Jarno
| comparison is performed according to the IEC/IEEE Standard for Binary
5977 67b7861d Aurelien Jarno
| Floating-Point Arithmetic.
5978 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
5979 67b7861d Aurelien Jarno
5980 67b7861d Aurelien Jarno
int float128_unordered_quiet( float128 a, float128 b STATUS_PARAM )
5981 67b7861d Aurelien Jarno
{
5982 67b7861d Aurelien Jarno
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5983 67b7861d Aurelien Jarno
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5984 67b7861d Aurelien Jarno
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5985 67b7861d Aurelien Jarno
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5986 67b7861d Aurelien Jarno
       ) {
5987 67b7861d Aurelien Jarno
        if (    float128_is_signaling_nan( a )
5988 67b7861d Aurelien Jarno
             || float128_is_signaling_nan( b ) ) {
5989 67b7861d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
5990 67b7861d Aurelien Jarno
        }
5991 67b7861d Aurelien Jarno
        return 1;
5992 67b7861d Aurelien Jarno
    }
5993 67b7861d Aurelien Jarno
    return 0;
5994 67b7861d Aurelien Jarno
}
5995 67b7861d Aurelien Jarno
5996 158142c2 bellard
#endif
5997 158142c2 bellard
5998 1d6bda35 bellard
/* misc functions */
5999 1d6bda35 bellard
float32 uint32_to_float32( unsigned int a STATUS_PARAM )
6000 1d6bda35 bellard
{
6001 1d6bda35 bellard
    return int64_to_float32(a STATUS_VAR);
6002 1d6bda35 bellard
}
6003 1d6bda35 bellard
6004 1d6bda35 bellard
float64 uint32_to_float64( unsigned int a STATUS_PARAM )
6005 1d6bda35 bellard
{
6006 1d6bda35 bellard
    return int64_to_float64(a STATUS_VAR);
6007 1d6bda35 bellard
}
6008 1d6bda35 bellard
6009 1d6bda35 bellard
unsigned int float32_to_uint32( float32 a STATUS_PARAM )
6010 1d6bda35 bellard
{
6011 1d6bda35 bellard
    int64_t v;
6012 1d6bda35 bellard
    unsigned int res;
6013 1d6bda35 bellard
6014 1d6bda35 bellard
    v = float32_to_int64(a STATUS_VAR);
6015 1d6bda35 bellard
    if (v < 0) {
6016 1d6bda35 bellard
        res = 0;
6017 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
6018 1d6bda35 bellard
    } else if (v > 0xffffffff) {
6019 1d6bda35 bellard
        res = 0xffffffff;
6020 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
6021 1d6bda35 bellard
    } else {
6022 1d6bda35 bellard
        res = v;
6023 1d6bda35 bellard
    }
6024 1d6bda35 bellard
    return res;
6025 1d6bda35 bellard
}
6026 1d6bda35 bellard
6027 1d6bda35 bellard
unsigned int float32_to_uint32_round_to_zero( float32 a STATUS_PARAM )
6028 1d6bda35 bellard
{
6029 1d6bda35 bellard
    int64_t v;
6030 1d6bda35 bellard
    unsigned int res;
6031 1d6bda35 bellard
6032 1d6bda35 bellard
    v = float32_to_int64_round_to_zero(a STATUS_VAR);
6033 1d6bda35 bellard
    if (v < 0) {
6034 1d6bda35 bellard
        res = 0;
6035 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
6036 1d6bda35 bellard
    } else if (v > 0xffffffff) {
6037 1d6bda35 bellard
        res = 0xffffffff;
6038 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
6039 1d6bda35 bellard
    } else {
6040 1d6bda35 bellard
        res = v;
6041 1d6bda35 bellard
    }
6042 1d6bda35 bellard
    return res;
6043 1d6bda35 bellard
}
6044 1d6bda35 bellard
6045 cbcef455 Peter Maydell
unsigned int float32_to_uint16_round_to_zero( float32 a STATUS_PARAM )
6046 cbcef455 Peter Maydell
{
6047 cbcef455 Peter Maydell
    int64_t v;
6048 cbcef455 Peter Maydell
    unsigned int res;
6049 cbcef455 Peter Maydell
6050 cbcef455 Peter Maydell
    v = float32_to_int64_round_to_zero(a STATUS_VAR);
6051 cbcef455 Peter Maydell
    if (v < 0) {
6052 cbcef455 Peter Maydell
        res = 0;
6053 cbcef455 Peter Maydell
        float_raise( float_flag_invalid STATUS_VAR);
6054 cbcef455 Peter Maydell
    } else if (v > 0xffff) {
6055 cbcef455 Peter Maydell
        res = 0xffff;
6056 cbcef455 Peter Maydell
        float_raise( float_flag_invalid STATUS_VAR);
6057 cbcef455 Peter Maydell
    } else {
6058 cbcef455 Peter Maydell
        res = v;
6059 cbcef455 Peter Maydell
    }
6060 cbcef455 Peter Maydell
    return res;
6061 cbcef455 Peter Maydell
}
6062 cbcef455 Peter Maydell
6063 1d6bda35 bellard
unsigned int float64_to_uint32( float64 a STATUS_PARAM )
6064 1d6bda35 bellard
{
6065 1d6bda35 bellard
    int64_t v;
6066 1d6bda35 bellard
    unsigned int res;
6067 1d6bda35 bellard
6068 1d6bda35 bellard
    v = float64_to_int64(a STATUS_VAR);
6069 1d6bda35 bellard
    if (v < 0) {
6070 1d6bda35 bellard
        res = 0;
6071 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
6072 1d6bda35 bellard
    } else if (v > 0xffffffff) {
6073 1d6bda35 bellard
        res = 0xffffffff;
6074 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
6075 1d6bda35 bellard
    } else {
6076 1d6bda35 bellard
        res = v;
6077 1d6bda35 bellard
    }
6078 1d6bda35 bellard
    return res;
6079 1d6bda35 bellard
}
6080 1d6bda35 bellard
6081 1d6bda35 bellard
unsigned int float64_to_uint32_round_to_zero( float64 a STATUS_PARAM )
6082 1d6bda35 bellard
{
6083 1d6bda35 bellard
    int64_t v;
6084 1d6bda35 bellard
    unsigned int res;
6085 1d6bda35 bellard
6086 1d6bda35 bellard
    v = float64_to_int64_round_to_zero(a STATUS_VAR);
6087 1d6bda35 bellard
    if (v < 0) {
6088 1d6bda35 bellard
        res = 0;
6089 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
6090 1d6bda35 bellard
    } else if (v > 0xffffffff) {
6091 1d6bda35 bellard
        res = 0xffffffff;
6092 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
6093 1d6bda35 bellard
    } else {
6094 1d6bda35 bellard
        res = v;
6095 1d6bda35 bellard
    }
6096 1d6bda35 bellard
    return res;
6097 1d6bda35 bellard
}
6098 1d6bda35 bellard
6099 cbcef455 Peter Maydell
unsigned int float64_to_uint16_round_to_zero( float64 a STATUS_PARAM )
6100 cbcef455 Peter Maydell
{
6101 cbcef455 Peter Maydell
    int64_t v;
6102 cbcef455 Peter Maydell
    unsigned int res;
6103 cbcef455 Peter Maydell
6104 cbcef455 Peter Maydell
    v = float64_to_int64_round_to_zero(a STATUS_VAR);
6105 cbcef455 Peter Maydell
    if (v < 0) {
6106 cbcef455 Peter Maydell
        res = 0;
6107 cbcef455 Peter Maydell
        float_raise( float_flag_invalid STATUS_VAR);
6108 cbcef455 Peter Maydell
    } else if (v > 0xffff) {
6109 cbcef455 Peter Maydell
        res = 0xffff;
6110 cbcef455 Peter Maydell
        float_raise( float_flag_invalid STATUS_VAR);
6111 cbcef455 Peter Maydell
    } else {
6112 cbcef455 Peter Maydell
        res = v;
6113 cbcef455 Peter Maydell
    }
6114 cbcef455 Peter Maydell
    return res;
6115 cbcef455 Peter Maydell
}
6116 cbcef455 Peter Maydell
6117 f090c9d4 pbrook
/* FIXME: This looks broken.  */
6118 75d62a58 j_mayer
uint64_t float64_to_uint64 (float64 a STATUS_PARAM)
6119 75d62a58 j_mayer
{
6120 75d62a58 j_mayer
    int64_t v;
6121 75d62a58 j_mayer
6122 f090c9d4 pbrook
    v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
6123 f090c9d4 pbrook
    v += float64_val(a);
6124 f090c9d4 pbrook
    v = float64_to_int64(make_float64(v) STATUS_VAR);
6125 75d62a58 j_mayer
6126 75d62a58 j_mayer
    return v - INT64_MIN;
6127 75d62a58 j_mayer
}
6128 75d62a58 j_mayer
6129 75d62a58 j_mayer
uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM)
6130 75d62a58 j_mayer
{
6131 75d62a58 j_mayer
    int64_t v;
6132 75d62a58 j_mayer
6133 f090c9d4 pbrook
    v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
6134 f090c9d4 pbrook
    v += float64_val(a);
6135 f090c9d4 pbrook
    v = float64_to_int64_round_to_zero(make_float64(v) STATUS_VAR);
6136 75d62a58 j_mayer
6137 75d62a58 j_mayer
    return v - INT64_MIN;
6138 75d62a58 j_mayer
}
6139 75d62a58 j_mayer
6140 1d6bda35 bellard
#define COMPARE(s, nan_exp)                                                  \
6141 750afe93 bellard
INLINE int float ## s ## _compare_internal( float ## s a, float ## s b,      \
6142 1d6bda35 bellard
                                      int is_quiet STATUS_PARAM )            \
6143 1d6bda35 bellard
{                                                                            \
6144 1d6bda35 bellard
    flag aSign, bSign;                                                       \
6145 bb98fe42 Andreas Färber
    uint ## s ## _t av, bv;                                                  \
6146 37d18660 Peter Maydell
    a = float ## s ## _squash_input_denormal(a STATUS_VAR);                  \
6147 37d18660 Peter Maydell
    b = float ## s ## _squash_input_denormal(b STATUS_VAR);                  \
6148 1d6bda35 bellard
                                                                             \
6149 1d6bda35 bellard
    if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) &&                    \
6150 1d6bda35 bellard
         extractFloat ## s ## Frac( a ) ) ||                                 \
6151 1d6bda35 bellard
        ( ( extractFloat ## s ## Exp( b ) == nan_exp ) &&                    \
6152 1d6bda35 bellard
          extractFloat ## s ## Frac( b ) )) {                                \
6153 1d6bda35 bellard
        if (!is_quiet ||                                                     \
6154 1d6bda35 bellard
            float ## s ## _is_signaling_nan( a ) ||                          \
6155 1d6bda35 bellard
            float ## s ## _is_signaling_nan( b ) ) {                         \
6156 1d6bda35 bellard
            float_raise( float_flag_invalid STATUS_VAR);                     \
6157 1d6bda35 bellard
        }                                                                    \
6158 1d6bda35 bellard
        return float_relation_unordered;                                     \
6159 1d6bda35 bellard
    }                                                                        \
6160 1d6bda35 bellard
    aSign = extractFloat ## s ## Sign( a );                                  \
6161 1d6bda35 bellard
    bSign = extractFloat ## s ## Sign( b );                                  \
6162 f090c9d4 pbrook
    av = float ## s ## _val(a);                                              \
6163 cd8a2533 blueswir1
    bv = float ## s ## _val(b);                                              \
6164 1d6bda35 bellard
    if ( aSign != bSign ) {                                                  \
6165 bb98fe42 Andreas Färber
        if ( (uint ## s ## _t) ( ( av | bv )<<1 ) == 0 ) {                   \
6166 1d6bda35 bellard
            /* zero case */                                                  \
6167 1d6bda35 bellard
            return float_relation_equal;                                     \
6168 1d6bda35 bellard
        } else {                                                             \
6169 1d6bda35 bellard
            return 1 - (2 * aSign);                                          \
6170 1d6bda35 bellard
        }                                                                    \
6171 1d6bda35 bellard
    } else {                                                                 \
6172 f090c9d4 pbrook
        if (av == bv) {                                                      \
6173 1d6bda35 bellard
            return float_relation_equal;                                     \
6174 1d6bda35 bellard
        } else {                                                             \
6175 f090c9d4 pbrook
            return 1 - 2 * (aSign ^ ( av < bv ));                            \
6176 1d6bda35 bellard
        }                                                                    \
6177 1d6bda35 bellard
    }                                                                        \
6178 1d6bda35 bellard
}                                                                            \
6179 1d6bda35 bellard
                                                                             \
6180 750afe93 bellard
int float ## s ## _compare( float ## s a, float ## s b STATUS_PARAM )        \
6181 1d6bda35 bellard
{                                                                            \
6182 1d6bda35 bellard
    return float ## s ## _compare_internal(a, b, 0 STATUS_VAR);              \
6183 1d6bda35 bellard
}                                                                            \
6184 1d6bda35 bellard
                                                                             \
6185 750afe93 bellard
int float ## s ## _compare_quiet( float ## s a, float ## s b STATUS_PARAM )  \
6186 1d6bda35 bellard
{                                                                            \
6187 1d6bda35 bellard
    return float ## s ## _compare_internal(a, b, 1 STATUS_VAR);              \
6188 1d6bda35 bellard
}
6189 1d6bda35 bellard
6190 1d6bda35 bellard
COMPARE(32, 0xff)
6191 1d6bda35 bellard
COMPARE(64, 0x7ff)
6192 9ee6e8bb pbrook
6193 f6714d36 Aurelien Jarno
INLINE int floatx80_compare_internal( floatx80 a, floatx80 b,
6194 f6714d36 Aurelien Jarno
                                      int is_quiet STATUS_PARAM )
6195 f6714d36 Aurelien Jarno
{
6196 f6714d36 Aurelien Jarno
    flag aSign, bSign;
6197 f6714d36 Aurelien Jarno
6198 f6714d36 Aurelien Jarno
    if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6199 f6714d36 Aurelien Jarno
          ( extractFloatx80Frac( a )<<1 ) ) ||
6200 f6714d36 Aurelien Jarno
        ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6201 f6714d36 Aurelien Jarno
          ( extractFloatx80Frac( b )<<1 ) )) {
6202 f6714d36 Aurelien Jarno
        if (!is_quiet ||
6203 f6714d36 Aurelien Jarno
            floatx80_is_signaling_nan( a ) ||
6204 f6714d36 Aurelien Jarno
            floatx80_is_signaling_nan( b ) ) {
6205 f6714d36 Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
6206 f6714d36 Aurelien Jarno
        }
6207 f6714d36 Aurelien Jarno
        return float_relation_unordered;
6208 f6714d36 Aurelien Jarno
    }
6209 f6714d36 Aurelien Jarno
    aSign = extractFloatx80Sign( a );
6210 f6714d36 Aurelien Jarno
    bSign = extractFloatx80Sign( b );
6211 f6714d36 Aurelien Jarno
    if ( aSign != bSign ) {
6212 f6714d36 Aurelien Jarno
6213 f6714d36 Aurelien Jarno
        if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
6214 f6714d36 Aurelien Jarno
             ( ( a.low | b.low ) == 0 ) ) {
6215 f6714d36 Aurelien Jarno
            /* zero case */
6216 f6714d36 Aurelien Jarno
            return float_relation_equal;
6217 f6714d36 Aurelien Jarno
        } else {
6218 f6714d36 Aurelien Jarno
            return 1 - (2 * aSign);
6219 f6714d36 Aurelien Jarno
        }
6220 f6714d36 Aurelien Jarno
    } else {
6221 f6714d36 Aurelien Jarno
        if (a.low == b.low && a.high == b.high) {
6222 f6714d36 Aurelien Jarno
            return float_relation_equal;
6223 f6714d36 Aurelien Jarno
        } else {
6224 f6714d36 Aurelien Jarno
            return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6225 f6714d36 Aurelien Jarno
        }
6226 f6714d36 Aurelien Jarno
    }
6227 f6714d36 Aurelien Jarno
}
6228 f6714d36 Aurelien Jarno
6229 f6714d36 Aurelien Jarno
int floatx80_compare( floatx80 a, floatx80 b STATUS_PARAM )
6230 f6714d36 Aurelien Jarno
{
6231 f6714d36 Aurelien Jarno
    return floatx80_compare_internal(a, b, 0 STATUS_VAR);
6232 f6714d36 Aurelien Jarno
}
6233 f6714d36 Aurelien Jarno
6234 f6714d36 Aurelien Jarno
int floatx80_compare_quiet( floatx80 a, floatx80 b STATUS_PARAM )
6235 f6714d36 Aurelien Jarno
{
6236 f6714d36 Aurelien Jarno
    return floatx80_compare_internal(a, b, 1 STATUS_VAR);
6237 f6714d36 Aurelien Jarno
}
6238 f6714d36 Aurelien Jarno
6239 1f587329 blueswir1
INLINE int float128_compare_internal( float128 a, float128 b,
6240 1f587329 blueswir1
                                      int is_quiet STATUS_PARAM )
6241 1f587329 blueswir1
{
6242 1f587329 blueswir1
    flag aSign, bSign;
6243 1f587329 blueswir1
6244 1f587329 blueswir1
    if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
6245 1f587329 blueswir1
          ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
6246 1f587329 blueswir1
        ( ( extractFloat128Exp( b ) == 0x7fff ) &&
6247 1f587329 blueswir1
          ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
6248 1f587329 blueswir1
        if (!is_quiet ||
6249 1f587329 blueswir1
            float128_is_signaling_nan( a ) ||
6250 1f587329 blueswir1
            float128_is_signaling_nan( b ) ) {
6251 1f587329 blueswir1
            float_raise( float_flag_invalid STATUS_VAR);
6252 1f587329 blueswir1
        }
6253 1f587329 blueswir1
        return float_relation_unordered;
6254 1f587329 blueswir1
    }
6255 1f587329 blueswir1
    aSign = extractFloat128Sign( a );
6256 1f587329 blueswir1
    bSign = extractFloat128Sign( b );
6257 1f587329 blueswir1
    if ( aSign != bSign ) {
6258 1f587329 blueswir1
        if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
6259 1f587329 blueswir1
            /* zero case */
6260 1f587329 blueswir1
            return float_relation_equal;
6261 1f587329 blueswir1
        } else {
6262 1f587329 blueswir1
            return 1 - (2 * aSign);
6263 1f587329 blueswir1
        }
6264 1f587329 blueswir1
    } else {
6265 1f587329 blueswir1
        if (a.low == b.low && a.high == b.high) {
6266 1f587329 blueswir1
            return float_relation_equal;
6267 1f587329 blueswir1
        } else {
6268 1f587329 blueswir1
            return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6269 1f587329 blueswir1
        }
6270 1f587329 blueswir1
    }
6271 1f587329 blueswir1
}
6272 1f587329 blueswir1
6273 1f587329 blueswir1
int float128_compare( float128 a, float128 b STATUS_PARAM )
6274 1f587329 blueswir1
{
6275 1f587329 blueswir1
    return float128_compare_internal(a, b, 0 STATUS_VAR);
6276 1f587329 blueswir1
}
6277 1f587329 blueswir1
6278 1f587329 blueswir1
int float128_compare_quiet( float128 a, float128 b STATUS_PARAM )
6279 1f587329 blueswir1
{
6280 1f587329 blueswir1
    return float128_compare_internal(a, b, 1 STATUS_VAR);
6281 1f587329 blueswir1
}
6282 1f587329 blueswir1
6283 274f1b04 Peter Maydell
/* min() and max() functions. These can't be implemented as
6284 274f1b04 Peter Maydell
 * 'compare and pick one input' because that would mishandle
6285 274f1b04 Peter Maydell
 * NaNs and +0 vs -0.
6286 274f1b04 Peter Maydell
 */
6287 274f1b04 Peter Maydell
#define MINMAX(s, nan_exp)                                              \
6288 274f1b04 Peter Maydell
INLINE float ## s float ## s ## _minmax(float ## s a, float ## s b,     \
6289 274f1b04 Peter Maydell
                                        int ismin STATUS_PARAM )        \
6290 274f1b04 Peter Maydell
{                                                                       \
6291 274f1b04 Peter Maydell
    flag aSign, bSign;                                                  \
6292 274f1b04 Peter Maydell
    uint ## s ## _t av, bv;                                             \
6293 274f1b04 Peter Maydell
    a = float ## s ## _squash_input_denormal(a STATUS_VAR);             \
6294 274f1b04 Peter Maydell
    b = float ## s ## _squash_input_denormal(b STATUS_VAR);             \
6295 274f1b04 Peter Maydell
    if (float ## s ## _is_any_nan(a) ||                                 \
6296 274f1b04 Peter Maydell
        float ## s ## _is_any_nan(b)) {                                 \
6297 274f1b04 Peter Maydell
        return propagateFloat ## s ## NaN(a, b STATUS_VAR);             \
6298 274f1b04 Peter Maydell
    }                                                                   \
6299 274f1b04 Peter Maydell
    aSign = extractFloat ## s ## Sign(a);                               \
6300 274f1b04 Peter Maydell
    bSign = extractFloat ## s ## Sign(b);                               \
6301 274f1b04 Peter Maydell
    av = float ## s ## _val(a);                                         \
6302 274f1b04 Peter Maydell
    bv = float ## s ## _val(b);                                         \
6303 274f1b04 Peter Maydell
    if (aSign != bSign) {                                               \
6304 274f1b04 Peter Maydell
        if (ismin) {                                                    \
6305 274f1b04 Peter Maydell
            return aSign ? a : b;                                       \
6306 274f1b04 Peter Maydell
        } else {                                                        \
6307 274f1b04 Peter Maydell
            return aSign ? b : a;                                       \
6308 274f1b04 Peter Maydell
        }                                                               \
6309 274f1b04 Peter Maydell
    } else {                                                            \
6310 274f1b04 Peter Maydell
        if (ismin) {                                                    \
6311 274f1b04 Peter Maydell
            return (aSign ^ (av < bv)) ? a : b;                         \
6312 274f1b04 Peter Maydell
        } else {                                                        \
6313 274f1b04 Peter Maydell
            return (aSign ^ (av < bv)) ? b : a;                         \
6314 274f1b04 Peter Maydell
        }                                                               \
6315 274f1b04 Peter Maydell
    }                                                                   \
6316 274f1b04 Peter Maydell
}                                                                       \
6317 274f1b04 Peter Maydell
                                                                        \
6318 274f1b04 Peter Maydell
float ## s float ## s ## _min(float ## s a, float ## s b STATUS_PARAM)  \
6319 274f1b04 Peter Maydell
{                                                                       \
6320 274f1b04 Peter Maydell
    return float ## s ## _minmax(a, b, 1 STATUS_VAR);                   \
6321 274f1b04 Peter Maydell
}                                                                       \
6322 274f1b04 Peter Maydell
                                                                        \
6323 274f1b04 Peter Maydell
float ## s float ## s ## _max(float ## s a, float ## s b STATUS_PARAM)  \
6324 274f1b04 Peter Maydell
{                                                                       \
6325 274f1b04 Peter Maydell
    return float ## s ## _minmax(a, b, 0 STATUS_VAR);                   \
6326 274f1b04 Peter Maydell
}
6327 274f1b04 Peter Maydell
6328 274f1b04 Peter Maydell
MINMAX(32, 0xff)
6329 274f1b04 Peter Maydell
MINMAX(64, 0x7ff)
6330 274f1b04 Peter Maydell
6331 274f1b04 Peter Maydell
6332 9ee6e8bb pbrook
/* Multiply A by 2 raised to the power N.  */
6333 9ee6e8bb pbrook
float32 float32_scalbn( float32 a, int n STATUS_PARAM )
6334 9ee6e8bb pbrook
{
6335 9ee6e8bb pbrook
    flag aSign;
6336 326b9e98 Aurelien Jarno
    int16_t aExp;
6337 bb98fe42 Andreas Färber
    uint32_t aSig;
6338 9ee6e8bb pbrook
6339 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
6340 9ee6e8bb pbrook
    aSig = extractFloat32Frac( a );
6341 9ee6e8bb pbrook
    aExp = extractFloat32Exp( a );
6342 9ee6e8bb pbrook
    aSign = extractFloat32Sign( a );
6343 9ee6e8bb pbrook
6344 9ee6e8bb pbrook
    if ( aExp == 0xFF ) {
6345 326b9e98 Aurelien Jarno
        if ( aSig ) {
6346 326b9e98 Aurelien Jarno
            return propagateFloat32NaN( a, a STATUS_VAR );
6347 326b9e98 Aurelien Jarno
        }
6348 9ee6e8bb pbrook
        return a;
6349 9ee6e8bb pbrook
    }
6350 69397542 pbrook
    if ( aExp != 0 )
6351 69397542 pbrook
        aSig |= 0x00800000;
6352 69397542 pbrook
    else if ( aSig == 0 )
6353 69397542 pbrook
        return a;
6354 69397542 pbrook
6355 326b9e98 Aurelien Jarno
    if (n > 0x200) {
6356 326b9e98 Aurelien Jarno
        n = 0x200;
6357 326b9e98 Aurelien Jarno
    } else if (n < -0x200) {
6358 326b9e98 Aurelien Jarno
        n = -0x200;
6359 326b9e98 Aurelien Jarno
    }
6360 326b9e98 Aurelien Jarno
6361 69397542 pbrook
    aExp += n - 1;
6362 69397542 pbrook
    aSig <<= 7;
6363 69397542 pbrook
    return normalizeRoundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
6364 9ee6e8bb pbrook
}
6365 9ee6e8bb pbrook
6366 9ee6e8bb pbrook
float64 float64_scalbn( float64 a, int n STATUS_PARAM )
6367 9ee6e8bb pbrook
{
6368 9ee6e8bb pbrook
    flag aSign;
6369 326b9e98 Aurelien Jarno
    int16_t aExp;
6370 bb98fe42 Andreas Färber
    uint64_t aSig;
6371 9ee6e8bb pbrook
6372 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
6373 9ee6e8bb pbrook
    aSig = extractFloat64Frac( a );
6374 9ee6e8bb pbrook
    aExp = extractFloat64Exp( a );
6375 9ee6e8bb pbrook
    aSign = extractFloat64Sign( a );
6376 9ee6e8bb pbrook
6377 9ee6e8bb pbrook
    if ( aExp == 0x7FF ) {
6378 326b9e98 Aurelien Jarno
        if ( aSig ) {
6379 326b9e98 Aurelien Jarno
            return propagateFloat64NaN( a, a STATUS_VAR );
6380 326b9e98 Aurelien Jarno
        }
6381 9ee6e8bb pbrook
        return a;
6382 9ee6e8bb pbrook
    }
6383 69397542 pbrook
    if ( aExp != 0 )
6384 69397542 pbrook
        aSig |= LIT64( 0x0010000000000000 );
6385 69397542 pbrook
    else if ( aSig == 0 )
6386 69397542 pbrook
        return a;
6387 69397542 pbrook
6388 326b9e98 Aurelien Jarno
    if (n > 0x1000) {
6389 326b9e98 Aurelien Jarno
        n = 0x1000;
6390 326b9e98 Aurelien Jarno
    } else if (n < -0x1000) {
6391 326b9e98 Aurelien Jarno
        n = -0x1000;
6392 326b9e98 Aurelien Jarno
    }
6393 326b9e98 Aurelien Jarno
6394 69397542 pbrook
    aExp += n - 1;
6395 69397542 pbrook
    aSig <<= 10;
6396 69397542 pbrook
    return normalizeRoundAndPackFloat64( aSign, aExp, aSig STATUS_VAR );
6397 9ee6e8bb pbrook
}
6398 9ee6e8bb pbrook
6399 9ee6e8bb pbrook
#ifdef FLOATX80
6400 9ee6e8bb pbrook
floatx80 floatx80_scalbn( floatx80 a, int n STATUS_PARAM )
6401 9ee6e8bb pbrook
{
6402 9ee6e8bb pbrook
    flag aSign;
6403 326b9e98 Aurelien Jarno
    int32_t aExp;
6404 bb98fe42 Andreas Färber
    uint64_t aSig;
6405 9ee6e8bb pbrook
6406 9ee6e8bb pbrook
    aSig = extractFloatx80Frac( a );
6407 9ee6e8bb pbrook
    aExp = extractFloatx80Exp( a );
6408 9ee6e8bb pbrook
    aSign = extractFloatx80Sign( a );
6409 9ee6e8bb pbrook
6410 326b9e98 Aurelien Jarno
    if ( aExp == 0x7FFF ) {
6411 326b9e98 Aurelien Jarno
        if ( aSig<<1 ) {
6412 326b9e98 Aurelien Jarno
            return propagateFloatx80NaN( a, a STATUS_VAR );
6413 326b9e98 Aurelien Jarno
        }
6414 9ee6e8bb pbrook
        return a;
6415 9ee6e8bb pbrook
    }
6416 326b9e98 Aurelien Jarno
6417 69397542 pbrook
    if (aExp == 0 && aSig == 0)
6418 69397542 pbrook
        return a;
6419 69397542 pbrook
6420 326b9e98 Aurelien Jarno
    if (n > 0x10000) {
6421 326b9e98 Aurelien Jarno
        n = 0x10000;
6422 326b9e98 Aurelien Jarno
    } else if (n < -0x10000) {
6423 326b9e98 Aurelien Jarno
        n = -0x10000;
6424 326b9e98 Aurelien Jarno
    }
6425 326b9e98 Aurelien Jarno
6426 9ee6e8bb pbrook
    aExp += n;
6427 69397542 pbrook
    return normalizeRoundAndPackFloatx80( STATUS(floatx80_rounding_precision),
6428 69397542 pbrook
                                          aSign, aExp, aSig, 0 STATUS_VAR );
6429 9ee6e8bb pbrook
}
6430 9ee6e8bb pbrook
#endif
6431 9ee6e8bb pbrook
6432 9ee6e8bb pbrook
#ifdef FLOAT128
6433 9ee6e8bb pbrook
float128 float128_scalbn( float128 a, int n STATUS_PARAM )
6434 9ee6e8bb pbrook
{
6435 9ee6e8bb pbrook
    flag aSign;
6436 326b9e98 Aurelien Jarno
    int32_t aExp;
6437 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1;
6438 9ee6e8bb pbrook
6439 9ee6e8bb pbrook
    aSig1 = extractFloat128Frac1( a );
6440 9ee6e8bb pbrook
    aSig0 = extractFloat128Frac0( a );
6441 9ee6e8bb pbrook
    aExp = extractFloat128Exp( a );
6442 9ee6e8bb pbrook
    aSign = extractFloat128Sign( a );
6443 9ee6e8bb pbrook
    if ( aExp == 0x7FFF ) {
6444 326b9e98 Aurelien Jarno
        if ( aSig0 | aSig1 ) {
6445 326b9e98 Aurelien Jarno
            return propagateFloat128NaN( a, a STATUS_VAR );
6446 326b9e98 Aurelien Jarno
        }
6447 9ee6e8bb pbrook
        return a;
6448 9ee6e8bb pbrook
    }
6449 69397542 pbrook
    if ( aExp != 0 )
6450 69397542 pbrook
        aSig0 |= LIT64( 0x0001000000000000 );
6451 69397542 pbrook
    else if ( aSig0 == 0 && aSig1 == 0 )
6452 69397542 pbrook
        return a;
6453 69397542 pbrook
6454 326b9e98 Aurelien Jarno
    if (n > 0x10000) {
6455 326b9e98 Aurelien Jarno
        n = 0x10000;
6456 326b9e98 Aurelien Jarno
    } else if (n < -0x10000) {
6457 326b9e98 Aurelien Jarno
        n = -0x10000;
6458 326b9e98 Aurelien Jarno
    }
6459 326b9e98 Aurelien Jarno
6460 69397542 pbrook
    aExp += n - 1;
6461 69397542 pbrook
    return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
6462 69397542 pbrook
                                          STATUS_VAR );
6463 9ee6e8bb pbrook
6464 9ee6e8bb pbrook
}
6465 9ee6e8bb pbrook
#endif