Statistics
| Branch: | Revision:

root / fpu / softfloat.c @ 7fee199c

History | View | Annotate | Download (228.5 kB)

1 8d725fac Andreas Färber
/*
2 8d725fac Andreas Färber
 * QEMU float support
3 8d725fac Andreas Färber
 *
4 8d725fac Andreas Färber
 * Derived from SoftFloat.
5 8d725fac Andreas Färber
 */
6 158142c2 bellard
7 158142c2 bellard
/*============================================================================
8 158142c2 bellard

9 158142c2 bellard
This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
10 158142c2 bellard
Package, Release 2b.
11 158142c2 bellard

12 158142c2 bellard
Written by John R. Hauser.  This work was made possible in part by the
13 158142c2 bellard
International Computer Science Institute, located at Suite 600, 1947 Center
14 158142c2 bellard
Street, Berkeley, California 94704.  Funding was partially provided by the
15 158142c2 bellard
National Science Foundation under grant MIP-9311980.  The original version
16 158142c2 bellard
of this code was written as part of a project to build a fixed-point vector
17 158142c2 bellard
processor in collaboration with the University of California at Berkeley,
18 158142c2 bellard
overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
19 158142c2 bellard
is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
20 158142c2 bellard
arithmetic/SoftFloat.html'.
21 158142c2 bellard

22 158142c2 bellard
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
23 158142c2 bellard
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
24 158142c2 bellard
RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
25 158142c2 bellard
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
26 158142c2 bellard
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
27 158142c2 bellard
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
28 158142c2 bellard
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
29 158142c2 bellard
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
30 158142c2 bellard

31 158142c2 bellard
Derivative works are acceptable, even for commercial purposes, so long as
32 158142c2 bellard
(1) the source code for the derivative work includes prominent notice that
33 158142c2 bellard
the work is derivative, and (2) the source code includes prominent notice with
34 158142c2 bellard
these four paragraphs for those parts of this code that are retained.
35 158142c2 bellard

36 158142c2 bellard
=============================================================================*/
37 158142c2 bellard
38 158142c2 bellard
#include "softfloat.h"
39 158142c2 bellard
40 158142c2 bellard
/*----------------------------------------------------------------------------
41 158142c2 bellard
| Primitive arithmetic functions, including multi-word arithmetic, and
42 158142c2 bellard
| division and square root approximations.  (Can be specialized to target if
43 158142c2 bellard
| desired.)
44 158142c2 bellard
*----------------------------------------------------------------------------*/
45 158142c2 bellard
#include "softfloat-macros.h"
46 158142c2 bellard
47 158142c2 bellard
/*----------------------------------------------------------------------------
48 158142c2 bellard
| Functions and definitions to determine:  (1) whether tininess for underflow
49 158142c2 bellard
| is detected before or after rounding by default, (2) what (if anything)
50 158142c2 bellard
| happens when exceptions are raised, (3) how signaling NaNs are distinguished
51 158142c2 bellard
| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
52 158142c2 bellard
| are propagated from function inputs to output.  These details are target-
53 158142c2 bellard
| specific.
54 158142c2 bellard
*----------------------------------------------------------------------------*/
55 158142c2 bellard
#include "softfloat-specialize.h"
56 158142c2 bellard
57 158142c2 bellard
void set_float_rounding_mode(int val STATUS_PARAM)
58 158142c2 bellard
{
59 158142c2 bellard
    STATUS(float_rounding_mode) = val;
60 158142c2 bellard
}
61 158142c2 bellard
62 1d6bda35 bellard
void set_float_exception_flags(int val STATUS_PARAM)
63 1d6bda35 bellard
{
64 1d6bda35 bellard
    STATUS(float_exception_flags) = val;
65 1d6bda35 bellard
}
66 1d6bda35 bellard
67 158142c2 bellard
void set_floatx80_rounding_precision(int val STATUS_PARAM)
68 158142c2 bellard
{
69 158142c2 bellard
    STATUS(floatx80_rounding_precision) = val;
70 158142c2 bellard
}
71 158142c2 bellard
72 158142c2 bellard
/*----------------------------------------------------------------------------
73 bb4d4bb3 Peter Maydell
| Returns the fraction bits of the half-precision floating-point value `a'.
74 bb4d4bb3 Peter Maydell
*----------------------------------------------------------------------------*/
75 bb4d4bb3 Peter Maydell
76 bb4d4bb3 Peter Maydell
INLINE uint32_t extractFloat16Frac(float16 a)
77 bb4d4bb3 Peter Maydell
{
78 bb4d4bb3 Peter Maydell
    return float16_val(a) & 0x3ff;
79 bb4d4bb3 Peter Maydell
}
80 bb4d4bb3 Peter Maydell
81 bb4d4bb3 Peter Maydell
/*----------------------------------------------------------------------------
82 bb4d4bb3 Peter Maydell
| Returns the exponent bits of the half-precision floating-point value `a'.
83 bb4d4bb3 Peter Maydell
*----------------------------------------------------------------------------*/
84 bb4d4bb3 Peter Maydell
85 bb4d4bb3 Peter Maydell
INLINE int16 extractFloat16Exp(float16 a)
86 bb4d4bb3 Peter Maydell
{
87 bb4d4bb3 Peter Maydell
    return (float16_val(a) >> 10) & 0x1f;
88 bb4d4bb3 Peter Maydell
}
89 bb4d4bb3 Peter Maydell
90 bb4d4bb3 Peter Maydell
/*----------------------------------------------------------------------------
91 bb4d4bb3 Peter Maydell
| Returns the sign bit of the single-precision floating-point value `a'.
92 bb4d4bb3 Peter Maydell
*----------------------------------------------------------------------------*/
93 bb4d4bb3 Peter Maydell
94 bb4d4bb3 Peter Maydell
INLINE flag extractFloat16Sign(float16 a)
95 bb4d4bb3 Peter Maydell
{
96 bb4d4bb3 Peter Maydell
    return float16_val(a)>>15;
97 bb4d4bb3 Peter Maydell
}
98 bb4d4bb3 Peter Maydell
99 bb4d4bb3 Peter Maydell
/*----------------------------------------------------------------------------
100 158142c2 bellard
| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
101 158142c2 bellard
| and 7, and returns the properly rounded 32-bit integer corresponding to the
102 158142c2 bellard
| input.  If `zSign' is 1, the input is negated before being converted to an
103 158142c2 bellard
| integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
104 158142c2 bellard
| is simply rounded to an integer, with the inexact exception raised if the
105 158142c2 bellard
| input cannot be represented exactly as an integer.  However, if the fixed-
106 158142c2 bellard
| point input is too large, the invalid exception is raised and the largest
107 158142c2 bellard
| positive or negative integer is returned.
108 158142c2 bellard
*----------------------------------------------------------------------------*/
109 158142c2 bellard
110 bb98fe42 Andreas Färber
static int32 roundAndPackInt32( flag zSign, uint64_t absZ STATUS_PARAM)
111 158142c2 bellard
{
112 158142c2 bellard
    int8 roundingMode;
113 158142c2 bellard
    flag roundNearestEven;
114 158142c2 bellard
    int8 roundIncrement, roundBits;
115 158142c2 bellard
    int32 z;
116 158142c2 bellard
117 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
118 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
119 158142c2 bellard
    roundIncrement = 0x40;
120 158142c2 bellard
    if ( ! roundNearestEven ) {
121 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
122 158142c2 bellard
            roundIncrement = 0;
123 158142c2 bellard
        }
124 158142c2 bellard
        else {
125 158142c2 bellard
            roundIncrement = 0x7F;
126 158142c2 bellard
            if ( zSign ) {
127 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
128 158142c2 bellard
            }
129 158142c2 bellard
            else {
130 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
131 158142c2 bellard
            }
132 158142c2 bellard
        }
133 158142c2 bellard
    }
134 158142c2 bellard
    roundBits = absZ & 0x7F;
135 158142c2 bellard
    absZ = ( absZ + roundIncrement )>>7;
136 158142c2 bellard
    absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
137 158142c2 bellard
    z = absZ;
138 158142c2 bellard
    if ( zSign ) z = - z;
139 158142c2 bellard
    if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
140 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
141 bb98fe42 Andreas Färber
        return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
142 158142c2 bellard
    }
143 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
144 158142c2 bellard
    return z;
145 158142c2 bellard
146 158142c2 bellard
}
147 158142c2 bellard
148 158142c2 bellard
/*----------------------------------------------------------------------------
149 158142c2 bellard
| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
150 158142c2 bellard
| `absZ1', with binary point between bits 63 and 64 (between the input words),
151 158142c2 bellard
| and returns the properly rounded 64-bit integer corresponding to the input.
152 158142c2 bellard
| If `zSign' is 1, the input is negated before being converted to an integer.
153 158142c2 bellard
| Ordinarily, the fixed-point input is simply rounded to an integer, with
154 158142c2 bellard
| the inexact exception raised if the input cannot be represented exactly as
155 158142c2 bellard
| an integer.  However, if the fixed-point input is too large, the invalid
156 158142c2 bellard
| exception is raised and the largest positive or negative integer is
157 158142c2 bellard
| returned.
158 158142c2 bellard
*----------------------------------------------------------------------------*/
159 158142c2 bellard
160 bb98fe42 Andreas Färber
static int64 roundAndPackInt64( flag zSign, uint64_t absZ0, uint64_t absZ1 STATUS_PARAM)
161 158142c2 bellard
{
162 158142c2 bellard
    int8 roundingMode;
163 158142c2 bellard
    flag roundNearestEven, increment;
164 158142c2 bellard
    int64 z;
165 158142c2 bellard
166 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
167 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
168 bb98fe42 Andreas Färber
    increment = ( (int64_t) absZ1 < 0 );
169 158142c2 bellard
    if ( ! roundNearestEven ) {
170 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
171 158142c2 bellard
            increment = 0;
172 158142c2 bellard
        }
173 158142c2 bellard
        else {
174 158142c2 bellard
            if ( zSign ) {
175 158142c2 bellard
                increment = ( roundingMode == float_round_down ) && absZ1;
176 158142c2 bellard
            }
177 158142c2 bellard
            else {
178 158142c2 bellard
                increment = ( roundingMode == float_round_up ) && absZ1;
179 158142c2 bellard
            }
180 158142c2 bellard
        }
181 158142c2 bellard
    }
182 158142c2 bellard
    if ( increment ) {
183 158142c2 bellard
        ++absZ0;
184 158142c2 bellard
        if ( absZ0 == 0 ) goto overflow;
185 bb98fe42 Andreas Färber
        absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
186 158142c2 bellard
    }
187 158142c2 bellard
    z = absZ0;
188 158142c2 bellard
    if ( zSign ) z = - z;
189 158142c2 bellard
    if ( z && ( ( z < 0 ) ^ zSign ) ) {
190 158142c2 bellard
 overflow:
191 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
192 158142c2 bellard
        return
193 bb98fe42 Andreas Färber
              zSign ? (int64_t) LIT64( 0x8000000000000000 )
194 158142c2 bellard
            : LIT64( 0x7FFFFFFFFFFFFFFF );
195 158142c2 bellard
    }
196 158142c2 bellard
    if ( absZ1 ) STATUS(float_exception_flags) |= float_flag_inexact;
197 158142c2 bellard
    return z;
198 158142c2 bellard
199 158142c2 bellard
}
200 158142c2 bellard
201 158142c2 bellard
/*----------------------------------------------------------------------------
202 158142c2 bellard
| Returns the fraction bits of the single-precision floating-point value `a'.
203 158142c2 bellard
*----------------------------------------------------------------------------*/
204 158142c2 bellard
205 bb98fe42 Andreas Färber
INLINE uint32_t extractFloat32Frac( float32 a )
206 158142c2 bellard
{
207 158142c2 bellard
208 f090c9d4 pbrook
    return float32_val(a) & 0x007FFFFF;
209 158142c2 bellard
210 158142c2 bellard
}
211 158142c2 bellard
212 158142c2 bellard
/*----------------------------------------------------------------------------
213 158142c2 bellard
| Returns the exponent bits of the single-precision floating-point value `a'.
214 158142c2 bellard
*----------------------------------------------------------------------------*/
215 158142c2 bellard
216 158142c2 bellard
INLINE int16 extractFloat32Exp( float32 a )
217 158142c2 bellard
{
218 158142c2 bellard
219 f090c9d4 pbrook
    return ( float32_val(a)>>23 ) & 0xFF;
220 158142c2 bellard
221 158142c2 bellard
}
222 158142c2 bellard
223 158142c2 bellard
/*----------------------------------------------------------------------------
224 158142c2 bellard
| Returns the sign bit of the single-precision floating-point value `a'.
225 158142c2 bellard
*----------------------------------------------------------------------------*/
226 158142c2 bellard
227 158142c2 bellard
INLINE flag extractFloat32Sign( float32 a )
228 158142c2 bellard
{
229 158142c2 bellard
230 f090c9d4 pbrook
    return float32_val(a)>>31;
231 158142c2 bellard
232 158142c2 bellard
}
233 158142c2 bellard
234 158142c2 bellard
/*----------------------------------------------------------------------------
235 37d18660 Peter Maydell
| If `a' is denormal and we are in flush-to-zero mode then set the
236 37d18660 Peter Maydell
| input-denormal exception and return zero. Otherwise just return the value.
237 37d18660 Peter Maydell
*----------------------------------------------------------------------------*/
238 37d18660 Peter Maydell
static float32 float32_squash_input_denormal(float32 a STATUS_PARAM)
239 37d18660 Peter Maydell
{
240 37d18660 Peter Maydell
    if (STATUS(flush_inputs_to_zero)) {
241 37d18660 Peter Maydell
        if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
242 37d18660 Peter Maydell
            float_raise(float_flag_input_denormal STATUS_VAR);
243 37d18660 Peter Maydell
            return make_float32(float32_val(a) & 0x80000000);
244 37d18660 Peter Maydell
        }
245 37d18660 Peter Maydell
    }
246 37d18660 Peter Maydell
    return a;
247 37d18660 Peter Maydell
}
248 37d18660 Peter Maydell
249 37d18660 Peter Maydell
/*----------------------------------------------------------------------------
250 158142c2 bellard
| Normalizes the subnormal single-precision floating-point value represented
251 158142c2 bellard
| by the denormalized significand `aSig'.  The normalized exponent and
252 158142c2 bellard
| significand are stored at the locations pointed to by `zExpPtr' and
253 158142c2 bellard
| `zSigPtr', respectively.
254 158142c2 bellard
*----------------------------------------------------------------------------*/
255 158142c2 bellard
256 158142c2 bellard
static void
257 bb98fe42 Andreas Färber
 normalizeFloat32Subnormal( uint32_t aSig, int16 *zExpPtr, uint32_t *zSigPtr )
258 158142c2 bellard
{
259 158142c2 bellard
    int8 shiftCount;
260 158142c2 bellard
261 158142c2 bellard
    shiftCount = countLeadingZeros32( aSig ) - 8;
262 158142c2 bellard
    *zSigPtr = aSig<<shiftCount;
263 158142c2 bellard
    *zExpPtr = 1 - shiftCount;
264 158142c2 bellard
265 158142c2 bellard
}
266 158142c2 bellard
267 158142c2 bellard
/*----------------------------------------------------------------------------
268 158142c2 bellard
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
269 158142c2 bellard
| single-precision floating-point value, returning the result.  After being
270 158142c2 bellard
| shifted into the proper positions, the three fields are simply added
271 158142c2 bellard
| together to form the result.  This means that any integer portion of `zSig'
272 158142c2 bellard
| will be added into the exponent.  Since a properly normalized significand
273 158142c2 bellard
| will have an integer portion equal to 1, the `zExp' input should be 1 less
274 158142c2 bellard
| than the desired result exponent whenever `zSig' is a complete, normalized
275 158142c2 bellard
| significand.
276 158142c2 bellard
*----------------------------------------------------------------------------*/
277 158142c2 bellard
278 bb98fe42 Andreas Färber
INLINE float32 packFloat32( flag zSign, int16 zExp, uint32_t zSig )
279 158142c2 bellard
{
280 158142c2 bellard
281 f090c9d4 pbrook
    return make_float32(
282 bb98fe42 Andreas Färber
          ( ( (uint32_t) zSign )<<31 ) + ( ( (uint32_t) zExp )<<23 ) + zSig);
283 158142c2 bellard
284 158142c2 bellard
}
285 158142c2 bellard
286 158142c2 bellard
/*----------------------------------------------------------------------------
287 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
288 158142c2 bellard
| and significand `zSig', and returns the proper single-precision floating-
289 158142c2 bellard
| point value corresponding to the abstract input.  Ordinarily, the abstract
290 158142c2 bellard
| value is simply rounded and packed into the single-precision format, with
291 158142c2 bellard
| the inexact exception raised if the abstract input cannot be represented
292 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
293 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
294 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded to
295 158142c2 bellard
| a subnormal number, and the underflow and inexact exceptions are raised if
296 158142c2 bellard
| the abstract input cannot be represented exactly as a subnormal single-
297 158142c2 bellard
| precision floating-point number.
298 158142c2 bellard
|     The input significand `zSig' has its binary point between bits 30
299 158142c2 bellard
| and 29, which is 7 bits to the left of the usual location.  This shifted
300 158142c2 bellard
| significand must be normalized or smaller.  If `zSig' is not normalized,
301 158142c2 bellard
| `zExp' must be 0; in that case, the result returned is a subnormal number,
302 158142c2 bellard
| and it must not require rounding.  In the usual case that `zSig' is
303 158142c2 bellard
| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
304 158142c2 bellard
| The handling of underflow and overflow follows the IEC/IEEE Standard for
305 158142c2 bellard
| Binary Floating-Point Arithmetic.
306 158142c2 bellard
*----------------------------------------------------------------------------*/
307 158142c2 bellard
308 bb98fe42 Andreas Färber
static float32 roundAndPackFloat32( flag zSign, int16 zExp, uint32_t zSig STATUS_PARAM)
309 158142c2 bellard
{
310 158142c2 bellard
    int8 roundingMode;
311 158142c2 bellard
    flag roundNearestEven;
312 158142c2 bellard
    int8 roundIncrement, roundBits;
313 158142c2 bellard
    flag isTiny;
314 158142c2 bellard
315 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
316 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
317 158142c2 bellard
    roundIncrement = 0x40;
318 158142c2 bellard
    if ( ! roundNearestEven ) {
319 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
320 158142c2 bellard
            roundIncrement = 0;
321 158142c2 bellard
        }
322 158142c2 bellard
        else {
323 158142c2 bellard
            roundIncrement = 0x7F;
324 158142c2 bellard
            if ( zSign ) {
325 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
326 158142c2 bellard
            }
327 158142c2 bellard
            else {
328 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
329 158142c2 bellard
            }
330 158142c2 bellard
        }
331 158142c2 bellard
    }
332 158142c2 bellard
    roundBits = zSig & 0x7F;
333 bb98fe42 Andreas Färber
    if ( 0xFD <= (uint16_t) zExp ) {
334 158142c2 bellard
        if (    ( 0xFD < zExp )
335 158142c2 bellard
             || (    ( zExp == 0xFD )
336 bb98fe42 Andreas Färber
                  && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
337 158142c2 bellard
           ) {
338 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
339 f090c9d4 pbrook
            return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
340 158142c2 bellard
        }
341 158142c2 bellard
        if ( zExp < 0 ) {
342 e6afc87f Peter Maydell
            if (STATUS(flush_to_zero)) {
343 e6afc87f Peter Maydell
                float_raise(float_flag_output_denormal STATUS_VAR);
344 e6afc87f Peter Maydell
                return packFloat32(zSign, 0, 0);
345 e6afc87f Peter Maydell
            }
346 158142c2 bellard
            isTiny =
347 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
348 158142c2 bellard
                || ( zExp < -1 )
349 158142c2 bellard
                || ( zSig + roundIncrement < 0x80000000 );
350 158142c2 bellard
            shift32RightJamming( zSig, - zExp, &zSig );
351 158142c2 bellard
            zExp = 0;
352 158142c2 bellard
            roundBits = zSig & 0x7F;
353 158142c2 bellard
            if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
354 158142c2 bellard
        }
355 158142c2 bellard
    }
356 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
357 158142c2 bellard
    zSig = ( zSig + roundIncrement )>>7;
358 158142c2 bellard
    zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
359 158142c2 bellard
    if ( zSig == 0 ) zExp = 0;
360 158142c2 bellard
    return packFloat32( zSign, zExp, zSig );
361 158142c2 bellard
362 158142c2 bellard
}
363 158142c2 bellard
364 158142c2 bellard
/*----------------------------------------------------------------------------
365 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
366 158142c2 bellard
| and significand `zSig', and returns the proper single-precision floating-
367 158142c2 bellard
| point value corresponding to the abstract input.  This routine is just like
368 158142c2 bellard
| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
369 158142c2 bellard
| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
370 158142c2 bellard
| floating-point exponent.
371 158142c2 bellard
*----------------------------------------------------------------------------*/
372 158142c2 bellard
373 158142c2 bellard
static float32
374 bb98fe42 Andreas Färber
 normalizeRoundAndPackFloat32( flag zSign, int16 zExp, uint32_t zSig STATUS_PARAM)
375 158142c2 bellard
{
376 158142c2 bellard
    int8 shiftCount;
377 158142c2 bellard
378 158142c2 bellard
    shiftCount = countLeadingZeros32( zSig ) - 1;
379 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
380 158142c2 bellard
381 158142c2 bellard
}
382 158142c2 bellard
383 158142c2 bellard
/*----------------------------------------------------------------------------
384 158142c2 bellard
| Returns the fraction bits of the double-precision floating-point value `a'.
385 158142c2 bellard
*----------------------------------------------------------------------------*/
386 158142c2 bellard
387 bb98fe42 Andreas Färber
INLINE uint64_t extractFloat64Frac( float64 a )
388 158142c2 bellard
{
389 158142c2 bellard
390 f090c9d4 pbrook
    return float64_val(a) & LIT64( 0x000FFFFFFFFFFFFF );
391 158142c2 bellard
392 158142c2 bellard
}
393 158142c2 bellard
394 158142c2 bellard
/*----------------------------------------------------------------------------
395 158142c2 bellard
| Returns the exponent bits of the double-precision floating-point value `a'.
396 158142c2 bellard
*----------------------------------------------------------------------------*/
397 158142c2 bellard
398 158142c2 bellard
INLINE int16 extractFloat64Exp( float64 a )
399 158142c2 bellard
{
400 158142c2 bellard
401 f090c9d4 pbrook
    return ( float64_val(a)>>52 ) & 0x7FF;
402 158142c2 bellard
403 158142c2 bellard
}
404 158142c2 bellard
405 158142c2 bellard
/*----------------------------------------------------------------------------
406 158142c2 bellard
| Returns the sign bit of the double-precision floating-point value `a'.
407 158142c2 bellard
*----------------------------------------------------------------------------*/
408 158142c2 bellard
409 158142c2 bellard
INLINE flag extractFloat64Sign( float64 a )
410 158142c2 bellard
{
411 158142c2 bellard
412 f090c9d4 pbrook
    return float64_val(a)>>63;
413 158142c2 bellard
414 158142c2 bellard
}
415 158142c2 bellard
416 158142c2 bellard
/*----------------------------------------------------------------------------
417 37d18660 Peter Maydell
| If `a' is denormal and we are in flush-to-zero mode then set the
418 37d18660 Peter Maydell
| input-denormal exception and return zero. Otherwise just return the value.
419 37d18660 Peter Maydell
*----------------------------------------------------------------------------*/
420 37d18660 Peter Maydell
static float64 float64_squash_input_denormal(float64 a STATUS_PARAM)
421 37d18660 Peter Maydell
{
422 37d18660 Peter Maydell
    if (STATUS(flush_inputs_to_zero)) {
423 37d18660 Peter Maydell
        if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
424 37d18660 Peter Maydell
            float_raise(float_flag_input_denormal STATUS_VAR);
425 37d18660 Peter Maydell
            return make_float64(float64_val(a) & (1ULL << 63));
426 37d18660 Peter Maydell
        }
427 37d18660 Peter Maydell
    }
428 37d18660 Peter Maydell
    return a;
429 37d18660 Peter Maydell
}
430 37d18660 Peter Maydell
431 37d18660 Peter Maydell
/*----------------------------------------------------------------------------
432 158142c2 bellard
| Normalizes the subnormal double-precision floating-point value represented
433 158142c2 bellard
| by the denormalized significand `aSig'.  The normalized exponent and
434 158142c2 bellard
| significand are stored at the locations pointed to by `zExpPtr' and
435 158142c2 bellard
| `zSigPtr', respectively.
436 158142c2 bellard
*----------------------------------------------------------------------------*/
437 158142c2 bellard
438 158142c2 bellard
static void
439 bb98fe42 Andreas Färber
 normalizeFloat64Subnormal( uint64_t aSig, int16 *zExpPtr, uint64_t *zSigPtr )
440 158142c2 bellard
{
441 158142c2 bellard
    int8 shiftCount;
442 158142c2 bellard
443 158142c2 bellard
    shiftCount = countLeadingZeros64( aSig ) - 11;
444 158142c2 bellard
    *zSigPtr = aSig<<shiftCount;
445 158142c2 bellard
    *zExpPtr = 1 - shiftCount;
446 158142c2 bellard
447 158142c2 bellard
}
448 158142c2 bellard
449 158142c2 bellard
/*----------------------------------------------------------------------------
450 158142c2 bellard
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
451 158142c2 bellard
| double-precision floating-point value, returning the result.  After being
452 158142c2 bellard
| shifted into the proper positions, the three fields are simply added
453 158142c2 bellard
| together to form the result.  This means that any integer portion of `zSig'
454 158142c2 bellard
| will be added into the exponent.  Since a properly normalized significand
455 158142c2 bellard
| will have an integer portion equal to 1, the `zExp' input should be 1 less
456 158142c2 bellard
| than the desired result exponent whenever `zSig' is a complete, normalized
457 158142c2 bellard
| significand.
458 158142c2 bellard
*----------------------------------------------------------------------------*/
459 158142c2 bellard
460 bb98fe42 Andreas Färber
INLINE float64 packFloat64( flag zSign, int16 zExp, uint64_t zSig )
461 158142c2 bellard
{
462 158142c2 bellard
463 f090c9d4 pbrook
    return make_float64(
464 bb98fe42 Andreas Färber
        ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
465 158142c2 bellard
466 158142c2 bellard
}
467 158142c2 bellard
468 158142c2 bellard
/*----------------------------------------------------------------------------
469 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
470 158142c2 bellard
| and significand `zSig', and returns the proper double-precision floating-
471 158142c2 bellard
| point value corresponding to the abstract input.  Ordinarily, the abstract
472 158142c2 bellard
| value is simply rounded and packed into the double-precision format, with
473 158142c2 bellard
| the inexact exception raised if the abstract input cannot be represented
474 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
475 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
476 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded
477 158142c2 bellard
| to a subnormal number, and the underflow and inexact exceptions are raised
478 158142c2 bellard
| if the abstract input cannot be represented exactly as a subnormal double-
479 158142c2 bellard
| precision floating-point number.
480 158142c2 bellard
|     The input significand `zSig' has its binary point between bits 62
481 158142c2 bellard
| and 61, which is 10 bits to the left of the usual location.  This shifted
482 158142c2 bellard
| significand must be normalized or smaller.  If `zSig' is not normalized,
483 158142c2 bellard
| `zExp' must be 0; in that case, the result returned is a subnormal number,
484 158142c2 bellard
| and it must not require rounding.  In the usual case that `zSig' is
485 158142c2 bellard
| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
486 158142c2 bellard
| The handling of underflow and overflow follows the IEC/IEEE Standard for
487 158142c2 bellard
| Binary Floating-Point Arithmetic.
488 158142c2 bellard
*----------------------------------------------------------------------------*/
489 158142c2 bellard
490 bb98fe42 Andreas Färber
static float64 roundAndPackFloat64( flag zSign, int16 zExp, uint64_t zSig STATUS_PARAM)
491 158142c2 bellard
{
492 158142c2 bellard
    int8 roundingMode;
493 158142c2 bellard
    flag roundNearestEven;
494 158142c2 bellard
    int16 roundIncrement, roundBits;
495 158142c2 bellard
    flag isTiny;
496 158142c2 bellard
497 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
498 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
499 158142c2 bellard
    roundIncrement = 0x200;
500 158142c2 bellard
    if ( ! roundNearestEven ) {
501 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
502 158142c2 bellard
            roundIncrement = 0;
503 158142c2 bellard
        }
504 158142c2 bellard
        else {
505 158142c2 bellard
            roundIncrement = 0x3FF;
506 158142c2 bellard
            if ( zSign ) {
507 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
508 158142c2 bellard
            }
509 158142c2 bellard
            else {
510 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
511 158142c2 bellard
            }
512 158142c2 bellard
        }
513 158142c2 bellard
    }
514 158142c2 bellard
    roundBits = zSig & 0x3FF;
515 bb98fe42 Andreas Färber
    if ( 0x7FD <= (uint16_t) zExp ) {
516 158142c2 bellard
        if (    ( 0x7FD < zExp )
517 158142c2 bellard
             || (    ( zExp == 0x7FD )
518 bb98fe42 Andreas Färber
                  && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
519 158142c2 bellard
           ) {
520 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
521 f090c9d4 pbrook
            return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 ));
522 158142c2 bellard
        }
523 158142c2 bellard
        if ( zExp < 0 ) {
524 e6afc87f Peter Maydell
            if (STATUS(flush_to_zero)) {
525 e6afc87f Peter Maydell
                float_raise(float_flag_output_denormal STATUS_VAR);
526 e6afc87f Peter Maydell
                return packFloat64(zSign, 0, 0);
527 e6afc87f Peter Maydell
            }
528 158142c2 bellard
            isTiny =
529 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
530 158142c2 bellard
                || ( zExp < -1 )
531 158142c2 bellard
                || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
532 158142c2 bellard
            shift64RightJamming( zSig, - zExp, &zSig );
533 158142c2 bellard
            zExp = 0;
534 158142c2 bellard
            roundBits = zSig & 0x3FF;
535 158142c2 bellard
            if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
536 158142c2 bellard
        }
537 158142c2 bellard
    }
538 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
539 158142c2 bellard
    zSig = ( zSig + roundIncrement )>>10;
540 158142c2 bellard
    zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
541 158142c2 bellard
    if ( zSig == 0 ) zExp = 0;
542 158142c2 bellard
    return packFloat64( zSign, zExp, zSig );
543 158142c2 bellard
544 158142c2 bellard
}
545 158142c2 bellard
546 158142c2 bellard
/*----------------------------------------------------------------------------
547 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
548 158142c2 bellard
| and significand `zSig', and returns the proper double-precision floating-
549 158142c2 bellard
| point value corresponding to the abstract input.  This routine is just like
550 158142c2 bellard
| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
551 158142c2 bellard
| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
552 158142c2 bellard
| floating-point exponent.
553 158142c2 bellard
*----------------------------------------------------------------------------*/
554 158142c2 bellard
555 158142c2 bellard
static float64
556 bb98fe42 Andreas Färber
 normalizeRoundAndPackFloat64( flag zSign, int16 zExp, uint64_t zSig STATUS_PARAM)
557 158142c2 bellard
{
558 158142c2 bellard
    int8 shiftCount;
559 158142c2 bellard
560 158142c2 bellard
    shiftCount = countLeadingZeros64( zSig ) - 1;
561 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
562 158142c2 bellard
563 158142c2 bellard
}
564 158142c2 bellard
565 158142c2 bellard
/*----------------------------------------------------------------------------
566 158142c2 bellard
| Returns the fraction bits of the extended double-precision floating-point
567 158142c2 bellard
| value `a'.
568 158142c2 bellard
*----------------------------------------------------------------------------*/
569 158142c2 bellard
570 bb98fe42 Andreas Färber
INLINE uint64_t extractFloatx80Frac( floatx80 a )
571 158142c2 bellard
{
572 158142c2 bellard
573 158142c2 bellard
    return a.low;
574 158142c2 bellard
575 158142c2 bellard
}
576 158142c2 bellard
577 158142c2 bellard
/*----------------------------------------------------------------------------
578 158142c2 bellard
| Returns the exponent bits of the extended double-precision floating-point
579 158142c2 bellard
| value `a'.
580 158142c2 bellard
*----------------------------------------------------------------------------*/
581 158142c2 bellard
582 158142c2 bellard
INLINE int32 extractFloatx80Exp( floatx80 a )
583 158142c2 bellard
{
584 158142c2 bellard
585 158142c2 bellard
    return a.high & 0x7FFF;
586 158142c2 bellard
587 158142c2 bellard
}
588 158142c2 bellard
589 158142c2 bellard
/*----------------------------------------------------------------------------
590 158142c2 bellard
| Returns the sign bit of the extended double-precision floating-point value
591 158142c2 bellard
| `a'.
592 158142c2 bellard
*----------------------------------------------------------------------------*/
593 158142c2 bellard
594 158142c2 bellard
INLINE flag extractFloatx80Sign( floatx80 a )
595 158142c2 bellard
{
596 158142c2 bellard
597 158142c2 bellard
    return a.high>>15;
598 158142c2 bellard
599 158142c2 bellard
}
600 158142c2 bellard
601 158142c2 bellard
/*----------------------------------------------------------------------------
602 158142c2 bellard
| Normalizes the subnormal extended double-precision floating-point value
603 158142c2 bellard
| represented by the denormalized significand `aSig'.  The normalized exponent
604 158142c2 bellard
| and significand are stored at the locations pointed to by `zExpPtr' and
605 158142c2 bellard
| `zSigPtr', respectively.
606 158142c2 bellard
*----------------------------------------------------------------------------*/
607 158142c2 bellard
608 158142c2 bellard
static void
609 bb98fe42 Andreas Färber
 normalizeFloatx80Subnormal( uint64_t aSig, int32 *zExpPtr, uint64_t *zSigPtr )
610 158142c2 bellard
{
611 158142c2 bellard
    int8 shiftCount;
612 158142c2 bellard
613 158142c2 bellard
    shiftCount = countLeadingZeros64( aSig );
614 158142c2 bellard
    *zSigPtr = aSig<<shiftCount;
615 158142c2 bellard
    *zExpPtr = 1 - shiftCount;
616 158142c2 bellard
617 158142c2 bellard
}
618 158142c2 bellard
619 158142c2 bellard
/*----------------------------------------------------------------------------
620 158142c2 bellard
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
621 158142c2 bellard
| extended double-precision floating-point value, returning the result.
622 158142c2 bellard
*----------------------------------------------------------------------------*/
623 158142c2 bellard
624 bb98fe42 Andreas Färber
INLINE floatx80 packFloatx80( flag zSign, int32 zExp, uint64_t zSig )
625 158142c2 bellard
{
626 158142c2 bellard
    floatx80 z;
627 158142c2 bellard
628 158142c2 bellard
    z.low = zSig;
629 bb98fe42 Andreas Färber
    z.high = ( ( (uint16_t) zSign )<<15 ) + zExp;
630 158142c2 bellard
    return z;
631 158142c2 bellard
632 158142c2 bellard
}
633 158142c2 bellard
634 158142c2 bellard
/*----------------------------------------------------------------------------
635 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
636 158142c2 bellard
| and extended significand formed by the concatenation of `zSig0' and `zSig1',
637 158142c2 bellard
| and returns the proper extended double-precision floating-point value
638 158142c2 bellard
| corresponding to the abstract input.  Ordinarily, the abstract value is
639 158142c2 bellard
| rounded and packed into the extended double-precision format, with the
640 158142c2 bellard
| inexact exception raised if the abstract input cannot be represented
641 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
642 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
643 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded to
644 158142c2 bellard
| a subnormal number, and the underflow and inexact exceptions are raised if
645 158142c2 bellard
| the abstract input cannot be represented exactly as a subnormal extended
646 158142c2 bellard
| double-precision floating-point number.
647 158142c2 bellard
|     If `roundingPrecision' is 32 or 64, the result is rounded to the same
648 158142c2 bellard
| number of bits as single or double precision, respectively.  Otherwise, the
649 158142c2 bellard
| result is rounded to the full precision of the extended double-precision
650 158142c2 bellard
| format.
651 158142c2 bellard
|     The input significand must be normalized or smaller.  If the input
652 158142c2 bellard
| significand is not normalized, `zExp' must be 0; in that case, the result
653 158142c2 bellard
| returned is a subnormal number, and it must not require rounding.  The
654 158142c2 bellard
| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
655 158142c2 bellard
| Floating-Point Arithmetic.
656 158142c2 bellard
*----------------------------------------------------------------------------*/
657 158142c2 bellard
658 158142c2 bellard
static floatx80
659 158142c2 bellard
 roundAndPackFloatx80(
660 bb98fe42 Andreas Färber
     int8 roundingPrecision, flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1
661 158142c2 bellard
 STATUS_PARAM)
662 158142c2 bellard
{
663 158142c2 bellard
    int8 roundingMode;
664 158142c2 bellard
    flag roundNearestEven, increment, isTiny;
665 158142c2 bellard
    int64 roundIncrement, roundMask, roundBits;
666 158142c2 bellard
667 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
668 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
669 158142c2 bellard
    if ( roundingPrecision == 80 ) goto precision80;
670 158142c2 bellard
    if ( roundingPrecision == 64 ) {
671 158142c2 bellard
        roundIncrement = LIT64( 0x0000000000000400 );
672 158142c2 bellard
        roundMask = LIT64( 0x00000000000007FF );
673 158142c2 bellard
    }
674 158142c2 bellard
    else if ( roundingPrecision == 32 ) {
675 158142c2 bellard
        roundIncrement = LIT64( 0x0000008000000000 );
676 158142c2 bellard
        roundMask = LIT64( 0x000000FFFFFFFFFF );
677 158142c2 bellard
    }
678 158142c2 bellard
    else {
679 158142c2 bellard
        goto precision80;
680 158142c2 bellard
    }
681 158142c2 bellard
    zSig0 |= ( zSig1 != 0 );
682 158142c2 bellard
    if ( ! roundNearestEven ) {
683 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
684 158142c2 bellard
            roundIncrement = 0;
685 158142c2 bellard
        }
686 158142c2 bellard
        else {
687 158142c2 bellard
            roundIncrement = roundMask;
688 158142c2 bellard
            if ( zSign ) {
689 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
690 158142c2 bellard
            }
691 158142c2 bellard
            else {
692 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
693 158142c2 bellard
            }
694 158142c2 bellard
        }
695 158142c2 bellard
    }
696 158142c2 bellard
    roundBits = zSig0 & roundMask;
697 bb98fe42 Andreas Färber
    if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
698 158142c2 bellard
        if (    ( 0x7FFE < zExp )
699 158142c2 bellard
             || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
700 158142c2 bellard
           ) {
701 158142c2 bellard
            goto overflow;
702 158142c2 bellard
        }
703 158142c2 bellard
        if ( zExp <= 0 ) {
704 e6afc87f Peter Maydell
            if (STATUS(flush_to_zero)) {
705 e6afc87f Peter Maydell
                float_raise(float_flag_output_denormal STATUS_VAR);
706 e6afc87f Peter Maydell
                return packFloatx80(zSign, 0, 0);
707 e6afc87f Peter Maydell
            }
708 158142c2 bellard
            isTiny =
709 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
710 158142c2 bellard
                || ( zExp < 0 )
711 158142c2 bellard
                || ( zSig0 <= zSig0 + roundIncrement );
712 158142c2 bellard
            shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
713 158142c2 bellard
            zExp = 0;
714 158142c2 bellard
            roundBits = zSig0 & roundMask;
715 158142c2 bellard
            if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
716 158142c2 bellard
            if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
717 158142c2 bellard
            zSig0 += roundIncrement;
718 bb98fe42 Andreas Färber
            if ( (int64_t) zSig0 < 0 ) zExp = 1;
719 158142c2 bellard
            roundIncrement = roundMask + 1;
720 158142c2 bellard
            if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
721 158142c2 bellard
                roundMask |= roundIncrement;
722 158142c2 bellard
            }
723 158142c2 bellard
            zSig0 &= ~ roundMask;
724 158142c2 bellard
            return packFloatx80( zSign, zExp, zSig0 );
725 158142c2 bellard
        }
726 158142c2 bellard
    }
727 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
728 158142c2 bellard
    zSig0 += roundIncrement;
729 158142c2 bellard
    if ( zSig0 < roundIncrement ) {
730 158142c2 bellard
        ++zExp;
731 158142c2 bellard
        zSig0 = LIT64( 0x8000000000000000 );
732 158142c2 bellard
    }
733 158142c2 bellard
    roundIncrement = roundMask + 1;
734 158142c2 bellard
    if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
735 158142c2 bellard
        roundMask |= roundIncrement;
736 158142c2 bellard
    }
737 158142c2 bellard
    zSig0 &= ~ roundMask;
738 158142c2 bellard
    if ( zSig0 == 0 ) zExp = 0;
739 158142c2 bellard
    return packFloatx80( zSign, zExp, zSig0 );
740 158142c2 bellard
 precision80:
741 bb98fe42 Andreas Färber
    increment = ( (int64_t) zSig1 < 0 );
742 158142c2 bellard
    if ( ! roundNearestEven ) {
743 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
744 158142c2 bellard
            increment = 0;
745 158142c2 bellard
        }
746 158142c2 bellard
        else {
747 158142c2 bellard
            if ( zSign ) {
748 158142c2 bellard
                increment = ( roundingMode == float_round_down ) && zSig1;
749 158142c2 bellard
            }
750 158142c2 bellard
            else {
751 158142c2 bellard
                increment = ( roundingMode == float_round_up ) && zSig1;
752 158142c2 bellard
            }
753 158142c2 bellard
        }
754 158142c2 bellard
    }
755 bb98fe42 Andreas Färber
    if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
756 158142c2 bellard
        if (    ( 0x7FFE < zExp )
757 158142c2 bellard
             || (    ( zExp == 0x7FFE )
758 158142c2 bellard
                  && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
759 158142c2 bellard
                  && increment
760 158142c2 bellard
                )
761 158142c2 bellard
           ) {
762 158142c2 bellard
            roundMask = 0;
763 158142c2 bellard
 overflow:
764 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
765 158142c2 bellard
            if (    ( roundingMode == float_round_to_zero )
766 158142c2 bellard
                 || ( zSign && ( roundingMode == float_round_up ) )
767 158142c2 bellard
                 || ( ! zSign && ( roundingMode == float_round_down ) )
768 158142c2 bellard
               ) {
769 158142c2 bellard
                return packFloatx80( zSign, 0x7FFE, ~ roundMask );
770 158142c2 bellard
            }
771 158142c2 bellard
            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
772 158142c2 bellard
        }
773 158142c2 bellard
        if ( zExp <= 0 ) {
774 158142c2 bellard
            isTiny =
775 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
776 158142c2 bellard
                || ( zExp < 0 )
777 158142c2 bellard
                || ! increment
778 158142c2 bellard
                || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
779 158142c2 bellard
            shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
780 158142c2 bellard
            zExp = 0;
781 158142c2 bellard
            if ( isTiny && zSig1 ) float_raise( float_flag_underflow STATUS_VAR);
782 158142c2 bellard
            if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
783 158142c2 bellard
            if ( roundNearestEven ) {
784 bb98fe42 Andreas Färber
                increment = ( (int64_t) zSig1 < 0 );
785 158142c2 bellard
            }
786 158142c2 bellard
            else {
787 158142c2 bellard
                if ( zSign ) {
788 158142c2 bellard
                    increment = ( roundingMode == float_round_down ) && zSig1;
789 158142c2 bellard
                }
790 158142c2 bellard
                else {
791 158142c2 bellard
                    increment = ( roundingMode == float_round_up ) && zSig1;
792 158142c2 bellard
                }
793 158142c2 bellard
            }
794 158142c2 bellard
            if ( increment ) {
795 158142c2 bellard
                ++zSig0;
796 158142c2 bellard
                zSig0 &=
797 bb98fe42 Andreas Färber
                    ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
798 bb98fe42 Andreas Färber
                if ( (int64_t) zSig0 < 0 ) zExp = 1;
799 158142c2 bellard
            }
800 158142c2 bellard
            return packFloatx80( zSign, zExp, zSig0 );
801 158142c2 bellard
        }
802 158142c2 bellard
    }
803 158142c2 bellard
    if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
804 158142c2 bellard
    if ( increment ) {
805 158142c2 bellard
        ++zSig0;
806 158142c2 bellard
        if ( zSig0 == 0 ) {
807 158142c2 bellard
            ++zExp;
808 158142c2 bellard
            zSig0 = LIT64( 0x8000000000000000 );
809 158142c2 bellard
        }
810 158142c2 bellard
        else {
811 bb98fe42 Andreas Färber
            zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
812 158142c2 bellard
        }
813 158142c2 bellard
    }
814 158142c2 bellard
    else {
815 158142c2 bellard
        if ( zSig0 == 0 ) zExp = 0;
816 158142c2 bellard
    }
817 158142c2 bellard
    return packFloatx80( zSign, zExp, zSig0 );
818 158142c2 bellard
819 158142c2 bellard
}
820 158142c2 bellard
821 158142c2 bellard
/*----------------------------------------------------------------------------
822 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent
823 158142c2 bellard
| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
824 158142c2 bellard
| and returns the proper extended double-precision floating-point value
825 158142c2 bellard
| corresponding to the abstract input.  This routine is just like
826 158142c2 bellard
| `roundAndPackFloatx80' except that the input significand does not have to be
827 158142c2 bellard
| normalized.
828 158142c2 bellard
*----------------------------------------------------------------------------*/
829 158142c2 bellard
830 158142c2 bellard
static floatx80
831 158142c2 bellard
 normalizeRoundAndPackFloatx80(
832 bb98fe42 Andreas Färber
     int8 roundingPrecision, flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1
833 158142c2 bellard
 STATUS_PARAM)
834 158142c2 bellard
{
835 158142c2 bellard
    int8 shiftCount;
836 158142c2 bellard
837 158142c2 bellard
    if ( zSig0 == 0 ) {
838 158142c2 bellard
        zSig0 = zSig1;
839 158142c2 bellard
        zSig1 = 0;
840 158142c2 bellard
        zExp -= 64;
841 158142c2 bellard
    }
842 158142c2 bellard
    shiftCount = countLeadingZeros64( zSig0 );
843 158142c2 bellard
    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
844 158142c2 bellard
    zExp -= shiftCount;
845 158142c2 bellard
    return
846 158142c2 bellard
        roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 STATUS_VAR);
847 158142c2 bellard
848 158142c2 bellard
}
849 158142c2 bellard
850 158142c2 bellard
/*----------------------------------------------------------------------------
851 158142c2 bellard
| Returns the least-significant 64 fraction bits of the quadruple-precision
852 158142c2 bellard
| floating-point value `a'.
853 158142c2 bellard
*----------------------------------------------------------------------------*/
854 158142c2 bellard
855 bb98fe42 Andreas Färber
INLINE uint64_t extractFloat128Frac1( float128 a )
856 158142c2 bellard
{
857 158142c2 bellard
858 158142c2 bellard
    return a.low;
859 158142c2 bellard
860 158142c2 bellard
}
861 158142c2 bellard
862 158142c2 bellard
/*----------------------------------------------------------------------------
863 158142c2 bellard
| Returns the most-significant 48 fraction bits of the quadruple-precision
864 158142c2 bellard
| floating-point value `a'.
865 158142c2 bellard
*----------------------------------------------------------------------------*/
866 158142c2 bellard
867 bb98fe42 Andreas Färber
INLINE uint64_t extractFloat128Frac0( float128 a )
868 158142c2 bellard
{
869 158142c2 bellard
870 158142c2 bellard
    return a.high & LIT64( 0x0000FFFFFFFFFFFF );
871 158142c2 bellard
872 158142c2 bellard
}
873 158142c2 bellard
874 158142c2 bellard
/*----------------------------------------------------------------------------
875 158142c2 bellard
| Returns the exponent bits of the quadruple-precision floating-point value
876 158142c2 bellard
| `a'.
877 158142c2 bellard
*----------------------------------------------------------------------------*/
878 158142c2 bellard
879 158142c2 bellard
INLINE int32 extractFloat128Exp( float128 a )
880 158142c2 bellard
{
881 158142c2 bellard
882 158142c2 bellard
    return ( a.high>>48 ) & 0x7FFF;
883 158142c2 bellard
884 158142c2 bellard
}
885 158142c2 bellard
886 158142c2 bellard
/*----------------------------------------------------------------------------
887 158142c2 bellard
| Returns the sign bit of the quadruple-precision floating-point value `a'.
888 158142c2 bellard
*----------------------------------------------------------------------------*/
889 158142c2 bellard
890 158142c2 bellard
INLINE flag extractFloat128Sign( float128 a )
891 158142c2 bellard
{
892 158142c2 bellard
893 158142c2 bellard
    return a.high>>63;
894 158142c2 bellard
895 158142c2 bellard
}
896 158142c2 bellard
897 158142c2 bellard
/*----------------------------------------------------------------------------
898 158142c2 bellard
| Normalizes the subnormal quadruple-precision floating-point value
899 158142c2 bellard
| represented by the denormalized significand formed by the concatenation of
900 158142c2 bellard
| `aSig0' and `aSig1'.  The normalized exponent is stored at the location
901 158142c2 bellard
| pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
902 158142c2 bellard
| significand are stored at the location pointed to by `zSig0Ptr', and the
903 158142c2 bellard
| least significant 64 bits of the normalized significand are stored at the
904 158142c2 bellard
| location pointed to by `zSig1Ptr'.
905 158142c2 bellard
*----------------------------------------------------------------------------*/
906 158142c2 bellard
907 158142c2 bellard
static void
908 158142c2 bellard
 normalizeFloat128Subnormal(
909 bb98fe42 Andreas Färber
     uint64_t aSig0,
910 bb98fe42 Andreas Färber
     uint64_t aSig1,
911 158142c2 bellard
     int32 *zExpPtr,
912 bb98fe42 Andreas Färber
     uint64_t *zSig0Ptr,
913 bb98fe42 Andreas Färber
     uint64_t *zSig1Ptr
914 158142c2 bellard
 )
915 158142c2 bellard
{
916 158142c2 bellard
    int8 shiftCount;
917 158142c2 bellard
918 158142c2 bellard
    if ( aSig0 == 0 ) {
919 158142c2 bellard
        shiftCount = countLeadingZeros64( aSig1 ) - 15;
920 158142c2 bellard
        if ( shiftCount < 0 ) {
921 158142c2 bellard
            *zSig0Ptr = aSig1>>( - shiftCount );
922 158142c2 bellard
            *zSig1Ptr = aSig1<<( shiftCount & 63 );
923 158142c2 bellard
        }
924 158142c2 bellard
        else {
925 158142c2 bellard
            *zSig0Ptr = aSig1<<shiftCount;
926 158142c2 bellard
            *zSig1Ptr = 0;
927 158142c2 bellard
        }
928 158142c2 bellard
        *zExpPtr = - shiftCount - 63;
929 158142c2 bellard
    }
930 158142c2 bellard
    else {
931 158142c2 bellard
        shiftCount = countLeadingZeros64( aSig0 ) - 15;
932 158142c2 bellard
        shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
933 158142c2 bellard
        *zExpPtr = 1 - shiftCount;
934 158142c2 bellard
    }
935 158142c2 bellard
936 158142c2 bellard
}
937 158142c2 bellard
938 158142c2 bellard
/*----------------------------------------------------------------------------
939 158142c2 bellard
| Packs the sign `zSign', the exponent `zExp', and the significand formed
940 158142c2 bellard
| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
941 158142c2 bellard
| floating-point value, returning the result.  After being shifted into the
942 158142c2 bellard
| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
943 158142c2 bellard
| added together to form the most significant 32 bits of the result.  This
944 158142c2 bellard
| means that any integer portion of `zSig0' will be added into the exponent.
945 158142c2 bellard
| Since a properly normalized significand will have an integer portion equal
946 158142c2 bellard
| to 1, the `zExp' input should be 1 less than the desired result exponent
947 158142c2 bellard
| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
948 158142c2 bellard
| significand.
949 158142c2 bellard
*----------------------------------------------------------------------------*/
950 158142c2 bellard
951 158142c2 bellard
INLINE float128
952 bb98fe42 Andreas Färber
 packFloat128( flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1 )
953 158142c2 bellard
{
954 158142c2 bellard
    float128 z;
955 158142c2 bellard
956 158142c2 bellard
    z.low = zSig1;
957 bb98fe42 Andreas Färber
    z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
958 158142c2 bellard
    return z;
959 158142c2 bellard
960 158142c2 bellard
}
961 158142c2 bellard
962 158142c2 bellard
/*----------------------------------------------------------------------------
963 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
964 158142c2 bellard
| and extended significand formed by the concatenation of `zSig0', `zSig1',
965 158142c2 bellard
| and `zSig2', and returns the proper quadruple-precision floating-point value
966 158142c2 bellard
| corresponding to the abstract input.  Ordinarily, the abstract value is
967 158142c2 bellard
| simply rounded and packed into the quadruple-precision format, with the
968 158142c2 bellard
| inexact exception raised if the abstract input cannot be represented
969 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
970 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
971 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded to
972 158142c2 bellard
| a subnormal number, and the underflow and inexact exceptions are raised if
973 158142c2 bellard
| the abstract input cannot be represented exactly as a subnormal quadruple-
974 158142c2 bellard
| precision floating-point number.
975 158142c2 bellard
|     The input significand must be normalized or smaller.  If the input
976 158142c2 bellard
| significand is not normalized, `zExp' must be 0; in that case, the result
977 158142c2 bellard
| returned is a subnormal number, and it must not require rounding.  In the
978 158142c2 bellard
| usual case that the input significand is normalized, `zExp' must be 1 less
979 158142c2 bellard
| than the ``true'' floating-point exponent.  The handling of underflow and
980 158142c2 bellard
| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
981 158142c2 bellard
*----------------------------------------------------------------------------*/
982 158142c2 bellard
983 158142c2 bellard
static float128
984 158142c2 bellard
 roundAndPackFloat128(
985 bb98fe42 Andreas Färber
     flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1, uint64_t zSig2 STATUS_PARAM)
986 158142c2 bellard
{
987 158142c2 bellard
    int8 roundingMode;
988 158142c2 bellard
    flag roundNearestEven, increment, isTiny;
989 158142c2 bellard
990 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
991 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
992 bb98fe42 Andreas Färber
    increment = ( (int64_t) zSig2 < 0 );
993 158142c2 bellard
    if ( ! roundNearestEven ) {
994 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
995 158142c2 bellard
            increment = 0;
996 158142c2 bellard
        }
997 158142c2 bellard
        else {
998 158142c2 bellard
            if ( zSign ) {
999 158142c2 bellard
                increment = ( roundingMode == float_round_down ) && zSig2;
1000 158142c2 bellard
            }
1001 158142c2 bellard
            else {
1002 158142c2 bellard
                increment = ( roundingMode == float_round_up ) && zSig2;
1003 158142c2 bellard
            }
1004 158142c2 bellard
        }
1005 158142c2 bellard
    }
1006 bb98fe42 Andreas Färber
    if ( 0x7FFD <= (uint32_t) zExp ) {
1007 158142c2 bellard
        if (    ( 0x7FFD < zExp )
1008 158142c2 bellard
             || (    ( zExp == 0x7FFD )
1009 158142c2 bellard
                  && eq128(
1010 158142c2 bellard
                         LIT64( 0x0001FFFFFFFFFFFF ),
1011 158142c2 bellard
                         LIT64( 0xFFFFFFFFFFFFFFFF ),
1012 158142c2 bellard
                         zSig0,
1013 158142c2 bellard
                         zSig1
1014 158142c2 bellard
                     )
1015 158142c2 bellard
                  && increment
1016 158142c2 bellard
                )
1017 158142c2 bellard
           ) {
1018 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
1019 158142c2 bellard
            if (    ( roundingMode == float_round_to_zero )
1020 158142c2 bellard
                 || ( zSign && ( roundingMode == float_round_up ) )
1021 158142c2 bellard
                 || ( ! zSign && ( roundingMode == float_round_down ) )
1022 158142c2 bellard
               ) {
1023 158142c2 bellard
                return
1024 158142c2 bellard
                    packFloat128(
1025 158142c2 bellard
                        zSign,
1026 158142c2 bellard
                        0x7FFE,
1027 158142c2 bellard
                        LIT64( 0x0000FFFFFFFFFFFF ),
1028 158142c2 bellard
                        LIT64( 0xFFFFFFFFFFFFFFFF )
1029 158142c2 bellard
                    );
1030 158142c2 bellard
            }
1031 158142c2 bellard
            return packFloat128( zSign, 0x7FFF, 0, 0 );
1032 158142c2 bellard
        }
1033 158142c2 bellard
        if ( zExp < 0 ) {
1034 e6afc87f Peter Maydell
            if (STATUS(flush_to_zero)) {
1035 e6afc87f Peter Maydell
                float_raise(float_flag_output_denormal STATUS_VAR);
1036 e6afc87f Peter Maydell
                return packFloat128(zSign, 0, 0, 0);
1037 e6afc87f Peter Maydell
            }
1038 158142c2 bellard
            isTiny =
1039 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
1040 158142c2 bellard
                || ( zExp < -1 )
1041 158142c2 bellard
                || ! increment
1042 158142c2 bellard
                || lt128(
1043 158142c2 bellard
                       zSig0,
1044 158142c2 bellard
                       zSig1,
1045 158142c2 bellard
                       LIT64( 0x0001FFFFFFFFFFFF ),
1046 158142c2 bellard
                       LIT64( 0xFFFFFFFFFFFFFFFF )
1047 158142c2 bellard
                   );
1048 158142c2 bellard
            shift128ExtraRightJamming(
1049 158142c2 bellard
                zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
1050 158142c2 bellard
            zExp = 0;
1051 158142c2 bellard
            if ( isTiny && zSig2 ) float_raise( float_flag_underflow STATUS_VAR);
1052 158142c2 bellard
            if ( roundNearestEven ) {
1053 bb98fe42 Andreas Färber
                increment = ( (int64_t) zSig2 < 0 );
1054 158142c2 bellard
            }
1055 158142c2 bellard
            else {
1056 158142c2 bellard
                if ( zSign ) {
1057 158142c2 bellard
                    increment = ( roundingMode == float_round_down ) && zSig2;
1058 158142c2 bellard
                }
1059 158142c2 bellard
                else {
1060 158142c2 bellard
                    increment = ( roundingMode == float_round_up ) && zSig2;
1061 158142c2 bellard
                }
1062 158142c2 bellard
            }
1063 158142c2 bellard
        }
1064 158142c2 bellard
    }
1065 158142c2 bellard
    if ( zSig2 ) STATUS(float_exception_flags) |= float_flag_inexact;
1066 158142c2 bellard
    if ( increment ) {
1067 158142c2 bellard
        add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
1068 158142c2 bellard
        zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
1069 158142c2 bellard
    }
1070 158142c2 bellard
    else {
1071 158142c2 bellard
        if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
1072 158142c2 bellard
    }
1073 158142c2 bellard
    return packFloat128( zSign, zExp, zSig0, zSig1 );
1074 158142c2 bellard
1075 158142c2 bellard
}
1076 158142c2 bellard
1077 158142c2 bellard
/*----------------------------------------------------------------------------
1078 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
1079 158142c2 bellard
| and significand formed by the concatenation of `zSig0' and `zSig1', and
1080 158142c2 bellard
| returns the proper quadruple-precision floating-point value corresponding
1081 158142c2 bellard
| to the abstract input.  This routine is just like `roundAndPackFloat128'
1082 158142c2 bellard
| except that the input significand has fewer bits and does not have to be
1083 158142c2 bellard
| normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
1084 158142c2 bellard
| point exponent.
1085 158142c2 bellard
*----------------------------------------------------------------------------*/
1086 158142c2 bellard
1087 158142c2 bellard
static float128
1088 158142c2 bellard
 normalizeRoundAndPackFloat128(
1089 bb98fe42 Andreas Färber
     flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1 STATUS_PARAM)
1090 158142c2 bellard
{
1091 158142c2 bellard
    int8 shiftCount;
1092 bb98fe42 Andreas Färber
    uint64_t zSig2;
1093 158142c2 bellard
1094 158142c2 bellard
    if ( zSig0 == 0 ) {
1095 158142c2 bellard
        zSig0 = zSig1;
1096 158142c2 bellard
        zSig1 = 0;
1097 158142c2 bellard
        zExp -= 64;
1098 158142c2 bellard
    }
1099 158142c2 bellard
    shiftCount = countLeadingZeros64( zSig0 ) - 15;
1100 158142c2 bellard
    if ( 0 <= shiftCount ) {
1101 158142c2 bellard
        zSig2 = 0;
1102 158142c2 bellard
        shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1103 158142c2 bellard
    }
1104 158142c2 bellard
    else {
1105 158142c2 bellard
        shift128ExtraRightJamming(
1106 158142c2 bellard
            zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
1107 158142c2 bellard
    }
1108 158142c2 bellard
    zExp -= shiftCount;
1109 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR);
1110 158142c2 bellard
1111 158142c2 bellard
}
1112 158142c2 bellard
1113 158142c2 bellard
/*----------------------------------------------------------------------------
1114 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a'
1115 158142c2 bellard
| to the single-precision floating-point format.  The conversion is performed
1116 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1117 158142c2 bellard
*----------------------------------------------------------------------------*/
1118 158142c2 bellard
1119 158142c2 bellard
float32 int32_to_float32( int32 a STATUS_PARAM )
1120 158142c2 bellard
{
1121 158142c2 bellard
    flag zSign;
1122 158142c2 bellard
1123 f090c9d4 pbrook
    if ( a == 0 ) return float32_zero;
1124 bb98fe42 Andreas Färber
    if ( a == (int32_t) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
1125 158142c2 bellard
    zSign = ( a < 0 );
1126 158142c2 bellard
    return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a STATUS_VAR );
1127 158142c2 bellard
1128 158142c2 bellard
}
1129 158142c2 bellard
1130 158142c2 bellard
/*----------------------------------------------------------------------------
1131 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a'
1132 158142c2 bellard
| to the double-precision floating-point format.  The conversion is performed
1133 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1134 158142c2 bellard
*----------------------------------------------------------------------------*/
1135 158142c2 bellard
1136 158142c2 bellard
float64 int32_to_float64( int32 a STATUS_PARAM )
1137 158142c2 bellard
{
1138 158142c2 bellard
    flag zSign;
1139 158142c2 bellard
    uint32 absA;
1140 158142c2 bellard
    int8 shiftCount;
1141 bb98fe42 Andreas Färber
    uint64_t zSig;
1142 158142c2 bellard
1143 f090c9d4 pbrook
    if ( a == 0 ) return float64_zero;
1144 158142c2 bellard
    zSign = ( a < 0 );
1145 158142c2 bellard
    absA = zSign ? - a : a;
1146 158142c2 bellard
    shiftCount = countLeadingZeros32( absA ) + 21;
1147 158142c2 bellard
    zSig = absA;
1148 158142c2 bellard
    return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
1149 158142c2 bellard
1150 158142c2 bellard
}
1151 158142c2 bellard
1152 158142c2 bellard
/*----------------------------------------------------------------------------
1153 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a'
1154 158142c2 bellard
| to the extended double-precision floating-point format.  The conversion
1155 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1156 158142c2 bellard
| Arithmetic.
1157 158142c2 bellard
*----------------------------------------------------------------------------*/
1158 158142c2 bellard
1159 158142c2 bellard
floatx80 int32_to_floatx80( int32 a STATUS_PARAM )
1160 158142c2 bellard
{
1161 158142c2 bellard
    flag zSign;
1162 158142c2 bellard
    uint32 absA;
1163 158142c2 bellard
    int8 shiftCount;
1164 bb98fe42 Andreas Färber
    uint64_t zSig;
1165 158142c2 bellard
1166 158142c2 bellard
    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1167 158142c2 bellard
    zSign = ( a < 0 );
1168 158142c2 bellard
    absA = zSign ? - a : a;
1169 158142c2 bellard
    shiftCount = countLeadingZeros32( absA ) + 32;
1170 158142c2 bellard
    zSig = absA;
1171 158142c2 bellard
    return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
1172 158142c2 bellard
1173 158142c2 bellard
}
1174 158142c2 bellard
1175 158142c2 bellard
/*----------------------------------------------------------------------------
1176 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a' to
1177 158142c2 bellard
| the quadruple-precision floating-point format.  The conversion is performed
1178 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1179 158142c2 bellard
*----------------------------------------------------------------------------*/
1180 158142c2 bellard
1181 158142c2 bellard
float128 int32_to_float128( int32 a STATUS_PARAM )
1182 158142c2 bellard
{
1183 158142c2 bellard
    flag zSign;
1184 158142c2 bellard
    uint32 absA;
1185 158142c2 bellard
    int8 shiftCount;
1186 bb98fe42 Andreas Färber
    uint64_t zSig0;
1187 158142c2 bellard
1188 158142c2 bellard
    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1189 158142c2 bellard
    zSign = ( a < 0 );
1190 158142c2 bellard
    absA = zSign ? - a : a;
1191 158142c2 bellard
    shiftCount = countLeadingZeros32( absA ) + 17;
1192 158142c2 bellard
    zSig0 = absA;
1193 158142c2 bellard
    return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
1194 158142c2 bellard
1195 158142c2 bellard
}
1196 158142c2 bellard
1197 158142c2 bellard
/*----------------------------------------------------------------------------
1198 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a'
1199 158142c2 bellard
| to the single-precision floating-point format.  The conversion is performed
1200 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1201 158142c2 bellard
*----------------------------------------------------------------------------*/
1202 158142c2 bellard
1203 158142c2 bellard
float32 int64_to_float32( int64 a STATUS_PARAM )
1204 158142c2 bellard
{
1205 158142c2 bellard
    flag zSign;
1206 158142c2 bellard
    uint64 absA;
1207 158142c2 bellard
    int8 shiftCount;
1208 158142c2 bellard
1209 f090c9d4 pbrook
    if ( a == 0 ) return float32_zero;
1210 158142c2 bellard
    zSign = ( a < 0 );
1211 158142c2 bellard
    absA = zSign ? - a : a;
1212 158142c2 bellard
    shiftCount = countLeadingZeros64( absA ) - 40;
1213 158142c2 bellard
    if ( 0 <= shiftCount ) {
1214 158142c2 bellard
        return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
1215 158142c2 bellard
    }
1216 158142c2 bellard
    else {
1217 158142c2 bellard
        shiftCount += 7;
1218 158142c2 bellard
        if ( shiftCount < 0 ) {
1219 158142c2 bellard
            shift64RightJamming( absA, - shiftCount, &absA );
1220 158142c2 bellard
        }
1221 158142c2 bellard
        else {
1222 158142c2 bellard
            absA <<= shiftCount;
1223 158142c2 bellard
        }
1224 158142c2 bellard
        return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA STATUS_VAR );
1225 158142c2 bellard
    }
1226 158142c2 bellard
1227 158142c2 bellard
}
1228 158142c2 bellard
1229 3430b0be j_mayer
float32 uint64_to_float32( uint64 a STATUS_PARAM )
1230 75d62a58 j_mayer
{
1231 75d62a58 j_mayer
    int8 shiftCount;
1232 75d62a58 j_mayer
1233 f090c9d4 pbrook
    if ( a == 0 ) return float32_zero;
1234 75d62a58 j_mayer
    shiftCount = countLeadingZeros64( a ) - 40;
1235 75d62a58 j_mayer
    if ( 0 <= shiftCount ) {
1236 75d62a58 j_mayer
        return packFloat32( 1 > 0, 0x95 - shiftCount, a<<shiftCount );
1237 75d62a58 j_mayer
    }
1238 75d62a58 j_mayer
    else {
1239 75d62a58 j_mayer
        shiftCount += 7;
1240 75d62a58 j_mayer
        if ( shiftCount < 0 ) {
1241 75d62a58 j_mayer
            shift64RightJamming( a, - shiftCount, &a );
1242 75d62a58 j_mayer
        }
1243 75d62a58 j_mayer
        else {
1244 75d62a58 j_mayer
            a <<= shiftCount;
1245 75d62a58 j_mayer
        }
1246 75d62a58 j_mayer
        return roundAndPackFloat32( 1 > 0, 0x9C - shiftCount, a STATUS_VAR );
1247 75d62a58 j_mayer
    }
1248 75d62a58 j_mayer
}
1249 75d62a58 j_mayer
1250 158142c2 bellard
/*----------------------------------------------------------------------------
1251 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a'
1252 158142c2 bellard
| to the double-precision floating-point format.  The conversion is performed
1253 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1254 158142c2 bellard
*----------------------------------------------------------------------------*/
1255 158142c2 bellard
1256 158142c2 bellard
float64 int64_to_float64( int64 a STATUS_PARAM )
1257 158142c2 bellard
{
1258 158142c2 bellard
    flag zSign;
1259 158142c2 bellard
1260 f090c9d4 pbrook
    if ( a == 0 ) return float64_zero;
1261 bb98fe42 Andreas Färber
    if ( a == (int64_t) LIT64( 0x8000000000000000 ) ) {
1262 158142c2 bellard
        return packFloat64( 1, 0x43E, 0 );
1263 158142c2 bellard
    }
1264 158142c2 bellard
    zSign = ( a < 0 );
1265 158142c2 bellard
    return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a STATUS_VAR );
1266 158142c2 bellard
1267 158142c2 bellard
}
1268 158142c2 bellard
1269 75d62a58 j_mayer
float64 uint64_to_float64( uint64 a STATUS_PARAM )
1270 75d62a58 j_mayer
{
1271 f090c9d4 pbrook
    if ( a == 0 ) return float64_zero;
1272 75d62a58 j_mayer
    return normalizeRoundAndPackFloat64( 0, 0x43C, a STATUS_VAR );
1273 75d62a58 j_mayer
1274 75d62a58 j_mayer
}
1275 75d62a58 j_mayer
1276 158142c2 bellard
/*----------------------------------------------------------------------------
1277 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a'
1278 158142c2 bellard
| to the extended double-precision floating-point format.  The conversion
1279 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1280 158142c2 bellard
| Arithmetic.
1281 158142c2 bellard
*----------------------------------------------------------------------------*/
1282 158142c2 bellard
1283 158142c2 bellard
floatx80 int64_to_floatx80( int64 a STATUS_PARAM )
1284 158142c2 bellard
{
1285 158142c2 bellard
    flag zSign;
1286 158142c2 bellard
    uint64 absA;
1287 158142c2 bellard
    int8 shiftCount;
1288 158142c2 bellard
1289 158142c2 bellard
    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1290 158142c2 bellard
    zSign = ( a < 0 );
1291 158142c2 bellard
    absA = zSign ? - a : a;
1292 158142c2 bellard
    shiftCount = countLeadingZeros64( absA );
1293 158142c2 bellard
    return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
1294 158142c2 bellard
1295 158142c2 bellard
}
1296 158142c2 bellard
1297 158142c2 bellard
/*----------------------------------------------------------------------------
1298 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a' to
1299 158142c2 bellard
| the quadruple-precision floating-point format.  The conversion is performed
1300 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1301 158142c2 bellard
*----------------------------------------------------------------------------*/
1302 158142c2 bellard
1303 158142c2 bellard
float128 int64_to_float128( int64 a STATUS_PARAM )
1304 158142c2 bellard
{
1305 158142c2 bellard
    flag zSign;
1306 158142c2 bellard
    uint64 absA;
1307 158142c2 bellard
    int8 shiftCount;
1308 158142c2 bellard
    int32 zExp;
1309 bb98fe42 Andreas Färber
    uint64_t zSig0, zSig1;
1310 158142c2 bellard
1311 158142c2 bellard
    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1312 158142c2 bellard
    zSign = ( a < 0 );
1313 158142c2 bellard
    absA = zSign ? - a : a;
1314 158142c2 bellard
    shiftCount = countLeadingZeros64( absA ) + 49;
1315 158142c2 bellard
    zExp = 0x406E - shiftCount;
1316 158142c2 bellard
    if ( 64 <= shiftCount ) {
1317 158142c2 bellard
        zSig1 = 0;
1318 158142c2 bellard
        zSig0 = absA;
1319 158142c2 bellard
        shiftCount -= 64;
1320 158142c2 bellard
    }
1321 158142c2 bellard
    else {
1322 158142c2 bellard
        zSig1 = absA;
1323 158142c2 bellard
        zSig0 = 0;
1324 158142c2 bellard
    }
1325 158142c2 bellard
    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1326 158142c2 bellard
    return packFloat128( zSign, zExp, zSig0, zSig1 );
1327 158142c2 bellard
1328 158142c2 bellard
}
1329 158142c2 bellard
1330 158142c2 bellard
/*----------------------------------------------------------------------------
1331 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1332 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
1333 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1334 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
1335 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
1336 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
1337 158142c2 bellard
| largest integer with the same sign as `a' is returned.
1338 158142c2 bellard
*----------------------------------------------------------------------------*/
1339 158142c2 bellard
1340 158142c2 bellard
int32 float32_to_int32( float32 a STATUS_PARAM )
1341 158142c2 bellard
{
1342 158142c2 bellard
    flag aSign;
1343 158142c2 bellard
    int16 aExp, shiftCount;
1344 bb98fe42 Andreas Färber
    uint32_t aSig;
1345 bb98fe42 Andreas Färber
    uint64_t aSig64;
1346 158142c2 bellard
1347 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1348 158142c2 bellard
    aSig = extractFloat32Frac( a );
1349 158142c2 bellard
    aExp = extractFloat32Exp( a );
1350 158142c2 bellard
    aSign = extractFloat32Sign( a );
1351 158142c2 bellard
    if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
1352 158142c2 bellard
    if ( aExp ) aSig |= 0x00800000;
1353 158142c2 bellard
    shiftCount = 0xAF - aExp;
1354 158142c2 bellard
    aSig64 = aSig;
1355 158142c2 bellard
    aSig64 <<= 32;
1356 158142c2 bellard
    if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
1357 158142c2 bellard
    return roundAndPackInt32( aSign, aSig64 STATUS_VAR );
1358 158142c2 bellard
1359 158142c2 bellard
}
1360 158142c2 bellard
1361 158142c2 bellard
/*----------------------------------------------------------------------------
1362 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1363 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
1364 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1365 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
1366 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
1367 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
1368 158142c2 bellard
| returned.
1369 158142c2 bellard
*----------------------------------------------------------------------------*/
1370 158142c2 bellard
1371 158142c2 bellard
int32 float32_to_int32_round_to_zero( float32 a STATUS_PARAM )
1372 158142c2 bellard
{
1373 158142c2 bellard
    flag aSign;
1374 158142c2 bellard
    int16 aExp, shiftCount;
1375 bb98fe42 Andreas Färber
    uint32_t aSig;
1376 158142c2 bellard
    int32 z;
1377 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1378 158142c2 bellard
1379 158142c2 bellard
    aSig = extractFloat32Frac( a );
1380 158142c2 bellard
    aExp = extractFloat32Exp( a );
1381 158142c2 bellard
    aSign = extractFloat32Sign( a );
1382 158142c2 bellard
    shiftCount = aExp - 0x9E;
1383 158142c2 bellard
    if ( 0 <= shiftCount ) {
1384 f090c9d4 pbrook
        if ( float32_val(a) != 0xCF000000 ) {
1385 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1386 158142c2 bellard
            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
1387 158142c2 bellard
        }
1388 bb98fe42 Andreas Färber
        return (int32_t) 0x80000000;
1389 158142c2 bellard
    }
1390 158142c2 bellard
    else if ( aExp <= 0x7E ) {
1391 158142c2 bellard
        if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
1392 158142c2 bellard
        return 0;
1393 158142c2 bellard
    }
1394 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<8;
1395 158142c2 bellard
    z = aSig>>( - shiftCount );
1396 bb98fe42 Andreas Färber
    if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
1397 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
1398 158142c2 bellard
    }
1399 158142c2 bellard
    if ( aSign ) z = - z;
1400 158142c2 bellard
    return z;
1401 158142c2 bellard
1402 158142c2 bellard
}
1403 158142c2 bellard
1404 158142c2 bellard
/*----------------------------------------------------------------------------
1405 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1406 cbcef455 Peter Maydell
| `a' to the 16-bit two's complement integer format.  The conversion is
1407 cbcef455 Peter Maydell
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1408 cbcef455 Peter Maydell
| Arithmetic, except that the conversion is always rounded toward zero.
1409 cbcef455 Peter Maydell
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
1410 cbcef455 Peter Maydell
| the conversion overflows, the largest integer with the same sign as `a' is
1411 cbcef455 Peter Maydell
| returned.
1412 cbcef455 Peter Maydell
*----------------------------------------------------------------------------*/
1413 cbcef455 Peter Maydell
1414 cbcef455 Peter Maydell
int16 float32_to_int16_round_to_zero( float32 a STATUS_PARAM )
1415 cbcef455 Peter Maydell
{
1416 cbcef455 Peter Maydell
    flag aSign;
1417 cbcef455 Peter Maydell
    int16 aExp, shiftCount;
1418 bb98fe42 Andreas Färber
    uint32_t aSig;
1419 cbcef455 Peter Maydell
    int32 z;
1420 cbcef455 Peter Maydell
1421 cbcef455 Peter Maydell
    aSig = extractFloat32Frac( a );
1422 cbcef455 Peter Maydell
    aExp = extractFloat32Exp( a );
1423 cbcef455 Peter Maydell
    aSign = extractFloat32Sign( a );
1424 cbcef455 Peter Maydell
    shiftCount = aExp - 0x8E;
1425 cbcef455 Peter Maydell
    if ( 0 <= shiftCount ) {
1426 cbcef455 Peter Maydell
        if ( float32_val(a) != 0xC7000000 ) {
1427 cbcef455 Peter Maydell
            float_raise( float_flag_invalid STATUS_VAR);
1428 cbcef455 Peter Maydell
            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1429 cbcef455 Peter Maydell
                return 0x7FFF;
1430 cbcef455 Peter Maydell
            }
1431 cbcef455 Peter Maydell
        }
1432 bb98fe42 Andreas Färber
        return (int32_t) 0xffff8000;
1433 cbcef455 Peter Maydell
    }
1434 cbcef455 Peter Maydell
    else if ( aExp <= 0x7E ) {
1435 cbcef455 Peter Maydell
        if ( aExp | aSig ) {
1436 cbcef455 Peter Maydell
            STATUS(float_exception_flags) |= float_flag_inexact;
1437 cbcef455 Peter Maydell
        }
1438 cbcef455 Peter Maydell
        return 0;
1439 cbcef455 Peter Maydell
    }
1440 cbcef455 Peter Maydell
    shiftCount -= 0x10;
1441 cbcef455 Peter Maydell
    aSig = ( aSig | 0x00800000 )<<8;
1442 cbcef455 Peter Maydell
    z = aSig>>( - shiftCount );
1443 bb98fe42 Andreas Färber
    if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
1444 cbcef455 Peter Maydell
        STATUS(float_exception_flags) |= float_flag_inexact;
1445 cbcef455 Peter Maydell
    }
1446 cbcef455 Peter Maydell
    if ( aSign ) {
1447 cbcef455 Peter Maydell
        z = - z;
1448 cbcef455 Peter Maydell
    }
1449 cbcef455 Peter Maydell
    return z;
1450 cbcef455 Peter Maydell
1451 cbcef455 Peter Maydell
}
1452 cbcef455 Peter Maydell
1453 cbcef455 Peter Maydell
/*----------------------------------------------------------------------------
1454 cbcef455 Peter Maydell
| Returns the result of converting the single-precision floating-point value
1455 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
1456 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1457 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
1458 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
1459 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
1460 158142c2 bellard
| largest integer with the same sign as `a' is returned.
1461 158142c2 bellard
*----------------------------------------------------------------------------*/
1462 158142c2 bellard
1463 158142c2 bellard
int64 float32_to_int64( float32 a STATUS_PARAM )
1464 158142c2 bellard
{
1465 158142c2 bellard
    flag aSign;
1466 158142c2 bellard
    int16 aExp, shiftCount;
1467 bb98fe42 Andreas Färber
    uint32_t aSig;
1468 bb98fe42 Andreas Färber
    uint64_t aSig64, aSigExtra;
1469 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1470 158142c2 bellard
1471 158142c2 bellard
    aSig = extractFloat32Frac( a );
1472 158142c2 bellard
    aExp = extractFloat32Exp( a );
1473 158142c2 bellard
    aSign = extractFloat32Sign( a );
1474 158142c2 bellard
    shiftCount = 0xBE - aExp;
1475 158142c2 bellard
    if ( shiftCount < 0 ) {
1476 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
1477 158142c2 bellard
        if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1478 158142c2 bellard
            return LIT64( 0x7FFFFFFFFFFFFFFF );
1479 158142c2 bellard
        }
1480 bb98fe42 Andreas Färber
        return (int64_t) LIT64( 0x8000000000000000 );
1481 158142c2 bellard
    }
1482 158142c2 bellard
    if ( aExp ) aSig |= 0x00800000;
1483 158142c2 bellard
    aSig64 = aSig;
1484 158142c2 bellard
    aSig64 <<= 40;
1485 158142c2 bellard
    shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
1486 158142c2 bellard
    return roundAndPackInt64( aSign, aSig64, aSigExtra STATUS_VAR );
1487 158142c2 bellard
1488 158142c2 bellard
}
1489 158142c2 bellard
1490 158142c2 bellard
/*----------------------------------------------------------------------------
1491 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1492 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
1493 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1494 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.  If
1495 158142c2 bellard
| `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
1496 158142c2 bellard
| conversion overflows, the largest integer with the same sign as `a' is
1497 158142c2 bellard
| returned.
1498 158142c2 bellard
*----------------------------------------------------------------------------*/
1499 158142c2 bellard
1500 158142c2 bellard
int64 float32_to_int64_round_to_zero( float32 a STATUS_PARAM )
1501 158142c2 bellard
{
1502 158142c2 bellard
    flag aSign;
1503 158142c2 bellard
    int16 aExp, shiftCount;
1504 bb98fe42 Andreas Färber
    uint32_t aSig;
1505 bb98fe42 Andreas Färber
    uint64_t aSig64;
1506 158142c2 bellard
    int64 z;
1507 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1508 158142c2 bellard
1509 158142c2 bellard
    aSig = extractFloat32Frac( a );
1510 158142c2 bellard
    aExp = extractFloat32Exp( a );
1511 158142c2 bellard
    aSign = extractFloat32Sign( a );
1512 158142c2 bellard
    shiftCount = aExp - 0xBE;
1513 158142c2 bellard
    if ( 0 <= shiftCount ) {
1514 f090c9d4 pbrook
        if ( float32_val(a) != 0xDF000000 ) {
1515 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1516 158142c2 bellard
            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1517 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
1518 158142c2 bellard
            }
1519 158142c2 bellard
        }
1520 bb98fe42 Andreas Färber
        return (int64_t) LIT64( 0x8000000000000000 );
1521 158142c2 bellard
    }
1522 158142c2 bellard
    else if ( aExp <= 0x7E ) {
1523 158142c2 bellard
        if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
1524 158142c2 bellard
        return 0;
1525 158142c2 bellard
    }
1526 158142c2 bellard
    aSig64 = aSig | 0x00800000;
1527 158142c2 bellard
    aSig64 <<= 40;
1528 158142c2 bellard
    z = aSig64>>( - shiftCount );
1529 bb98fe42 Andreas Färber
    if ( (uint64_t) ( aSig64<<( shiftCount & 63 ) ) ) {
1530 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
1531 158142c2 bellard
    }
1532 158142c2 bellard
    if ( aSign ) z = - z;
1533 158142c2 bellard
    return z;
1534 158142c2 bellard
1535 158142c2 bellard
}
1536 158142c2 bellard
1537 158142c2 bellard
/*----------------------------------------------------------------------------
1538 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1539 158142c2 bellard
| `a' to the double-precision floating-point format.  The conversion is
1540 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1541 158142c2 bellard
| Arithmetic.
1542 158142c2 bellard
*----------------------------------------------------------------------------*/
1543 158142c2 bellard
1544 158142c2 bellard
float64 float32_to_float64( float32 a STATUS_PARAM )
1545 158142c2 bellard
{
1546 158142c2 bellard
    flag aSign;
1547 158142c2 bellard
    int16 aExp;
1548 bb98fe42 Andreas Färber
    uint32_t aSig;
1549 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1550 158142c2 bellard
1551 158142c2 bellard
    aSig = extractFloat32Frac( a );
1552 158142c2 bellard
    aExp = extractFloat32Exp( a );
1553 158142c2 bellard
    aSign = extractFloat32Sign( a );
1554 158142c2 bellard
    if ( aExp == 0xFF ) {
1555 bcd4d9af Christophe Lyon
        if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
1556 158142c2 bellard
        return packFloat64( aSign, 0x7FF, 0 );
1557 158142c2 bellard
    }
1558 158142c2 bellard
    if ( aExp == 0 ) {
1559 158142c2 bellard
        if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
1560 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1561 158142c2 bellard
        --aExp;
1562 158142c2 bellard
    }
1563 bb98fe42 Andreas Färber
    return packFloat64( aSign, aExp + 0x380, ( (uint64_t) aSig )<<29 );
1564 158142c2 bellard
1565 158142c2 bellard
}
1566 158142c2 bellard
1567 158142c2 bellard
/*----------------------------------------------------------------------------
1568 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1569 158142c2 bellard
| `a' to the extended double-precision floating-point format.  The conversion
1570 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1571 158142c2 bellard
| Arithmetic.
1572 158142c2 bellard
*----------------------------------------------------------------------------*/
1573 158142c2 bellard
1574 158142c2 bellard
floatx80 float32_to_floatx80( float32 a STATUS_PARAM )
1575 158142c2 bellard
{
1576 158142c2 bellard
    flag aSign;
1577 158142c2 bellard
    int16 aExp;
1578 bb98fe42 Andreas Färber
    uint32_t aSig;
1579 158142c2 bellard
1580 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1581 158142c2 bellard
    aSig = extractFloat32Frac( a );
1582 158142c2 bellard
    aExp = extractFloat32Exp( a );
1583 158142c2 bellard
    aSign = extractFloat32Sign( a );
1584 158142c2 bellard
    if ( aExp == 0xFF ) {
1585 bcd4d9af Christophe Lyon
        if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
1586 158142c2 bellard
        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
1587 158142c2 bellard
    }
1588 158142c2 bellard
    if ( aExp == 0 ) {
1589 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1590 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1591 158142c2 bellard
    }
1592 158142c2 bellard
    aSig |= 0x00800000;
1593 bb98fe42 Andreas Färber
    return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
1594 158142c2 bellard
1595 158142c2 bellard
}
1596 158142c2 bellard
1597 158142c2 bellard
/*----------------------------------------------------------------------------
1598 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1599 158142c2 bellard
| `a' to the double-precision floating-point format.  The conversion is
1600 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1601 158142c2 bellard
| Arithmetic.
1602 158142c2 bellard
*----------------------------------------------------------------------------*/
1603 158142c2 bellard
1604 158142c2 bellard
float128 float32_to_float128( float32 a STATUS_PARAM )
1605 158142c2 bellard
{
1606 158142c2 bellard
    flag aSign;
1607 158142c2 bellard
    int16 aExp;
1608 bb98fe42 Andreas Färber
    uint32_t aSig;
1609 158142c2 bellard
1610 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1611 158142c2 bellard
    aSig = extractFloat32Frac( a );
1612 158142c2 bellard
    aExp = extractFloat32Exp( a );
1613 158142c2 bellard
    aSign = extractFloat32Sign( a );
1614 158142c2 bellard
    if ( aExp == 0xFF ) {
1615 bcd4d9af Christophe Lyon
        if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
1616 158142c2 bellard
        return packFloat128( aSign, 0x7FFF, 0, 0 );
1617 158142c2 bellard
    }
1618 158142c2 bellard
    if ( aExp == 0 ) {
1619 158142c2 bellard
        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
1620 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1621 158142c2 bellard
        --aExp;
1622 158142c2 bellard
    }
1623 bb98fe42 Andreas Färber
    return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
1624 158142c2 bellard
1625 158142c2 bellard
}
1626 158142c2 bellard
1627 158142c2 bellard
/*----------------------------------------------------------------------------
1628 158142c2 bellard
| Rounds the single-precision floating-point value `a' to an integer, and
1629 158142c2 bellard
| returns the result as a single-precision floating-point value.  The
1630 158142c2 bellard
| operation is performed according to the IEC/IEEE Standard for Binary
1631 158142c2 bellard
| Floating-Point Arithmetic.
1632 158142c2 bellard
*----------------------------------------------------------------------------*/
1633 158142c2 bellard
1634 158142c2 bellard
float32 float32_round_to_int( float32 a STATUS_PARAM)
1635 158142c2 bellard
{
1636 158142c2 bellard
    flag aSign;
1637 158142c2 bellard
    int16 aExp;
1638 bb98fe42 Andreas Färber
    uint32_t lastBitMask, roundBitsMask;
1639 158142c2 bellard
    int8 roundingMode;
1640 bb98fe42 Andreas Färber
    uint32_t z;
1641 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1642 158142c2 bellard
1643 158142c2 bellard
    aExp = extractFloat32Exp( a );
1644 158142c2 bellard
    if ( 0x96 <= aExp ) {
1645 158142c2 bellard
        if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
1646 158142c2 bellard
            return propagateFloat32NaN( a, a STATUS_VAR );
1647 158142c2 bellard
        }
1648 158142c2 bellard
        return a;
1649 158142c2 bellard
    }
1650 158142c2 bellard
    if ( aExp <= 0x7E ) {
1651 bb98fe42 Andreas Färber
        if ( (uint32_t) ( float32_val(a)<<1 ) == 0 ) return a;
1652 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
1653 158142c2 bellard
        aSign = extractFloat32Sign( a );
1654 158142c2 bellard
        switch ( STATUS(float_rounding_mode) ) {
1655 158142c2 bellard
         case float_round_nearest_even:
1656 158142c2 bellard
            if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
1657 158142c2 bellard
                return packFloat32( aSign, 0x7F, 0 );
1658 158142c2 bellard
            }
1659 158142c2 bellard
            break;
1660 158142c2 bellard
         case float_round_down:
1661 f090c9d4 pbrook
            return make_float32(aSign ? 0xBF800000 : 0);
1662 158142c2 bellard
         case float_round_up:
1663 f090c9d4 pbrook
            return make_float32(aSign ? 0x80000000 : 0x3F800000);
1664 158142c2 bellard
        }
1665 158142c2 bellard
        return packFloat32( aSign, 0, 0 );
1666 158142c2 bellard
    }
1667 158142c2 bellard
    lastBitMask = 1;
1668 158142c2 bellard
    lastBitMask <<= 0x96 - aExp;
1669 158142c2 bellard
    roundBitsMask = lastBitMask - 1;
1670 f090c9d4 pbrook
    z = float32_val(a);
1671 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
1672 158142c2 bellard
    if ( roundingMode == float_round_nearest_even ) {
1673 158142c2 bellard
        z += lastBitMask>>1;
1674 158142c2 bellard
        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
1675 158142c2 bellard
    }
1676 158142c2 bellard
    else if ( roundingMode != float_round_to_zero ) {
1677 f090c9d4 pbrook
        if ( extractFloat32Sign( make_float32(z) ) ^ ( roundingMode == float_round_up ) ) {
1678 158142c2 bellard
            z += roundBitsMask;
1679 158142c2 bellard
        }
1680 158142c2 bellard
    }
1681 158142c2 bellard
    z &= ~ roundBitsMask;
1682 f090c9d4 pbrook
    if ( z != float32_val(a) ) STATUS(float_exception_flags) |= float_flag_inexact;
1683 f090c9d4 pbrook
    return make_float32(z);
1684 158142c2 bellard
1685 158142c2 bellard
}
1686 158142c2 bellard
1687 158142c2 bellard
/*----------------------------------------------------------------------------
1688 158142c2 bellard
| Returns the result of adding the absolute values of the single-precision
1689 158142c2 bellard
| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
1690 158142c2 bellard
| before being returned.  `zSign' is ignored if the result is a NaN.
1691 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
1692 158142c2 bellard
| Floating-Point Arithmetic.
1693 158142c2 bellard
*----------------------------------------------------------------------------*/
1694 158142c2 bellard
1695 158142c2 bellard
static float32 addFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
1696 158142c2 bellard
{
1697 158142c2 bellard
    int16 aExp, bExp, zExp;
1698 bb98fe42 Andreas Färber
    uint32_t aSig, bSig, zSig;
1699 158142c2 bellard
    int16 expDiff;
1700 158142c2 bellard
1701 158142c2 bellard
    aSig = extractFloat32Frac( a );
1702 158142c2 bellard
    aExp = extractFloat32Exp( a );
1703 158142c2 bellard
    bSig = extractFloat32Frac( b );
1704 158142c2 bellard
    bExp = extractFloat32Exp( b );
1705 158142c2 bellard
    expDiff = aExp - bExp;
1706 158142c2 bellard
    aSig <<= 6;
1707 158142c2 bellard
    bSig <<= 6;
1708 158142c2 bellard
    if ( 0 < expDiff ) {
1709 158142c2 bellard
        if ( aExp == 0xFF ) {
1710 158142c2 bellard
            if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1711 158142c2 bellard
            return a;
1712 158142c2 bellard
        }
1713 158142c2 bellard
        if ( bExp == 0 ) {
1714 158142c2 bellard
            --expDiff;
1715 158142c2 bellard
        }
1716 158142c2 bellard
        else {
1717 158142c2 bellard
            bSig |= 0x20000000;
1718 158142c2 bellard
        }
1719 158142c2 bellard
        shift32RightJamming( bSig, expDiff, &bSig );
1720 158142c2 bellard
        zExp = aExp;
1721 158142c2 bellard
    }
1722 158142c2 bellard
    else if ( expDiff < 0 ) {
1723 158142c2 bellard
        if ( bExp == 0xFF ) {
1724 158142c2 bellard
            if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1725 158142c2 bellard
            return packFloat32( zSign, 0xFF, 0 );
1726 158142c2 bellard
        }
1727 158142c2 bellard
        if ( aExp == 0 ) {
1728 158142c2 bellard
            ++expDiff;
1729 158142c2 bellard
        }
1730 158142c2 bellard
        else {
1731 158142c2 bellard
            aSig |= 0x20000000;
1732 158142c2 bellard
        }
1733 158142c2 bellard
        shift32RightJamming( aSig, - expDiff, &aSig );
1734 158142c2 bellard
        zExp = bExp;
1735 158142c2 bellard
    }
1736 158142c2 bellard
    else {
1737 158142c2 bellard
        if ( aExp == 0xFF ) {
1738 158142c2 bellard
            if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1739 158142c2 bellard
            return a;
1740 158142c2 bellard
        }
1741 fe76d976 pbrook
        if ( aExp == 0 ) {
1742 e6afc87f Peter Maydell
            if (STATUS(flush_to_zero)) {
1743 e6afc87f Peter Maydell
                if (aSig | bSig) {
1744 e6afc87f Peter Maydell
                    float_raise(float_flag_output_denormal STATUS_VAR);
1745 e6afc87f Peter Maydell
                }
1746 e6afc87f Peter Maydell
                return packFloat32(zSign, 0, 0);
1747 e6afc87f Peter Maydell
            }
1748 fe76d976 pbrook
            return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
1749 fe76d976 pbrook
        }
1750 158142c2 bellard
        zSig = 0x40000000 + aSig + bSig;
1751 158142c2 bellard
        zExp = aExp;
1752 158142c2 bellard
        goto roundAndPack;
1753 158142c2 bellard
    }
1754 158142c2 bellard
    aSig |= 0x20000000;
1755 158142c2 bellard
    zSig = ( aSig + bSig )<<1;
1756 158142c2 bellard
    --zExp;
1757 bb98fe42 Andreas Färber
    if ( (int32_t) zSig < 0 ) {
1758 158142c2 bellard
        zSig = aSig + bSig;
1759 158142c2 bellard
        ++zExp;
1760 158142c2 bellard
    }
1761 158142c2 bellard
 roundAndPack:
1762 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1763 158142c2 bellard
1764 158142c2 bellard
}
1765 158142c2 bellard
1766 158142c2 bellard
/*----------------------------------------------------------------------------
1767 158142c2 bellard
| Returns the result of subtracting the absolute values of the single-
1768 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the
1769 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
1770 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
1771 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
1772 158142c2 bellard
*----------------------------------------------------------------------------*/
1773 158142c2 bellard
1774 158142c2 bellard
static float32 subFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
1775 158142c2 bellard
{
1776 158142c2 bellard
    int16 aExp, bExp, zExp;
1777 bb98fe42 Andreas Färber
    uint32_t aSig, bSig, zSig;
1778 158142c2 bellard
    int16 expDiff;
1779 158142c2 bellard
1780 158142c2 bellard
    aSig = extractFloat32Frac( a );
1781 158142c2 bellard
    aExp = extractFloat32Exp( a );
1782 158142c2 bellard
    bSig = extractFloat32Frac( b );
1783 158142c2 bellard
    bExp = extractFloat32Exp( b );
1784 158142c2 bellard
    expDiff = aExp - bExp;
1785 158142c2 bellard
    aSig <<= 7;
1786 158142c2 bellard
    bSig <<= 7;
1787 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
1788 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
1789 158142c2 bellard
    if ( aExp == 0xFF ) {
1790 158142c2 bellard
        if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1791 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
1792 158142c2 bellard
        return float32_default_nan;
1793 158142c2 bellard
    }
1794 158142c2 bellard
    if ( aExp == 0 ) {
1795 158142c2 bellard
        aExp = 1;
1796 158142c2 bellard
        bExp = 1;
1797 158142c2 bellard
    }
1798 158142c2 bellard
    if ( bSig < aSig ) goto aBigger;
1799 158142c2 bellard
    if ( aSig < bSig ) goto bBigger;
1800 158142c2 bellard
    return packFloat32( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
1801 158142c2 bellard
 bExpBigger:
1802 158142c2 bellard
    if ( bExp == 0xFF ) {
1803 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1804 158142c2 bellard
        return packFloat32( zSign ^ 1, 0xFF, 0 );
1805 158142c2 bellard
    }
1806 158142c2 bellard
    if ( aExp == 0 ) {
1807 158142c2 bellard
        ++expDiff;
1808 158142c2 bellard
    }
1809 158142c2 bellard
    else {
1810 158142c2 bellard
        aSig |= 0x40000000;
1811 158142c2 bellard
    }
1812 158142c2 bellard
    shift32RightJamming( aSig, - expDiff, &aSig );
1813 158142c2 bellard
    bSig |= 0x40000000;
1814 158142c2 bellard
 bBigger:
1815 158142c2 bellard
    zSig = bSig - aSig;
1816 158142c2 bellard
    zExp = bExp;
1817 158142c2 bellard
    zSign ^= 1;
1818 158142c2 bellard
    goto normalizeRoundAndPack;
1819 158142c2 bellard
 aExpBigger:
1820 158142c2 bellard
    if ( aExp == 0xFF ) {
1821 158142c2 bellard
        if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1822 158142c2 bellard
        return a;
1823 158142c2 bellard
    }
1824 158142c2 bellard
    if ( bExp == 0 ) {
1825 158142c2 bellard
        --expDiff;
1826 158142c2 bellard
    }
1827 158142c2 bellard
    else {
1828 158142c2 bellard
        bSig |= 0x40000000;
1829 158142c2 bellard
    }
1830 158142c2 bellard
    shift32RightJamming( bSig, expDiff, &bSig );
1831 158142c2 bellard
    aSig |= 0x40000000;
1832 158142c2 bellard
 aBigger:
1833 158142c2 bellard
    zSig = aSig - bSig;
1834 158142c2 bellard
    zExp = aExp;
1835 158142c2 bellard
 normalizeRoundAndPack:
1836 158142c2 bellard
    --zExp;
1837 158142c2 bellard
    return normalizeRoundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1838 158142c2 bellard
1839 158142c2 bellard
}
1840 158142c2 bellard
1841 158142c2 bellard
/*----------------------------------------------------------------------------
1842 158142c2 bellard
| Returns the result of adding the single-precision floating-point values `a'
1843 158142c2 bellard
| and `b'.  The operation is performed according to the IEC/IEEE Standard for
1844 158142c2 bellard
| Binary Floating-Point Arithmetic.
1845 158142c2 bellard
*----------------------------------------------------------------------------*/
1846 158142c2 bellard
1847 158142c2 bellard
float32 float32_add( float32 a, float32 b STATUS_PARAM )
1848 158142c2 bellard
{
1849 158142c2 bellard
    flag aSign, bSign;
1850 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1851 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
1852 158142c2 bellard
1853 158142c2 bellard
    aSign = extractFloat32Sign( a );
1854 158142c2 bellard
    bSign = extractFloat32Sign( b );
1855 158142c2 bellard
    if ( aSign == bSign ) {
1856 158142c2 bellard
        return addFloat32Sigs( a, b, aSign STATUS_VAR);
1857 158142c2 bellard
    }
1858 158142c2 bellard
    else {
1859 158142c2 bellard
        return subFloat32Sigs( a, b, aSign STATUS_VAR );
1860 158142c2 bellard
    }
1861 158142c2 bellard
1862 158142c2 bellard
}
1863 158142c2 bellard
1864 158142c2 bellard
/*----------------------------------------------------------------------------
1865 158142c2 bellard
| Returns the result of subtracting the single-precision floating-point values
1866 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1867 158142c2 bellard
| for Binary Floating-Point Arithmetic.
1868 158142c2 bellard
*----------------------------------------------------------------------------*/
1869 158142c2 bellard
1870 158142c2 bellard
float32 float32_sub( float32 a, float32 b STATUS_PARAM )
1871 158142c2 bellard
{
1872 158142c2 bellard
    flag aSign, bSign;
1873 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1874 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
1875 158142c2 bellard
1876 158142c2 bellard
    aSign = extractFloat32Sign( a );
1877 158142c2 bellard
    bSign = extractFloat32Sign( b );
1878 158142c2 bellard
    if ( aSign == bSign ) {
1879 158142c2 bellard
        return subFloat32Sigs( a, b, aSign STATUS_VAR );
1880 158142c2 bellard
    }
1881 158142c2 bellard
    else {
1882 158142c2 bellard
        return addFloat32Sigs( a, b, aSign STATUS_VAR );
1883 158142c2 bellard
    }
1884 158142c2 bellard
1885 158142c2 bellard
}
1886 158142c2 bellard
1887 158142c2 bellard
/*----------------------------------------------------------------------------
1888 158142c2 bellard
| Returns the result of multiplying the single-precision floating-point values
1889 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1890 158142c2 bellard
| for Binary Floating-Point Arithmetic.
1891 158142c2 bellard
*----------------------------------------------------------------------------*/
1892 158142c2 bellard
1893 158142c2 bellard
float32 float32_mul( float32 a, float32 b STATUS_PARAM )
1894 158142c2 bellard
{
1895 158142c2 bellard
    flag aSign, bSign, zSign;
1896 158142c2 bellard
    int16 aExp, bExp, zExp;
1897 bb98fe42 Andreas Färber
    uint32_t aSig, bSig;
1898 bb98fe42 Andreas Färber
    uint64_t zSig64;
1899 bb98fe42 Andreas Färber
    uint32_t zSig;
1900 158142c2 bellard
1901 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1902 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
1903 37d18660 Peter Maydell
1904 158142c2 bellard
    aSig = extractFloat32Frac( a );
1905 158142c2 bellard
    aExp = extractFloat32Exp( a );
1906 158142c2 bellard
    aSign = extractFloat32Sign( a );
1907 158142c2 bellard
    bSig = extractFloat32Frac( b );
1908 158142c2 bellard
    bExp = extractFloat32Exp( b );
1909 158142c2 bellard
    bSign = extractFloat32Sign( b );
1910 158142c2 bellard
    zSign = aSign ^ bSign;
1911 158142c2 bellard
    if ( aExp == 0xFF ) {
1912 158142c2 bellard
        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1913 158142c2 bellard
            return propagateFloat32NaN( a, b STATUS_VAR );
1914 158142c2 bellard
        }
1915 158142c2 bellard
        if ( ( bExp | bSig ) == 0 ) {
1916 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1917 158142c2 bellard
            return float32_default_nan;
1918 158142c2 bellard
        }
1919 158142c2 bellard
        return packFloat32( zSign, 0xFF, 0 );
1920 158142c2 bellard
    }
1921 158142c2 bellard
    if ( bExp == 0xFF ) {
1922 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1923 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) {
1924 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1925 158142c2 bellard
            return float32_default_nan;
1926 158142c2 bellard
        }
1927 158142c2 bellard
        return packFloat32( zSign, 0xFF, 0 );
1928 158142c2 bellard
    }
1929 158142c2 bellard
    if ( aExp == 0 ) {
1930 158142c2 bellard
        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1931 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1932 158142c2 bellard
    }
1933 158142c2 bellard
    if ( bExp == 0 ) {
1934 158142c2 bellard
        if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1935 158142c2 bellard
        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1936 158142c2 bellard
    }
1937 158142c2 bellard
    zExp = aExp + bExp - 0x7F;
1938 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<7;
1939 158142c2 bellard
    bSig = ( bSig | 0x00800000 )<<8;
1940 bb98fe42 Andreas Färber
    shift64RightJamming( ( (uint64_t) aSig ) * bSig, 32, &zSig64 );
1941 158142c2 bellard
    zSig = zSig64;
1942 bb98fe42 Andreas Färber
    if ( 0 <= (int32_t) ( zSig<<1 ) ) {
1943 158142c2 bellard
        zSig <<= 1;
1944 158142c2 bellard
        --zExp;
1945 158142c2 bellard
    }
1946 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1947 158142c2 bellard
1948 158142c2 bellard
}
1949 158142c2 bellard
1950 158142c2 bellard
/*----------------------------------------------------------------------------
1951 158142c2 bellard
| Returns the result of dividing the single-precision floating-point value `a'
1952 158142c2 bellard
| by the corresponding value `b'.  The operation is performed according to the
1953 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1954 158142c2 bellard
*----------------------------------------------------------------------------*/
1955 158142c2 bellard
1956 158142c2 bellard
float32 float32_div( float32 a, float32 b STATUS_PARAM )
1957 158142c2 bellard
{
1958 158142c2 bellard
    flag aSign, bSign, zSign;
1959 158142c2 bellard
    int16 aExp, bExp, zExp;
1960 bb98fe42 Andreas Färber
    uint32_t aSig, bSig, zSig;
1961 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1962 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
1963 158142c2 bellard
1964 158142c2 bellard
    aSig = extractFloat32Frac( a );
1965 158142c2 bellard
    aExp = extractFloat32Exp( a );
1966 158142c2 bellard
    aSign = extractFloat32Sign( a );
1967 158142c2 bellard
    bSig = extractFloat32Frac( b );
1968 158142c2 bellard
    bExp = extractFloat32Exp( b );
1969 158142c2 bellard
    bSign = extractFloat32Sign( b );
1970 158142c2 bellard
    zSign = aSign ^ bSign;
1971 158142c2 bellard
    if ( aExp == 0xFF ) {
1972 158142c2 bellard
        if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1973 158142c2 bellard
        if ( bExp == 0xFF ) {
1974 158142c2 bellard
            if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1975 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1976 158142c2 bellard
            return float32_default_nan;
1977 158142c2 bellard
        }
1978 158142c2 bellard
        return packFloat32( zSign, 0xFF, 0 );
1979 158142c2 bellard
    }
1980 158142c2 bellard
    if ( bExp == 0xFF ) {
1981 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1982 158142c2 bellard
        return packFloat32( zSign, 0, 0 );
1983 158142c2 bellard
    }
1984 158142c2 bellard
    if ( bExp == 0 ) {
1985 158142c2 bellard
        if ( bSig == 0 ) {
1986 158142c2 bellard
            if ( ( aExp | aSig ) == 0 ) {
1987 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
1988 158142c2 bellard
                return float32_default_nan;
1989 158142c2 bellard
            }
1990 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
1991 158142c2 bellard
            return packFloat32( zSign, 0xFF, 0 );
1992 158142c2 bellard
        }
1993 158142c2 bellard
        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1994 158142c2 bellard
    }
1995 158142c2 bellard
    if ( aExp == 0 ) {
1996 158142c2 bellard
        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1997 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1998 158142c2 bellard
    }
1999 158142c2 bellard
    zExp = aExp - bExp + 0x7D;
2000 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<7;
2001 158142c2 bellard
    bSig = ( bSig | 0x00800000 )<<8;
2002 158142c2 bellard
    if ( bSig <= ( aSig + aSig ) ) {
2003 158142c2 bellard
        aSig >>= 1;
2004 158142c2 bellard
        ++zExp;
2005 158142c2 bellard
    }
2006 bb98fe42 Andreas Färber
    zSig = ( ( (uint64_t) aSig )<<32 ) / bSig;
2007 158142c2 bellard
    if ( ( zSig & 0x3F ) == 0 ) {
2008 bb98fe42 Andreas Färber
        zSig |= ( (uint64_t) bSig * zSig != ( (uint64_t) aSig )<<32 );
2009 158142c2 bellard
    }
2010 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
2011 158142c2 bellard
2012 158142c2 bellard
}
2013 158142c2 bellard
2014 158142c2 bellard
/*----------------------------------------------------------------------------
2015 158142c2 bellard
| Returns the remainder of the single-precision floating-point value `a'
2016 158142c2 bellard
| with respect to the corresponding value `b'.  The operation is performed
2017 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2018 158142c2 bellard
*----------------------------------------------------------------------------*/
2019 158142c2 bellard
2020 158142c2 bellard
float32 float32_rem( float32 a, float32 b STATUS_PARAM )
2021 158142c2 bellard
{
2022 ed086f3d Blue Swirl
    flag aSign, zSign;
2023 158142c2 bellard
    int16 aExp, bExp, expDiff;
2024 bb98fe42 Andreas Färber
    uint32_t aSig, bSig;
2025 bb98fe42 Andreas Färber
    uint32_t q;
2026 bb98fe42 Andreas Färber
    uint64_t aSig64, bSig64, q64;
2027 bb98fe42 Andreas Färber
    uint32_t alternateASig;
2028 bb98fe42 Andreas Färber
    int32_t sigMean;
2029 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2030 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2031 158142c2 bellard
2032 158142c2 bellard
    aSig = extractFloat32Frac( a );
2033 158142c2 bellard
    aExp = extractFloat32Exp( a );
2034 158142c2 bellard
    aSign = extractFloat32Sign( a );
2035 158142c2 bellard
    bSig = extractFloat32Frac( b );
2036 158142c2 bellard
    bExp = extractFloat32Exp( b );
2037 158142c2 bellard
    if ( aExp == 0xFF ) {
2038 158142c2 bellard
        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
2039 158142c2 bellard
            return propagateFloat32NaN( a, b STATUS_VAR );
2040 158142c2 bellard
        }
2041 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2042 158142c2 bellard
        return float32_default_nan;
2043 158142c2 bellard
    }
2044 158142c2 bellard
    if ( bExp == 0xFF ) {
2045 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
2046 158142c2 bellard
        return a;
2047 158142c2 bellard
    }
2048 158142c2 bellard
    if ( bExp == 0 ) {
2049 158142c2 bellard
        if ( bSig == 0 ) {
2050 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2051 158142c2 bellard
            return float32_default_nan;
2052 158142c2 bellard
        }
2053 158142c2 bellard
        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2054 158142c2 bellard
    }
2055 158142c2 bellard
    if ( aExp == 0 ) {
2056 158142c2 bellard
        if ( aSig == 0 ) return a;
2057 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2058 158142c2 bellard
    }
2059 158142c2 bellard
    expDiff = aExp - bExp;
2060 158142c2 bellard
    aSig |= 0x00800000;
2061 158142c2 bellard
    bSig |= 0x00800000;
2062 158142c2 bellard
    if ( expDiff < 32 ) {
2063 158142c2 bellard
        aSig <<= 8;
2064 158142c2 bellard
        bSig <<= 8;
2065 158142c2 bellard
        if ( expDiff < 0 ) {
2066 158142c2 bellard
            if ( expDiff < -1 ) return a;
2067 158142c2 bellard
            aSig >>= 1;
2068 158142c2 bellard
        }
2069 158142c2 bellard
        q = ( bSig <= aSig );
2070 158142c2 bellard
        if ( q ) aSig -= bSig;
2071 158142c2 bellard
        if ( 0 < expDiff ) {
2072 bb98fe42 Andreas Färber
            q = ( ( (uint64_t) aSig )<<32 ) / bSig;
2073 158142c2 bellard
            q >>= 32 - expDiff;
2074 158142c2 bellard
            bSig >>= 2;
2075 158142c2 bellard
            aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
2076 158142c2 bellard
        }
2077 158142c2 bellard
        else {
2078 158142c2 bellard
            aSig >>= 2;
2079 158142c2 bellard
            bSig >>= 2;
2080 158142c2 bellard
        }
2081 158142c2 bellard
    }
2082 158142c2 bellard
    else {
2083 158142c2 bellard
        if ( bSig <= aSig ) aSig -= bSig;
2084 bb98fe42 Andreas Färber
        aSig64 = ( (uint64_t) aSig )<<40;
2085 bb98fe42 Andreas Färber
        bSig64 = ( (uint64_t) bSig )<<40;
2086 158142c2 bellard
        expDiff -= 64;
2087 158142c2 bellard
        while ( 0 < expDiff ) {
2088 158142c2 bellard
            q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2089 158142c2 bellard
            q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2090 158142c2 bellard
            aSig64 = - ( ( bSig * q64 )<<38 );
2091 158142c2 bellard
            expDiff -= 62;
2092 158142c2 bellard
        }
2093 158142c2 bellard
        expDiff += 64;
2094 158142c2 bellard
        q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2095 158142c2 bellard
        q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2096 158142c2 bellard
        q = q64>>( 64 - expDiff );
2097 158142c2 bellard
        bSig <<= 6;
2098 158142c2 bellard
        aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
2099 158142c2 bellard
    }
2100 158142c2 bellard
    do {
2101 158142c2 bellard
        alternateASig = aSig;
2102 158142c2 bellard
        ++q;
2103 158142c2 bellard
        aSig -= bSig;
2104 bb98fe42 Andreas Färber
    } while ( 0 <= (int32_t) aSig );
2105 158142c2 bellard
    sigMean = aSig + alternateASig;
2106 158142c2 bellard
    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
2107 158142c2 bellard
        aSig = alternateASig;
2108 158142c2 bellard
    }
2109 bb98fe42 Andreas Färber
    zSign = ( (int32_t) aSig < 0 );
2110 158142c2 bellard
    if ( zSign ) aSig = - aSig;
2111 158142c2 bellard
    return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig STATUS_VAR );
2112 158142c2 bellard
2113 158142c2 bellard
}
2114 158142c2 bellard
2115 158142c2 bellard
/*----------------------------------------------------------------------------
2116 158142c2 bellard
| Returns the square root of the single-precision floating-point value `a'.
2117 158142c2 bellard
| The operation is performed according to the IEC/IEEE Standard for Binary
2118 158142c2 bellard
| Floating-Point Arithmetic.
2119 158142c2 bellard
*----------------------------------------------------------------------------*/
2120 158142c2 bellard
2121 158142c2 bellard
float32 float32_sqrt( float32 a STATUS_PARAM )
2122 158142c2 bellard
{
2123 158142c2 bellard
    flag aSign;
2124 158142c2 bellard
    int16 aExp, zExp;
2125 bb98fe42 Andreas Färber
    uint32_t aSig, zSig;
2126 bb98fe42 Andreas Färber
    uint64_t rem, term;
2127 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2128 158142c2 bellard
2129 158142c2 bellard
    aSig = extractFloat32Frac( a );
2130 158142c2 bellard
    aExp = extractFloat32Exp( a );
2131 158142c2 bellard
    aSign = extractFloat32Sign( a );
2132 158142c2 bellard
    if ( aExp == 0xFF ) {
2133 f090c9d4 pbrook
        if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
2134 158142c2 bellard
        if ( ! aSign ) return a;
2135 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2136 158142c2 bellard
        return float32_default_nan;
2137 158142c2 bellard
    }
2138 158142c2 bellard
    if ( aSign ) {
2139 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) return a;
2140 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2141 158142c2 bellard
        return float32_default_nan;
2142 158142c2 bellard
    }
2143 158142c2 bellard
    if ( aExp == 0 ) {
2144 f090c9d4 pbrook
        if ( aSig == 0 ) return float32_zero;
2145 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2146 158142c2 bellard
    }
2147 158142c2 bellard
    zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
2148 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<8;
2149 158142c2 bellard
    zSig = estimateSqrt32( aExp, aSig ) + 2;
2150 158142c2 bellard
    if ( ( zSig & 0x7F ) <= 5 ) {
2151 158142c2 bellard
        if ( zSig < 2 ) {
2152 158142c2 bellard
            zSig = 0x7FFFFFFF;
2153 158142c2 bellard
            goto roundAndPack;
2154 158142c2 bellard
        }
2155 158142c2 bellard
        aSig >>= aExp & 1;
2156 bb98fe42 Andreas Färber
        term = ( (uint64_t) zSig ) * zSig;
2157 bb98fe42 Andreas Färber
        rem = ( ( (uint64_t) aSig )<<32 ) - term;
2158 bb98fe42 Andreas Färber
        while ( (int64_t) rem < 0 ) {
2159 158142c2 bellard
            --zSig;
2160 bb98fe42 Andreas Färber
            rem += ( ( (uint64_t) zSig )<<1 ) | 1;
2161 158142c2 bellard
        }
2162 158142c2 bellard
        zSig |= ( rem != 0 );
2163 158142c2 bellard
    }
2164 158142c2 bellard
    shift32RightJamming( zSig, 1, &zSig );
2165 158142c2 bellard
 roundAndPack:
2166 158142c2 bellard
    return roundAndPackFloat32( 0, zExp, zSig STATUS_VAR );
2167 158142c2 bellard
2168 158142c2 bellard
}
2169 158142c2 bellard
2170 158142c2 bellard
/*----------------------------------------------------------------------------
2171 8229c991 Aurelien Jarno
| Returns the binary exponential of the single-precision floating-point value
2172 8229c991 Aurelien Jarno
| `a'. The operation is performed according to the IEC/IEEE Standard for
2173 8229c991 Aurelien Jarno
| Binary Floating-Point Arithmetic.
2174 8229c991 Aurelien Jarno
|
2175 8229c991 Aurelien Jarno
| Uses the following identities:
2176 8229c991 Aurelien Jarno
|
2177 8229c991 Aurelien Jarno
| 1. -------------------------------------------------------------------------
2178 8229c991 Aurelien Jarno
|      x    x*ln(2)
2179 8229c991 Aurelien Jarno
|     2  = e
2180 8229c991 Aurelien Jarno
|
2181 8229c991 Aurelien Jarno
| 2. -------------------------------------------------------------------------
2182 8229c991 Aurelien Jarno
|                      2     3     4     5           n
2183 8229c991 Aurelien Jarno
|      x        x     x     x     x     x           x
2184 8229c991 Aurelien Jarno
|     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
2185 8229c991 Aurelien Jarno
|               1!    2!    3!    4!    5!          n!
2186 8229c991 Aurelien Jarno
*----------------------------------------------------------------------------*/
2187 8229c991 Aurelien Jarno
2188 8229c991 Aurelien Jarno
static const float64 float32_exp2_coefficients[15] =
2189 8229c991 Aurelien Jarno
{
2190 d5138cf4 Peter Maydell
    const_float64( 0x3ff0000000000000ll ), /*  1 */
2191 d5138cf4 Peter Maydell
    const_float64( 0x3fe0000000000000ll ), /*  2 */
2192 d5138cf4 Peter Maydell
    const_float64( 0x3fc5555555555555ll ), /*  3 */
2193 d5138cf4 Peter Maydell
    const_float64( 0x3fa5555555555555ll ), /*  4 */
2194 d5138cf4 Peter Maydell
    const_float64( 0x3f81111111111111ll ), /*  5 */
2195 d5138cf4 Peter Maydell
    const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
2196 d5138cf4 Peter Maydell
    const_float64( 0x3f2a01a01a01a01all ), /*  7 */
2197 d5138cf4 Peter Maydell
    const_float64( 0x3efa01a01a01a01all ), /*  8 */
2198 d5138cf4 Peter Maydell
    const_float64( 0x3ec71de3a556c734ll ), /*  9 */
2199 d5138cf4 Peter Maydell
    const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
2200 d5138cf4 Peter Maydell
    const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
2201 d5138cf4 Peter Maydell
    const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
2202 d5138cf4 Peter Maydell
    const_float64( 0x3de6124613a86d09ll ), /* 13 */
2203 d5138cf4 Peter Maydell
    const_float64( 0x3da93974a8c07c9dll ), /* 14 */
2204 d5138cf4 Peter Maydell
    const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
2205 8229c991 Aurelien Jarno
};
2206 8229c991 Aurelien Jarno
2207 8229c991 Aurelien Jarno
float32 float32_exp2( float32 a STATUS_PARAM )
2208 8229c991 Aurelien Jarno
{
2209 8229c991 Aurelien Jarno
    flag aSign;
2210 8229c991 Aurelien Jarno
    int16 aExp;
2211 bb98fe42 Andreas Färber
    uint32_t aSig;
2212 8229c991 Aurelien Jarno
    float64 r, x, xn;
2213 8229c991 Aurelien Jarno
    int i;
2214 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2215 8229c991 Aurelien Jarno
2216 8229c991 Aurelien Jarno
    aSig = extractFloat32Frac( a );
2217 8229c991 Aurelien Jarno
    aExp = extractFloat32Exp( a );
2218 8229c991 Aurelien Jarno
    aSign = extractFloat32Sign( a );
2219 8229c991 Aurelien Jarno
2220 8229c991 Aurelien Jarno
    if ( aExp == 0xFF) {
2221 8229c991 Aurelien Jarno
        if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
2222 8229c991 Aurelien Jarno
        return (aSign) ? float32_zero : a;
2223 8229c991 Aurelien Jarno
    }
2224 8229c991 Aurelien Jarno
    if (aExp == 0) {
2225 8229c991 Aurelien Jarno
        if (aSig == 0) return float32_one;
2226 8229c991 Aurelien Jarno
    }
2227 8229c991 Aurelien Jarno
2228 8229c991 Aurelien Jarno
    float_raise( float_flag_inexact STATUS_VAR);
2229 8229c991 Aurelien Jarno
2230 8229c991 Aurelien Jarno
    /* ******************************* */
2231 8229c991 Aurelien Jarno
    /* using float64 for approximation */
2232 8229c991 Aurelien Jarno
    /* ******************************* */
2233 8229c991 Aurelien Jarno
    x = float32_to_float64(a STATUS_VAR);
2234 8229c991 Aurelien Jarno
    x = float64_mul(x, float64_ln2 STATUS_VAR);
2235 8229c991 Aurelien Jarno
2236 8229c991 Aurelien Jarno
    xn = x;
2237 8229c991 Aurelien Jarno
    r = float64_one;
2238 8229c991 Aurelien Jarno
    for (i = 0 ; i < 15 ; i++) {
2239 8229c991 Aurelien Jarno
        float64 f;
2240 8229c991 Aurelien Jarno
2241 8229c991 Aurelien Jarno
        f = float64_mul(xn, float32_exp2_coefficients[i] STATUS_VAR);
2242 8229c991 Aurelien Jarno
        r = float64_add(r, f STATUS_VAR);
2243 8229c991 Aurelien Jarno
2244 8229c991 Aurelien Jarno
        xn = float64_mul(xn, x STATUS_VAR);
2245 8229c991 Aurelien Jarno
    }
2246 8229c991 Aurelien Jarno
2247 8229c991 Aurelien Jarno
    return float64_to_float32(r, status);
2248 8229c991 Aurelien Jarno
}
2249 8229c991 Aurelien Jarno
2250 8229c991 Aurelien Jarno
/*----------------------------------------------------------------------------
2251 374dfc33 aurel32
| Returns the binary log of the single-precision floating-point value `a'.
2252 374dfc33 aurel32
| The operation is performed according to the IEC/IEEE Standard for Binary
2253 374dfc33 aurel32
| Floating-Point Arithmetic.
2254 374dfc33 aurel32
*----------------------------------------------------------------------------*/
2255 374dfc33 aurel32
float32 float32_log2( float32 a STATUS_PARAM )
2256 374dfc33 aurel32
{
2257 374dfc33 aurel32
    flag aSign, zSign;
2258 374dfc33 aurel32
    int16 aExp;
2259 bb98fe42 Andreas Färber
    uint32_t aSig, zSig, i;
2260 374dfc33 aurel32
2261 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2262 374dfc33 aurel32
    aSig = extractFloat32Frac( a );
2263 374dfc33 aurel32
    aExp = extractFloat32Exp( a );
2264 374dfc33 aurel32
    aSign = extractFloat32Sign( a );
2265 374dfc33 aurel32
2266 374dfc33 aurel32
    if ( aExp == 0 ) {
2267 374dfc33 aurel32
        if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
2268 374dfc33 aurel32
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2269 374dfc33 aurel32
    }
2270 374dfc33 aurel32
    if ( aSign ) {
2271 374dfc33 aurel32
        float_raise( float_flag_invalid STATUS_VAR);
2272 374dfc33 aurel32
        return float32_default_nan;
2273 374dfc33 aurel32
    }
2274 374dfc33 aurel32
    if ( aExp == 0xFF ) {
2275 374dfc33 aurel32
        if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
2276 374dfc33 aurel32
        return a;
2277 374dfc33 aurel32
    }
2278 374dfc33 aurel32
2279 374dfc33 aurel32
    aExp -= 0x7F;
2280 374dfc33 aurel32
    aSig |= 0x00800000;
2281 374dfc33 aurel32
    zSign = aExp < 0;
2282 374dfc33 aurel32
    zSig = aExp << 23;
2283 374dfc33 aurel32
2284 374dfc33 aurel32
    for (i = 1 << 22; i > 0; i >>= 1) {
2285 bb98fe42 Andreas Färber
        aSig = ( (uint64_t)aSig * aSig ) >> 23;
2286 374dfc33 aurel32
        if ( aSig & 0x01000000 ) {
2287 374dfc33 aurel32
            aSig >>= 1;
2288 374dfc33 aurel32
            zSig |= i;
2289 374dfc33 aurel32
        }
2290 374dfc33 aurel32
    }
2291 374dfc33 aurel32
2292 374dfc33 aurel32
    if ( zSign )
2293 374dfc33 aurel32
        zSig = -zSig;
2294 374dfc33 aurel32
2295 374dfc33 aurel32
    return normalizeRoundAndPackFloat32( zSign, 0x85, zSig STATUS_VAR );
2296 374dfc33 aurel32
}
2297 374dfc33 aurel32
2298 374dfc33 aurel32
/*----------------------------------------------------------------------------
2299 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is equal to
2300 b689362d Aurelien Jarno
| the corresponding value `b', and 0 otherwise.  The invalid exception is
2301 b689362d Aurelien Jarno
| raised if either operand is a NaN.  Otherwise, the comparison is performed
2302 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2303 158142c2 bellard
*----------------------------------------------------------------------------*/
2304 158142c2 bellard
2305 b689362d Aurelien Jarno
int float32_eq( float32 a, float32 b STATUS_PARAM )
2306 158142c2 bellard
{
2307 b689362d Aurelien Jarno
    uint32_t av, bv;
2308 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2309 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2310 158142c2 bellard
2311 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2312 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2313 158142c2 bellard
       ) {
2314 b689362d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
2315 158142c2 bellard
        return 0;
2316 158142c2 bellard
    }
2317 b689362d Aurelien Jarno
    av = float32_val(a);
2318 b689362d Aurelien Jarno
    bv = float32_val(b);
2319 b689362d Aurelien Jarno
    return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
2320 158142c2 bellard
}
2321 158142c2 bellard
2322 158142c2 bellard
/*----------------------------------------------------------------------------
2323 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than
2324 f5a64251 Aurelien Jarno
| or equal to the corresponding value `b', and 0 otherwise.  The invalid
2325 f5a64251 Aurelien Jarno
| exception is raised if either operand is a NaN.  The comparison is performed
2326 f5a64251 Aurelien Jarno
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2327 158142c2 bellard
*----------------------------------------------------------------------------*/
2328 158142c2 bellard
2329 750afe93 bellard
int float32_le( float32 a, float32 b STATUS_PARAM )
2330 158142c2 bellard
{
2331 158142c2 bellard
    flag aSign, bSign;
2332 bb98fe42 Andreas Färber
    uint32_t av, bv;
2333 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2334 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2335 158142c2 bellard
2336 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2337 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2338 158142c2 bellard
       ) {
2339 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2340 158142c2 bellard
        return 0;
2341 158142c2 bellard
    }
2342 158142c2 bellard
    aSign = extractFloat32Sign( a );
2343 158142c2 bellard
    bSign = extractFloat32Sign( b );
2344 f090c9d4 pbrook
    av = float32_val(a);
2345 f090c9d4 pbrook
    bv = float32_val(b);
2346 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
2347 f090c9d4 pbrook
    return ( av == bv ) || ( aSign ^ ( av < bv ) );
2348 158142c2 bellard
2349 158142c2 bellard
}
2350 158142c2 bellard
2351 158142c2 bellard
/*----------------------------------------------------------------------------
2352 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than
2353 f5a64251 Aurelien Jarno
| the corresponding value `b', and 0 otherwise.  The invalid exception is
2354 f5a64251 Aurelien Jarno
| raised if either operand is a NaN.  The comparison is performed according
2355 f5a64251 Aurelien Jarno
| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2356 158142c2 bellard
*----------------------------------------------------------------------------*/
2357 158142c2 bellard
2358 750afe93 bellard
int float32_lt( float32 a, float32 b STATUS_PARAM )
2359 158142c2 bellard
{
2360 158142c2 bellard
    flag aSign, bSign;
2361 bb98fe42 Andreas Färber
    uint32_t av, bv;
2362 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2363 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2364 158142c2 bellard
2365 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2366 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2367 158142c2 bellard
       ) {
2368 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2369 158142c2 bellard
        return 0;
2370 158142c2 bellard
    }
2371 158142c2 bellard
    aSign = extractFloat32Sign( a );
2372 158142c2 bellard
    bSign = extractFloat32Sign( b );
2373 f090c9d4 pbrook
    av = float32_val(a);
2374 f090c9d4 pbrook
    bv = float32_val(b);
2375 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
2376 f090c9d4 pbrook
    return ( av != bv ) && ( aSign ^ ( av < bv ) );
2377 158142c2 bellard
2378 158142c2 bellard
}
2379 158142c2 bellard
2380 158142c2 bellard
/*----------------------------------------------------------------------------
2381 67b7861d Aurelien Jarno
| Returns 1 if the single-precision floating-point values `a' and `b' cannot
2382 f5a64251 Aurelien Jarno
| be compared, and 0 otherwise.  The invalid exception is raised if either
2383 f5a64251 Aurelien Jarno
| operand is a NaN.  The comparison is performed according to the IEC/IEEE
2384 f5a64251 Aurelien Jarno
| Standard for Binary Floating-Point Arithmetic.
2385 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
2386 67b7861d Aurelien Jarno
2387 67b7861d Aurelien Jarno
int float32_unordered( float32 a, float32 b STATUS_PARAM )
2388 67b7861d Aurelien Jarno
{
2389 67b7861d Aurelien Jarno
    a = float32_squash_input_denormal(a STATUS_VAR);
2390 67b7861d Aurelien Jarno
    b = float32_squash_input_denormal(b STATUS_VAR);
2391 67b7861d Aurelien Jarno
2392 67b7861d Aurelien Jarno
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2393 67b7861d Aurelien Jarno
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2394 67b7861d Aurelien Jarno
       ) {
2395 67b7861d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
2396 67b7861d Aurelien Jarno
        return 1;
2397 67b7861d Aurelien Jarno
    }
2398 67b7861d Aurelien Jarno
    return 0;
2399 67b7861d Aurelien Jarno
}
2400 b689362d Aurelien Jarno
2401 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
2402 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is equal to
2403 f5a64251 Aurelien Jarno
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
2404 f5a64251 Aurelien Jarno
| exception.  The comparison is performed according to the IEC/IEEE Standard
2405 f5a64251 Aurelien Jarno
| for Binary Floating-Point Arithmetic.
2406 158142c2 bellard
*----------------------------------------------------------------------------*/
2407 158142c2 bellard
2408 b689362d Aurelien Jarno
int float32_eq_quiet( float32 a, float32 b STATUS_PARAM )
2409 158142c2 bellard
{
2410 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2411 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2412 158142c2 bellard
2413 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2414 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2415 158142c2 bellard
       ) {
2416 b689362d Aurelien Jarno
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2417 b689362d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
2418 b689362d Aurelien Jarno
        }
2419 158142c2 bellard
        return 0;
2420 158142c2 bellard
    }
2421 b689362d Aurelien Jarno
    return ( float32_val(a) == float32_val(b) ) ||
2422 b689362d Aurelien Jarno
            ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
2423 158142c2 bellard
}
2424 158142c2 bellard
2425 158142c2 bellard
/*----------------------------------------------------------------------------
2426 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than or
2427 158142c2 bellard
| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
2428 158142c2 bellard
| cause an exception.  Otherwise, the comparison is performed according to the
2429 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2430 158142c2 bellard
*----------------------------------------------------------------------------*/
2431 158142c2 bellard
2432 750afe93 bellard
int float32_le_quiet( float32 a, float32 b STATUS_PARAM )
2433 158142c2 bellard
{
2434 158142c2 bellard
    flag aSign, bSign;
2435 bb98fe42 Andreas Färber
    uint32_t av, bv;
2436 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2437 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2438 158142c2 bellard
2439 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2440 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2441 158142c2 bellard
       ) {
2442 158142c2 bellard
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2443 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2444 158142c2 bellard
        }
2445 158142c2 bellard
        return 0;
2446 158142c2 bellard
    }
2447 158142c2 bellard
    aSign = extractFloat32Sign( a );
2448 158142c2 bellard
    bSign = extractFloat32Sign( b );
2449 f090c9d4 pbrook
    av = float32_val(a);
2450 f090c9d4 pbrook
    bv = float32_val(b);
2451 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
2452 f090c9d4 pbrook
    return ( av == bv ) || ( aSign ^ ( av < bv ) );
2453 158142c2 bellard
2454 158142c2 bellard
}
2455 158142c2 bellard
2456 158142c2 bellard
/*----------------------------------------------------------------------------
2457 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than
2458 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
2459 158142c2 bellard
| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
2460 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
2461 158142c2 bellard
*----------------------------------------------------------------------------*/
2462 158142c2 bellard
2463 750afe93 bellard
int float32_lt_quiet( float32 a, float32 b STATUS_PARAM )
2464 158142c2 bellard
{
2465 158142c2 bellard
    flag aSign, bSign;
2466 bb98fe42 Andreas Färber
    uint32_t av, bv;
2467 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2468 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2469 158142c2 bellard
2470 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2471 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2472 158142c2 bellard
       ) {
2473 158142c2 bellard
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2474 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2475 158142c2 bellard
        }
2476 158142c2 bellard
        return 0;
2477 158142c2 bellard
    }
2478 158142c2 bellard
    aSign = extractFloat32Sign( a );
2479 158142c2 bellard
    bSign = extractFloat32Sign( b );
2480 f090c9d4 pbrook
    av = float32_val(a);
2481 f090c9d4 pbrook
    bv = float32_val(b);
2482 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
2483 f090c9d4 pbrook
    return ( av != bv ) && ( aSign ^ ( av < bv ) );
2484 158142c2 bellard
2485 158142c2 bellard
}
2486 158142c2 bellard
2487 158142c2 bellard
/*----------------------------------------------------------------------------
2488 67b7861d Aurelien Jarno
| Returns 1 if the single-precision floating-point values `a' and `b' cannot
2489 67b7861d Aurelien Jarno
| be compared, and 0 otherwise.  Quiet NaNs do not cause an exception.  The
2490 67b7861d Aurelien Jarno
| comparison is performed according to the IEC/IEEE Standard for Binary
2491 67b7861d Aurelien Jarno
| Floating-Point Arithmetic.
2492 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
2493 67b7861d Aurelien Jarno
2494 67b7861d Aurelien Jarno
int float32_unordered_quiet( float32 a, float32 b STATUS_PARAM )
2495 67b7861d Aurelien Jarno
{
2496 67b7861d Aurelien Jarno
    a = float32_squash_input_denormal(a STATUS_VAR);
2497 67b7861d Aurelien Jarno
    b = float32_squash_input_denormal(b STATUS_VAR);
2498 67b7861d Aurelien Jarno
2499 67b7861d Aurelien Jarno
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2500 67b7861d Aurelien Jarno
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2501 67b7861d Aurelien Jarno
       ) {
2502 67b7861d Aurelien Jarno
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2503 67b7861d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
2504 67b7861d Aurelien Jarno
        }
2505 67b7861d Aurelien Jarno
        return 1;
2506 67b7861d Aurelien Jarno
    }
2507 67b7861d Aurelien Jarno
    return 0;
2508 67b7861d Aurelien Jarno
}
2509 67b7861d Aurelien Jarno
2510 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
2511 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2512 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
2513 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2514 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
2515 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
2516 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
2517 158142c2 bellard
| largest integer with the same sign as `a' is returned.
2518 158142c2 bellard
*----------------------------------------------------------------------------*/
2519 158142c2 bellard
2520 158142c2 bellard
int32 float64_to_int32( float64 a STATUS_PARAM )
2521 158142c2 bellard
{
2522 158142c2 bellard
    flag aSign;
2523 158142c2 bellard
    int16 aExp, shiftCount;
2524 bb98fe42 Andreas Färber
    uint64_t aSig;
2525 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
2526 158142c2 bellard
2527 158142c2 bellard
    aSig = extractFloat64Frac( a );
2528 158142c2 bellard
    aExp = extractFloat64Exp( a );
2529 158142c2 bellard
    aSign = extractFloat64Sign( a );
2530 158142c2 bellard
    if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2531 158142c2 bellard
    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2532 158142c2 bellard
    shiftCount = 0x42C - aExp;
2533 158142c2 bellard
    if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
2534 158142c2 bellard
    return roundAndPackInt32( aSign, aSig STATUS_VAR );
2535 158142c2 bellard
2536 158142c2 bellard
}
2537 158142c2 bellard
2538 158142c2 bellard
/*----------------------------------------------------------------------------
2539 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2540 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
2541 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2542 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
2543 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
2544 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
2545 158142c2 bellard
| returned.
2546 158142c2 bellard
*----------------------------------------------------------------------------*/
2547 158142c2 bellard
2548 158142c2 bellard
int32 float64_to_int32_round_to_zero( float64 a STATUS_PARAM )
2549 158142c2 bellard
{
2550 158142c2 bellard
    flag aSign;
2551 158142c2 bellard
    int16 aExp, shiftCount;
2552 bb98fe42 Andreas Färber
    uint64_t aSig, savedASig;
2553 158142c2 bellard
    int32 z;
2554 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
2555 158142c2 bellard
2556 158142c2 bellard
    aSig = extractFloat64Frac( a );
2557 158142c2 bellard
    aExp = extractFloat64Exp( a );
2558 158142c2 bellard
    aSign = extractFloat64Sign( a );
2559 158142c2 bellard
    if ( 0x41E < aExp ) {
2560 158142c2 bellard
        if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2561 158142c2 bellard
        goto invalid;
2562 158142c2 bellard
    }
2563 158142c2 bellard
    else if ( aExp < 0x3FF ) {
2564 158142c2 bellard
        if ( aExp || aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
2565 158142c2 bellard
        return 0;
2566 158142c2 bellard
    }
2567 158142c2 bellard
    aSig |= LIT64( 0x0010000000000000 );
2568 158142c2 bellard
    shiftCount = 0x433 - aExp;
2569 158142c2 bellard
    savedASig = aSig;
2570 158142c2 bellard
    aSig >>= shiftCount;
2571 158142c2 bellard
    z = aSig;
2572 158142c2 bellard
    if ( aSign ) z = - z;
2573 158142c2 bellard
    if ( ( z < 0 ) ^ aSign ) {
2574 158142c2 bellard
 invalid:
2575 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2576 bb98fe42 Andreas Färber
        return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
2577 158142c2 bellard
    }
2578 158142c2 bellard
    if ( ( aSig<<shiftCount ) != savedASig ) {
2579 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
2580 158142c2 bellard
    }
2581 158142c2 bellard
    return z;
2582 158142c2 bellard
2583 158142c2 bellard
}
2584 158142c2 bellard
2585 158142c2 bellard
/*----------------------------------------------------------------------------
2586 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2587 cbcef455 Peter Maydell
| `a' to the 16-bit two's complement integer format.  The conversion is
2588 cbcef455 Peter Maydell
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2589 cbcef455 Peter Maydell
| Arithmetic, except that the conversion is always rounded toward zero.
2590 cbcef455 Peter Maydell
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
2591 cbcef455 Peter Maydell
| the conversion overflows, the largest integer with the same sign as `a' is
2592 cbcef455 Peter Maydell
| returned.
2593 cbcef455 Peter Maydell
*----------------------------------------------------------------------------*/
2594 cbcef455 Peter Maydell
2595 cbcef455 Peter Maydell
int16 float64_to_int16_round_to_zero( float64 a STATUS_PARAM )
2596 cbcef455 Peter Maydell
{
2597 cbcef455 Peter Maydell
    flag aSign;
2598 cbcef455 Peter Maydell
    int16 aExp, shiftCount;
2599 bb98fe42 Andreas Färber
    uint64_t aSig, savedASig;
2600 cbcef455 Peter Maydell
    int32 z;
2601 cbcef455 Peter Maydell
2602 cbcef455 Peter Maydell
    aSig = extractFloat64Frac( a );
2603 cbcef455 Peter Maydell
    aExp = extractFloat64Exp( a );
2604 cbcef455 Peter Maydell
    aSign = extractFloat64Sign( a );
2605 cbcef455 Peter Maydell
    if ( 0x40E < aExp ) {
2606 cbcef455 Peter Maydell
        if ( ( aExp == 0x7FF ) && aSig ) {
2607 cbcef455 Peter Maydell
            aSign = 0;
2608 cbcef455 Peter Maydell
        }
2609 cbcef455 Peter Maydell
        goto invalid;
2610 cbcef455 Peter Maydell
    }
2611 cbcef455 Peter Maydell
    else if ( aExp < 0x3FF ) {
2612 cbcef455 Peter Maydell
        if ( aExp || aSig ) {
2613 cbcef455 Peter Maydell
            STATUS(float_exception_flags) |= float_flag_inexact;
2614 cbcef455 Peter Maydell
        }
2615 cbcef455 Peter Maydell
        return 0;
2616 cbcef455 Peter Maydell
    }
2617 cbcef455 Peter Maydell
    aSig |= LIT64( 0x0010000000000000 );
2618 cbcef455 Peter Maydell
    shiftCount = 0x433 - aExp;
2619 cbcef455 Peter Maydell
    savedASig = aSig;
2620 cbcef455 Peter Maydell
    aSig >>= shiftCount;
2621 cbcef455 Peter Maydell
    z = aSig;
2622 cbcef455 Peter Maydell
    if ( aSign ) {
2623 cbcef455 Peter Maydell
        z = - z;
2624 cbcef455 Peter Maydell
    }
2625 cbcef455 Peter Maydell
    if ( ( (int16_t)z < 0 ) ^ aSign ) {
2626 cbcef455 Peter Maydell
 invalid:
2627 cbcef455 Peter Maydell
        float_raise( float_flag_invalid STATUS_VAR);
2628 bb98fe42 Andreas Färber
        return aSign ? (int32_t) 0xffff8000 : 0x7FFF;
2629 cbcef455 Peter Maydell
    }
2630 cbcef455 Peter Maydell
    if ( ( aSig<<shiftCount ) != savedASig ) {
2631 cbcef455 Peter Maydell
        STATUS(float_exception_flags) |= float_flag_inexact;
2632 cbcef455 Peter Maydell
    }
2633 cbcef455 Peter Maydell
    return z;
2634 cbcef455 Peter Maydell
}
2635 cbcef455 Peter Maydell
2636 cbcef455 Peter Maydell
/*----------------------------------------------------------------------------
2637 cbcef455 Peter Maydell
| Returns the result of converting the double-precision floating-point value
2638 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
2639 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2640 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
2641 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
2642 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
2643 158142c2 bellard
| largest integer with the same sign as `a' is returned.
2644 158142c2 bellard
*----------------------------------------------------------------------------*/
2645 158142c2 bellard
2646 158142c2 bellard
int64 float64_to_int64( float64 a STATUS_PARAM )
2647 158142c2 bellard
{
2648 158142c2 bellard
    flag aSign;
2649 158142c2 bellard
    int16 aExp, shiftCount;
2650 bb98fe42 Andreas Färber
    uint64_t aSig, aSigExtra;
2651 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
2652 158142c2 bellard
2653 158142c2 bellard
    aSig = extractFloat64Frac( a );
2654 158142c2 bellard
    aExp = extractFloat64Exp( a );
2655 158142c2 bellard
    aSign = extractFloat64Sign( a );
2656 158142c2 bellard
    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2657 158142c2 bellard
    shiftCount = 0x433 - aExp;
2658 158142c2 bellard
    if ( shiftCount <= 0 ) {
2659 158142c2 bellard
        if ( 0x43E < aExp ) {
2660 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2661 158142c2 bellard
            if (    ! aSign
2662 158142c2 bellard
                 || (    ( aExp == 0x7FF )
2663 158142c2 bellard
                      && ( aSig != LIT64( 0x0010000000000000 ) ) )
2664 158142c2 bellard
               ) {
2665 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
2666 158142c2 bellard
            }
2667 bb98fe42 Andreas Färber
            return (int64_t) LIT64( 0x8000000000000000 );
2668 158142c2 bellard
        }
2669 158142c2 bellard
        aSigExtra = 0;
2670 158142c2 bellard
        aSig <<= - shiftCount;
2671 158142c2 bellard
    }
2672 158142c2 bellard
    else {
2673 158142c2 bellard
        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
2674 158142c2 bellard
    }
2675 158142c2 bellard
    return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
2676 158142c2 bellard
2677 158142c2 bellard
}
2678 158142c2 bellard
2679 158142c2 bellard
/*----------------------------------------------------------------------------
2680 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2681 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
2682 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2683 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
2684 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
2685 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
2686 158142c2 bellard
| returned.
2687 158142c2 bellard
*----------------------------------------------------------------------------*/
2688 158142c2 bellard
2689 158142c2 bellard
int64 float64_to_int64_round_to_zero( float64 a STATUS_PARAM )
2690 158142c2 bellard
{
2691 158142c2 bellard
    flag aSign;
2692 158142c2 bellard
    int16 aExp, shiftCount;
2693 bb98fe42 Andreas Färber
    uint64_t aSig;
2694 158142c2 bellard
    int64 z;
2695 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
2696 158142c2 bellard
2697 158142c2 bellard
    aSig = extractFloat64Frac( a );
2698 158142c2 bellard
    aExp = extractFloat64Exp( a );
2699 158142c2 bellard
    aSign = extractFloat64Sign( a );
2700 158142c2 bellard
    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2701 158142c2 bellard
    shiftCount = aExp - 0x433;
2702 158142c2 bellard
    if ( 0 <= shiftCount ) {
2703 158142c2 bellard
        if ( 0x43E <= aExp ) {
2704 f090c9d4 pbrook
            if ( float64_val(a) != LIT64( 0xC3E0000000000000 ) ) {
2705 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
2706 158142c2 bellard
                if (    ! aSign
2707 158142c2 bellard
                     || (    ( aExp == 0x7FF )
2708 158142c2 bellard
                          && ( aSig != LIT64( 0x0010000000000000 ) ) )
2709 158142c2 bellard
                   ) {
2710 158142c2 bellard
                    return LIT64( 0x7FFFFFFFFFFFFFFF );
2711 158142c2 bellard
                }
2712 158142c2 bellard
            }
2713 bb98fe42 Andreas Färber
            return (int64_t) LIT64( 0x8000000000000000 );
2714 158142c2 bellard
        }
2715 158142c2 bellard
        z = aSig<<shiftCount;
2716 158142c2 bellard
    }
2717 158142c2 bellard
    else {
2718 158142c2 bellard
        if ( aExp < 0x3FE ) {
2719 158142c2 bellard
            if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
2720 158142c2 bellard
            return 0;
2721 158142c2 bellard
        }
2722 158142c2 bellard
        z = aSig>>( - shiftCount );
2723 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
2724 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
2725 158142c2 bellard
        }
2726 158142c2 bellard
    }
2727 158142c2 bellard
    if ( aSign ) z = - z;
2728 158142c2 bellard
    return z;
2729 158142c2 bellard
2730 158142c2 bellard
}
2731 158142c2 bellard
2732 158142c2 bellard
/*----------------------------------------------------------------------------
2733 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2734 158142c2 bellard
| `a' to the single-precision floating-point format.  The conversion is
2735 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2736 158142c2 bellard
| Arithmetic.
2737 158142c2 bellard
*----------------------------------------------------------------------------*/
2738 158142c2 bellard
2739 158142c2 bellard
float32 float64_to_float32( float64 a STATUS_PARAM )
2740 158142c2 bellard
{
2741 158142c2 bellard
    flag aSign;
2742 158142c2 bellard
    int16 aExp;
2743 bb98fe42 Andreas Färber
    uint64_t aSig;
2744 bb98fe42 Andreas Färber
    uint32_t zSig;
2745 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
2746 158142c2 bellard
2747 158142c2 bellard
    aSig = extractFloat64Frac( a );
2748 158142c2 bellard
    aExp = extractFloat64Exp( a );
2749 158142c2 bellard
    aSign = extractFloat64Sign( a );
2750 158142c2 bellard
    if ( aExp == 0x7FF ) {
2751 bcd4d9af Christophe Lyon
        if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
2752 158142c2 bellard
        return packFloat32( aSign, 0xFF, 0 );
2753 158142c2 bellard
    }
2754 158142c2 bellard
    shift64RightJamming( aSig, 22, &aSig );
2755 158142c2 bellard
    zSig = aSig;
2756 158142c2 bellard
    if ( aExp || zSig ) {
2757 158142c2 bellard
        zSig |= 0x40000000;
2758 158142c2 bellard
        aExp -= 0x381;
2759 158142c2 bellard
    }
2760 158142c2 bellard
    return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
2761 158142c2 bellard
2762 158142c2 bellard
}
2763 158142c2 bellard
2764 60011498 Paul Brook
2765 60011498 Paul Brook
/*----------------------------------------------------------------------------
2766 60011498 Paul Brook
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
2767 60011498 Paul Brook
| half-precision floating-point value, returning the result.  After being
2768 60011498 Paul Brook
| shifted into the proper positions, the three fields are simply added
2769 60011498 Paul Brook
| together to form the result.  This means that any integer portion of `zSig'
2770 60011498 Paul Brook
| will be added into the exponent.  Since a properly normalized significand
2771 60011498 Paul Brook
| will have an integer portion equal to 1, the `zExp' input should be 1 less
2772 60011498 Paul Brook
| than the desired result exponent whenever `zSig' is a complete, normalized
2773 60011498 Paul Brook
| significand.
2774 60011498 Paul Brook
*----------------------------------------------------------------------------*/
2775 bb98fe42 Andreas Färber
static float16 packFloat16(flag zSign, int16 zExp, uint16_t zSig)
2776 60011498 Paul Brook
{
2777 bb4d4bb3 Peter Maydell
    return make_float16(
2778 bb98fe42 Andreas Färber
        (((uint32_t)zSign) << 15) + (((uint32_t)zExp) << 10) + zSig);
2779 60011498 Paul Brook
}
2780 60011498 Paul Brook
2781 60011498 Paul Brook
/* Half precision floats come in two formats: standard IEEE and "ARM" format.
2782 60011498 Paul Brook
   The latter gains extra exponent range by omitting the NaN/Inf encodings.  */
2783 bb4d4bb3 Peter Maydell
2784 bb4d4bb3 Peter Maydell
float32 float16_to_float32(float16 a, flag ieee STATUS_PARAM)
2785 60011498 Paul Brook
{
2786 60011498 Paul Brook
    flag aSign;
2787 60011498 Paul Brook
    int16 aExp;
2788 bb98fe42 Andreas Färber
    uint32_t aSig;
2789 60011498 Paul Brook
2790 bb4d4bb3 Peter Maydell
    aSign = extractFloat16Sign(a);
2791 bb4d4bb3 Peter Maydell
    aExp = extractFloat16Exp(a);
2792 bb4d4bb3 Peter Maydell
    aSig = extractFloat16Frac(a);
2793 60011498 Paul Brook
2794 60011498 Paul Brook
    if (aExp == 0x1f && ieee) {
2795 60011498 Paul Brook
        if (aSig) {
2796 f591e1be Peter Maydell
            return commonNaNToFloat32(float16ToCommonNaN(a STATUS_VAR) STATUS_VAR);
2797 60011498 Paul Brook
        }
2798 60011498 Paul Brook
        return packFloat32(aSign, 0xff, aSig << 13);
2799 60011498 Paul Brook
    }
2800 60011498 Paul Brook
    if (aExp == 0) {
2801 60011498 Paul Brook
        int8 shiftCount;
2802 60011498 Paul Brook
2803 60011498 Paul Brook
        if (aSig == 0) {
2804 60011498 Paul Brook
            return packFloat32(aSign, 0, 0);
2805 60011498 Paul Brook
        }
2806 60011498 Paul Brook
2807 60011498 Paul Brook
        shiftCount = countLeadingZeros32( aSig ) - 21;
2808 60011498 Paul Brook
        aSig = aSig << shiftCount;
2809 60011498 Paul Brook
        aExp = -shiftCount;
2810 60011498 Paul Brook
    }
2811 60011498 Paul Brook
    return packFloat32( aSign, aExp + 0x70, aSig << 13);
2812 60011498 Paul Brook
}
2813 60011498 Paul Brook
2814 bb4d4bb3 Peter Maydell
float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM)
2815 60011498 Paul Brook
{
2816 60011498 Paul Brook
    flag aSign;
2817 60011498 Paul Brook
    int16 aExp;
2818 bb98fe42 Andreas Färber
    uint32_t aSig;
2819 bb98fe42 Andreas Färber
    uint32_t mask;
2820 bb98fe42 Andreas Färber
    uint32_t increment;
2821 60011498 Paul Brook
    int8 roundingMode;
2822 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2823 60011498 Paul Brook
2824 60011498 Paul Brook
    aSig = extractFloat32Frac( a );
2825 60011498 Paul Brook
    aExp = extractFloat32Exp( a );
2826 60011498 Paul Brook
    aSign = extractFloat32Sign( a );
2827 60011498 Paul Brook
    if ( aExp == 0xFF ) {
2828 60011498 Paul Brook
        if (aSig) {
2829 600e30d2 Peter Maydell
            /* Input is a NaN */
2830 600e30d2 Peter Maydell
            float16 r = commonNaNToFloat16( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
2831 600e30d2 Peter Maydell
            if (!ieee) {
2832 600e30d2 Peter Maydell
                return packFloat16(aSign, 0, 0);
2833 600e30d2 Peter Maydell
            }
2834 600e30d2 Peter Maydell
            return r;
2835 60011498 Paul Brook
        }
2836 600e30d2 Peter Maydell
        /* Infinity */
2837 600e30d2 Peter Maydell
        if (!ieee) {
2838 600e30d2 Peter Maydell
            float_raise(float_flag_invalid STATUS_VAR);
2839 600e30d2 Peter Maydell
            return packFloat16(aSign, 0x1f, 0x3ff);
2840 600e30d2 Peter Maydell
        }
2841 600e30d2 Peter Maydell
        return packFloat16(aSign, 0x1f, 0);
2842 60011498 Paul Brook
    }
2843 600e30d2 Peter Maydell
    if (aExp == 0 && aSig == 0) {
2844 60011498 Paul Brook
        return packFloat16(aSign, 0, 0);
2845 60011498 Paul Brook
    }
2846 60011498 Paul Brook
    /* Decimal point between bits 22 and 23.  */
2847 60011498 Paul Brook
    aSig |= 0x00800000;
2848 60011498 Paul Brook
    aExp -= 0x7f;
2849 60011498 Paul Brook
    if (aExp < -14) {
2850 600e30d2 Peter Maydell
        mask = 0x00ffffff;
2851 600e30d2 Peter Maydell
        if (aExp >= -24) {
2852 600e30d2 Peter Maydell
            mask >>= 25 + aExp;
2853 60011498 Paul Brook
        }
2854 60011498 Paul Brook
    } else {
2855 60011498 Paul Brook
        mask = 0x00001fff;
2856 60011498 Paul Brook
    }
2857 60011498 Paul Brook
    if (aSig & mask) {
2858 60011498 Paul Brook
        float_raise( float_flag_underflow STATUS_VAR );
2859 60011498 Paul Brook
        roundingMode = STATUS(float_rounding_mode);
2860 60011498 Paul Brook
        switch (roundingMode) {
2861 60011498 Paul Brook
        case float_round_nearest_even:
2862 60011498 Paul Brook
            increment = (mask + 1) >> 1;
2863 60011498 Paul Brook
            if ((aSig & mask) == increment) {
2864 60011498 Paul Brook
                increment = aSig & (increment << 1);
2865 60011498 Paul Brook
            }
2866 60011498 Paul Brook
            break;
2867 60011498 Paul Brook
        case float_round_up:
2868 60011498 Paul Brook
            increment = aSign ? 0 : mask;
2869 60011498 Paul Brook
            break;
2870 60011498 Paul Brook
        case float_round_down:
2871 60011498 Paul Brook
            increment = aSign ? mask : 0;
2872 60011498 Paul Brook
            break;
2873 60011498 Paul Brook
        default: /* round_to_zero */
2874 60011498 Paul Brook
            increment = 0;
2875 60011498 Paul Brook
            break;
2876 60011498 Paul Brook
        }
2877 60011498 Paul Brook
        aSig += increment;
2878 60011498 Paul Brook
        if (aSig >= 0x01000000) {
2879 60011498 Paul Brook
            aSig >>= 1;
2880 60011498 Paul Brook
            aExp++;
2881 60011498 Paul Brook
        }
2882 60011498 Paul Brook
    } else if (aExp < -14
2883 60011498 Paul Brook
          && STATUS(float_detect_tininess) == float_tininess_before_rounding) {
2884 60011498 Paul Brook
        float_raise( float_flag_underflow STATUS_VAR);
2885 60011498 Paul Brook
    }
2886 60011498 Paul Brook
2887 60011498 Paul Brook
    if (ieee) {
2888 60011498 Paul Brook
        if (aExp > 15) {
2889 60011498 Paul Brook
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
2890 60011498 Paul Brook
            return packFloat16(aSign, 0x1f, 0);
2891 60011498 Paul Brook
        }
2892 60011498 Paul Brook
    } else {
2893 60011498 Paul Brook
        if (aExp > 16) {
2894 600e30d2 Peter Maydell
            float_raise(float_flag_invalid | float_flag_inexact STATUS_VAR);
2895 60011498 Paul Brook
            return packFloat16(aSign, 0x1f, 0x3ff);
2896 60011498 Paul Brook
        }
2897 60011498 Paul Brook
    }
2898 60011498 Paul Brook
    if (aExp < -24) {
2899 60011498 Paul Brook
        return packFloat16(aSign, 0, 0);
2900 60011498 Paul Brook
    }
2901 60011498 Paul Brook
    if (aExp < -14) {
2902 60011498 Paul Brook
        aSig >>= -14 - aExp;
2903 60011498 Paul Brook
        aExp = -14;
2904 60011498 Paul Brook
    }
2905 60011498 Paul Brook
    return packFloat16(aSign, aExp + 14, aSig >> 13);
2906 60011498 Paul Brook
}
2907 60011498 Paul Brook
2908 158142c2 bellard
/*----------------------------------------------------------------------------
2909 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2910 158142c2 bellard
| `a' to the extended double-precision floating-point format.  The conversion
2911 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
2912 158142c2 bellard
| Arithmetic.
2913 158142c2 bellard
*----------------------------------------------------------------------------*/
2914 158142c2 bellard
2915 158142c2 bellard
floatx80 float64_to_floatx80( float64 a STATUS_PARAM )
2916 158142c2 bellard
{
2917 158142c2 bellard
    flag aSign;
2918 158142c2 bellard
    int16 aExp;
2919 bb98fe42 Andreas Färber
    uint64_t aSig;
2920 158142c2 bellard
2921 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
2922 158142c2 bellard
    aSig = extractFloat64Frac( a );
2923 158142c2 bellard
    aExp = extractFloat64Exp( a );
2924 158142c2 bellard
    aSign = extractFloat64Sign( a );
2925 158142c2 bellard
    if ( aExp == 0x7FF ) {
2926 bcd4d9af Christophe Lyon
        if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
2927 158142c2 bellard
        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2928 158142c2 bellard
    }
2929 158142c2 bellard
    if ( aExp == 0 ) {
2930 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
2931 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2932 158142c2 bellard
    }
2933 158142c2 bellard
    return
2934 158142c2 bellard
        packFloatx80(
2935 158142c2 bellard
            aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
2936 158142c2 bellard
2937 158142c2 bellard
}
2938 158142c2 bellard
2939 158142c2 bellard
/*----------------------------------------------------------------------------
2940 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2941 158142c2 bellard
| `a' to the quadruple-precision floating-point format.  The conversion is
2942 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2943 158142c2 bellard
| Arithmetic.
2944 158142c2 bellard
*----------------------------------------------------------------------------*/
2945 158142c2 bellard
2946 158142c2 bellard
float128 float64_to_float128( float64 a STATUS_PARAM )
2947 158142c2 bellard
{
2948 158142c2 bellard
    flag aSign;
2949 158142c2 bellard
    int16 aExp;
2950 bb98fe42 Andreas Färber
    uint64_t aSig, zSig0, zSig1;
2951 158142c2 bellard
2952 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
2953 158142c2 bellard
    aSig = extractFloat64Frac( a );
2954 158142c2 bellard
    aExp = extractFloat64Exp( a );
2955 158142c2 bellard
    aSign = extractFloat64Sign( a );
2956 158142c2 bellard
    if ( aExp == 0x7FF ) {
2957 bcd4d9af Christophe Lyon
        if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
2958 158142c2 bellard
        return packFloat128( aSign, 0x7FFF, 0, 0 );
2959 158142c2 bellard
    }
2960 158142c2 bellard
    if ( aExp == 0 ) {
2961 158142c2 bellard
        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
2962 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2963 158142c2 bellard
        --aExp;
2964 158142c2 bellard
    }
2965 158142c2 bellard
    shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
2966 158142c2 bellard
    return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
2967 158142c2 bellard
2968 158142c2 bellard
}
2969 158142c2 bellard
2970 158142c2 bellard
/*----------------------------------------------------------------------------
2971 158142c2 bellard
| Rounds the double-precision floating-point value `a' to an integer, and
2972 158142c2 bellard
| returns the result as a double-precision floating-point value.  The
2973 158142c2 bellard
| operation is performed according to the IEC/IEEE Standard for Binary
2974 158142c2 bellard
| Floating-Point Arithmetic.
2975 158142c2 bellard
*----------------------------------------------------------------------------*/
2976 158142c2 bellard
2977 158142c2 bellard
float64 float64_round_to_int( float64 a STATUS_PARAM )
2978 158142c2 bellard
{
2979 158142c2 bellard
    flag aSign;
2980 158142c2 bellard
    int16 aExp;
2981 bb98fe42 Andreas Färber
    uint64_t lastBitMask, roundBitsMask;
2982 158142c2 bellard
    int8 roundingMode;
2983 bb98fe42 Andreas Färber
    uint64_t z;
2984 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
2985 158142c2 bellard
2986 158142c2 bellard
    aExp = extractFloat64Exp( a );
2987 158142c2 bellard
    if ( 0x433 <= aExp ) {
2988 158142c2 bellard
        if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
2989 158142c2 bellard
            return propagateFloat64NaN( a, a STATUS_VAR );
2990 158142c2 bellard
        }
2991 158142c2 bellard
        return a;
2992 158142c2 bellard
    }
2993 158142c2 bellard
    if ( aExp < 0x3FF ) {
2994 bb98fe42 Andreas Färber
        if ( (uint64_t) ( float64_val(a)<<1 ) == 0 ) return a;
2995 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
2996 158142c2 bellard
        aSign = extractFloat64Sign( a );
2997 158142c2 bellard
        switch ( STATUS(float_rounding_mode) ) {
2998 158142c2 bellard
         case float_round_nearest_even:
2999 158142c2 bellard
            if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
3000 158142c2 bellard
                return packFloat64( aSign, 0x3FF, 0 );
3001 158142c2 bellard
            }
3002 158142c2 bellard
            break;
3003 158142c2 bellard
         case float_round_down:
3004 f090c9d4 pbrook
            return make_float64(aSign ? LIT64( 0xBFF0000000000000 ) : 0);
3005 158142c2 bellard
         case float_round_up:
3006 f090c9d4 pbrook
            return make_float64(
3007 f090c9d4 pbrook
            aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ));
3008 158142c2 bellard
        }
3009 158142c2 bellard
        return packFloat64( aSign, 0, 0 );
3010 158142c2 bellard
    }
3011 158142c2 bellard
    lastBitMask = 1;
3012 158142c2 bellard
    lastBitMask <<= 0x433 - aExp;
3013 158142c2 bellard
    roundBitsMask = lastBitMask - 1;
3014 f090c9d4 pbrook
    z = float64_val(a);
3015 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
3016 158142c2 bellard
    if ( roundingMode == float_round_nearest_even ) {
3017 158142c2 bellard
        z += lastBitMask>>1;
3018 158142c2 bellard
        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
3019 158142c2 bellard
    }
3020 158142c2 bellard
    else if ( roundingMode != float_round_to_zero ) {
3021 f090c9d4 pbrook
        if ( extractFloat64Sign( make_float64(z) ) ^ ( roundingMode == float_round_up ) ) {
3022 158142c2 bellard
            z += roundBitsMask;
3023 158142c2 bellard
        }
3024 158142c2 bellard
    }
3025 158142c2 bellard
    z &= ~ roundBitsMask;
3026 f090c9d4 pbrook
    if ( z != float64_val(a) )
3027 f090c9d4 pbrook
        STATUS(float_exception_flags) |= float_flag_inexact;
3028 f090c9d4 pbrook
    return make_float64(z);
3029 158142c2 bellard
3030 158142c2 bellard
}
3031 158142c2 bellard
3032 e6e5906b pbrook
float64 float64_trunc_to_int( float64 a STATUS_PARAM)
3033 e6e5906b pbrook
{
3034 e6e5906b pbrook
    int oldmode;
3035 e6e5906b pbrook
    float64 res;
3036 e6e5906b pbrook
    oldmode = STATUS(float_rounding_mode);
3037 e6e5906b pbrook
    STATUS(float_rounding_mode) = float_round_to_zero;
3038 e6e5906b pbrook
    res = float64_round_to_int(a STATUS_VAR);
3039 e6e5906b pbrook
    STATUS(float_rounding_mode) = oldmode;
3040 e6e5906b pbrook
    return res;
3041 e6e5906b pbrook
}
3042 e6e5906b pbrook
3043 158142c2 bellard
/*----------------------------------------------------------------------------
3044 158142c2 bellard
| Returns the result of adding the absolute values of the double-precision
3045 158142c2 bellard
| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
3046 158142c2 bellard
| before being returned.  `zSign' is ignored if the result is a NaN.
3047 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
3048 158142c2 bellard
| Floating-Point Arithmetic.
3049 158142c2 bellard
*----------------------------------------------------------------------------*/
3050 158142c2 bellard
3051 158142c2 bellard
static float64 addFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
3052 158142c2 bellard
{
3053 158142c2 bellard
    int16 aExp, bExp, zExp;
3054 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig;
3055 158142c2 bellard
    int16 expDiff;
3056 158142c2 bellard
3057 158142c2 bellard
    aSig = extractFloat64Frac( a );
3058 158142c2 bellard
    aExp = extractFloat64Exp( a );
3059 158142c2 bellard
    bSig = extractFloat64Frac( b );
3060 158142c2 bellard
    bExp = extractFloat64Exp( b );
3061 158142c2 bellard
    expDiff = aExp - bExp;
3062 158142c2 bellard
    aSig <<= 9;
3063 158142c2 bellard
    bSig <<= 9;
3064 158142c2 bellard
    if ( 0 < expDiff ) {
3065 158142c2 bellard
        if ( aExp == 0x7FF ) {
3066 158142c2 bellard
            if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3067 158142c2 bellard
            return a;
3068 158142c2 bellard
        }
3069 158142c2 bellard
        if ( bExp == 0 ) {
3070 158142c2 bellard
            --expDiff;
3071 158142c2 bellard
        }
3072 158142c2 bellard
        else {
3073 158142c2 bellard
            bSig |= LIT64( 0x2000000000000000 );
3074 158142c2 bellard
        }
3075 158142c2 bellard
        shift64RightJamming( bSig, expDiff, &bSig );
3076 158142c2 bellard
        zExp = aExp;
3077 158142c2 bellard
    }
3078 158142c2 bellard
    else if ( expDiff < 0 ) {
3079 158142c2 bellard
        if ( bExp == 0x7FF ) {
3080 158142c2 bellard
            if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3081 158142c2 bellard
            return packFloat64( zSign, 0x7FF, 0 );
3082 158142c2 bellard
        }
3083 158142c2 bellard
        if ( aExp == 0 ) {
3084 158142c2 bellard
            ++expDiff;
3085 158142c2 bellard
        }
3086 158142c2 bellard
        else {
3087 158142c2 bellard
            aSig |= LIT64( 0x2000000000000000 );
3088 158142c2 bellard
        }
3089 158142c2 bellard
        shift64RightJamming( aSig, - expDiff, &aSig );
3090 158142c2 bellard
        zExp = bExp;
3091 158142c2 bellard
    }
3092 158142c2 bellard
    else {
3093 158142c2 bellard
        if ( aExp == 0x7FF ) {
3094 158142c2 bellard
            if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3095 158142c2 bellard
            return a;
3096 158142c2 bellard
        }
3097 fe76d976 pbrook
        if ( aExp == 0 ) {
3098 e6afc87f Peter Maydell
            if (STATUS(flush_to_zero)) {
3099 e6afc87f Peter Maydell
                if (aSig | bSig) {
3100 e6afc87f Peter Maydell
                    float_raise(float_flag_output_denormal STATUS_VAR);
3101 e6afc87f Peter Maydell
                }
3102 e6afc87f Peter Maydell
                return packFloat64(zSign, 0, 0);
3103 e6afc87f Peter Maydell
            }
3104 fe76d976 pbrook
            return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
3105 fe76d976 pbrook
        }
3106 158142c2 bellard
        zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
3107 158142c2 bellard
        zExp = aExp;
3108 158142c2 bellard
        goto roundAndPack;
3109 158142c2 bellard
    }
3110 158142c2 bellard
    aSig |= LIT64( 0x2000000000000000 );
3111 158142c2 bellard
    zSig = ( aSig + bSig )<<1;
3112 158142c2 bellard
    --zExp;
3113 bb98fe42 Andreas Färber
    if ( (int64_t) zSig < 0 ) {
3114 158142c2 bellard
        zSig = aSig + bSig;
3115 158142c2 bellard
        ++zExp;
3116 158142c2 bellard
    }
3117 158142c2 bellard
 roundAndPack:
3118 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
3119 158142c2 bellard
3120 158142c2 bellard
}
3121 158142c2 bellard
3122 158142c2 bellard
/*----------------------------------------------------------------------------
3123 158142c2 bellard
| Returns the result of subtracting the absolute values of the double-
3124 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the
3125 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
3126 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
3127 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
3128 158142c2 bellard
*----------------------------------------------------------------------------*/
3129 158142c2 bellard
3130 158142c2 bellard
static float64 subFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
3131 158142c2 bellard
{
3132 158142c2 bellard
    int16 aExp, bExp, zExp;
3133 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig;
3134 158142c2 bellard
    int16 expDiff;
3135 158142c2 bellard
3136 158142c2 bellard
    aSig = extractFloat64Frac( a );
3137 158142c2 bellard
    aExp = extractFloat64Exp( a );
3138 158142c2 bellard
    bSig = extractFloat64Frac( b );
3139 158142c2 bellard
    bExp = extractFloat64Exp( b );
3140 158142c2 bellard
    expDiff = aExp - bExp;
3141 158142c2 bellard
    aSig <<= 10;
3142 158142c2 bellard
    bSig <<= 10;
3143 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
3144 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
3145 158142c2 bellard
    if ( aExp == 0x7FF ) {
3146 158142c2 bellard
        if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3147 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3148 158142c2 bellard
        return float64_default_nan;
3149 158142c2 bellard
    }
3150 158142c2 bellard
    if ( aExp == 0 ) {
3151 158142c2 bellard
        aExp = 1;
3152 158142c2 bellard
        bExp = 1;
3153 158142c2 bellard
    }
3154 158142c2 bellard
    if ( bSig < aSig ) goto aBigger;
3155 158142c2 bellard
    if ( aSig < bSig ) goto bBigger;
3156 158142c2 bellard
    return packFloat64( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
3157 158142c2 bellard
 bExpBigger:
3158 158142c2 bellard
    if ( bExp == 0x7FF ) {
3159 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3160 158142c2 bellard
        return packFloat64( zSign ^ 1, 0x7FF, 0 );
3161 158142c2 bellard
    }
3162 158142c2 bellard
    if ( aExp == 0 ) {
3163 158142c2 bellard
        ++expDiff;
3164 158142c2 bellard
    }
3165 158142c2 bellard
    else {
3166 158142c2 bellard
        aSig |= LIT64( 0x4000000000000000 );
3167 158142c2 bellard
    }
3168 158142c2 bellard
    shift64RightJamming( aSig, - expDiff, &aSig );
3169 158142c2 bellard
    bSig |= LIT64( 0x4000000000000000 );
3170 158142c2 bellard
 bBigger:
3171 158142c2 bellard
    zSig = bSig - aSig;
3172 158142c2 bellard
    zExp = bExp;
3173 158142c2 bellard
    zSign ^= 1;
3174 158142c2 bellard
    goto normalizeRoundAndPack;
3175 158142c2 bellard
 aExpBigger:
3176 158142c2 bellard
    if ( aExp == 0x7FF ) {
3177 158142c2 bellard
        if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3178 158142c2 bellard
        return a;
3179 158142c2 bellard
    }
3180 158142c2 bellard
    if ( bExp == 0 ) {
3181 158142c2 bellard
        --expDiff;
3182 158142c2 bellard
    }
3183 158142c2 bellard
    else {
3184 158142c2 bellard
        bSig |= LIT64( 0x4000000000000000 );
3185 158142c2 bellard
    }
3186 158142c2 bellard
    shift64RightJamming( bSig, expDiff, &bSig );
3187 158142c2 bellard
    aSig |= LIT64( 0x4000000000000000 );
3188 158142c2 bellard
 aBigger:
3189 158142c2 bellard
    zSig = aSig - bSig;
3190 158142c2 bellard
    zExp = aExp;
3191 158142c2 bellard
 normalizeRoundAndPack:
3192 158142c2 bellard
    --zExp;
3193 158142c2 bellard
    return normalizeRoundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
3194 158142c2 bellard
3195 158142c2 bellard
}
3196 158142c2 bellard
3197 158142c2 bellard
/*----------------------------------------------------------------------------
3198 158142c2 bellard
| Returns the result of adding the double-precision floating-point values `a'
3199 158142c2 bellard
| and `b'.  The operation is performed according to the IEC/IEEE Standard for
3200 158142c2 bellard
| Binary Floating-Point Arithmetic.
3201 158142c2 bellard
*----------------------------------------------------------------------------*/
3202 158142c2 bellard
3203 158142c2 bellard
float64 float64_add( float64 a, float64 b STATUS_PARAM )
3204 158142c2 bellard
{
3205 158142c2 bellard
    flag aSign, bSign;
3206 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3207 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3208 158142c2 bellard
3209 158142c2 bellard
    aSign = extractFloat64Sign( a );
3210 158142c2 bellard
    bSign = extractFloat64Sign( b );
3211 158142c2 bellard
    if ( aSign == bSign ) {
3212 158142c2 bellard
        return addFloat64Sigs( a, b, aSign STATUS_VAR );
3213 158142c2 bellard
    }
3214 158142c2 bellard
    else {
3215 158142c2 bellard
        return subFloat64Sigs( a, b, aSign STATUS_VAR );
3216 158142c2 bellard
    }
3217 158142c2 bellard
3218 158142c2 bellard
}
3219 158142c2 bellard
3220 158142c2 bellard
/*----------------------------------------------------------------------------
3221 158142c2 bellard
| Returns the result of subtracting the double-precision floating-point values
3222 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
3223 158142c2 bellard
| for Binary Floating-Point Arithmetic.
3224 158142c2 bellard
*----------------------------------------------------------------------------*/
3225 158142c2 bellard
3226 158142c2 bellard
float64 float64_sub( float64 a, float64 b STATUS_PARAM )
3227 158142c2 bellard
{
3228 158142c2 bellard
    flag aSign, bSign;
3229 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3230 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3231 158142c2 bellard
3232 158142c2 bellard
    aSign = extractFloat64Sign( a );
3233 158142c2 bellard
    bSign = extractFloat64Sign( b );
3234 158142c2 bellard
    if ( aSign == bSign ) {
3235 158142c2 bellard
        return subFloat64Sigs( a, b, aSign STATUS_VAR );
3236 158142c2 bellard
    }
3237 158142c2 bellard
    else {
3238 158142c2 bellard
        return addFloat64Sigs( a, b, aSign STATUS_VAR );
3239 158142c2 bellard
    }
3240 158142c2 bellard
3241 158142c2 bellard
}
3242 158142c2 bellard
3243 158142c2 bellard
/*----------------------------------------------------------------------------
3244 158142c2 bellard
| Returns the result of multiplying the double-precision floating-point values
3245 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
3246 158142c2 bellard
| for Binary Floating-Point Arithmetic.
3247 158142c2 bellard
*----------------------------------------------------------------------------*/
3248 158142c2 bellard
3249 158142c2 bellard
float64 float64_mul( float64 a, float64 b STATUS_PARAM )
3250 158142c2 bellard
{
3251 158142c2 bellard
    flag aSign, bSign, zSign;
3252 158142c2 bellard
    int16 aExp, bExp, zExp;
3253 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig0, zSig1;
3254 158142c2 bellard
3255 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3256 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3257 37d18660 Peter Maydell
3258 158142c2 bellard
    aSig = extractFloat64Frac( a );
3259 158142c2 bellard
    aExp = extractFloat64Exp( a );
3260 158142c2 bellard
    aSign = extractFloat64Sign( a );
3261 158142c2 bellard
    bSig = extractFloat64Frac( b );
3262 158142c2 bellard
    bExp = extractFloat64Exp( b );
3263 158142c2 bellard
    bSign = extractFloat64Sign( b );
3264 158142c2 bellard
    zSign = aSign ^ bSign;
3265 158142c2 bellard
    if ( aExp == 0x7FF ) {
3266 158142c2 bellard
        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
3267 158142c2 bellard
            return propagateFloat64NaN( a, b STATUS_VAR );
3268 158142c2 bellard
        }
3269 158142c2 bellard
        if ( ( bExp | bSig ) == 0 ) {
3270 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3271 158142c2 bellard
            return float64_default_nan;
3272 158142c2 bellard
        }
3273 158142c2 bellard
        return packFloat64( zSign, 0x7FF, 0 );
3274 158142c2 bellard
    }
3275 158142c2 bellard
    if ( bExp == 0x7FF ) {
3276 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3277 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) {
3278 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3279 158142c2 bellard
            return float64_default_nan;
3280 158142c2 bellard
        }
3281 158142c2 bellard
        return packFloat64( zSign, 0x7FF, 0 );
3282 158142c2 bellard
    }
3283 158142c2 bellard
    if ( aExp == 0 ) {
3284 158142c2 bellard
        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
3285 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3286 158142c2 bellard
    }
3287 158142c2 bellard
    if ( bExp == 0 ) {
3288 158142c2 bellard
        if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
3289 158142c2 bellard
        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3290 158142c2 bellard
    }
3291 158142c2 bellard
    zExp = aExp + bExp - 0x3FF;
3292 158142c2 bellard
    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
3293 158142c2 bellard
    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3294 158142c2 bellard
    mul64To128( aSig, bSig, &zSig0, &zSig1 );
3295 158142c2 bellard
    zSig0 |= ( zSig1 != 0 );
3296 bb98fe42 Andreas Färber
    if ( 0 <= (int64_t) ( zSig0<<1 ) ) {
3297 158142c2 bellard
        zSig0 <<= 1;
3298 158142c2 bellard
        --zExp;
3299 158142c2 bellard
    }
3300 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp, zSig0 STATUS_VAR );
3301 158142c2 bellard
3302 158142c2 bellard
}
3303 158142c2 bellard
3304 158142c2 bellard
/*----------------------------------------------------------------------------
3305 158142c2 bellard
| Returns the result of dividing the double-precision floating-point value `a'
3306 158142c2 bellard
| by the corresponding value `b'.  The operation is performed according to
3307 158142c2 bellard
| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3308 158142c2 bellard
*----------------------------------------------------------------------------*/
3309 158142c2 bellard
3310 158142c2 bellard
float64 float64_div( float64 a, float64 b STATUS_PARAM )
3311 158142c2 bellard
{
3312 158142c2 bellard
    flag aSign, bSign, zSign;
3313 158142c2 bellard
    int16 aExp, bExp, zExp;
3314 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig;
3315 bb98fe42 Andreas Färber
    uint64_t rem0, rem1;
3316 bb98fe42 Andreas Färber
    uint64_t term0, term1;
3317 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3318 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3319 158142c2 bellard
3320 158142c2 bellard
    aSig = extractFloat64Frac( a );
3321 158142c2 bellard
    aExp = extractFloat64Exp( a );
3322 158142c2 bellard
    aSign = extractFloat64Sign( a );
3323 158142c2 bellard
    bSig = extractFloat64Frac( b );
3324 158142c2 bellard
    bExp = extractFloat64Exp( b );
3325 158142c2 bellard
    bSign = extractFloat64Sign( b );
3326 158142c2 bellard
    zSign = aSign ^ bSign;
3327 158142c2 bellard
    if ( aExp == 0x7FF ) {
3328 158142c2 bellard
        if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3329 158142c2 bellard
        if ( bExp == 0x7FF ) {
3330 158142c2 bellard
            if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3331 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3332 158142c2 bellard
            return float64_default_nan;
3333 158142c2 bellard
        }
3334 158142c2 bellard
        return packFloat64( zSign, 0x7FF, 0 );
3335 158142c2 bellard
    }
3336 158142c2 bellard
    if ( bExp == 0x7FF ) {
3337 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3338 158142c2 bellard
        return packFloat64( zSign, 0, 0 );
3339 158142c2 bellard
    }
3340 158142c2 bellard
    if ( bExp == 0 ) {
3341 158142c2 bellard
        if ( bSig == 0 ) {
3342 158142c2 bellard
            if ( ( aExp | aSig ) == 0 ) {
3343 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
3344 158142c2 bellard
                return float64_default_nan;
3345 158142c2 bellard
            }
3346 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
3347 158142c2 bellard
            return packFloat64( zSign, 0x7FF, 0 );
3348 158142c2 bellard
        }
3349 158142c2 bellard
        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3350 158142c2 bellard
    }
3351 158142c2 bellard
    if ( aExp == 0 ) {
3352 158142c2 bellard
        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
3353 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3354 158142c2 bellard
    }
3355 158142c2 bellard
    zExp = aExp - bExp + 0x3FD;
3356 158142c2 bellard
    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
3357 158142c2 bellard
    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3358 158142c2 bellard
    if ( bSig <= ( aSig + aSig ) ) {
3359 158142c2 bellard
        aSig >>= 1;
3360 158142c2 bellard
        ++zExp;
3361 158142c2 bellard
    }
3362 158142c2 bellard
    zSig = estimateDiv128To64( aSig, 0, bSig );
3363 158142c2 bellard
    if ( ( zSig & 0x1FF ) <= 2 ) {
3364 158142c2 bellard
        mul64To128( bSig, zSig, &term0, &term1 );
3365 158142c2 bellard
        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
3366 bb98fe42 Andreas Färber
        while ( (int64_t) rem0 < 0 ) {
3367 158142c2 bellard
            --zSig;
3368 158142c2 bellard
            add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
3369 158142c2 bellard
        }
3370 158142c2 bellard
        zSig |= ( rem1 != 0 );
3371 158142c2 bellard
    }
3372 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
3373 158142c2 bellard
3374 158142c2 bellard
}
3375 158142c2 bellard
3376 158142c2 bellard
/*----------------------------------------------------------------------------
3377 158142c2 bellard
| Returns the remainder of the double-precision floating-point value `a'
3378 158142c2 bellard
| with respect to the corresponding value `b'.  The operation is performed
3379 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3380 158142c2 bellard
*----------------------------------------------------------------------------*/
3381 158142c2 bellard
3382 158142c2 bellard
float64 float64_rem( float64 a, float64 b STATUS_PARAM )
3383 158142c2 bellard
{
3384 ed086f3d Blue Swirl
    flag aSign, zSign;
3385 158142c2 bellard
    int16 aExp, bExp, expDiff;
3386 bb98fe42 Andreas Färber
    uint64_t aSig, bSig;
3387 bb98fe42 Andreas Färber
    uint64_t q, alternateASig;
3388 bb98fe42 Andreas Färber
    int64_t sigMean;
3389 158142c2 bellard
3390 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3391 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3392 158142c2 bellard
    aSig = extractFloat64Frac( a );
3393 158142c2 bellard
    aExp = extractFloat64Exp( a );
3394 158142c2 bellard
    aSign = extractFloat64Sign( a );
3395 158142c2 bellard
    bSig = extractFloat64Frac( b );
3396 158142c2 bellard
    bExp = extractFloat64Exp( b );
3397 158142c2 bellard
    if ( aExp == 0x7FF ) {
3398 158142c2 bellard
        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
3399 158142c2 bellard
            return propagateFloat64NaN( a, b STATUS_VAR );
3400 158142c2 bellard
        }
3401 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3402 158142c2 bellard
        return float64_default_nan;
3403 158142c2 bellard
    }
3404 158142c2 bellard
    if ( bExp == 0x7FF ) {
3405 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3406 158142c2 bellard
        return a;
3407 158142c2 bellard
    }
3408 158142c2 bellard
    if ( bExp == 0 ) {
3409 158142c2 bellard
        if ( bSig == 0 ) {
3410 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3411 158142c2 bellard
            return float64_default_nan;
3412 158142c2 bellard
        }
3413 158142c2 bellard
        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3414 158142c2 bellard
    }
3415 158142c2 bellard
    if ( aExp == 0 ) {
3416 158142c2 bellard
        if ( aSig == 0 ) return a;
3417 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3418 158142c2 bellard
    }
3419 158142c2 bellard
    expDiff = aExp - bExp;
3420 158142c2 bellard
    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
3421 158142c2 bellard
    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3422 158142c2 bellard
    if ( expDiff < 0 ) {
3423 158142c2 bellard
        if ( expDiff < -1 ) return a;
3424 158142c2 bellard
        aSig >>= 1;
3425 158142c2 bellard
    }
3426 158142c2 bellard
    q = ( bSig <= aSig );
3427 158142c2 bellard
    if ( q ) aSig -= bSig;
3428 158142c2 bellard
    expDiff -= 64;
3429 158142c2 bellard
    while ( 0 < expDiff ) {
3430 158142c2 bellard
        q = estimateDiv128To64( aSig, 0, bSig );
3431 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
3432 158142c2 bellard
        aSig = - ( ( bSig>>2 ) * q );
3433 158142c2 bellard
        expDiff -= 62;
3434 158142c2 bellard
    }
3435 158142c2 bellard
    expDiff += 64;
3436 158142c2 bellard
    if ( 0 < expDiff ) {
3437 158142c2 bellard
        q = estimateDiv128To64( aSig, 0, bSig );
3438 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
3439 158142c2 bellard
        q >>= 64 - expDiff;
3440 158142c2 bellard
        bSig >>= 2;
3441 158142c2 bellard
        aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
3442 158142c2 bellard
    }
3443 158142c2 bellard
    else {
3444 158142c2 bellard
        aSig >>= 2;
3445 158142c2 bellard
        bSig >>= 2;
3446 158142c2 bellard
    }
3447 158142c2 bellard
    do {
3448 158142c2 bellard
        alternateASig = aSig;
3449 158142c2 bellard
        ++q;
3450 158142c2 bellard
        aSig -= bSig;
3451 bb98fe42 Andreas Färber
    } while ( 0 <= (int64_t) aSig );
3452 158142c2 bellard
    sigMean = aSig + alternateASig;
3453 158142c2 bellard
    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
3454 158142c2 bellard
        aSig = alternateASig;
3455 158142c2 bellard
    }
3456 bb98fe42 Andreas Färber
    zSign = ( (int64_t) aSig < 0 );
3457 158142c2 bellard
    if ( zSign ) aSig = - aSig;
3458 158142c2 bellard
    return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig STATUS_VAR );
3459 158142c2 bellard
3460 158142c2 bellard
}
3461 158142c2 bellard
3462 158142c2 bellard
/*----------------------------------------------------------------------------
3463 158142c2 bellard
| Returns the square root of the double-precision floating-point value `a'.
3464 158142c2 bellard
| The operation is performed according to the IEC/IEEE Standard for Binary
3465 158142c2 bellard
| Floating-Point Arithmetic.
3466 158142c2 bellard
*----------------------------------------------------------------------------*/
3467 158142c2 bellard
3468 158142c2 bellard
float64 float64_sqrt( float64 a STATUS_PARAM )
3469 158142c2 bellard
{
3470 158142c2 bellard
    flag aSign;
3471 158142c2 bellard
    int16 aExp, zExp;
3472 bb98fe42 Andreas Färber
    uint64_t aSig, zSig, doubleZSig;
3473 bb98fe42 Andreas Färber
    uint64_t rem0, rem1, term0, term1;
3474 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3475 158142c2 bellard
3476 158142c2 bellard
    aSig = extractFloat64Frac( a );
3477 158142c2 bellard
    aExp = extractFloat64Exp( a );
3478 158142c2 bellard
    aSign = extractFloat64Sign( a );
3479 158142c2 bellard
    if ( aExp == 0x7FF ) {
3480 158142c2 bellard
        if ( aSig ) return propagateFloat64NaN( a, a STATUS_VAR );
3481 158142c2 bellard
        if ( ! aSign ) return a;
3482 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3483 158142c2 bellard
        return float64_default_nan;
3484 158142c2 bellard
    }
3485 158142c2 bellard
    if ( aSign ) {
3486 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) return a;
3487 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3488 158142c2 bellard
        return float64_default_nan;
3489 158142c2 bellard
    }
3490 158142c2 bellard
    if ( aExp == 0 ) {
3491 f090c9d4 pbrook
        if ( aSig == 0 ) return float64_zero;
3492 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3493 158142c2 bellard
    }
3494 158142c2 bellard
    zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
3495 158142c2 bellard
    aSig |= LIT64( 0x0010000000000000 );
3496 158142c2 bellard
    zSig = estimateSqrt32( aExp, aSig>>21 );
3497 158142c2 bellard
    aSig <<= 9 - ( aExp & 1 );
3498 158142c2 bellard
    zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
3499 158142c2 bellard
    if ( ( zSig & 0x1FF ) <= 5 ) {
3500 158142c2 bellard
        doubleZSig = zSig<<1;
3501 158142c2 bellard
        mul64To128( zSig, zSig, &term0, &term1 );
3502 158142c2 bellard
        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
3503 bb98fe42 Andreas Färber
        while ( (int64_t) rem0 < 0 ) {
3504 158142c2 bellard
            --zSig;
3505 158142c2 bellard
            doubleZSig -= 2;
3506 158142c2 bellard
            add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
3507 158142c2 bellard
        }
3508 158142c2 bellard
        zSig |= ( ( rem0 | rem1 ) != 0 );
3509 158142c2 bellard
    }
3510 158142c2 bellard
    return roundAndPackFloat64( 0, zExp, zSig STATUS_VAR );
3511 158142c2 bellard
3512 158142c2 bellard
}
3513 158142c2 bellard
3514 158142c2 bellard
/*----------------------------------------------------------------------------
3515 374dfc33 aurel32
| Returns the binary log of the double-precision floating-point value `a'.
3516 374dfc33 aurel32
| The operation is performed according to the IEC/IEEE Standard for Binary
3517 374dfc33 aurel32
| Floating-Point Arithmetic.
3518 374dfc33 aurel32
*----------------------------------------------------------------------------*/
3519 374dfc33 aurel32
float64 float64_log2( float64 a STATUS_PARAM )
3520 374dfc33 aurel32
{
3521 374dfc33 aurel32
    flag aSign, zSign;
3522 374dfc33 aurel32
    int16 aExp;
3523 bb98fe42 Andreas Färber
    uint64_t aSig, aSig0, aSig1, zSig, i;
3524 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3525 374dfc33 aurel32
3526 374dfc33 aurel32
    aSig = extractFloat64Frac( a );
3527 374dfc33 aurel32
    aExp = extractFloat64Exp( a );
3528 374dfc33 aurel32
    aSign = extractFloat64Sign( a );
3529 374dfc33 aurel32
3530 374dfc33 aurel32
    if ( aExp == 0 ) {
3531 374dfc33 aurel32
        if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
3532 374dfc33 aurel32
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3533 374dfc33 aurel32
    }
3534 374dfc33 aurel32
    if ( aSign ) {
3535 374dfc33 aurel32
        float_raise( float_flag_invalid STATUS_VAR);
3536 374dfc33 aurel32
        return float64_default_nan;
3537 374dfc33 aurel32
    }
3538 374dfc33 aurel32
    if ( aExp == 0x7FF ) {
3539 374dfc33 aurel32
        if ( aSig ) return propagateFloat64NaN( a, float64_zero STATUS_VAR );
3540 374dfc33 aurel32
        return a;
3541 374dfc33 aurel32
    }
3542 374dfc33 aurel32
3543 374dfc33 aurel32
    aExp -= 0x3FF;
3544 374dfc33 aurel32
    aSig |= LIT64( 0x0010000000000000 );
3545 374dfc33 aurel32
    zSign = aExp < 0;
3546 bb98fe42 Andreas Färber
    zSig = (uint64_t)aExp << 52;
3547 374dfc33 aurel32
    for (i = 1LL << 51; i > 0; i >>= 1) {
3548 374dfc33 aurel32
        mul64To128( aSig, aSig, &aSig0, &aSig1 );
3549 374dfc33 aurel32
        aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
3550 374dfc33 aurel32
        if ( aSig & LIT64( 0x0020000000000000 ) ) {
3551 374dfc33 aurel32
            aSig >>= 1;
3552 374dfc33 aurel32
            zSig |= i;
3553 374dfc33 aurel32
        }
3554 374dfc33 aurel32
    }
3555 374dfc33 aurel32
3556 374dfc33 aurel32
    if ( zSign )
3557 374dfc33 aurel32
        zSig = -zSig;
3558 374dfc33 aurel32
    return normalizeRoundAndPackFloat64( zSign, 0x408, zSig STATUS_VAR );
3559 374dfc33 aurel32
}
3560 374dfc33 aurel32
3561 374dfc33 aurel32
/*----------------------------------------------------------------------------
3562 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is equal to the
3563 b689362d Aurelien Jarno
| corresponding value `b', and 0 otherwise.  The invalid exception is raised
3564 b689362d Aurelien Jarno
| if either operand is a NaN.  Otherwise, the comparison is performed
3565 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3566 158142c2 bellard
*----------------------------------------------------------------------------*/
3567 158142c2 bellard
3568 b689362d Aurelien Jarno
int float64_eq( float64 a, float64 b STATUS_PARAM )
3569 158142c2 bellard
{
3570 bb98fe42 Andreas Färber
    uint64_t av, bv;
3571 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3572 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3573 158142c2 bellard
3574 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3575 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3576 158142c2 bellard
       ) {
3577 b689362d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
3578 158142c2 bellard
        return 0;
3579 158142c2 bellard
    }
3580 f090c9d4 pbrook
    av = float64_val(a);
3581 a1b91bb4 pbrook
    bv = float64_val(b);
3582 bb98fe42 Andreas Färber
    return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
3583 158142c2 bellard
3584 158142c2 bellard
}
3585 158142c2 bellard
3586 158142c2 bellard
/*----------------------------------------------------------------------------
3587 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than or
3588 f5a64251 Aurelien Jarno
| equal to the corresponding value `b', and 0 otherwise.  The invalid
3589 f5a64251 Aurelien Jarno
| exception is raised if either operand is a NaN.  The comparison is performed
3590 f5a64251 Aurelien Jarno
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3591 158142c2 bellard
*----------------------------------------------------------------------------*/
3592 158142c2 bellard
3593 750afe93 bellard
int float64_le( float64 a, float64 b STATUS_PARAM )
3594 158142c2 bellard
{
3595 158142c2 bellard
    flag aSign, bSign;
3596 bb98fe42 Andreas Färber
    uint64_t av, bv;
3597 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3598 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3599 158142c2 bellard
3600 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3601 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3602 158142c2 bellard
       ) {
3603 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3604 158142c2 bellard
        return 0;
3605 158142c2 bellard
    }
3606 158142c2 bellard
    aSign = extractFloat64Sign( a );
3607 158142c2 bellard
    bSign = extractFloat64Sign( b );
3608 f090c9d4 pbrook
    av = float64_val(a);
3609 a1b91bb4 pbrook
    bv = float64_val(b);
3610 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
3611 f090c9d4 pbrook
    return ( av == bv ) || ( aSign ^ ( av < bv ) );
3612 158142c2 bellard
3613 158142c2 bellard
}
3614 158142c2 bellard
3615 158142c2 bellard
/*----------------------------------------------------------------------------
3616 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than
3617 f5a64251 Aurelien Jarno
| the corresponding value `b', and 0 otherwise.  The invalid exception is
3618 f5a64251 Aurelien Jarno
| raised if either operand is a NaN.  The comparison is performed according
3619 f5a64251 Aurelien Jarno
| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3620 158142c2 bellard
*----------------------------------------------------------------------------*/
3621 158142c2 bellard
3622 750afe93 bellard
int float64_lt( float64 a, float64 b STATUS_PARAM )
3623 158142c2 bellard
{
3624 158142c2 bellard
    flag aSign, bSign;
3625 bb98fe42 Andreas Färber
    uint64_t av, bv;
3626 158142c2 bellard
3627 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3628 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3629 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3630 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3631 158142c2 bellard
       ) {
3632 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3633 158142c2 bellard
        return 0;
3634 158142c2 bellard
    }
3635 158142c2 bellard
    aSign = extractFloat64Sign( a );
3636 158142c2 bellard
    bSign = extractFloat64Sign( b );
3637 f090c9d4 pbrook
    av = float64_val(a);
3638 a1b91bb4 pbrook
    bv = float64_val(b);
3639 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
3640 f090c9d4 pbrook
    return ( av != bv ) && ( aSign ^ ( av < bv ) );
3641 158142c2 bellard
3642 158142c2 bellard
}
3643 158142c2 bellard
3644 158142c2 bellard
/*----------------------------------------------------------------------------
3645 67b7861d Aurelien Jarno
| Returns 1 if the double-precision floating-point values `a' and `b' cannot
3646 f5a64251 Aurelien Jarno
| be compared, and 0 otherwise.  The invalid exception is raised if either
3647 f5a64251 Aurelien Jarno
| operand is a NaN.  The comparison is performed according to the IEC/IEEE
3648 f5a64251 Aurelien Jarno
| Standard for Binary Floating-Point Arithmetic.
3649 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
3650 67b7861d Aurelien Jarno
3651 67b7861d Aurelien Jarno
int float64_unordered( float64 a, float64 b STATUS_PARAM )
3652 67b7861d Aurelien Jarno
{
3653 67b7861d Aurelien Jarno
    a = float64_squash_input_denormal(a STATUS_VAR);
3654 67b7861d Aurelien Jarno
    b = float64_squash_input_denormal(b STATUS_VAR);
3655 67b7861d Aurelien Jarno
3656 67b7861d Aurelien Jarno
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3657 67b7861d Aurelien Jarno
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3658 67b7861d Aurelien Jarno
       ) {
3659 67b7861d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
3660 67b7861d Aurelien Jarno
        return 1;
3661 67b7861d Aurelien Jarno
    }
3662 67b7861d Aurelien Jarno
    return 0;
3663 67b7861d Aurelien Jarno
}
3664 67b7861d Aurelien Jarno
3665 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
3666 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is equal to the
3667 f5a64251 Aurelien Jarno
| corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
3668 f5a64251 Aurelien Jarno
| exception.The comparison is performed according to the IEC/IEEE Standard
3669 f5a64251 Aurelien Jarno
| for Binary Floating-Point Arithmetic.
3670 158142c2 bellard
*----------------------------------------------------------------------------*/
3671 158142c2 bellard
3672 b689362d Aurelien Jarno
int float64_eq_quiet( float64 a, float64 b STATUS_PARAM )
3673 158142c2 bellard
{
3674 bb98fe42 Andreas Färber
    uint64_t av, bv;
3675 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3676 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3677 158142c2 bellard
3678 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3679 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3680 158142c2 bellard
       ) {
3681 b689362d Aurelien Jarno
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3682 b689362d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
3683 b689362d Aurelien Jarno
        }
3684 158142c2 bellard
        return 0;
3685 158142c2 bellard
    }
3686 f090c9d4 pbrook
    av = float64_val(a);
3687 a1b91bb4 pbrook
    bv = float64_val(b);
3688 bb98fe42 Andreas Färber
    return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
3689 158142c2 bellard
3690 158142c2 bellard
}
3691 158142c2 bellard
3692 158142c2 bellard
/*----------------------------------------------------------------------------
3693 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than or
3694 158142c2 bellard
| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
3695 158142c2 bellard
| cause an exception.  Otherwise, the comparison is performed according to the
3696 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3697 158142c2 bellard
*----------------------------------------------------------------------------*/
3698 158142c2 bellard
3699 750afe93 bellard
int float64_le_quiet( float64 a, float64 b STATUS_PARAM )
3700 158142c2 bellard
{
3701 158142c2 bellard
    flag aSign, bSign;
3702 bb98fe42 Andreas Färber
    uint64_t av, bv;
3703 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3704 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3705 158142c2 bellard
3706 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3707 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3708 158142c2 bellard
       ) {
3709 158142c2 bellard
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3710 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3711 158142c2 bellard
        }
3712 158142c2 bellard
        return 0;
3713 158142c2 bellard
    }
3714 158142c2 bellard
    aSign = extractFloat64Sign( a );
3715 158142c2 bellard
    bSign = extractFloat64Sign( b );
3716 f090c9d4 pbrook
    av = float64_val(a);
3717 a1b91bb4 pbrook
    bv = float64_val(b);
3718 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
3719 f090c9d4 pbrook
    return ( av == bv ) || ( aSign ^ ( av < bv ) );
3720 158142c2 bellard
3721 158142c2 bellard
}
3722 158142c2 bellard
3723 158142c2 bellard
/*----------------------------------------------------------------------------
3724 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than
3725 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
3726 158142c2 bellard
| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
3727 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
3728 158142c2 bellard
*----------------------------------------------------------------------------*/
3729 158142c2 bellard
3730 750afe93 bellard
int float64_lt_quiet( float64 a, float64 b STATUS_PARAM )
3731 158142c2 bellard
{
3732 158142c2 bellard
    flag aSign, bSign;
3733 bb98fe42 Andreas Färber
    uint64_t av, bv;
3734 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3735 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3736 158142c2 bellard
3737 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3738 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3739 158142c2 bellard
       ) {
3740 158142c2 bellard
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3741 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3742 158142c2 bellard
        }
3743 158142c2 bellard
        return 0;
3744 158142c2 bellard
    }
3745 158142c2 bellard
    aSign = extractFloat64Sign( a );
3746 158142c2 bellard
    bSign = extractFloat64Sign( b );
3747 f090c9d4 pbrook
    av = float64_val(a);
3748 a1b91bb4 pbrook
    bv = float64_val(b);
3749 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
3750 f090c9d4 pbrook
    return ( av != bv ) && ( aSign ^ ( av < bv ) );
3751 158142c2 bellard
3752 158142c2 bellard
}
3753 158142c2 bellard
3754 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
3755 67b7861d Aurelien Jarno
| Returns 1 if the double-precision floating-point values `a' and `b' cannot
3756 67b7861d Aurelien Jarno
| be compared, and 0 otherwise.  Quiet NaNs do not cause an exception.  The
3757 67b7861d Aurelien Jarno
| comparison is performed according to the IEC/IEEE Standard for Binary
3758 67b7861d Aurelien Jarno
| Floating-Point Arithmetic.
3759 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
3760 67b7861d Aurelien Jarno
3761 67b7861d Aurelien Jarno
int float64_unordered_quiet( float64 a, float64 b STATUS_PARAM )
3762 67b7861d Aurelien Jarno
{
3763 67b7861d Aurelien Jarno
    a = float64_squash_input_denormal(a STATUS_VAR);
3764 67b7861d Aurelien Jarno
    b = float64_squash_input_denormal(b STATUS_VAR);
3765 67b7861d Aurelien Jarno
3766 67b7861d Aurelien Jarno
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3767 67b7861d Aurelien Jarno
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3768 67b7861d Aurelien Jarno
       ) {
3769 67b7861d Aurelien Jarno
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3770 67b7861d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
3771 67b7861d Aurelien Jarno
        }
3772 67b7861d Aurelien Jarno
        return 1;
3773 67b7861d Aurelien Jarno
    }
3774 67b7861d Aurelien Jarno
    return 0;
3775 67b7861d Aurelien Jarno
}
3776 67b7861d Aurelien Jarno
3777 158142c2 bellard
/*----------------------------------------------------------------------------
3778 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3779 158142c2 bellard
| point value `a' to the 32-bit two's complement integer format.  The
3780 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3781 158142c2 bellard
| Floating-Point Arithmetic---which means in particular that the conversion
3782 158142c2 bellard
| is rounded according to the current rounding mode.  If `a' is a NaN, the
3783 158142c2 bellard
| largest positive integer is returned.  Otherwise, if the conversion
3784 158142c2 bellard
| overflows, the largest integer with the same sign as `a' is returned.
3785 158142c2 bellard
*----------------------------------------------------------------------------*/
3786 158142c2 bellard
3787 158142c2 bellard
int32 floatx80_to_int32( floatx80 a STATUS_PARAM )
3788 158142c2 bellard
{
3789 158142c2 bellard
    flag aSign;
3790 158142c2 bellard
    int32 aExp, shiftCount;
3791 bb98fe42 Andreas Färber
    uint64_t aSig;
3792 158142c2 bellard
3793 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3794 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3795 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3796 bb98fe42 Andreas Färber
    if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
3797 158142c2 bellard
    shiftCount = 0x4037 - aExp;
3798 158142c2 bellard
    if ( shiftCount <= 0 ) shiftCount = 1;
3799 158142c2 bellard
    shift64RightJamming( aSig, shiftCount, &aSig );
3800 158142c2 bellard
    return roundAndPackInt32( aSign, aSig STATUS_VAR );
3801 158142c2 bellard
3802 158142c2 bellard
}
3803 158142c2 bellard
3804 158142c2 bellard
/*----------------------------------------------------------------------------
3805 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3806 158142c2 bellard
| point value `a' to the 32-bit two's complement integer format.  The
3807 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3808 158142c2 bellard
| Floating-Point Arithmetic, except that the conversion is always rounded
3809 158142c2 bellard
| toward zero.  If `a' is a NaN, the largest positive integer is returned.
3810 158142c2 bellard
| Otherwise, if the conversion overflows, the largest integer with the same
3811 158142c2 bellard
| sign as `a' is returned.
3812 158142c2 bellard
*----------------------------------------------------------------------------*/
3813 158142c2 bellard
3814 158142c2 bellard
int32 floatx80_to_int32_round_to_zero( floatx80 a STATUS_PARAM )
3815 158142c2 bellard
{
3816 158142c2 bellard
    flag aSign;
3817 158142c2 bellard
    int32 aExp, shiftCount;
3818 bb98fe42 Andreas Färber
    uint64_t aSig, savedASig;
3819 158142c2 bellard
    int32 z;
3820 158142c2 bellard
3821 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3822 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3823 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3824 158142c2 bellard
    if ( 0x401E < aExp ) {
3825 bb98fe42 Andreas Färber
        if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
3826 158142c2 bellard
        goto invalid;
3827 158142c2 bellard
    }
3828 158142c2 bellard
    else if ( aExp < 0x3FFF ) {
3829 158142c2 bellard
        if ( aExp || aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
3830 158142c2 bellard
        return 0;
3831 158142c2 bellard
    }
3832 158142c2 bellard
    shiftCount = 0x403E - aExp;
3833 158142c2 bellard
    savedASig = aSig;
3834 158142c2 bellard
    aSig >>= shiftCount;
3835 158142c2 bellard
    z = aSig;
3836 158142c2 bellard
    if ( aSign ) z = - z;
3837 158142c2 bellard
    if ( ( z < 0 ) ^ aSign ) {
3838 158142c2 bellard
 invalid:
3839 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3840 bb98fe42 Andreas Färber
        return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
3841 158142c2 bellard
    }
3842 158142c2 bellard
    if ( ( aSig<<shiftCount ) != savedASig ) {
3843 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
3844 158142c2 bellard
    }
3845 158142c2 bellard
    return z;
3846 158142c2 bellard
3847 158142c2 bellard
}
3848 158142c2 bellard
3849 158142c2 bellard
/*----------------------------------------------------------------------------
3850 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3851 158142c2 bellard
| point value `a' to the 64-bit two's complement integer format.  The
3852 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3853 158142c2 bellard
| Floating-Point Arithmetic---which means in particular that the conversion
3854 158142c2 bellard
| is rounded according to the current rounding mode.  If `a' is a NaN,
3855 158142c2 bellard
| the largest positive integer is returned.  Otherwise, if the conversion
3856 158142c2 bellard
| overflows, the largest integer with the same sign as `a' is returned.
3857 158142c2 bellard
*----------------------------------------------------------------------------*/
3858 158142c2 bellard
3859 158142c2 bellard
int64 floatx80_to_int64( floatx80 a STATUS_PARAM )
3860 158142c2 bellard
{
3861 158142c2 bellard
    flag aSign;
3862 158142c2 bellard
    int32 aExp, shiftCount;
3863 bb98fe42 Andreas Färber
    uint64_t aSig, aSigExtra;
3864 158142c2 bellard
3865 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3866 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3867 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3868 158142c2 bellard
    shiftCount = 0x403E - aExp;
3869 158142c2 bellard
    if ( shiftCount <= 0 ) {
3870 158142c2 bellard
        if ( shiftCount ) {
3871 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3872 158142c2 bellard
            if (    ! aSign
3873 158142c2 bellard
                 || (    ( aExp == 0x7FFF )
3874 158142c2 bellard
                      && ( aSig != LIT64( 0x8000000000000000 ) ) )
3875 158142c2 bellard
               ) {
3876 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
3877 158142c2 bellard
            }
3878 bb98fe42 Andreas Färber
            return (int64_t) LIT64( 0x8000000000000000 );
3879 158142c2 bellard
        }
3880 158142c2 bellard
        aSigExtra = 0;
3881 158142c2 bellard
    }
3882 158142c2 bellard
    else {
3883 158142c2 bellard
        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
3884 158142c2 bellard
    }
3885 158142c2 bellard
    return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
3886 158142c2 bellard
3887 158142c2 bellard
}
3888 158142c2 bellard
3889 158142c2 bellard
/*----------------------------------------------------------------------------
3890 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3891 158142c2 bellard
| point value `a' to the 64-bit two's complement integer format.  The
3892 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3893 158142c2 bellard
| Floating-Point Arithmetic, except that the conversion is always rounded
3894 158142c2 bellard
| toward zero.  If `a' is a NaN, the largest positive integer is returned.
3895 158142c2 bellard
| Otherwise, if the conversion overflows, the largest integer with the same
3896 158142c2 bellard
| sign as `a' is returned.
3897 158142c2 bellard
*----------------------------------------------------------------------------*/
3898 158142c2 bellard
3899 158142c2 bellard
int64 floatx80_to_int64_round_to_zero( floatx80 a STATUS_PARAM )
3900 158142c2 bellard
{
3901 158142c2 bellard
    flag aSign;
3902 158142c2 bellard
    int32 aExp, shiftCount;
3903 bb98fe42 Andreas Färber
    uint64_t aSig;
3904 158142c2 bellard
    int64 z;
3905 158142c2 bellard
3906 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3907 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3908 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3909 158142c2 bellard
    shiftCount = aExp - 0x403E;
3910 158142c2 bellard
    if ( 0 <= shiftCount ) {
3911 158142c2 bellard
        aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
3912 158142c2 bellard
        if ( ( a.high != 0xC03E ) || aSig ) {
3913 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3914 158142c2 bellard
            if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
3915 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
3916 158142c2 bellard
            }
3917 158142c2 bellard
        }
3918 bb98fe42 Andreas Färber
        return (int64_t) LIT64( 0x8000000000000000 );
3919 158142c2 bellard
    }
3920 158142c2 bellard
    else if ( aExp < 0x3FFF ) {
3921 158142c2 bellard
        if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
3922 158142c2 bellard
        return 0;
3923 158142c2 bellard
    }
3924 158142c2 bellard
    z = aSig>>( - shiftCount );
3925 bb98fe42 Andreas Färber
    if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
3926 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
3927 158142c2 bellard
    }
3928 158142c2 bellard
    if ( aSign ) z = - z;
3929 158142c2 bellard
    return z;
3930 158142c2 bellard
3931 158142c2 bellard
}
3932 158142c2 bellard
3933 158142c2 bellard
/*----------------------------------------------------------------------------
3934 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3935 158142c2 bellard
| point value `a' to the single-precision floating-point format.  The
3936 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3937 158142c2 bellard
| Floating-Point Arithmetic.
3938 158142c2 bellard
*----------------------------------------------------------------------------*/
3939 158142c2 bellard
3940 158142c2 bellard
float32 floatx80_to_float32( floatx80 a STATUS_PARAM )
3941 158142c2 bellard
{
3942 158142c2 bellard
    flag aSign;
3943 158142c2 bellard
    int32 aExp;
3944 bb98fe42 Andreas Färber
    uint64_t aSig;
3945 158142c2 bellard
3946 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3947 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3948 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3949 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3950 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig<<1 ) ) {
3951 bcd4d9af Christophe Lyon
            return commonNaNToFloat32( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
3952 158142c2 bellard
        }
3953 158142c2 bellard
        return packFloat32( aSign, 0xFF, 0 );
3954 158142c2 bellard
    }
3955 158142c2 bellard
    shift64RightJamming( aSig, 33, &aSig );
3956 158142c2 bellard
    if ( aExp || aSig ) aExp -= 0x3F81;
3957 158142c2 bellard
    return roundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
3958 158142c2 bellard
3959 158142c2 bellard
}
3960 158142c2 bellard
3961 158142c2 bellard
/*----------------------------------------------------------------------------
3962 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3963 158142c2 bellard
| point value `a' to the double-precision floating-point format.  The
3964 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3965 158142c2 bellard
| Floating-Point Arithmetic.
3966 158142c2 bellard
*----------------------------------------------------------------------------*/
3967 158142c2 bellard
3968 158142c2 bellard
float64 floatx80_to_float64( floatx80 a STATUS_PARAM )
3969 158142c2 bellard
{
3970 158142c2 bellard
    flag aSign;
3971 158142c2 bellard
    int32 aExp;
3972 bb98fe42 Andreas Färber
    uint64_t aSig, zSig;
3973 158142c2 bellard
3974 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3975 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3976 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3977 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3978 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig<<1 ) ) {
3979 bcd4d9af Christophe Lyon
            return commonNaNToFloat64( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
3980 158142c2 bellard
        }
3981 158142c2 bellard
        return packFloat64( aSign, 0x7FF, 0 );
3982 158142c2 bellard
    }
3983 158142c2 bellard
    shift64RightJamming( aSig, 1, &zSig );
3984 158142c2 bellard
    if ( aExp || aSig ) aExp -= 0x3C01;
3985 158142c2 bellard
    return roundAndPackFloat64( aSign, aExp, zSig STATUS_VAR );
3986 158142c2 bellard
3987 158142c2 bellard
}
3988 158142c2 bellard
3989 158142c2 bellard
/*----------------------------------------------------------------------------
3990 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3991 158142c2 bellard
| point value `a' to the quadruple-precision floating-point format.  The
3992 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3993 158142c2 bellard
| Floating-Point Arithmetic.
3994 158142c2 bellard
*----------------------------------------------------------------------------*/
3995 158142c2 bellard
3996 158142c2 bellard
float128 floatx80_to_float128( floatx80 a STATUS_PARAM )
3997 158142c2 bellard
{
3998 158142c2 bellard
    flag aSign;
3999 158142c2 bellard
    int16 aExp;
4000 bb98fe42 Andreas Färber
    uint64_t aSig, zSig0, zSig1;
4001 158142c2 bellard
4002 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4003 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4004 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4005 bb98fe42 Andreas Färber
    if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
4006 bcd4d9af Christophe Lyon
        return commonNaNToFloat128( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
4007 158142c2 bellard
    }
4008 158142c2 bellard
    shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
4009 158142c2 bellard
    return packFloat128( aSign, aExp, zSig0, zSig1 );
4010 158142c2 bellard
4011 158142c2 bellard
}
4012 158142c2 bellard
4013 158142c2 bellard
/*----------------------------------------------------------------------------
4014 158142c2 bellard
| Rounds the extended double-precision floating-point value `a' to an integer,
4015 158142c2 bellard
| and returns the result as an extended quadruple-precision floating-point
4016 158142c2 bellard
| value.  The operation is performed according to the IEC/IEEE Standard for
4017 158142c2 bellard
| Binary Floating-Point Arithmetic.
4018 158142c2 bellard
*----------------------------------------------------------------------------*/
4019 158142c2 bellard
4020 158142c2 bellard
floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM )
4021 158142c2 bellard
{
4022 158142c2 bellard
    flag aSign;
4023 158142c2 bellard
    int32 aExp;
4024 bb98fe42 Andreas Färber
    uint64_t lastBitMask, roundBitsMask;
4025 158142c2 bellard
    int8 roundingMode;
4026 158142c2 bellard
    floatx80 z;
4027 158142c2 bellard
4028 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4029 158142c2 bellard
    if ( 0x403E <= aExp ) {
4030 bb98fe42 Andreas Färber
        if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
4031 158142c2 bellard
            return propagateFloatx80NaN( a, a STATUS_VAR );
4032 158142c2 bellard
        }
4033 158142c2 bellard
        return a;
4034 158142c2 bellard
    }
4035 158142c2 bellard
    if ( aExp < 0x3FFF ) {
4036 158142c2 bellard
        if (    ( aExp == 0 )
4037 bb98fe42 Andreas Färber
             && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
4038 158142c2 bellard
            return a;
4039 158142c2 bellard
        }
4040 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
4041 158142c2 bellard
        aSign = extractFloatx80Sign( a );
4042 158142c2 bellard
        switch ( STATUS(float_rounding_mode) ) {
4043 158142c2 bellard
         case float_round_nearest_even:
4044 bb98fe42 Andreas Färber
            if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
4045 158142c2 bellard
               ) {
4046 158142c2 bellard
                return
4047 158142c2 bellard
                    packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
4048 158142c2 bellard
            }
4049 158142c2 bellard
            break;
4050 158142c2 bellard
         case float_round_down:
4051 158142c2 bellard
            return
4052 158142c2 bellard
                  aSign ?
4053 158142c2 bellard
                      packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
4054 158142c2 bellard
                : packFloatx80( 0, 0, 0 );
4055 158142c2 bellard
         case float_round_up:
4056 158142c2 bellard
            return
4057 158142c2 bellard
                  aSign ? packFloatx80( 1, 0, 0 )
4058 158142c2 bellard
                : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
4059 158142c2 bellard
        }
4060 158142c2 bellard
        return packFloatx80( aSign, 0, 0 );
4061 158142c2 bellard
    }
4062 158142c2 bellard
    lastBitMask = 1;
4063 158142c2 bellard
    lastBitMask <<= 0x403E - aExp;
4064 158142c2 bellard
    roundBitsMask = lastBitMask - 1;
4065 158142c2 bellard
    z = a;
4066 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
4067 158142c2 bellard
    if ( roundingMode == float_round_nearest_even ) {
4068 158142c2 bellard
        z.low += lastBitMask>>1;
4069 158142c2 bellard
        if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
4070 158142c2 bellard
    }
4071 158142c2 bellard
    else if ( roundingMode != float_round_to_zero ) {
4072 158142c2 bellard
        if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
4073 158142c2 bellard
            z.low += roundBitsMask;
4074 158142c2 bellard
        }
4075 158142c2 bellard
    }
4076 158142c2 bellard
    z.low &= ~ roundBitsMask;
4077 158142c2 bellard
    if ( z.low == 0 ) {
4078 158142c2 bellard
        ++z.high;
4079 158142c2 bellard
        z.low = LIT64( 0x8000000000000000 );
4080 158142c2 bellard
    }
4081 158142c2 bellard
    if ( z.low != a.low ) STATUS(float_exception_flags) |= float_flag_inexact;
4082 158142c2 bellard
    return z;
4083 158142c2 bellard
4084 158142c2 bellard
}
4085 158142c2 bellard
4086 158142c2 bellard
/*----------------------------------------------------------------------------
4087 158142c2 bellard
| Returns the result of adding the absolute values of the extended double-
4088 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
4089 158142c2 bellard
| negated before being returned.  `zSign' is ignored if the result is a NaN.
4090 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
4091 158142c2 bellard
| Floating-Point Arithmetic.
4092 158142c2 bellard
*----------------------------------------------------------------------------*/
4093 158142c2 bellard
4094 158142c2 bellard
static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM)
4095 158142c2 bellard
{
4096 158142c2 bellard
    int32 aExp, bExp, zExp;
4097 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig0, zSig1;
4098 158142c2 bellard
    int32 expDiff;
4099 158142c2 bellard
4100 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4101 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4102 158142c2 bellard
    bSig = extractFloatx80Frac( b );
4103 158142c2 bellard
    bExp = extractFloatx80Exp( b );
4104 158142c2 bellard
    expDiff = aExp - bExp;
4105 158142c2 bellard
    if ( 0 < expDiff ) {
4106 158142c2 bellard
        if ( aExp == 0x7FFF ) {
4107 bb98fe42 Andreas Färber
            if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4108 158142c2 bellard
            return a;
4109 158142c2 bellard
        }
4110 158142c2 bellard
        if ( bExp == 0 ) --expDiff;
4111 158142c2 bellard
        shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
4112 158142c2 bellard
        zExp = aExp;
4113 158142c2 bellard
    }
4114 158142c2 bellard
    else if ( expDiff < 0 ) {
4115 158142c2 bellard
        if ( bExp == 0x7FFF ) {
4116 bb98fe42 Andreas Färber
            if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4117 158142c2 bellard
            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4118 158142c2 bellard
        }
4119 158142c2 bellard
        if ( aExp == 0 ) ++expDiff;
4120 158142c2 bellard
        shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
4121 158142c2 bellard
        zExp = bExp;
4122 158142c2 bellard
    }
4123 158142c2 bellard
    else {
4124 158142c2 bellard
        if ( aExp == 0x7FFF ) {
4125 bb98fe42 Andreas Färber
            if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
4126 158142c2 bellard
                return propagateFloatx80NaN( a, b STATUS_VAR );
4127 158142c2 bellard
            }
4128 158142c2 bellard
            return a;
4129 158142c2 bellard
        }
4130 158142c2 bellard
        zSig1 = 0;
4131 158142c2 bellard
        zSig0 = aSig + bSig;
4132 158142c2 bellard
        if ( aExp == 0 ) {
4133 158142c2 bellard
            normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
4134 158142c2 bellard
            goto roundAndPack;
4135 158142c2 bellard
        }
4136 158142c2 bellard
        zExp = aExp;
4137 158142c2 bellard
        goto shiftRight1;
4138 158142c2 bellard
    }
4139 158142c2 bellard
    zSig0 = aSig + bSig;
4140 bb98fe42 Andreas Färber
    if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
4141 158142c2 bellard
 shiftRight1:
4142 158142c2 bellard
    shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
4143 158142c2 bellard
    zSig0 |= LIT64( 0x8000000000000000 );
4144 158142c2 bellard
    ++zExp;
4145 158142c2 bellard
 roundAndPack:
4146 158142c2 bellard
    return
4147 158142c2 bellard
        roundAndPackFloatx80(
4148 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
4149 158142c2 bellard
4150 158142c2 bellard
}
4151 158142c2 bellard
4152 158142c2 bellard
/*----------------------------------------------------------------------------
4153 158142c2 bellard
| Returns the result of subtracting the absolute values of the extended
4154 158142c2 bellard
| double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
4155 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
4156 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
4157 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
4158 158142c2 bellard
*----------------------------------------------------------------------------*/
4159 158142c2 bellard
4160 158142c2 bellard
static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM )
4161 158142c2 bellard
{
4162 158142c2 bellard
    int32 aExp, bExp, zExp;
4163 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig0, zSig1;
4164 158142c2 bellard
    int32 expDiff;
4165 158142c2 bellard
    floatx80 z;
4166 158142c2 bellard
4167 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4168 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4169 158142c2 bellard
    bSig = extractFloatx80Frac( b );
4170 158142c2 bellard
    bExp = extractFloatx80Exp( b );
4171 158142c2 bellard
    expDiff = aExp - bExp;
4172 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
4173 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
4174 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4175 bb98fe42 Andreas Färber
        if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
4176 158142c2 bellard
            return propagateFloatx80NaN( a, b STATUS_VAR );
4177 158142c2 bellard
        }
4178 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4179 158142c2 bellard
        z.low = floatx80_default_nan_low;
4180 158142c2 bellard
        z.high = floatx80_default_nan_high;
4181 158142c2 bellard
        return z;
4182 158142c2 bellard
    }
4183 158142c2 bellard
    if ( aExp == 0 ) {
4184 158142c2 bellard
        aExp = 1;
4185 158142c2 bellard
        bExp = 1;
4186 158142c2 bellard
    }
4187 158142c2 bellard
    zSig1 = 0;
4188 158142c2 bellard
    if ( bSig < aSig ) goto aBigger;
4189 158142c2 bellard
    if ( aSig < bSig ) goto bBigger;
4190 158142c2 bellard
    return packFloatx80( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
4191 158142c2 bellard
 bExpBigger:
4192 158142c2 bellard
    if ( bExp == 0x7FFF ) {
4193 bb98fe42 Andreas Färber
        if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4194 158142c2 bellard
        return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
4195 158142c2 bellard
    }
4196 158142c2 bellard
    if ( aExp == 0 ) ++expDiff;
4197 158142c2 bellard
    shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
4198 158142c2 bellard
 bBigger:
4199 158142c2 bellard
    sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
4200 158142c2 bellard
    zExp = bExp;
4201 158142c2 bellard
    zSign ^= 1;
4202 158142c2 bellard
    goto normalizeRoundAndPack;
4203 158142c2 bellard
 aExpBigger:
4204 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4205 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4206 158142c2 bellard
        return a;
4207 158142c2 bellard
    }
4208 158142c2 bellard
    if ( bExp == 0 ) --expDiff;
4209 158142c2 bellard
    shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
4210 158142c2 bellard
 aBigger:
4211 158142c2 bellard
    sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
4212 158142c2 bellard
    zExp = aExp;
4213 158142c2 bellard
 normalizeRoundAndPack:
4214 158142c2 bellard
    return
4215 158142c2 bellard
        normalizeRoundAndPackFloatx80(
4216 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
4217 158142c2 bellard
4218 158142c2 bellard
}
4219 158142c2 bellard
4220 158142c2 bellard
/*----------------------------------------------------------------------------
4221 158142c2 bellard
| Returns the result of adding the extended double-precision floating-point
4222 158142c2 bellard
| values `a' and `b'.  The operation is performed according to the IEC/IEEE
4223 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
4224 158142c2 bellard
*----------------------------------------------------------------------------*/
4225 158142c2 bellard
4226 158142c2 bellard
floatx80 floatx80_add( floatx80 a, floatx80 b STATUS_PARAM )
4227 158142c2 bellard
{
4228 158142c2 bellard
    flag aSign, bSign;
4229 158142c2 bellard
4230 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4231 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4232 158142c2 bellard
    if ( aSign == bSign ) {
4233 158142c2 bellard
        return addFloatx80Sigs( a, b, aSign STATUS_VAR );
4234 158142c2 bellard
    }
4235 158142c2 bellard
    else {
4236 158142c2 bellard
        return subFloatx80Sigs( a, b, aSign STATUS_VAR );
4237 158142c2 bellard
    }
4238 158142c2 bellard
4239 158142c2 bellard
}
4240 158142c2 bellard
4241 158142c2 bellard
/*----------------------------------------------------------------------------
4242 158142c2 bellard
| Returns the result of subtracting the extended double-precision floating-
4243 158142c2 bellard
| point values `a' and `b'.  The operation is performed according to the
4244 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4245 158142c2 bellard
*----------------------------------------------------------------------------*/
4246 158142c2 bellard
4247 158142c2 bellard
floatx80 floatx80_sub( floatx80 a, floatx80 b STATUS_PARAM )
4248 158142c2 bellard
{
4249 158142c2 bellard
    flag aSign, bSign;
4250 158142c2 bellard
4251 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4252 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4253 158142c2 bellard
    if ( aSign == bSign ) {
4254 158142c2 bellard
        return subFloatx80Sigs( a, b, aSign STATUS_VAR );
4255 158142c2 bellard
    }
4256 158142c2 bellard
    else {
4257 158142c2 bellard
        return addFloatx80Sigs( a, b, aSign STATUS_VAR );
4258 158142c2 bellard
    }
4259 158142c2 bellard
4260 158142c2 bellard
}
4261 158142c2 bellard
4262 158142c2 bellard
/*----------------------------------------------------------------------------
4263 158142c2 bellard
| Returns the result of multiplying the extended double-precision floating-
4264 158142c2 bellard
| point values `a' and `b'.  The operation is performed according to the
4265 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4266 158142c2 bellard
*----------------------------------------------------------------------------*/
4267 158142c2 bellard
4268 158142c2 bellard
floatx80 floatx80_mul( floatx80 a, floatx80 b STATUS_PARAM )
4269 158142c2 bellard
{
4270 158142c2 bellard
    flag aSign, bSign, zSign;
4271 158142c2 bellard
    int32 aExp, bExp, zExp;
4272 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig0, zSig1;
4273 158142c2 bellard
    floatx80 z;
4274 158142c2 bellard
4275 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4276 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4277 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4278 158142c2 bellard
    bSig = extractFloatx80Frac( b );
4279 158142c2 bellard
    bExp = extractFloatx80Exp( b );
4280 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4281 158142c2 bellard
    zSign = aSign ^ bSign;
4282 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4283 bb98fe42 Andreas Färber
        if (    (uint64_t) ( aSig<<1 )
4284 bb98fe42 Andreas Färber
             || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
4285 158142c2 bellard
            return propagateFloatx80NaN( a, b STATUS_VAR );
4286 158142c2 bellard
        }
4287 158142c2 bellard
        if ( ( bExp | bSig ) == 0 ) goto invalid;
4288 158142c2 bellard
        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4289 158142c2 bellard
    }
4290 158142c2 bellard
    if ( bExp == 0x7FFF ) {
4291 bb98fe42 Andreas Färber
        if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4292 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) {
4293 158142c2 bellard
 invalid:
4294 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4295 158142c2 bellard
            z.low = floatx80_default_nan_low;
4296 158142c2 bellard
            z.high = floatx80_default_nan_high;
4297 158142c2 bellard
            return z;
4298 158142c2 bellard
        }
4299 158142c2 bellard
        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4300 158142c2 bellard
    }
4301 158142c2 bellard
    if ( aExp == 0 ) {
4302 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
4303 158142c2 bellard
        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
4304 158142c2 bellard
    }
4305 158142c2 bellard
    if ( bExp == 0 ) {
4306 158142c2 bellard
        if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
4307 158142c2 bellard
        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
4308 158142c2 bellard
    }
4309 158142c2 bellard
    zExp = aExp + bExp - 0x3FFE;
4310 158142c2 bellard
    mul64To128( aSig, bSig, &zSig0, &zSig1 );
4311 bb98fe42 Andreas Färber
    if ( 0 < (int64_t) zSig0 ) {
4312 158142c2 bellard
        shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
4313 158142c2 bellard
        --zExp;
4314 158142c2 bellard
    }
4315 158142c2 bellard
    return
4316 158142c2 bellard
        roundAndPackFloatx80(
4317 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
4318 158142c2 bellard
4319 158142c2 bellard
}
4320 158142c2 bellard
4321 158142c2 bellard
/*----------------------------------------------------------------------------
4322 158142c2 bellard
| Returns the result of dividing the extended double-precision floating-point
4323 158142c2 bellard
| value `a' by the corresponding value `b'.  The operation is performed
4324 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4325 158142c2 bellard
*----------------------------------------------------------------------------*/
4326 158142c2 bellard
4327 158142c2 bellard
floatx80 floatx80_div( floatx80 a, floatx80 b STATUS_PARAM )
4328 158142c2 bellard
{
4329 158142c2 bellard
    flag aSign, bSign, zSign;
4330 158142c2 bellard
    int32 aExp, bExp, zExp;
4331 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig0, zSig1;
4332 bb98fe42 Andreas Färber
    uint64_t rem0, rem1, rem2, term0, term1, term2;
4333 158142c2 bellard
    floatx80 z;
4334 158142c2 bellard
4335 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4336 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4337 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4338 158142c2 bellard
    bSig = extractFloatx80Frac( b );
4339 158142c2 bellard
    bExp = extractFloatx80Exp( b );
4340 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4341 158142c2 bellard
    zSign = aSign ^ bSign;
4342 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4343 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4344 158142c2 bellard
        if ( bExp == 0x7FFF ) {
4345 bb98fe42 Andreas Färber
            if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4346 158142c2 bellard
            goto invalid;
4347 158142c2 bellard
        }
4348 158142c2 bellard
        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4349 158142c2 bellard
    }
4350 158142c2 bellard
    if ( bExp == 0x7FFF ) {
4351 bb98fe42 Andreas Färber
        if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4352 158142c2 bellard
        return packFloatx80( zSign, 0, 0 );
4353 158142c2 bellard
    }
4354 158142c2 bellard
    if ( bExp == 0 ) {
4355 158142c2 bellard
        if ( bSig == 0 ) {
4356 158142c2 bellard
            if ( ( aExp | aSig ) == 0 ) {
4357 158142c2 bellard
 invalid:
4358 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
4359 158142c2 bellard
                z.low = floatx80_default_nan_low;
4360 158142c2 bellard
                z.high = floatx80_default_nan_high;
4361 158142c2 bellard
                return z;
4362 158142c2 bellard
            }
4363 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
4364 158142c2 bellard
            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4365 158142c2 bellard
        }
4366 158142c2 bellard
        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
4367 158142c2 bellard
    }
4368 158142c2 bellard
    if ( aExp == 0 ) {
4369 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
4370 158142c2 bellard
        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
4371 158142c2 bellard
    }
4372 158142c2 bellard
    zExp = aExp - bExp + 0x3FFE;
4373 158142c2 bellard
    rem1 = 0;
4374 158142c2 bellard
    if ( bSig <= aSig ) {
4375 158142c2 bellard
        shift128Right( aSig, 0, 1, &aSig, &rem1 );
4376 158142c2 bellard
        ++zExp;
4377 158142c2 bellard
    }
4378 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig, rem1, bSig );
4379 158142c2 bellard
    mul64To128( bSig, zSig0, &term0, &term1 );
4380 158142c2 bellard
    sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
4381 bb98fe42 Andreas Färber
    while ( (int64_t) rem0 < 0 ) {
4382 158142c2 bellard
        --zSig0;
4383 158142c2 bellard
        add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
4384 158142c2 bellard
    }
4385 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, 0, bSig );
4386 bb98fe42 Andreas Färber
    if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
4387 158142c2 bellard
        mul64To128( bSig, zSig1, &term1, &term2 );
4388 158142c2 bellard
        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
4389 bb98fe42 Andreas Färber
        while ( (int64_t) rem1 < 0 ) {
4390 158142c2 bellard
            --zSig1;
4391 158142c2 bellard
            add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
4392 158142c2 bellard
        }
4393 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 ) != 0 );
4394 158142c2 bellard
    }
4395 158142c2 bellard
    return
4396 158142c2 bellard
        roundAndPackFloatx80(
4397 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
4398 158142c2 bellard
4399 158142c2 bellard
}
4400 158142c2 bellard
4401 158142c2 bellard
/*----------------------------------------------------------------------------
4402 158142c2 bellard
| Returns the remainder of the extended double-precision floating-point value
4403 158142c2 bellard
| `a' with respect to the corresponding value `b'.  The operation is performed
4404 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4405 158142c2 bellard
*----------------------------------------------------------------------------*/
4406 158142c2 bellard
4407 158142c2 bellard
floatx80 floatx80_rem( floatx80 a, floatx80 b STATUS_PARAM )
4408 158142c2 bellard
{
4409 ed086f3d Blue Swirl
    flag aSign, zSign;
4410 158142c2 bellard
    int32 aExp, bExp, expDiff;
4411 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, bSig;
4412 bb98fe42 Andreas Färber
    uint64_t q, term0, term1, alternateASig0, alternateASig1;
4413 158142c2 bellard
    floatx80 z;
4414 158142c2 bellard
4415 158142c2 bellard
    aSig0 = extractFloatx80Frac( a );
4416 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4417 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4418 158142c2 bellard
    bSig = extractFloatx80Frac( b );
4419 158142c2 bellard
    bExp = extractFloatx80Exp( b );
4420 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4421 bb98fe42 Andreas Färber
        if (    (uint64_t) ( aSig0<<1 )
4422 bb98fe42 Andreas Färber
             || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
4423 158142c2 bellard
            return propagateFloatx80NaN( a, b STATUS_VAR );
4424 158142c2 bellard
        }
4425 158142c2 bellard
        goto invalid;
4426 158142c2 bellard
    }
4427 158142c2 bellard
    if ( bExp == 0x7FFF ) {
4428 bb98fe42 Andreas Färber
        if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4429 158142c2 bellard
        return a;
4430 158142c2 bellard
    }
4431 158142c2 bellard
    if ( bExp == 0 ) {
4432 158142c2 bellard
        if ( bSig == 0 ) {
4433 158142c2 bellard
 invalid:
4434 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4435 158142c2 bellard
            z.low = floatx80_default_nan_low;
4436 158142c2 bellard
            z.high = floatx80_default_nan_high;
4437 158142c2 bellard
            return z;
4438 158142c2 bellard
        }
4439 158142c2 bellard
        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
4440 158142c2 bellard
    }
4441 158142c2 bellard
    if ( aExp == 0 ) {
4442 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
4443 158142c2 bellard
        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
4444 158142c2 bellard
    }
4445 158142c2 bellard
    bSig |= LIT64( 0x8000000000000000 );
4446 158142c2 bellard
    zSign = aSign;
4447 158142c2 bellard
    expDiff = aExp - bExp;
4448 158142c2 bellard
    aSig1 = 0;
4449 158142c2 bellard
    if ( expDiff < 0 ) {
4450 158142c2 bellard
        if ( expDiff < -1 ) return a;
4451 158142c2 bellard
        shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
4452 158142c2 bellard
        expDiff = 0;
4453 158142c2 bellard
    }
4454 158142c2 bellard
    q = ( bSig <= aSig0 );
4455 158142c2 bellard
    if ( q ) aSig0 -= bSig;
4456 158142c2 bellard
    expDiff -= 64;
4457 158142c2 bellard
    while ( 0 < expDiff ) {
4458 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig );
4459 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
4460 158142c2 bellard
        mul64To128( bSig, q, &term0, &term1 );
4461 158142c2 bellard
        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4462 158142c2 bellard
        shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
4463 158142c2 bellard
        expDiff -= 62;
4464 158142c2 bellard
    }
4465 158142c2 bellard
    expDiff += 64;
4466 158142c2 bellard
    if ( 0 < expDiff ) {
4467 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig );
4468 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
4469 158142c2 bellard
        q >>= 64 - expDiff;
4470 158142c2 bellard
        mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
4471 158142c2 bellard
        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4472 158142c2 bellard
        shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
4473 158142c2 bellard
        while ( le128( term0, term1, aSig0, aSig1 ) ) {
4474 158142c2 bellard
            ++q;
4475 158142c2 bellard
            sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4476 158142c2 bellard
        }
4477 158142c2 bellard
    }
4478 158142c2 bellard
    else {
4479 158142c2 bellard
        term1 = 0;
4480 158142c2 bellard
        term0 = bSig;
4481 158142c2 bellard
    }
4482 158142c2 bellard
    sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
4483 158142c2 bellard
    if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
4484 158142c2 bellard
         || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
4485 158142c2 bellard
              && ( q & 1 ) )
4486 158142c2 bellard
       ) {
4487 158142c2 bellard
        aSig0 = alternateASig0;
4488 158142c2 bellard
        aSig1 = alternateASig1;
4489 158142c2 bellard
        zSign = ! zSign;
4490 158142c2 bellard
    }
4491 158142c2 bellard
    return
4492 158142c2 bellard
        normalizeRoundAndPackFloatx80(
4493 158142c2 bellard
            80, zSign, bExp + expDiff, aSig0, aSig1 STATUS_VAR );
4494 158142c2 bellard
4495 158142c2 bellard
}
4496 158142c2 bellard
4497 158142c2 bellard
/*----------------------------------------------------------------------------
4498 158142c2 bellard
| Returns the square root of the extended double-precision floating-point
4499 158142c2 bellard
| value `a'.  The operation is performed according to the IEC/IEEE Standard
4500 158142c2 bellard
| for Binary Floating-Point Arithmetic.
4501 158142c2 bellard
*----------------------------------------------------------------------------*/
4502 158142c2 bellard
4503 158142c2 bellard
floatx80 floatx80_sqrt( floatx80 a STATUS_PARAM )
4504 158142c2 bellard
{
4505 158142c2 bellard
    flag aSign;
4506 158142c2 bellard
    int32 aExp, zExp;
4507 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
4508 bb98fe42 Andreas Färber
    uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
4509 158142c2 bellard
    floatx80 z;
4510 158142c2 bellard
4511 158142c2 bellard
    aSig0 = extractFloatx80Frac( a );
4512 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4513 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4514 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4515 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a STATUS_VAR );
4516 158142c2 bellard
        if ( ! aSign ) return a;
4517 158142c2 bellard
        goto invalid;
4518 158142c2 bellard
    }
4519 158142c2 bellard
    if ( aSign ) {
4520 158142c2 bellard
        if ( ( aExp | aSig0 ) == 0 ) return a;
4521 158142c2 bellard
 invalid:
4522 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4523 158142c2 bellard
        z.low = floatx80_default_nan_low;
4524 158142c2 bellard
        z.high = floatx80_default_nan_high;
4525 158142c2 bellard
        return z;
4526 158142c2 bellard
    }
4527 158142c2 bellard
    if ( aExp == 0 ) {
4528 158142c2 bellard
        if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
4529 158142c2 bellard
        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
4530 158142c2 bellard
    }
4531 158142c2 bellard
    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
4532 158142c2 bellard
    zSig0 = estimateSqrt32( aExp, aSig0>>32 );
4533 158142c2 bellard
    shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
4534 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
4535 158142c2 bellard
    doubleZSig0 = zSig0<<1;
4536 158142c2 bellard
    mul64To128( zSig0, zSig0, &term0, &term1 );
4537 158142c2 bellard
    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
4538 bb98fe42 Andreas Färber
    while ( (int64_t) rem0 < 0 ) {
4539 158142c2 bellard
        --zSig0;
4540 158142c2 bellard
        doubleZSig0 -= 2;
4541 158142c2 bellard
        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
4542 158142c2 bellard
    }
4543 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
4544 158142c2 bellard
    if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
4545 158142c2 bellard
        if ( zSig1 == 0 ) zSig1 = 1;
4546 158142c2 bellard
        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
4547 158142c2 bellard
        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
4548 158142c2 bellard
        mul64To128( zSig1, zSig1, &term2, &term3 );
4549 158142c2 bellard
        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
4550 bb98fe42 Andreas Färber
        while ( (int64_t) rem1 < 0 ) {
4551 158142c2 bellard
            --zSig1;
4552 158142c2 bellard
            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
4553 158142c2 bellard
            term3 |= 1;
4554 158142c2 bellard
            term2 |= doubleZSig0;
4555 158142c2 bellard
            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
4556 158142c2 bellard
        }
4557 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
4558 158142c2 bellard
    }
4559 158142c2 bellard
    shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
4560 158142c2 bellard
    zSig0 |= doubleZSig0;
4561 158142c2 bellard
    return
4562 158142c2 bellard
        roundAndPackFloatx80(
4563 158142c2 bellard
            STATUS(floatx80_rounding_precision), 0, zExp, zSig0, zSig1 STATUS_VAR );
4564 158142c2 bellard
4565 158142c2 bellard
}
4566 158142c2 bellard
4567 158142c2 bellard
/*----------------------------------------------------------------------------
4568 b689362d Aurelien Jarno
| Returns 1 if the extended double-precision floating-point value `a' is equal
4569 b689362d Aurelien Jarno
| to the corresponding value `b', and 0 otherwise.  The invalid exception is
4570 b689362d Aurelien Jarno
| raised if either operand is a NaN.  Otherwise, the comparison is performed
4571 b689362d Aurelien Jarno
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4572 158142c2 bellard
*----------------------------------------------------------------------------*/
4573 158142c2 bellard
4574 b689362d Aurelien Jarno
int floatx80_eq( floatx80 a, floatx80 b STATUS_PARAM )
4575 158142c2 bellard
{
4576 158142c2 bellard
4577 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4578 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
4579 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4580 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
4581 158142c2 bellard
       ) {
4582 b689362d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
4583 158142c2 bellard
        return 0;
4584 158142c2 bellard
    }
4585 158142c2 bellard
    return
4586 158142c2 bellard
           ( a.low == b.low )
4587 158142c2 bellard
        && (    ( a.high == b.high )
4588 158142c2 bellard
             || (    ( a.low == 0 )
4589 bb98fe42 Andreas Färber
                  && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
4590 158142c2 bellard
           );
4591 158142c2 bellard
4592 158142c2 bellard
}
4593 158142c2 bellard
4594 158142c2 bellard
/*----------------------------------------------------------------------------
4595 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is
4596 158142c2 bellard
| less than or equal to the corresponding value `b', and 0 otherwise.  The
4597 f5a64251 Aurelien Jarno
| invalid exception is raised if either operand is a NaN.  The comparison is
4598 f5a64251 Aurelien Jarno
| performed according to the IEC/IEEE Standard for Binary Floating-Point
4599 f5a64251 Aurelien Jarno
| Arithmetic.
4600 158142c2 bellard
*----------------------------------------------------------------------------*/
4601 158142c2 bellard
4602 750afe93 bellard
int floatx80_le( floatx80 a, floatx80 b STATUS_PARAM )
4603 158142c2 bellard
{
4604 158142c2 bellard
    flag aSign, bSign;
4605 158142c2 bellard
4606 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4607 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
4608 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4609 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
4610 158142c2 bellard
       ) {
4611 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4612 158142c2 bellard
        return 0;
4613 158142c2 bellard
    }
4614 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4615 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4616 158142c2 bellard
    if ( aSign != bSign ) {
4617 158142c2 bellard
        return
4618 158142c2 bellard
               aSign
4619 bb98fe42 Andreas Färber
            || (    ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4620 158142c2 bellard
                 == 0 );
4621 158142c2 bellard
    }
4622 158142c2 bellard
    return
4623 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
4624 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
4625 158142c2 bellard
4626 158142c2 bellard
}
4627 158142c2 bellard
4628 158142c2 bellard
/*----------------------------------------------------------------------------
4629 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is
4630 f5a64251 Aurelien Jarno
| less than the corresponding value `b', and 0 otherwise.  The invalid
4631 f5a64251 Aurelien Jarno
| exception is raised if either operand is a NaN.  The comparison is performed
4632 f5a64251 Aurelien Jarno
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4633 158142c2 bellard
*----------------------------------------------------------------------------*/
4634 158142c2 bellard
4635 750afe93 bellard
int floatx80_lt( floatx80 a, floatx80 b STATUS_PARAM )
4636 158142c2 bellard
{
4637 158142c2 bellard
    flag aSign, bSign;
4638 158142c2 bellard
4639 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4640 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
4641 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4642 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
4643 158142c2 bellard
       ) {
4644 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4645 158142c2 bellard
        return 0;
4646 158142c2 bellard
    }
4647 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4648 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4649 158142c2 bellard
    if ( aSign != bSign ) {
4650 158142c2 bellard
        return
4651 158142c2 bellard
               aSign
4652 bb98fe42 Andreas Färber
            && (    ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4653 158142c2 bellard
                 != 0 );
4654 158142c2 bellard
    }
4655 158142c2 bellard
    return
4656 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
4657 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
4658 158142c2 bellard
4659 158142c2 bellard
}
4660 158142c2 bellard
4661 158142c2 bellard
/*----------------------------------------------------------------------------
4662 67b7861d Aurelien Jarno
| Returns 1 if the extended double-precision floating-point values `a' and `b'
4663 f5a64251 Aurelien Jarno
| cannot be compared, and 0 otherwise.  The invalid exception is raised if
4664 f5a64251 Aurelien Jarno
| either operand is a NaN.   The comparison is performed according to the
4665 f5a64251 Aurelien Jarno
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4666 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
4667 67b7861d Aurelien Jarno
int floatx80_unordered( floatx80 a, floatx80 b STATUS_PARAM )
4668 67b7861d Aurelien Jarno
{
4669 67b7861d Aurelien Jarno
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4670 67b7861d Aurelien Jarno
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
4671 67b7861d Aurelien Jarno
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4672 67b7861d Aurelien Jarno
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
4673 67b7861d Aurelien Jarno
       ) {
4674 67b7861d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
4675 67b7861d Aurelien Jarno
        return 1;
4676 67b7861d Aurelien Jarno
    }
4677 67b7861d Aurelien Jarno
    return 0;
4678 67b7861d Aurelien Jarno
}
4679 67b7861d Aurelien Jarno
4680 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
4681 b689362d Aurelien Jarno
| Returns 1 if the extended double-precision floating-point value `a' is
4682 f5a64251 Aurelien Jarno
| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
4683 f5a64251 Aurelien Jarno
| cause an exception.  The comparison is performed according to the IEC/IEEE
4684 f5a64251 Aurelien Jarno
| Standard for Binary Floating-Point Arithmetic.
4685 158142c2 bellard
*----------------------------------------------------------------------------*/
4686 158142c2 bellard
4687 b689362d Aurelien Jarno
int floatx80_eq_quiet( floatx80 a, floatx80 b STATUS_PARAM )
4688 158142c2 bellard
{
4689 158142c2 bellard
4690 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4691 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
4692 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4693 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
4694 158142c2 bellard
       ) {
4695 b689362d Aurelien Jarno
        if (    floatx80_is_signaling_nan( a )
4696 b689362d Aurelien Jarno
             || floatx80_is_signaling_nan( b ) ) {
4697 b689362d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
4698 b689362d Aurelien Jarno
        }
4699 158142c2 bellard
        return 0;
4700 158142c2 bellard
    }
4701 158142c2 bellard
    return
4702 158142c2 bellard
           ( a.low == b.low )
4703 158142c2 bellard
        && (    ( a.high == b.high )
4704 158142c2 bellard
             || (    ( a.low == 0 )
4705 bb98fe42 Andreas Färber
                  && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
4706 158142c2 bellard
           );
4707 158142c2 bellard
4708 158142c2 bellard
}
4709 158142c2 bellard
4710 158142c2 bellard
/*----------------------------------------------------------------------------
4711 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is less
4712 158142c2 bellard
| than or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
4713 158142c2 bellard
| do not cause an exception.  Otherwise, the comparison is performed according
4714 158142c2 bellard
| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4715 158142c2 bellard
*----------------------------------------------------------------------------*/
4716 158142c2 bellard
4717 750afe93 bellard
int floatx80_le_quiet( floatx80 a, floatx80 b STATUS_PARAM )
4718 158142c2 bellard
{
4719 158142c2 bellard
    flag aSign, bSign;
4720 158142c2 bellard
4721 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4722 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
4723 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4724 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
4725 158142c2 bellard
       ) {
4726 158142c2 bellard
        if (    floatx80_is_signaling_nan( a )
4727 158142c2 bellard
             || floatx80_is_signaling_nan( b ) ) {
4728 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4729 158142c2 bellard
        }
4730 158142c2 bellard
        return 0;
4731 158142c2 bellard
    }
4732 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4733 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4734 158142c2 bellard
    if ( aSign != bSign ) {
4735 158142c2 bellard
        return
4736 158142c2 bellard
               aSign
4737 bb98fe42 Andreas Färber
            || (    ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4738 158142c2 bellard
                 == 0 );
4739 158142c2 bellard
    }
4740 158142c2 bellard
    return
4741 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
4742 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
4743 158142c2 bellard
4744 158142c2 bellard
}
4745 158142c2 bellard
4746 158142c2 bellard
/*----------------------------------------------------------------------------
4747 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is less
4748 158142c2 bellard
| than the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
4749 158142c2 bellard
| an exception.  Otherwise, the comparison is performed according to the
4750 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4751 158142c2 bellard
*----------------------------------------------------------------------------*/
4752 158142c2 bellard
4753 750afe93 bellard
int floatx80_lt_quiet( floatx80 a, floatx80 b STATUS_PARAM )
4754 158142c2 bellard
{
4755 158142c2 bellard
    flag aSign, bSign;
4756 158142c2 bellard
4757 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4758 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
4759 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4760 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
4761 158142c2 bellard
       ) {
4762 158142c2 bellard
        if (    floatx80_is_signaling_nan( a )
4763 158142c2 bellard
             || floatx80_is_signaling_nan( b ) ) {
4764 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4765 158142c2 bellard
        }
4766 158142c2 bellard
        return 0;
4767 158142c2 bellard
    }
4768 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4769 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4770 158142c2 bellard
    if ( aSign != bSign ) {
4771 158142c2 bellard
        return
4772 158142c2 bellard
               aSign
4773 bb98fe42 Andreas Färber
            && (    ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4774 158142c2 bellard
                 != 0 );
4775 158142c2 bellard
    }
4776 158142c2 bellard
    return
4777 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
4778 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
4779 158142c2 bellard
4780 158142c2 bellard
}
4781 158142c2 bellard
4782 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
4783 67b7861d Aurelien Jarno
| Returns 1 if the extended double-precision floating-point values `a' and `b'
4784 67b7861d Aurelien Jarno
| cannot be compared, and 0 otherwise.  Quiet NaNs do not cause an exception.
4785 67b7861d Aurelien Jarno
| The comparison is performed according to the IEC/IEEE Standard for Binary
4786 67b7861d Aurelien Jarno
| Floating-Point Arithmetic.
4787 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
4788 67b7861d Aurelien Jarno
int floatx80_unordered_quiet( floatx80 a, floatx80 b STATUS_PARAM )
4789 67b7861d Aurelien Jarno
{
4790 67b7861d Aurelien Jarno
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4791 67b7861d Aurelien Jarno
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
4792 67b7861d Aurelien Jarno
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4793 67b7861d Aurelien Jarno
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
4794 67b7861d Aurelien Jarno
       ) {
4795 67b7861d Aurelien Jarno
        if (    floatx80_is_signaling_nan( a )
4796 67b7861d Aurelien Jarno
             || floatx80_is_signaling_nan( b ) ) {
4797 67b7861d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
4798 67b7861d Aurelien Jarno
        }
4799 67b7861d Aurelien Jarno
        return 1;
4800 67b7861d Aurelien Jarno
    }
4801 67b7861d Aurelien Jarno
    return 0;
4802 67b7861d Aurelien Jarno
}
4803 67b7861d Aurelien Jarno
4804 158142c2 bellard
/*----------------------------------------------------------------------------
4805 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4806 158142c2 bellard
| value `a' to the 32-bit two's complement integer format.  The conversion
4807 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4808 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
4809 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
4810 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
4811 158142c2 bellard
| largest integer with the same sign as `a' is returned.
4812 158142c2 bellard
*----------------------------------------------------------------------------*/
4813 158142c2 bellard
4814 158142c2 bellard
int32 float128_to_int32( float128 a STATUS_PARAM )
4815 158142c2 bellard
{
4816 158142c2 bellard
    flag aSign;
4817 158142c2 bellard
    int32 aExp, shiftCount;
4818 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1;
4819 158142c2 bellard
4820 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4821 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4822 158142c2 bellard
    aExp = extractFloat128Exp( a );
4823 158142c2 bellard
    aSign = extractFloat128Sign( a );
4824 158142c2 bellard
    if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
4825 158142c2 bellard
    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4826 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
4827 158142c2 bellard
    shiftCount = 0x4028 - aExp;
4828 158142c2 bellard
    if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
4829 158142c2 bellard
    return roundAndPackInt32( aSign, aSig0 STATUS_VAR );
4830 158142c2 bellard
4831 158142c2 bellard
}
4832 158142c2 bellard
4833 158142c2 bellard
/*----------------------------------------------------------------------------
4834 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4835 158142c2 bellard
| value `a' to the 32-bit two's complement integer format.  The conversion
4836 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4837 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.  If
4838 158142c2 bellard
| `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
4839 158142c2 bellard
| conversion overflows, the largest integer with the same sign as `a' is
4840 158142c2 bellard
| returned.
4841 158142c2 bellard
*----------------------------------------------------------------------------*/
4842 158142c2 bellard
4843 158142c2 bellard
int32 float128_to_int32_round_to_zero( float128 a STATUS_PARAM )
4844 158142c2 bellard
{
4845 158142c2 bellard
    flag aSign;
4846 158142c2 bellard
    int32 aExp, shiftCount;
4847 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, savedASig;
4848 158142c2 bellard
    int32 z;
4849 158142c2 bellard
4850 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4851 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4852 158142c2 bellard
    aExp = extractFloat128Exp( a );
4853 158142c2 bellard
    aSign = extractFloat128Sign( a );
4854 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
4855 158142c2 bellard
    if ( 0x401E < aExp ) {
4856 158142c2 bellard
        if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
4857 158142c2 bellard
        goto invalid;
4858 158142c2 bellard
    }
4859 158142c2 bellard
    else if ( aExp < 0x3FFF ) {
4860 158142c2 bellard
        if ( aExp || aSig0 ) STATUS(float_exception_flags) |= float_flag_inexact;
4861 158142c2 bellard
        return 0;
4862 158142c2 bellard
    }
4863 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
4864 158142c2 bellard
    shiftCount = 0x402F - aExp;
4865 158142c2 bellard
    savedASig = aSig0;
4866 158142c2 bellard
    aSig0 >>= shiftCount;
4867 158142c2 bellard
    z = aSig0;
4868 158142c2 bellard
    if ( aSign ) z = - z;
4869 158142c2 bellard
    if ( ( z < 0 ) ^ aSign ) {
4870 158142c2 bellard
 invalid:
4871 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4872 bb98fe42 Andreas Färber
        return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
4873 158142c2 bellard
    }
4874 158142c2 bellard
    if ( ( aSig0<<shiftCount ) != savedASig ) {
4875 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
4876 158142c2 bellard
    }
4877 158142c2 bellard
    return z;
4878 158142c2 bellard
4879 158142c2 bellard
}
4880 158142c2 bellard
4881 158142c2 bellard
/*----------------------------------------------------------------------------
4882 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4883 158142c2 bellard
| value `a' to the 64-bit two's complement integer format.  The conversion
4884 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4885 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
4886 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
4887 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
4888 158142c2 bellard
| largest integer with the same sign as `a' is returned.
4889 158142c2 bellard
*----------------------------------------------------------------------------*/
4890 158142c2 bellard
4891 158142c2 bellard
int64 float128_to_int64( float128 a STATUS_PARAM )
4892 158142c2 bellard
{
4893 158142c2 bellard
    flag aSign;
4894 158142c2 bellard
    int32 aExp, shiftCount;
4895 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1;
4896 158142c2 bellard
4897 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4898 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4899 158142c2 bellard
    aExp = extractFloat128Exp( a );
4900 158142c2 bellard
    aSign = extractFloat128Sign( a );
4901 158142c2 bellard
    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4902 158142c2 bellard
    shiftCount = 0x402F - aExp;
4903 158142c2 bellard
    if ( shiftCount <= 0 ) {
4904 158142c2 bellard
        if ( 0x403E < aExp ) {
4905 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4906 158142c2 bellard
            if (    ! aSign
4907 158142c2 bellard
                 || (    ( aExp == 0x7FFF )
4908 158142c2 bellard
                      && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
4909 158142c2 bellard
                    )
4910 158142c2 bellard
               ) {
4911 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
4912 158142c2 bellard
            }
4913 bb98fe42 Andreas Färber
            return (int64_t) LIT64( 0x8000000000000000 );
4914 158142c2 bellard
        }
4915 158142c2 bellard
        shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
4916 158142c2 bellard
    }
4917 158142c2 bellard
    else {
4918 158142c2 bellard
        shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
4919 158142c2 bellard
    }
4920 158142c2 bellard
    return roundAndPackInt64( aSign, aSig0, aSig1 STATUS_VAR );
4921 158142c2 bellard
4922 158142c2 bellard
}
4923 158142c2 bellard
4924 158142c2 bellard
/*----------------------------------------------------------------------------
4925 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4926 158142c2 bellard
| value `a' to the 64-bit two's complement integer format.  The conversion
4927 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4928 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
4929 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
4930 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
4931 158142c2 bellard
| returned.
4932 158142c2 bellard
*----------------------------------------------------------------------------*/
4933 158142c2 bellard
4934 158142c2 bellard
int64 float128_to_int64_round_to_zero( float128 a STATUS_PARAM )
4935 158142c2 bellard
{
4936 158142c2 bellard
    flag aSign;
4937 158142c2 bellard
    int32 aExp, shiftCount;
4938 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1;
4939 158142c2 bellard
    int64 z;
4940 158142c2 bellard
4941 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4942 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4943 158142c2 bellard
    aExp = extractFloat128Exp( a );
4944 158142c2 bellard
    aSign = extractFloat128Sign( a );
4945 158142c2 bellard
    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4946 158142c2 bellard
    shiftCount = aExp - 0x402F;
4947 158142c2 bellard
    if ( 0 < shiftCount ) {
4948 158142c2 bellard
        if ( 0x403E <= aExp ) {
4949 158142c2 bellard
            aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
4950 158142c2 bellard
            if (    ( a.high == LIT64( 0xC03E000000000000 ) )
4951 158142c2 bellard
                 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
4952 158142c2 bellard
                if ( aSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
4953 158142c2 bellard
            }
4954 158142c2 bellard
            else {
4955 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
4956 158142c2 bellard
                if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
4957 158142c2 bellard
                    return LIT64( 0x7FFFFFFFFFFFFFFF );
4958 158142c2 bellard
                }
4959 158142c2 bellard
            }
4960 bb98fe42 Andreas Färber
            return (int64_t) LIT64( 0x8000000000000000 );
4961 158142c2 bellard
        }
4962 158142c2 bellard
        z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
4963 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig1<<shiftCount ) ) {
4964 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
4965 158142c2 bellard
        }
4966 158142c2 bellard
    }
4967 158142c2 bellard
    else {
4968 158142c2 bellard
        if ( aExp < 0x3FFF ) {
4969 158142c2 bellard
            if ( aExp | aSig0 | aSig1 ) {
4970 158142c2 bellard
                STATUS(float_exception_flags) |= float_flag_inexact;
4971 158142c2 bellard
            }
4972 158142c2 bellard
            return 0;
4973 158142c2 bellard
        }
4974 158142c2 bellard
        z = aSig0>>( - shiftCount );
4975 158142c2 bellard
        if (    aSig1
4976 bb98fe42 Andreas Färber
             || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
4977 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
4978 158142c2 bellard
        }
4979 158142c2 bellard
    }
4980 158142c2 bellard
    if ( aSign ) z = - z;
4981 158142c2 bellard
    return z;
4982 158142c2 bellard
4983 158142c2 bellard
}
4984 158142c2 bellard
4985 158142c2 bellard
/*----------------------------------------------------------------------------
4986 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4987 158142c2 bellard
| value `a' to the single-precision floating-point format.  The conversion
4988 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4989 158142c2 bellard
| Arithmetic.
4990 158142c2 bellard
*----------------------------------------------------------------------------*/
4991 158142c2 bellard
4992 158142c2 bellard
float32 float128_to_float32( float128 a STATUS_PARAM )
4993 158142c2 bellard
{
4994 158142c2 bellard
    flag aSign;
4995 158142c2 bellard
    int32 aExp;
4996 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1;
4997 bb98fe42 Andreas Färber
    uint32_t zSig;
4998 158142c2 bellard
4999 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5000 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5001 158142c2 bellard
    aExp = extractFloat128Exp( a );
5002 158142c2 bellard
    aSign = extractFloat128Sign( a );
5003 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5004 158142c2 bellard
        if ( aSig0 | aSig1 ) {
5005 bcd4d9af Christophe Lyon
            return commonNaNToFloat32( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
5006 158142c2 bellard
        }
5007 158142c2 bellard
        return packFloat32( aSign, 0xFF, 0 );
5008 158142c2 bellard
    }
5009 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
5010 158142c2 bellard
    shift64RightJamming( aSig0, 18, &aSig0 );
5011 158142c2 bellard
    zSig = aSig0;
5012 158142c2 bellard
    if ( aExp || zSig ) {
5013 158142c2 bellard
        zSig |= 0x40000000;
5014 158142c2 bellard
        aExp -= 0x3F81;
5015 158142c2 bellard
    }
5016 158142c2 bellard
    return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
5017 158142c2 bellard
5018 158142c2 bellard
}
5019 158142c2 bellard
5020 158142c2 bellard
/*----------------------------------------------------------------------------
5021 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
5022 158142c2 bellard
| value `a' to the double-precision floating-point format.  The conversion
5023 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5024 158142c2 bellard
| Arithmetic.
5025 158142c2 bellard
*----------------------------------------------------------------------------*/
5026 158142c2 bellard
5027 158142c2 bellard
float64 float128_to_float64( float128 a STATUS_PARAM )
5028 158142c2 bellard
{
5029 158142c2 bellard
    flag aSign;
5030 158142c2 bellard
    int32 aExp;
5031 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1;
5032 158142c2 bellard
5033 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5034 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5035 158142c2 bellard
    aExp = extractFloat128Exp( a );
5036 158142c2 bellard
    aSign = extractFloat128Sign( a );
5037 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5038 158142c2 bellard
        if ( aSig0 | aSig1 ) {
5039 bcd4d9af Christophe Lyon
            return commonNaNToFloat64( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
5040 158142c2 bellard
        }
5041 158142c2 bellard
        return packFloat64( aSign, 0x7FF, 0 );
5042 158142c2 bellard
    }
5043 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
5044 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
5045 158142c2 bellard
    if ( aExp || aSig0 ) {
5046 158142c2 bellard
        aSig0 |= LIT64( 0x4000000000000000 );
5047 158142c2 bellard
        aExp -= 0x3C01;
5048 158142c2 bellard
    }
5049 158142c2 bellard
    return roundAndPackFloat64( aSign, aExp, aSig0 STATUS_VAR );
5050 158142c2 bellard
5051 158142c2 bellard
}
5052 158142c2 bellard
5053 158142c2 bellard
/*----------------------------------------------------------------------------
5054 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
5055 158142c2 bellard
| value `a' to the extended double-precision floating-point format.  The
5056 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
5057 158142c2 bellard
| Floating-Point Arithmetic.
5058 158142c2 bellard
*----------------------------------------------------------------------------*/
5059 158142c2 bellard
5060 158142c2 bellard
floatx80 float128_to_floatx80( float128 a STATUS_PARAM )
5061 158142c2 bellard
{
5062 158142c2 bellard
    flag aSign;
5063 158142c2 bellard
    int32 aExp;
5064 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1;
5065 158142c2 bellard
5066 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5067 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5068 158142c2 bellard
    aExp = extractFloat128Exp( a );
5069 158142c2 bellard
    aSign = extractFloat128Sign( a );
5070 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5071 158142c2 bellard
        if ( aSig0 | aSig1 ) {
5072 bcd4d9af Christophe Lyon
            return commonNaNToFloatx80( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
5073 158142c2 bellard
        }
5074 158142c2 bellard
        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5075 158142c2 bellard
    }
5076 158142c2 bellard
    if ( aExp == 0 ) {
5077 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
5078 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5079 158142c2 bellard
    }
5080 158142c2 bellard
    else {
5081 158142c2 bellard
        aSig0 |= LIT64( 0x0001000000000000 );
5082 158142c2 bellard
    }
5083 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
5084 158142c2 bellard
    return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 STATUS_VAR );
5085 158142c2 bellard
5086 158142c2 bellard
}
5087 158142c2 bellard
5088 158142c2 bellard
/*----------------------------------------------------------------------------
5089 158142c2 bellard
| Rounds the quadruple-precision floating-point value `a' to an integer, and
5090 158142c2 bellard
| returns the result as a quadruple-precision floating-point value.  The
5091 158142c2 bellard
| operation is performed according to the IEC/IEEE Standard for Binary
5092 158142c2 bellard
| Floating-Point Arithmetic.
5093 158142c2 bellard
*----------------------------------------------------------------------------*/
5094 158142c2 bellard
5095 158142c2 bellard
float128 float128_round_to_int( float128 a STATUS_PARAM )
5096 158142c2 bellard
{
5097 158142c2 bellard
    flag aSign;
5098 158142c2 bellard
    int32 aExp;
5099 bb98fe42 Andreas Färber
    uint64_t lastBitMask, roundBitsMask;
5100 158142c2 bellard
    int8 roundingMode;
5101 158142c2 bellard
    float128 z;
5102 158142c2 bellard
5103 158142c2 bellard
    aExp = extractFloat128Exp( a );
5104 158142c2 bellard
    if ( 0x402F <= aExp ) {
5105 158142c2 bellard
        if ( 0x406F <= aExp ) {
5106 158142c2 bellard
            if (    ( aExp == 0x7FFF )
5107 158142c2 bellard
                 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
5108 158142c2 bellard
               ) {
5109 158142c2 bellard
                return propagateFloat128NaN( a, a STATUS_VAR );
5110 158142c2 bellard
            }
5111 158142c2 bellard
            return a;
5112 158142c2 bellard
        }
5113 158142c2 bellard
        lastBitMask = 1;
5114 158142c2 bellard
        lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
5115 158142c2 bellard
        roundBitsMask = lastBitMask - 1;
5116 158142c2 bellard
        z = a;
5117 158142c2 bellard
        roundingMode = STATUS(float_rounding_mode);
5118 158142c2 bellard
        if ( roundingMode == float_round_nearest_even ) {
5119 158142c2 bellard
            if ( lastBitMask ) {
5120 158142c2 bellard
                add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
5121 158142c2 bellard
                if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
5122 158142c2 bellard
            }
5123 158142c2 bellard
            else {
5124 bb98fe42 Andreas Färber
                if ( (int64_t) z.low < 0 ) {
5125 158142c2 bellard
                    ++z.high;
5126 bb98fe42 Andreas Färber
                    if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
5127 158142c2 bellard
                }
5128 158142c2 bellard
            }
5129 158142c2 bellard
        }
5130 158142c2 bellard
        else if ( roundingMode != float_round_to_zero ) {
5131 158142c2 bellard
            if (   extractFloat128Sign( z )
5132 158142c2 bellard
                 ^ ( roundingMode == float_round_up ) ) {
5133 158142c2 bellard
                add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
5134 158142c2 bellard
            }
5135 158142c2 bellard
        }
5136 158142c2 bellard
        z.low &= ~ roundBitsMask;
5137 158142c2 bellard
    }
5138 158142c2 bellard
    else {
5139 158142c2 bellard
        if ( aExp < 0x3FFF ) {
5140 bb98fe42 Andreas Färber
            if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
5141 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
5142 158142c2 bellard
            aSign = extractFloat128Sign( a );
5143 158142c2 bellard
            switch ( STATUS(float_rounding_mode) ) {
5144 158142c2 bellard
             case float_round_nearest_even:
5145 158142c2 bellard
                if (    ( aExp == 0x3FFE )
5146 158142c2 bellard
                     && (   extractFloat128Frac0( a )
5147 158142c2 bellard
                          | extractFloat128Frac1( a ) )
5148 158142c2 bellard
                   ) {
5149 158142c2 bellard
                    return packFloat128( aSign, 0x3FFF, 0, 0 );
5150 158142c2 bellard
                }
5151 158142c2 bellard
                break;
5152 158142c2 bellard
             case float_round_down:
5153 158142c2 bellard
                return
5154 158142c2 bellard
                      aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
5155 158142c2 bellard
                    : packFloat128( 0, 0, 0, 0 );
5156 158142c2 bellard
             case float_round_up:
5157 158142c2 bellard
                return
5158 158142c2 bellard
                      aSign ? packFloat128( 1, 0, 0, 0 )
5159 158142c2 bellard
                    : packFloat128( 0, 0x3FFF, 0, 0 );
5160 158142c2 bellard
            }
5161 158142c2 bellard
            return packFloat128( aSign, 0, 0, 0 );
5162 158142c2 bellard
        }
5163 158142c2 bellard
        lastBitMask = 1;
5164 158142c2 bellard
        lastBitMask <<= 0x402F - aExp;
5165 158142c2 bellard
        roundBitsMask = lastBitMask - 1;
5166 158142c2 bellard
        z.low = 0;
5167 158142c2 bellard
        z.high = a.high;
5168 158142c2 bellard
        roundingMode = STATUS(float_rounding_mode);
5169 158142c2 bellard
        if ( roundingMode == float_round_nearest_even ) {
5170 158142c2 bellard
            z.high += lastBitMask>>1;
5171 158142c2 bellard
            if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
5172 158142c2 bellard
                z.high &= ~ lastBitMask;
5173 158142c2 bellard
            }
5174 158142c2 bellard
        }
5175 158142c2 bellard
        else if ( roundingMode != float_round_to_zero ) {
5176 158142c2 bellard
            if (   extractFloat128Sign( z )
5177 158142c2 bellard
                 ^ ( roundingMode == float_round_up ) ) {
5178 158142c2 bellard
                z.high |= ( a.low != 0 );
5179 158142c2 bellard
                z.high += roundBitsMask;
5180 158142c2 bellard
            }
5181 158142c2 bellard
        }
5182 158142c2 bellard
        z.high &= ~ roundBitsMask;
5183 158142c2 bellard
    }
5184 158142c2 bellard
    if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
5185 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
5186 158142c2 bellard
    }
5187 158142c2 bellard
    return z;
5188 158142c2 bellard
5189 158142c2 bellard
}
5190 158142c2 bellard
5191 158142c2 bellard
/*----------------------------------------------------------------------------
5192 158142c2 bellard
| Returns the result of adding the absolute values of the quadruple-precision
5193 158142c2 bellard
| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
5194 158142c2 bellard
| before being returned.  `zSign' is ignored if the result is a NaN.
5195 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
5196 158142c2 bellard
| Floating-Point Arithmetic.
5197 158142c2 bellard
*----------------------------------------------------------------------------*/
5198 158142c2 bellard
5199 158142c2 bellard
static float128 addFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
5200 158142c2 bellard
{
5201 158142c2 bellard
    int32 aExp, bExp, zExp;
5202 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
5203 158142c2 bellard
    int32 expDiff;
5204 158142c2 bellard
5205 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5206 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5207 158142c2 bellard
    aExp = extractFloat128Exp( a );
5208 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
5209 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
5210 158142c2 bellard
    bExp = extractFloat128Exp( b );
5211 158142c2 bellard
    expDiff = aExp - bExp;
5212 158142c2 bellard
    if ( 0 < expDiff ) {
5213 158142c2 bellard
        if ( aExp == 0x7FFF ) {
5214 158142c2 bellard
            if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5215 158142c2 bellard
            return a;
5216 158142c2 bellard
        }
5217 158142c2 bellard
        if ( bExp == 0 ) {
5218 158142c2 bellard
            --expDiff;
5219 158142c2 bellard
        }
5220 158142c2 bellard
        else {
5221 158142c2 bellard
            bSig0 |= LIT64( 0x0001000000000000 );
5222 158142c2 bellard
        }
5223 158142c2 bellard
        shift128ExtraRightJamming(
5224 158142c2 bellard
            bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
5225 158142c2 bellard
        zExp = aExp;
5226 158142c2 bellard
    }
5227 158142c2 bellard
    else if ( expDiff < 0 ) {
5228 158142c2 bellard
        if ( bExp == 0x7FFF ) {
5229 158142c2 bellard
            if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5230 158142c2 bellard
            return packFloat128( zSign, 0x7FFF, 0, 0 );
5231 158142c2 bellard
        }
5232 158142c2 bellard
        if ( aExp == 0 ) {
5233 158142c2 bellard
            ++expDiff;
5234 158142c2 bellard
        }
5235 158142c2 bellard
        else {
5236 158142c2 bellard
            aSig0 |= LIT64( 0x0001000000000000 );
5237 158142c2 bellard
        }
5238 158142c2 bellard
        shift128ExtraRightJamming(
5239 158142c2 bellard
            aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
5240 158142c2 bellard
        zExp = bExp;
5241 158142c2 bellard
    }
5242 158142c2 bellard
    else {
5243 158142c2 bellard
        if ( aExp == 0x7FFF ) {
5244 158142c2 bellard
            if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
5245 158142c2 bellard
                return propagateFloat128NaN( a, b STATUS_VAR );
5246 158142c2 bellard
            }
5247 158142c2 bellard
            return a;
5248 158142c2 bellard
        }
5249 158142c2 bellard
        add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
5250 fe76d976 pbrook
        if ( aExp == 0 ) {
5251 e6afc87f Peter Maydell
            if (STATUS(flush_to_zero)) {
5252 e6afc87f Peter Maydell
                if (zSig0 | zSig1) {
5253 e6afc87f Peter Maydell
                    float_raise(float_flag_output_denormal STATUS_VAR);
5254 e6afc87f Peter Maydell
                }
5255 e6afc87f Peter Maydell
                return packFloat128(zSign, 0, 0, 0);
5256 e6afc87f Peter Maydell
            }
5257 fe76d976 pbrook
            return packFloat128( zSign, 0, zSig0, zSig1 );
5258 fe76d976 pbrook
        }
5259 158142c2 bellard
        zSig2 = 0;
5260 158142c2 bellard
        zSig0 |= LIT64( 0x0002000000000000 );
5261 158142c2 bellard
        zExp = aExp;
5262 158142c2 bellard
        goto shiftRight1;
5263 158142c2 bellard
    }
5264 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
5265 158142c2 bellard
    add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
5266 158142c2 bellard
    --zExp;
5267 158142c2 bellard
    if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
5268 158142c2 bellard
    ++zExp;
5269 158142c2 bellard
 shiftRight1:
5270 158142c2 bellard
    shift128ExtraRightJamming(
5271 158142c2 bellard
        zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
5272 158142c2 bellard
 roundAndPack:
5273 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5274 158142c2 bellard
5275 158142c2 bellard
}
5276 158142c2 bellard
5277 158142c2 bellard
/*----------------------------------------------------------------------------
5278 158142c2 bellard
| Returns the result of subtracting the absolute values of the quadruple-
5279 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the
5280 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
5281 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
5282 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
5283 158142c2 bellard
*----------------------------------------------------------------------------*/
5284 158142c2 bellard
5285 158142c2 bellard
static float128 subFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
5286 158142c2 bellard
{
5287 158142c2 bellard
    int32 aExp, bExp, zExp;
5288 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
5289 158142c2 bellard
    int32 expDiff;
5290 158142c2 bellard
    float128 z;
5291 158142c2 bellard
5292 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5293 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5294 158142c2 bellard
    aExp = extractFloat128Exp( a );
5295 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
5296 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
5297 158142c2 bellard
    bExp = extractFloat128Exp( b );
5298 158142c2 bellard
    expDiff = aExp - bExp;
5299 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
5300 158142c2 bellard
    shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
5301 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
5302 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
5303 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5304 158142c2 bellard
        if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
5305 158142c2 bellard
            return propagateFloat128NaN( a, b STATUS_VAR );
5306 158142c2 bellard
        }
5307 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5308 158142c2 bellard
        z.low = float128_default_nan_low;
5309 158142c2 bellard
        z.high = float128_default_nan_high;
5310 158142c2 bellard
        return z;
5311 158142c2 bellard
    }
5312 158142c2 bellard
    if ( aExp == 0 ) {
5313 158142c2 bellard
        aExp = 1;
5314 158142c2 bellard
        bExp = 1;
5315 158142c2 bellard
    }
5316 158142c2 bellard
    if ( bSig0 < aSig0 ) goto aBigger;
5317 158142c2 bellard
    if ( aSig0 < bSig0 ) goto bBigger;
5318 158142c2 bellard
    if ( bSig1 < aSig1 ) goto aBigger;
5319 158142c2 bellard
    if ( aSig1 < bSig1 ) goto bBigger;
5320 158142c2 bellard
    return packFloat128( STATUS(float_rounding_mode) == float_round_down, 0, 0, 0 );
5321 158142c2 bellard
 bExpBigger:
5322 158142c2 bellard
    if ( bExp == 0x7FFF ) {
5323 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5324 158142c2 bellard
        return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
5325 158142c2 bellard
    }
5326 158142c2 bellard
    if ( aExp == 0 ) {
5327 158142c2 bellard
        ++expDiff;
5328 158142c2 bellard
    }
5329 158142c2 bellard
    else {
5330 158142c2 bellard
        aSig0 |= LIT64( 0x4000000000000000 );
5331 158142c2 bellard
    }
5332 158142c2 bellard
    shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
5333 158142c2 bellard
    bSig0 |= LIT64( 0x4000000000000000 );
5334 158142c2 bellard
 bBigger:
5335 158142c2 bellard
    sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
5336 158142c2 bellard
    zExp = bExp;
5337 158142c2 bellard
    zSign ^= 1;
5338 158142c2 bellard
    goto normalizeRoundAndPack;
5339 158142c2 bellard
 aExpBigger:
5340 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5341 158142c2 bellard
        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5342 158142c2 bellard
        return a;
5343 158142c2 bellard
    }
5344 158142c2 bellard
    if ( bExp == 0 ) {
5345 158142c2 bellard
        --expDiff;
5346 158142c2 bellard
    }
5347 158142c2 bellard
    else {
5348 158142c2 bellard
        bSig0 |= LIT64( 0x4000000000000000 );
5349 158142c2 bellard
    }
5350 158142c2 bellard
    shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
5351 158142c2 bellard
    aSig0 |= LIT64( 0x4000000000000000 );
5352 158142c2 bellard
 aBigger:
5353 158142c2 bellard
    sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
5354 158142c2 bellard
    zExp = aExp;
5355 158142c2 bellard
 normalizeRoundAndPack:
5356 158142c2 bellard
    --zExp;
5357 158142c2 bellard
    return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 STATUS_VAR );
5358 158142c2 bellard
5359 158142c2 bellard
}
5360 158142c2 bellard
5361 158142c2 bellard
/*----------------------------------------------------------------------------
5362 158142c2 bellard
| Returns the result of adding the quadruple-precision floating-point values
5363 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
5364 158142c2 bellard
| for Binary Floating-Point Arithmetic.
5365 158142c2 bellard
*----------------------------------------------------------------------------*/
5366 158142c2 bellard
5367 158142c2 bellard
float128 float128_add( float128 a, float128 b STATUS_PARAM )
5368 158142c2 bellard
{
5369 158142c2 bellard
    flag aSign, bSign;
5370 158142c2 bellard
5371 158142c2 bellard
    aSign = extractFloat128Sign( a );
5372 158142c2 bellard
    bSign = extractFloat128Sign( b );
5373 158142c2 bellard
    if ( aSign == bSign ) {
5374 158142c2 bellard
        return addFloat128Sigs( a, b, aSign STATUS_VAR );
5375 158142c2 bellard
    }
5376 158142c2 bellard
    else {
5377 158142c2 bellard
        return subFloat128Sigs( a, b, aSign STATUS_VAR );
5378 158142c2 bellard
    }
5379 158142c2 bellard
5380 158142c2 bellard
}
5381 158142c2 bellard
5382 158142c2 bellard
/*----------------------------------------------------------------------------
5383 158142c2 bellard
| Returns the result of subtracting the quadruple-precision floating-point
5384 158142c2 bellard
| values `a' and `b'.  The operation is performed according to the IEC/IEEE
5385 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
5386 158142c2 bellard
*----------------------------------------------------------------------------*/
5387 158142c2 bellard
5388 158142c2 bellard
float128 float128_sub( float128 a, float128 b STATUS_PARAM )
5389 158142c2 bellard
{
5390 158142c2 bellard
    flag aSign, bSign;
5391 158142c2 bellard
5392 158142c2 bellard
    aSign = extractFloat128Sign( a );
5393 158142c2 bellard
    bSign = extractFloat128Sign( b );
5394 158142c2 bellard
    if ( aSign == bSign ) {
5395 158142c2 bellard
        return subFloat128Sigs( a, b, aSign STATUS_VAR );
5396 158142c2 bellard
    }
5397 158142c2 bellard
    else {
5398 158142c2 bellard
        return addFloat128Sigs( a, b, aSign STATUS_VAR );
5399 158142c2 bellard
    }
5400 158142c2 bellard
5401 158142c2 bellard
}
5402 158142c2 bellard
5403 158142c2 bellard
/*----------------------------------------------------------------------------
5404 158142c2 bellard
| Returns the result of multiplying the quadruple-precision floating-point
5405 158142c2 bellard
| values `a' and `b'.  The operation is performed according to the IEC/IEEE
5406 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
5407 158142c2 bellard
*----------------------------------------------------------------------------*/
5408 158142c2 bellard
5409 158142c2 bellard
float128 float128_mul( float128 a, float128 b STATUS_PARAM )
5410 158142c2 bellard
{
5411 158142c2 bellard
    flag aSign, bSign, zSign;
5412 158142c2 bellard
    int32 aExp, bExp, zExp;
5413 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
5414 158142c2 bellard
    float128 z;
5415 158142c2 bellard
5416 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5417 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5418 158142c2 bellard
    aExp = extractFloat128Exp( a );
5419 158142c2 bellard
    aSign = extractFloat128Sign( a );
5420 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
5421 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
5422 158142c2 bellard
    bExp = extractFloat128Exp( b );
5423 158142c2 bellard
    bSign = extractFloat128Sign( b );
5424 158142c2 bellard
    zSign = aSign ^ bSign;
5425 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5426 158142c2 bellard
        if (    ( aSig0 | aSig1 )
5427 158142c2 bellard
             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
5428 158142c2 bellard
            return propagateFloat128NaN( a, b STATUS_VAR );
5429 158142c2 bellard
        }
5430 158142c2 bellard
        if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
5431 158142c2 bellard
        return packFloat128( zSign, 0x7FFF, 0, 0 );
5432 158142c2 bellard
    }
5433 158142c2 bellard
    if ( bExp == 0x7FFF ) {
5434 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5435 158142c2 bellard
        if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
5436 158142c2 bellard
 invalid:
5437 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5438 158142c2 bellard
            z.low = float128_default_nan_low;
5439 158142c2 bellard
            z.high = float128_default_nan_high;
5440 158142c2 bellard
            return z;
5441 158142c2 bellard
        }
5442 158142c2 bellard
        return packFloat128( zSign, 0x7FFF, 0, 0 );
5443 158142c2 bellard
    }
5444 158142c2 bellard
    if ( aExp == 0 ) {
5445 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5446 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5447 158142c2 bellard
    }
5448 158142c2 bellard
    if ( bExp == 0 ) {
5449 158142c2 bellard
        if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5450 158142c2 bellard
        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5451 158142c2 bellard
    }
5452 158142c2 bellard
    zExp = aExp + bExp - 0x4000;
5453 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
5454 158142c2 bellard
    shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
5455 158142c2 bellard
    mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
5456 158142c2 bellard
    add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
5457 158142c2 bellard
    zSig2 |= ( zSig3 != 0 );
5458 158142c2 bellard
    if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
5459 158142c2 bellard
        shift128ExtraRightJamming(
5460 158142c2 bellard
            zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
5461 158142c2 bellard
        ++zExp;
5462 158142c2 bellard
    }
5463 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5464 158142c2 bellard
5465 158142c2 bellard
}
5466 158142c2 bellard
5467 158142c2 bellard
/*----------------------------------------------------------------------------
5468 158142c2 bellard
| Returns the result of dividing the quadruple-precision floating-point value
5469 158142c2 bellard
| `a' by the corresponding value `b'.  The operation is performed according to
5470 158142c2 bellard
| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5471 158142c2 bellard
*----------------------------------------------------------------------------*/
5472 158142c2 bellard
5473 158142c2 bellard
float128 float128_div( float128 a, float128 b STATUS_PARAM )
5474 158142c2 bellard
{
5475 158142c2 bellard
    flag aSign, bSign, zSign;
5476 158142c2 bellard
    int32 aExp, bExp, zExp;
5477 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
5478 bb98fe42 Andreas Färber
    uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5479 158142c2 bellard
    float128 z;
5480 158142c2 bellard
5481 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5482 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5483 158142c2 bellard
    aExp = extractFloat128Exp( a );
5484 158142c2 bellard
    aSign = extractFloat128Sign( a );
5485 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
5486 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
5487 158142c2 bellard
    bExp = extractFloat128Exp( b );
5488 158142c2 bellard
    bSign = extractFloat128Sign( b );
5489 158142c2 bellard
    zSign = aSign ^ bSign;
5490 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5491 158142c2 bellard
        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5492 158142c2 bellard
        if ( bExp == 0x7FFF ) {
5493 158142c2 bellard
            if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5494 158142c2 bellard
            goto invalid;
5495 158142c2 bellard
        }
5496 158142c2 bellard
        return packFloat128( zSign, 0x7FFF, 0, 0 );
5497 158142c2 bellard
    }
5498 158142c2 bellard
    if ( bExp == 0x7FFF ) {
5499 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5500 158142c2 bellard
        return packFloat128( zSign, 0, 0, 0 );
5501 158142c2 bellard
    }
5502 158142c2 bellard
    if ( bExp == 0 ) {
5503 158142c2 bellard
        if ( ( bSig0 | bSig1 ) == 0 ) {
5504 158142c2 bellard
            if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
5505 158142c2 bellard
 invalid:
5506 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
5507 158142c2 bellard
                z.low = float128_default_nan_low;
5508 158142c2 bellard
                z.high = float128_default_nan_high;
5509 158142c2 bellard
                return z;
5510 158142c2 bellard
            }
5511 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
5512 158142c2 bellard
            return packFloat128( zSign, 0x7FFF, 0, 0 );
5513 158142c2 bellard
        }
5514 158142c2 bellard
        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5515 158142c2 bellard
    }
5516 158142c2 bellard
    if ( aExp == 0 ) {
5517 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5518 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5519 158142c2 bellard
    }
5520 158142c2 bellard
    zExp = aExp - bExp + 0x3FFD;
5521 158142c2 bellard
    shortShift128Left(
5522 158142c2 bellard
        aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
5523 158142c2 bellard
    shortShift128Left(
5524 158142c2 bellard
        bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
5525 158142c2 bellard
    if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
5526 158142c2 bellard
        shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
5527 158142c2 bellard
        ++zExp;
5528 158142c2 bellard
    }
5529 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
5530 158142c2 bellard
    mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
5531 158142c2 bellard
    sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
5532 bb98fe42 Andreas Färber
    while ( (int64_t) rem0 < 0 ) {
5533 158142c2 bellard
        --zSig0;
5534 158142c2 bellard
        add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
5535 158142c2 bellard
    }
5536 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
5537 158142c2 bellard
    if ( ( zSig1 & 0x3FFF ) <= 4 ) {
5538 158142c2 bellard
        mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
5539 158142c2 bellard
        sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
5540 bb98fe42 Andreas Färber
        while ( (int64_t) rem1 < 0 ) {
5541 158142c2 bellard
            --zSig1;
5542 158142c2 bellard
            add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
5543 158142c2 bellard
        }
5544 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5545 158142c2 bellard
    }
5546 158142c2 bellard
    shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
5547 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5548 158142c2 bellard
5549 158142c2 bellard
}
5550 158142c2 bellard
5551 158142c2 bellard
/*----------------------------------------------------------------------------
5552 158142c2 bellard
| Returns the remainder of the quadruple-precision floating-point value `a'
5553 158142c2 bellard
| with respect to the corresponding value `b'.  The operation is performed
5554 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5555 158142c2 bellard
*----------------------------------------------------------------------------*/
5556 158142c2 bellard
5557 158142c2 bellard
float128 float128_rem( float128 a, float128 b STATUS_PARAM )
5558 158142c2 bellard
{
5559 ed086f3d Blue Swirl
    flag aSign, zSign;
5560 158142c2 bellard
    int32 aExp, bExp, expDiff;
5561 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
5562 bb98fe42 Andreas Färber
    uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
5563 bb98fe42 Andreas Färber
    int64_t sigMean0;
5564 158142c2 bellard
    float128 z;
5565 158142c2 bellard
5566 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5567 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5568 158142c2 bellard
    aExp = extractFloat128Exp( a );
5569 158142c2 bellard
    aSign = extractFloat128Sign( a );
5570 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
5571 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
5572 158142c2 bellard
    bExp = extractFloat128Exp( b );
5573 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5574 158142c2 bellard
        if (    ( aSig0 | aSig1 )
5575 158142c2 bellard
             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
5576 158142c2 bellard
            return propagateFloat128NaN( a, b STATUS_VAR );
5577 158142c2 bellard
        }
5578 158142c2 bellard
        goto invalid;
5579 158142c2 bellard
    }
5580 158142c2 bellard
    if ( bExp == 0x7FFF ) {
5581 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5582 158142c2 bellard
        return a;
5583 158142c2 bellard
    }
5584 158142c2 bellard
    if ( bExp == 0 ) {
5585 158142c2 bellard
        if ( ( bSig0 | bSig1 ) == 0 ) {
5586 158142c2 bellard
 invalid:
5587 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5588 158142c2 bellard
            z.low = float128_default_nan_low;
5589 158142c2 bellard
            z.high = float128_default_nan_high;
5590 158142c2 bellard
            return z;
5591 158142c2 bellard
        }
5592 158142c2 bellard
        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5593 158142c2 bellard
    }
5594 158142c2 bellard
    if ( aExp == 0 ) {
5595 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return a;
5596 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5597 158142c2 bellard
    }
5598 158142c2 bellard
    expDiff = aExp - bExp;
5599 158142c2 bellard
    if ( expDiff < -1 ) return a;
5600 158142c2 bellard
    shortShift128Left(
5601 158142c2 bellard
        aSig0 | LIT64( 0x0001000000000000 ),
5602 158142c2 bellard
        aSig1,
5603 158142c2 bellard
        15 - ( expDiff < 0 ),
5604 158142c2 bellard
        &aSig0,
5605 158142c2 bellard
        &aSig1
5606 158142c2 bellard
    );
5607 158142c2 bellard
    shortShift128Left(
5608 158142c2 bellard
        bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
5609 158142c2 bellard
    q = le128( bSig0, bSig1, aSig0, aSig1 );
5610 158142c2 bellard
    if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
5611 158142c2 bellard
    expDiff -= 64;
5612 158142c2 bellard
    while ( 0 < expDiff ) {
5613 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
5614 158142c2 bellard
        q = ( 4 < q ) ? q - 4 : 0;
5615 158142c2 bellard
        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
5616 158142c2 bellard
        shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
5617 158142c2 bellard
        shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
5618 158142c2 bellard
        sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
5619 158142c2 bellard
        expDiff -= 61;
5620 158142c2 bellard
    }
5621 158142c2 bellard
    if ( -64 < expDiff ) {
5622 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
5623 158142c2 bellard
        q = ( 4 < q ) ? q - 4 : 0;
5624 158142c2 bellard
        q >>= - expDiff;
5625 158142c2 bellard
        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
5626 158142c2 bellard
        expDiff += 52;
5627 158142c2 bellard
        if ( expDiff < 0 ) {
5628 158142c2 bellard
            shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
5629 158142c2 bellard
        }
5630 158142c2 bellard
        else {
5631 158142c2 bellard
            shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
5632 158142c2 bellard
        }
5633 158142c2 bellard
        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
5634 158142c2 bellard
        sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
5635 158142c2 bellard
    }
5636 158142c2 bellard
    else {
5637 158142c2 bellard
        shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
5638 158142c2 bellard
        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
5639 158142c2 bellard
    }
5640 158142c2 bellard
    do {
5641 158142c2 bellard
        alternateASig0 = aSig0;
5642 158142c2 bellard
        alternateASig1 = aSig1;
5643 158142c2 bellard
        ++q;
5644 158142c2 bellard
        sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
5645 bb98fe42 Andreas Färber
    } while ( 0 <= (int64_t) aSig0 );
5646 158142c2 bellard
    add128(
5647 bb98fe42 Andreas Färber
        aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
5648 158142c2 bellard
    if (    ( sigMean0 < 0 )
5649 158142c2 bellard
         || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
5650 158142c2 bellard
        aSig0 = alternateASig0;
5651 158142c2 bellard
        aSig1 = alternateASig1;
5652 158142c2 bellard
    }
5653 bb98fe42 Andreas Färber
    zSign = ( (int64_t) aSig0 < 0 );
5654 158142c2 bellard
    if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
5655 158142c2 bellard
    return
5656 158142c2 bellard
        normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 STATUS_VAR );
5657 158142c2 bellard
5658 158142c2 bellard
}
5659 158142c2 bellard
5660 158142c2 bellard
/*----------------------------------------------------------------------------
5661 158142c2 bellard
| Returns the square root of the quadruple-precision floating-point value `a'.
5662 158142c2 bellard
| The operation is performed according to the IEC/IEEE Standard for Binary
5663 158142c2 bellard
| Floating-Point Arithmetic.
5664 158142c2 bellard
*----------------------------------------------------------------------------*/
5665 158142c2 bellard
5666 158142c2 bellard
float128 float128_sqrt( float128 a STATUS_PARAM )
5667 158142c2 bellard
{
5668 158142c2 bellard
    flag aSign;
5669 158142c2 bellard
    int32 aExp, zExp;
5670 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
5671 bb98fe42 Andreas Färber
    uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5672 158142c2 bellard
    float128 z;
5673 158142c2 bellard
5674 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5675 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5676 158142c2 bellard
    aExp = extractFloat128Exp( a );
5677 158142c2 bellard
    aSign = extractFloat128Sign( a );
5678 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5679 158142c2 bellard
        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a STATUS_VAR );
5680 158142c2 bellard
        if ( ! aSign ) return a;
5681 158142c2 bellard
        goto invalid;
5682 158142c2 bellard
    }
5683 158142c2 bellard
    if ( aSign ) {
5684 158142c2 bellard
        if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
5685 158142c2 bellard
 invalid:
5686 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5687 158142c2 bellard
        z.low = float128_default_nan_low;
5688 158142c2 bellard
        z.high = float128_default_nan_high;
5689 158142c2 bellard
        return z;
5690 158142c2 bellard
    }
5691 158142c2 bellard
    if ( aExp == 0 ) {
5692 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
5693 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5694 158142c2 bellard
    }
5695 158142c2 bellard
    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
5696 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
5697 158142c2 bellard
    zSig0 = estimateSqrt32( aExp, aSig0>>17 );
5698 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
5699 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
5700 158142c2 bellard
    doubleZSig0 = zSig0<<1;
5701 158142c2 bellard
    mul64To128( zSig0, zSig0, &term0, &term1 );
5702 158142c2 bellard
    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
5703 bb98fe42 Andreas Färber
    while ( (int64_t) rem0 < 0 ) {
5704 158142c2 bellard
        --zSig0;
5705 158142c2 bellard
        doubleZSig0 -= 2;
5706 158142c2 bellard
        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
5707 158142c2 bellard
    }
5708 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
5709 158142c2 bellard
    if ( ( zSig1 & 0x1FFF ) <= 5 ) {
5710 158142c2 bellard
        if ( zSig1 == 0 ) zSig1 = 1;
5711 158142c2 bellard
        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
5712 158142c2 bellard
        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5713 158142c2 bellard
        mul64To128( zSig1, zSig1, &term2, &term3 );
5714 158142c2 bellard
        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
5715 bb98fe42 Andreas Färber
        while ( (int64_t) rem1 < 0 ) {
5716 158142c2 bellard
            --zSig1;
5717 158142c2 bellard
            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
5718 158142c2 bellard
            term3 |= 1;
5719 158142c2 bellard
            term2 |= doubleZSig0;
5720 158142c2 bellard
            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
5721 158142c2 bellard
        }
5722 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5723 158142c2 bellard
    }
5724 158142c2 bellard
    shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
5725 158142c2 bellard
    return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5726 158142c2 bellard
5727 158142c2 bellard
}
5728 158142c2 bellard
5729 158142c2 bellard
/*----------------------------------------------------------------------------
5730 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is equal to
5731 b689362d Aurelien Jarno
| the corresponding value `b', and 0 otherwise.  The invalid exception is
5732 b689362d Aurelien Jarno
| raised if either operand is a NaN.  Otherwise, the comparison is performed
5733 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5734 158142c2 bellard
*----------------------------------------------------------------------------*/
5735 158142c2 bellard
5736 b689362d Aurelien Jarno
int float128_eq( float128 a, float128 b STATUS_PARAM )
5737 158142c2 bellard
{
5738 158142c2 bellard
5739 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5740 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5741 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5742 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5743 158142c2 bellard
       ) {
5744 b689362d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
5745 158142c2 bellard
        return 0;
5746 158142c2 bellard
    }
5747 158142c2 bellard
    return
5748 158142c2 bellard
           ( a.low == b.low )
5749 158142c2 bellard
        && (    ( a.high == b.high )
5750 158142c2 bellard
             || (    ( a.low == 0 )
5751 bb98fe42 Andreas Färber
                  && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
5752 158142c2 bellard
           );
5753 158142c2 bellard
5754 158142c2 bellard
}
5755 158142c2 bellard
5756 158142c2 bellard
/*----------------------------------------------------------------------------
5757 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
5758 f5a64251 Aurelien Jarno
| or equal to the corresponding value `b', and 0 otherwise.  The invalid
5759 f5a64251 Aurelien Jarno
| exception is raised if either operand is a NaN.  The comparison is performed
5760 f5a64251 Aurelien Jarno
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5761 158142c2 bellard
*----------------------------------------------------------------------------*/
5762 158142c2 bellard
5763 750afe93 bellard
int float128_le( float128 a, float128 b STATUS_PARAM )
5764 158142c2 bellard
{
5765 158142c2 bellard
    flag aSign, bSign;
5766 158142c2 bellard
5767 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5768 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5769 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5770 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5771 158142c2 bellard
       ) {
5772 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5773 158142c2 bellard
        return 0;
5774 158142c2 bellard
    }
5775 158142c2 bellard
    aSign = extractFloat128Sign( a );
5776 158142c2 bellard
    bSign = extractFloat128Sign( b );
5777 158142c2 bellard
    if ( aSign != bSign ) {
5778 158142c2 bellard
        return
5779 158142c2 bellard
               aSign
5780 bb98fe42 Andreas Färber
            || (    ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5781 158142c2 bellard
                 == 0 );
5782 158142c2 bellard
    }
5783 158142c2 bellard
    return
5784 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
5785 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
5786 158142c2 bellard
5787 158142c2 bellard
}
5788 158142c2 bellard
5789 158142c2 bellard
/*----------------------------------------------------------------------------
5790 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
5791 f5a64251 Aurelien Jarno
| the corresponding value `b', and 0 otherwise.  The invalid exception is
5792 f5a64251 Aurelien Jarno
| raised if either operand is a NaN.  The comparison is performed according
5793 f5a64251 Aurelien Jarno
| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5794 158142c2 bellard
*----------------------------------------------------------------------------*/
5795 158142c2 bellard
5796 750afe93 bellard
int float128_lt( float128 a, float128 b STATUS_PARAM )
5797 158142c2 bellard
{
5798 158142c2 bellard
    flag aSign, bSign;
5799 158142c2 bellard
5800 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5801 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5802 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5803 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5804 158142c2 bellard
       ) {
5805 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5806 158142c2 bellard
        return 0;
5807 158142c2 bellard
    }
5808 158142c2 bellard
    aSign = extractFloat128Sign( a );
5809 158142c2 bellard
    bSign = extractFloat128Sign( b );
5810 158142c2 bellard
    if ( aSign != bSign ) {
5811 158142c2 bellard
        return
5812 158142c2 bellard
               aSign
5813 bb98fe42 Andreas Färber
            && (    ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5814 158142c2 bellard
                 != 0 );
5815 158142c2 bellard
    }
5816 158142c2 bellard
    return
5817 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
5818 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
5819 158142c2 bellard
5820 158142c2 bellard
}
5821 158142c2 bellard
5822 158142c2 bellard
/*----------------------------------------------------------------------------
5823 67b7861d Aurelien Jarno
| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
5824 f5a64251 Aurelien Jarno
| be compared, and 0 otherwise.  The invalid exception is raised if either
5825 f5a64251 Aurelien Jarno
| operand is a NaN. The comparison is performed according to the IEC/IEEE
5826 f5a64251 Aurelien Jarno
| Standard for Binary Floating-Point Arithmetic.
5827 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
5828 67b7861d Aurelien Jarno
5829 67b7861d Aurelien Jarno
int float128_unordered( float128 a, float128 b STATUS_PARAM )
5830 67b7861d Aurelien Jarno
{
5831 67b7861d Aurelien Jarno
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5832 67b7861d Aurelien Jarno
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5833 67b7861d Aurelien Jarno
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5834 67b7861d Aurelien Jarno
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5835 67b7861d Aurelien Jarno
       ) {
5836 67b7861d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
5837 67b7861d Aurelien Jarno
        return 1;
5838 67b7861d Aurelien Jarno
    }
5839 67b7861d Aurelien Jarno
    return 0;
5840 67b7861d Aurelien Jarno
}
5841 67b7861d Aurelien Jarno
5842 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
5843 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is equal to
5844 f5a64251 Aurelien Jarno
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
5845 f5a64251 Aurelien Jarno
| exception.  The comparison is performed according to the IEC/IEEE Standard
5846 f5a64251 Aurelien Jarno
| for Binary Floating-Point Arithmetic.
5847 158142c2 bellard
*----------------------------------------------------------------------------*/
5848 158142c2 bellard
5849 b689362d Aurelien Jarno
int float128_eq_quiet( float128 a, float128 b STATUS_PARAM )
5850 158142c2 bellard
{
5851 158142c2 bellard
5852 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5853 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5854 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5855 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5856 158142c2 bellard
       ) {
5857 b689362d Aurelien Jarno
        if (    float128_is_signaling_nan( a )
5858 b689362d Aurelien Jarno
             || float128_is_signaling_nan( b ) ) {
5859 b689362d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
5860 b689362d Aurelien Jarno
        }
5861 158142c2 bellard
        return 0;
5862 158142c2 bellard
    }
5863 158142c2 bellard
    return
5864 158142c2 bellard
           ( a.low == b.low )
5865 158142c2 bellard
        && (    ( a.high == b.high )
5866 158142c2 bellard
             || (    ( a.low == 0 )
5867 bb98fe42 Andreas Färber
                  && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
5868 158142c2 bellard
           );
5869 158142c2 bellard
5870 158142c2 bellard
}
5871 158142c2 bellard
5872 158142c2 bellard
/*----------------------------------------------------------------------------
5873 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
5874 158142c2 bellard
| or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
5875 158142c2 bellard
| cause an exception.  Otherwise, the comparison is performed according to the
5876 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5877 158142c2 bellard
*----------------------------------------------------------------------------*/
5878 158142c2 bellard
5879 750afe93 bellard
int float128_le_quiet( float128 a, float128 b STATUS_PARAM )
5880 158142c2 bellard
{
5881 158142c2 bellard
    flag aSign, bSign;
5882 158142c2 bellard
5883 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5884 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5885 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5886 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5887 158142c2 bellard
       ) {
5888 158142c2 bellard
        if (    float128_is_signaling_nan( a )
5889 158142c2 bellard
             || float128_is_signaling_nan( b ) ) {
5890 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5891 158142c2 bellard
        }
5892 158142c2 bellard
        return 0;
5893 158142c2 bellard
    }
5894 158142c2 bellard
    aSign = extractFloat128Sign( a );
5895 158142c2 bellard
    bSign = extractFloat128Sign( b );
5896 158142c2 bellard
    if ( aSign != bSign ) {
5897 158142c2 bellard
        return
5898 158142c2 bellard
               aSign
5899 bb98fe42 Andreas Färber
            || (    ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5900 158142c2 bellard
                 == 0 );
5901 158142c2 bellard
    }
5902 158142c2 bellard
    return
5903 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
5904 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
5905 158142c2 bellard
5906 158142c2 bellard
}
5907 158142c2 bellard
5908 158142c2 bellard
/*----------------------------------------------------------------------------
5909 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
5910 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
5911 158142c2 bellard
| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
5912 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
5913 158142c2 bellard
*----------------------------------------------------------------------------*/
5914 158142c2 bellard
5915 750afe93 bellard
int float128_lt_quiet( float128 a, float128 b STATUS_PARAM )
5916 158142c2 bellard
{
5917 158142c2 bellard
    flag aSign, bSign;
5918 158142c2 bellard
5919 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5920 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5921 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5922 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5923 158142c2 bellard
       ) {
5924 158142c2 bellard
        if (    float128_is_signaling_nan( a )
5925 158142c2 bellard
             || float128_is_signaling_nan( b ) ) {
5926 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5927 158142c2 bellard
        }
5928 158142c2 bellard
        return 0;
5929 158142c2 bellard
    }
5930 158142c2 bellard
    aSign = extractFloat128Sign( a );
5931 158142c2 bellard
    bSign = extractFloat128Sign( b );
5932 158142c2 bellard
    if ( aSign != bSign ) {
5933 158142c2 bellard
        return
5934 158142c2 bellard
               aSign
5935 bb98fe42 Andreas Färber
            && (    ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5936 158142c2 bellard
                 != 0 );
5937 158142c2 bellard
    }
5938 158142c2 bellard
    return
5939 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
5940 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
5941 158142c2 bellard
5942 158142c2 bellard
}
5943 158142c2 bellard
5944 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
5945 67b7861d Aurelien Jarno
| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
5946 67b7861d Aurelien Jarno
| be compared, and 0 otherwise.  Quiet NaNs do not cause an exception.  The
5947 67b7861d Aurelien Jarno
| comparison is performed according to the IEC/IEEE Standard for Binary
5948 67b7861d Aurelien Jarno
| Floating-Point Arithmetic.
5949 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
5950 67b7861d Aurelien Jarno
5951 67b7861d Aurelien Jarno
int float128_unordered_quiet( float128 a, float128 b STATUS_PARAM )
5952 67b7861d Aurelien Jarno
{
5953 67b7861d Aurelien Jarno
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5954 67b7861d Aurelien Jarno
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5955 67b7861d Aurelien Jarno
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5956 67b7861d Aurelien Jarno
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5957 67b7861d Aurelien Jarno
       ) {
5958 67b7861d Aurelien Jarno
        if (    float128_is_signaling_nan( a )
5959 67b7861d Aurelien Jarno
             || float128_is_signaling_nan( b ) ) {
5960 67b7861d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
5961 67b7861d Aurelien Jarno
        }
5962 67b7861d Aurelien Jarno
        return 1;
5963 67b7861d Aurelien Jarno
    }
5964 67b7861d Aurelien Jarno
    return 0;
5965 67b7861d Aurelien Jarno
}
5966 67b7861d Aurelien Jarno
5967 1d6bda35 bellard
/* misc functions */
5968 1d6bda35 bellard
float32 uint32_to_float32( unsigned int a STATUS_PARAM )
5969 1d6bda35 bellard
{
5970 1d6bda35 bellard
    return int64_to_float32(a STATUS_VAR);
5971 1d6bda35 bellard
}
5972 1d6bda35 bellard
5973 1d6bda35 bellard
float64 uint32_to_float64( unsigned int a STATUS_PARAM )
5974 1d6bda35 bellard
{
5975 1d6bda35 bellard
    return int64_to_float64(a STATUS_VAR);
5976 1d6bda35 bellard
}
5977 1d6bda35 bellard
5978 1d6bda35 bellard
unsigned int float32_to_uint32( float32 a STATUS_PARAM )
5979 1d6bda35 bellard
{
5980 1d6bda35 bellard
    int64_t v;
5981 1d6bda35 bellard
    unsigned int res;
5982 1d6bda35 bellard
5983 1d6bda35 bellard
    v = float32_to_int64(a STATUS_VAR);
5984 1d6bda35 bellard
    if (v < 0) {
5985 1d6bda35 bellard
        res = 0;
5986 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5987 1d6bda35 bellard
    } else if (v > 0xffffffff) {
5988 1d6bda35 bellard
        res = 0xffffffff;
5989 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5990 1d6bda35 bellard
    } else {
5991 1d6bda35 bellard
        res = v;
5992 1d6bda35 bellard
    }
5993 1d6bda35 bellard
    return res;
5994 1d6bda35 bellard
}
5995 1d6bda35 bellard
5996 1d6bda35 bellard
unsigned int float32_to_uint32_round_to_zero( float32 a STATUS_PARAM )
5997 1d6bda35 bellard
{
5998 1d6bda35 bellard
    int64_t v;
5999 1d6bda35 bellard
    unsigned int res;
6000 1d6bda35 bellard
6001 1d6bda35 bellard
    v = float32_to_int64_round_to_zero(a STATUS_VAR);
6002 1d6bda35 bellard
    if (v < 0) {
6003 1d6bda35 bellard
        res = 0;
6004 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
6005 1d6bda35 bellard
    } else if (v > 0xffffffff) {
6006 1d6bda35 bellard
        res = 0xffffffff;
6007 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
6008 1d6bda35 bellard
    } else {
6009 1d6bda35 bellard
        res = v;
6010 1d6bda35 bellard
    }
6011 1d6bda35 bellard
    return res;
6012 1d6bda35 bellard
}
6013 1d6bda35 bellard
6014 cbcef455 Peter Maydell
unsigned int float32_to_uint16_round_to_zero( float32 a STATUS_PARAM )
6015 cbcef455 Peter Maydell
{
6016 cbcef455 Peter Maydell
    int64_t v;
6017 cbcef455 Peter Maydell
    unsigned int res;
6018 cbcef455 Peter Maydell
6019 cbcef455 Peter Maydell
    v = float32_to_int64_round_to_zero(a STATUS_VAR);
6020 cbcef455 Peter Maydell
    if (v < 0) {
6021 cbcef455 Peter Maydell
        res = 0;
6022 cbcef455 Peter Maydell
        float_raise( float_flag_invalid STATUS_VAR);
6023 cbcef455 Peter Maydell
    } else if (v > 0xffff) {
6024 cbcef455 Peter Maydell
        res = 0xffff;
6025 cbcef455 Peter Maydell
        float_raise( float_flag_invalid STATUS_VAR);
6026 cbcef455 Peter Maydell
    } else {
6027 cbcef455 Peter Maydell
        res = v;
6028 cbcef455 Peter Maydell
    }
6029 cbcef455 Peter Maydell
    return res;
6030 cbcef455 Peter Maydell
}
6031 cbcef455 Peter Maydell
6032 1d6bda35 bellard
unsigned int float64_to_uint32( float64 a STATUS_PARAM )
6033 1d6bda35 bellard
{
6034 1d6bda35 bellard
    int64_t v;
6035 1d6bda35 bellard
    unsigned int res;
6036 1d6bda35 bellard
6037 1d6bda35 bellard
    v = float64_to_int64(a STATUS_VAR);
6038 1d6bda35 bellard
    if (v < 0) {
6039 1d6bda35 bellard
        res = 0;
6040 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
6041 1d6bda35 bellard
    } else if (v > 0xffffffff) {
6042 1d6bda35 bellard
        res = 0xffffffff;
6043 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
6044 1d6bda35 bellard
    } else {
6045 1d6bda35 bellard
        res = v;
6046 1d6bda35 bellard
    }
6047 1d6bda35 bellard
    return res;
6048 1d6bda35 bellard
}
6049 1d6bda35 bellard
6050 1d6bda35 bellard
unsigned int float64_to_uint32_round_to_zero( float64 a STATUS_PARAM )
6051 1d6bda35 bellard
{
6052 1d6bda35 bellard
    int64_t v;
6053 1d6bda35 bellard
    unsigned int res;
6054 1d6bda35 bellard
6055 1d6bda35 bellard
    v = float64_to_int64_round_to_zero(a STATUS_VAR);
6056 1d6bda35 bellard
    if (v < 0) {
6057 1d6bda35 bellard
        res = 0;
6058 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
6059 1d6bda35 bellard
    } else if (v > 0xffffffff) {
6060 1d6bda35 bellard
        res = 0xffffffff;
6061 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
6062 1d6bda35 bellard
    } else {
6063 1d6bda35 bellard
        res = v;
6064 1d6bda35 bellard
    }
6065 1d6bda35 bellard
    return res;
6066 1d6bda35 bellard
}
6067 1d6bda35 bellard
6068 cbcef455 Peter Maydell
unsigned int float64_to_uint16_round_to_zero( float64 a STATUS_PARAM )
6069 cbcef455 Peter Maydell
{
6070 cbcef455 Peter Maydell
    int64_t v;
6071 cbcef455 Peter Maydell
    unsigned int res;
6072 cbcef455 Peter Maydell
6073 cbcef455 Peter Maydell
    v = float64_to_int64_round_to_zero(a STATUS_VAR);
6074 cbcef455 Peter Maydell
    if (v < 0) {
6075 cbcef455 Peter Maydell
        res = 0;
6076 cbcef455 Peter Maydell
        float_raise( float_flag_invalid STATUS_VAR);
6077 cbcef455 Peter Maydell
    } else if (v > 0xffff) {
6078 cbcef455 Peter Maydell
        res = 0xffff;
6079 cbcef455 Peter Maydell
        float_raise( float_flag_invalid STATUS_VAR);
6080 cbcef455 Peter Maydell
    } else {
6081 cbcef455 Peter Maydell
        res = v;
6082 cbcef455 Peter Maydell
    }
6083 cbcef455 Peter Maydell
    return res;
6084 cbcef455 Peter Maydell
}
6085 cbcef455 Peter Maydell
6086 f090c9d4 pbrook
/* FIXME: This looks broken.  */
6087 75d62a58 j_mayer
uint64_t float64_to_uint64 (float64 a STATUS_PARAM)
6088 75d62a58 j_mayer
{
6089 75d62a58 j_mayer
    int64_t v;
6090 75d62a58 j_mayer
6091 f090c9d4 pbrook
    v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
6092 f090c9d4 pbrook
    v += float64_val(a);
6093 f090c9d4 pbrook
    v = float64_to_int64(make_float64(v) STATUS_VAR);
6094 75d62a58 j_mayer
6095 75d62a58 j_mayer
    return v - INT64_MIN;
6096 75d62a58 j_mayer
}
6097 75d62a58 j_mayer
6098 75d62a58 j_mayer
uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM)
6099 75d62a58 j_mayer
{
6100 75d62a58 j_mayer
    int64_t v;
6101 75d62a58 j_mayer
6102 f090c9d4 pbrook
    v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
6103 f090c9d4 pbrook
    v += float64_val(a);
6104 f090c9d4 pbrook
    v = float64_to_int64_round_to_zero(make_float64(v) STATUS_VAR);
6105 75d62a58 j_mayer
6106 75d62a58 j_mayer
    return v - INT64_MIN;
6107 75d62a58 j_mayer
}
6108 75d62a58 j_mayer
6109 1d6bda35 bellard
#define COMPARE(s, nan_exp)                                                  \
6110 750afe93 bellard
INLINE int float ## s ## _compare_internal( float ## s a, float ## s b,      \
6111 1d6bda35 bellard
                                      int is_quiet STATUS_PARAM )            \
6112 1d6bda35 bellard
{                                                                            \
6113 1d6bda35 bellard
    flag aSign, bSign;                                                       \
6114 bb98fe42 Andreas Färber
    uint ## s ## _t av, bv;                                                  \
6115 37d18660 Peter Maydell
    a = float ## s ## _squash_input_denormal(a STATUS_VAR);                  \
6116 37d18660 Peter Maydell
    b = float ## s ## _squash_input_denormal(b STATUS_VAR);                  \
6117 1d6bda35 bellard
                                                                             \
6118 1d6bda35 bellard
    if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) &&                    \
6119 1d6bda35 bellard
         extractFloat ## s ## Frac( a ) ) ||                                 \
6120 1d6bda35 bellard
        ( ( extractFloat ## s ## Exp( b ) == nan_exp ) &&                    \
6121 1d6bda35 bellard
          extractFloat ## s ## Frac( b ) )) {                                \
6122 1d6bda35 bellard
        if (!is_quiet ||                                                     \
6123 1d6bda35 bellard
            float ## s ## _is_signaling_nan( a ) ||                          \
6124 1d6bda35 bellard
            float ## s ## _is_signaling_nan( b ) ) {                         \
6125 1d6bda35 bellard
            float_raise( float_flag_invalid STATUS_VAR);                     \
6126 1d6bda35 bellard
        }                                                                    \
6127 1d6bda35 bellard
        return float_relation_unordered;                                     \
6128 1d6bda35 bellard
    }                                                                        \
6129 1d6bda35 bellard
    aSign = extractFloat ## s ## Sign( a );                                  \
6130 1d6bda35 bellard
    bSign = extractFloat ## s ## Sign( b );                                  \
6131 f090c9d4 pbrook
    av = float ## s ## _val(a);                                              \
6132 cd8a2533 blueswir1
    bv = float ## s ## _val(b);                                              \
6133 1d6bda35 bellard
    if ( aSign != bSign ) {                                                  \
6134 bb98fe42 Andreas Färber
        if ( (uint ## s ## _t) ( ( av | bv )<<1 ) == 0 ) {                   \
6135 1d6bda35 bellard
            /* zero case */                                                  \
6136 1d6bda35 bellard
            return float_relation_equal;                                     \
6137 1d6bda35 bellard
        } else {                                                             \
6138 1d6bda35 bellard
            return 1 - (2 * aSign);                                          \
6139 1d6bda35 bellard
        }                                                                    \
6140 1d6bda35 bellard
    } else {                                                                 \
6141 f090c9d4 pbrook
        if (av == bv) {                                                      \
6142 1d6bda35 bellard
            return float_relation_equal;                                     \
6143 1d6bda35 bellard
        } else {                                                             \
6144 f090c9d4 pbrook
            return 1 - 2 * (aSign ^ ( av < bv ));                            \
6145 1d6bda35 bellard
        }                                                                    \
6146 1d6bda35 bellard
    }                                                                        \
6147 1d6bda35 bellard
}                                                                            \
6148 1d6bda35 bellard
                                                                             \
6149 750afe93 bellard
int float ## s ## _compare( float ## s a, float ## s b STATUS_PARAM )        \
6150 1d6bda35 bellard
{                                                                            \
6151 1d6bda35 bellard
    return float ## s ## _compare_internal(a, b, 0 STATUS_VAR);              \
6152 1d6bda35 bellard
}                                                                            \
6153 1d6bda35 bellard
                                                                             \
6154 750afe93 bellard
int float ## s ## _compare_quiet( float ## s a, float ## s b STATUS_PARAM )  \
6155 1d6bda35 bellard
{                                                                            \
6156 1d6bda35 bellard
    return float ## s ## _compare_internal(a, b, 1 STATUS_VAR);              \
6157 1d6bda35 bellard
}
6158 1d6bda35 bellard
6159 1d6bda35 bellard
COMPARE(32, 0xff)
6160 1d6bda35 bellard
COMPARE(64, 0x7ff)
6161 9ee6e8bb pbrook
6162 f6714d36 Aurelien Jarno
INLINE int floatx80_compare_internal( floatx80 a, floatx80 b,
6163 f6714d36 Aurelien Jarno
                                      int is_quiet STATUS_PARAM )
6164 f6714d36 Aurelien Jarno
{
6165 f6714d36 Aurelien Jarno
    flag aSign, bSign;
6166 f6714d36 Aurelien Jarno
6167 f6714d36 Aurelien Jarno
    if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6168 f6714d36 Aurelien Jarno
          ( extractFloatx80Frac( a )<<1 ) ) ||
6169 f6714d36 Aurelien Jarno
        ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6170 f6714d36 Aurelien Jarno
          ( extractFloatx80Frac( b )<<1 ) )) {
6171 f6714d36 Aurelien Jarno
        if (!is_quiet ||
6172 f6714d36 Aurelien Jarno
            floatx80_is_signaling_nan( a ) ||
6173 f6714d36 Aurelien Jarno
            floatx80_is_signaling_nan( b ) ) {
6174 f6714d36 Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
6175 f6714d36 Aurelien Jarno
        }
6176 f6714d36 Aurelien Jarno
        return float_relation_unordered;
6177 f6714d36 Aurelien Jarno
    }
6178 f6714d36 Aurelien Jarno
    aSign = extractFloatx80Sign( a );
6179 f6714d36 Aurelien Jarno
    bSign = extractFloatx80Sign( b );
6180 f6714d36 Aurelien Jarno
    if ( aSign != bSign ) {
6181 f6714d36 Aurelien Jarno
6182 f6714d36 Aurelien Jarno
        if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
6183 f6714d36 Aurelien Jarno
             ( ( a.low | b.low ) == 0 ) ) {
6184 f6714d36 Aurelien Jarno
            /* zero case */
6185 f6714d36 Aurelien Jarno
            return float_relation_equal;
6186 f6714d36 Aurelien Jarno
        } else {
6187 f6714d36 Aurelien Jarno
            return 1 - (2 * aSign);
6188 f6714d36 Aurelien Jarno
        }
6189 f6714d36 Aurelien Jarno
    } else {
6190 f6714d36 Aurelien Jarno
        if (a.low == b.low && a.high == b.high) {
6191 f6714d36 Aurelien Jarno
            return float_relation_equal;
6192 f6714d36 Aurelien Jarno
        } else {
6193 f6714d36 Aurelien Jarno
            return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6194 f6714d36 Aurelien Jarno
        }
6195 f6714d36 Aurelien Jarno
    }
6196 f6714d36 Aurelien Jarno
}
6197 f6714d36 Aurelien Jarno
6198 f6714d36 Aurelien Jarno
int floatx80_compare( floatx80 a, floatx80 b STATUS_PARAM )
6199 f6714d36 Aurelien Jarno
{
6200 f6714d36 Aurelien Jarno
    return floatx80_compare_internal(a, b, 0 STATUS_VAR);
6201 f6714d36 Aurelien Jarno
}
6202 f6714d36 Aurelien Jarno
6203 f6714d36 Aurelien Jarno
int floatx80_compare_quiet( floatx80 a, floatx80 b STATUS_PARAM )
6204 f6714d36 Aurelien Jarno
{
6205 f6714d36 Aurelien Jarno
    return floatx80_compare_internal(a, b, 1 STATUS_VAR);
6206 f6714d36 Aurelien Jarno
}
6207 f6714d36 Aurelien Jarno
6208 1f587329 blueswir1
INLINE int float128_compare_internal( float128 a, float128 b,
6209 1f587329 blueswir1
                                      int is_quiet STATUS_PARAM )
6210 1f587329 blueswir1
{
6211 1f587329 blueswir1
    flag aSign, bSign;
6212 1f587329 blueswir1
6213 1f587329 blueswir1
    if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
6214 1f587329 blueswir1
          ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
6215 1f587329 blueswir1
        ( ( extractFloat128Exp( b ) == 0x7fff ) &&
6216 1f587329 blueswir1
          ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
6217 1f587329 blueswir1
        if (!is_quiet ||
6218 1f587329 blueswir1
            float128_is_signaling_nan( a ) ||
6219 1f587329 blueswir1
            float128_is_signaling_nan( b ) ) {
6220 1f587329 blueswir1
            float_raise( float_flag_invalid STATUS_VAR);
6221 1f587329 blueswir1
        }
6222 1f587329 blueswir1
        return float_relation_unordered;
6223 1f587329 blueswir1
    }
6224 1f587329 blueswir1
    aSign = extractFloat128Sign( a );
6225 1f587329 blueswir1
    bSign = extractFloat128Sign( b );
6226 1f587329 blueswir1
    if ( aSign != bSign ) {
6227 1f587329 blueswir1
        if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
6228 1f587329 blueswir1
            /* zero case */
6229 1f587329 blueswir1
            return float_relation_equal;
6230 1f587329 blueswir1
        } else {
6231 1f587329 blueswir1
            return 1 - (2 * aSign);
6232 1f587329 blueswir1
        }
6233 1f587329 blueswir1
    } else {
6234 1f587329 blueswir1
        if (a.low == b.low && a.high == b.high) {
6235 1f587329 blueswir1
            return float_relation_equal;
6236 1f587329 blueswir1
        } else {
6237 1f587329 blueswir1
            return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6238 1f587329 blueswir1
        }
6239 1f587329 blueswir1
    }
6240 1f587329 blueswir1
}
6241 1f587329 blueswir1
6242 1f587329 blueswir1
int float128_compare( float128 a, float128 b STATUS_PARAM )
6243 1f587329 blueswir1
{
6244 1f587329 blueswir1
    return float128_compare_internal(a, b, 0 STATUS_VAR);
6245 1f587329 blueswir1
}
6246 1f587329 blueswir1
6247 1f587329 blueswir1
int float128_compare_quiet( float128 a, float128 b STATUS_PARAM )
6248 1f587329 blueswir1
{
6249 1f587329 blueswir1
    return float128_compare_internal(a, b, 1 STATUS_VAR);
6250 1f587329 blueswir1
}
6251 1f587329 blueswir1
6252 274f1b04 Peter Maydell
/* min() and max() functions. These can't be implemented as
6253 274f1b04 Peter Maydell
 * 'compare and pick one input' because that would mishandle
6254 274f1b04 Peter Maydell
 * NaNs and +0 vs -0.
6255 274f1b04 Peter Maydell
 */
6256 274f1b04 Peter Maydell
#define MINMAX(s, nan_exp)                                              \
6257 274f1b04 Peter Maydell
INLINE float ## s float ## s ## _minmax(float ## s a, float ## s b,     \
6258 274f1b04 Peter Maydell
                                        int ismin STATUS_PARAM )        \
6259 274f1b04 Peter Maydell
{                                                                       \
6260 274f1b04 Peter Maydell
    flag aSign, bSign;                                                  \
6261 274f1b04 Peter Maydell
    uint ## s ## _t av, bv;                                             \
6262 274f1b04 Peter Maydell
    a = float ## s ## _squash_input_denormal(a STATUS_VAR);             \
6263 274f1b04 Peter Maydell
    b = float ## s ## _squash_input_denormal(b STATUS_VAR);             \
6264 274f1b04 Peter Maydell
    if (float ## s ## _is_any_nan(a) ||                                 \
6265 274f1b04 Peter Maydell
        float ## s ## _is_any_nan(b)) {                                 \
6266 274f1b04 Peter Maydell
        return propagateFloat ## s ## NaN(a, b STATUS_VAR);             \
6267 274f1b04 Peter Maydell
    }                                                                   \
6268 274f1b04 Peter Maydell
    aSign = extractFloat ## s ## Sign(a);                               \
6269 274f1b04 Peter Maydell
    bSign = extractFloat ## s ## Sign(b);                               \
6270 274f1b04 Peter Maydell
    av = float ## s ## _val(a);                                         \
6271 274f1b04 Peter Maydell
    bv = float ## s ## _val(b);                                         \
6272 274f1b04 Peter Maydell
    if (aSign != bSign) {                                               \
6273 274f1b04 Peter Maydell
        if (ismin) {                                                    \
6274 274f1b04 Peter Maydell
            return aSign ? a : b;                                       \
6275 274f1b04 Peter Maydell
        } else {                                                        \
6276 274f1b04 Peter Maydell
            return aSign ? b : a;                                       \
6277 274f1b04 Peter Maydell
        }                                                               \
6278 274f1b04 Peter Maydell
    } else {                                                            \
6279 274f1b04 Peter Maydell
        if (ismin) {                                                    \
6280 274f1b04 Peter Maydell
            return (aSign ^ (av < bv)) ? a : b;                         \
6281 274f1b04 Peter Maydell
        } else {                                                        \
6282 274f1b04 Peter Maydell
            return (aSign ^ (av < bv)) ? b : a;                         \
6283 274f1b04 Peter Maydell
        }                                                               \
6284 274f1b04 Peter Maydell
    }                                                                   \
6285 274f1b04 Peter Maydell
}                                                                       \
6286 274f1b04 Peter Maydell
                                                                        \
6287 274f1b04 Peter Maydell
float ## s float ## s ## _min(float ## s a, float ## s b STATUS_PARAM)  \
6288 274f1b04 Peter Maydell
{                                                                       \
6289 274f1b04 Peter Maydell
    return float ## s ## _minmax(a, b, 1 STATUS_VAR);                   \
6290 274f1b04 Peter Maydell
}                                                                       \
6291 274f1b04 Peter Maydell
                                                                        \
6292 274f1b04 Peter Maydell
float ## s float ## s ## _max(float ## s a, float ## s b STATUS_PARAM)  \
6293 274f1b04 Peter Maydell
{                                                                       \
6294 274f1b04 Peter Maydell
    return float ## s ## _minmax(a, b, 0 STATUS_VAR);                   \
6295 274f1b04 Peter Maydell
}
6296 274f1b04 Peter Maydell
6297 274f1b04 Peter Maydell
MINMAX(32, 0xff)
6298 274f1b04 Peter Maydell
MINMAX(64, 0x7ff)
6299 274f1b04 Peter Maydell
6300 274f1b04 Peter Maydell
6301 9ee6e8bb pbrook
/* Multiply A by 2 raised to the power N.  */
6302 9ee6e8bb pbrook
float32 float32_scalbn( float32 a, int n STATUS_PARAM )
6303 9ee6e8bb pbrook
{
6304 9ee6e8bb pbrook
    flag aSign;
6305 326b9e98 Aurelien Jarno
    int16_t aExp;
6306 bb98fe42 Andreas Färber
    uint32_t aSig;
6307 9ee6e8bb pbrook
6308 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
6309 9ee6e8bb pbrook
    aSig = extractFloat32Frac( a );
6310 9ee6e8bb pbrook
    aExp = extractFloat32Exp( a );
6311 9ee6e8bb pbrook
    aSign = extractFloat32Sign( a );
6312 9ee6e8bb pbrook
6313 9ee6e8bb pbrook
    if ( aExp == 0xFF ) {
6314 326b9e98 Aurelien Jarno
        if ( aSig ) {
6315 326b9e98 Aurelien Jarno
            return propagateFloat32NaN( a, a STATUS_VAR );
6316 326b9e98 Aurelien Jarno
        }
6317 9ee6e8bb pbrook
        return a;
6318 9ee6e8bb pbrook
    }
6319 69397542 pbrook
    if ( aExp != 0 )
6320 69397542 pbrook
        aSig |= 0x00800000;
6321 69397542 pbrook
    else if ( aSig == 0 )
6322 69397542 pbrook
        return a;
6323 69397542 pbrook
6324 326b9e98 Aurelien Jarno
    if (n > 0x200) {
6325 326b9e98 Aurelien Jarno
        n = 0x200;
6326 326b9e98 Aurelien Jarno
    } else if (n < -0x200) {
6327 326b9e98 Aurelien Jarno
        n = -0x200;
6328 326b9e98 Aurelien Jarno
    }
6329 326b9e98 Aurelien Jarno
6330 69397542 pbrook
    aExp += n - 1;
6331 69397542 pbrook
    aSig <<= 7;
6332 69397542 pbrook
    return normalizeRoundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
6333 9ee6e8bb pbrook
}
6334 9ee6e8bb pbrook
6335 9ee6e8bb pbrook
float64 float64_scalbn( float64 a, int n STATUS_PARAM )
6336 9ee6e8bb pbrook
{
6337 9ee6e8bb pbrook
    flag aSign;
6338 326b9e98 Aurelien Jarno
    int16_t aExp;
6339 bb98fe42 Andreas Färber
    uint64_t aSig;
6340 9ee6e8bb pbrook
6341 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
6342 9ee6e8bb pbrook
    aSig = extractFloat64Frac( a );
6343 9ee6e8bb pbrook
    aExp = extractFloat64Exp( a );
6344 9ee6e8bb pbrook
    aSign = extractFloat64Sign( a );
6345 9ee6e8bb pbrook
6346 9ee6e8bb pbrook
    if ( aExp == 0x7FF ) {
6347 326b9e98 Aurelien Jarno
        if ( aSig ) {
6348 326b9e98 Aurelien Jarno
            return propagateFloat64NaN( a, a STATUS_VAR );
6349 326b9e98 Aurelien Jarno
        }
6350 9ee6e8bb pbrook
        return a;
6351 9ee6e8bb pbrook
    }
6352 69397542 pbrook
    if ( aExp != 0 )
6353 69397542 pbrook
        aSig |= LIT64( 0x0010000000000000 );
6354 69397542 pbrook
    else if ( aSig == 0 )
6355 69397542 pbrook
        return a;
6356 69397542 pbrook
6357 326b9e98 Aurelien Jarno
    if (n > 0x1000) {
6358 326b9e98 Aurelien Jarno
        n = 0x1000;
6359 326b9e98 Aurelien Jarno
    } else if (n < -0x1000) {
6360 326b9e98 Aurelien Jarno
        n = -0x1000;
6361 326b9e98 Aurelien Jarno
    }
6362 326b9e98 Aurelien Jarno
6363 69397542 pbrook
    aExp += n - 1;
6364 69397542 pbrook
    aSig <<= 10;
6365 69397542 pbrook
    return normalizeRoundAndPackFloat64( aSign, aExp, aSig STATUS_VAR );
6366 9ee6e8bb pbrook
}
6367 9ee6e8bb pbrook
6368 9ee6e8bb pbrook
floatx80 floatx80_scalbn( floatx80 a, int n STATUS_PARAM )
6369 9ee6e8bb pbrook
{
6370 9ee6e8bb pbrook
    flag aSign;
6371 326b9e98 Aurelien Jarno
    int32_t aExp;
6372 bb98fe42 Andreas Färber
    uint64_t aSig;
6373 9ee6e8bb pbrook
6374 9ee6e8bb pbrook
    aSig = extractFloatx80Frac( a );
6375 9ee6e8bb pbrook
    aExp = extractFloatx80Exp( a );
6376 9ee6e8bb pbrook
    aSign = extractFloatx80Sign( a );
6377 9ee6e8bb pbrook
6378 326b9e98 Aurelien Jarno
    if ( aExp == 0x7FFF ) {
6379 326b9e98 Aurelien Jarno
        if ( aSig<<1 ) {
6380 326b9e98 Aurelien Jarno
            return propagateFloatx80NaN( a, a STATUS_VAR );
6381 326b9e98 Aurelien Jarno
        }
6382 9ee6e8bb pbrook
        return a;
6383 9ee6e8bb pbrook
    }
6384 326b9e98 Aurelien Jarno
6385 69397542 pbrook
    if (aExp == 0 && aSig == 0)
6386 69397542 pbrook
        return a;
6387 69397542 pbrook
6388 326b9e98 Aurelien Jarno
    if (n > 0x10000) {
6389 326b9e98 Aurelien Jarno
        n = 0x10000;
6390 326b9e98 Aurelien Jarno
    } else if (n < -0x10000) {
6391 326b9e98 Aurelien Jarno
        n = -0x10000;
6392 326b9e98 Aurelien Jarno
    }
6393 326b9e98 Aurelien Jarno
6394 9ee6e8bb pbrook
    aExp += n;
6395 69397542 pbrook
    return normalizeRoundAndPackFloatx80( STATUS(floatx80_rounding_precision),
6396 69397542 pbrook
                                          aSign, aExp, aSig, 0 STATUS_VAR );
6397 9ee6e8bb pbrook
}
6398 9ee6e8bb pbrook
6399 9ee6e8bb pbrook
float128 float128_scalbn( float128 a, int n STATUS_PARAM )
6400 9ee6e8bb pbrook
{
6401 9ee6e8bb pbrook
    flag aSign;
6402 326b9e98 Aurelien Jarno
    int32_t aExp;
6403 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1;
6404 9ee6e8bb pbrook
6405 9ee6e8bb pbrook
    aSig1 = extractFloat128Frac1( a );
6406 9ee6e8bb pbrook
    aSig0 = extractFloat128Frac0( a );
6407 9ee6e8bb pbrook
    aExp = extractFloat128Exp( a );
6408 9ee6e8bb pbrook
    aSign = extractFloat128Sign( a );
6409 9ee6e8bb pbrook
    if ( aExp == 0x7FFF ) {
6410 326b9e98 Aurelien Jarno
        if ( aSig0 | aSig1 ) {
6411 326b9e98 Aurelien Jarno
            return propagateFloat128NaN( a, a STATUS_VAR );
6412 326b9e98 Aurelien Jarno
        }
6413 9ee6e8bb pbrook
        return a;
6414 9ee6e8bb pbrook
    }
6415 69397542 pbrook
    if ( aExp != 0 )
6416 69397542 pbrook
        aSig0 |= LIT64( 0x0001000000000000 );
6417 69397542 pbrook
    else if ( aSig0 == 0 && aSig1 == 0 )
6418 69397542 pbrook
        return a;
6419 69397542 pbrook
6420 326b9e98 Aurelien Jarno
    if (n > 0x10000) {
6421 326b9e98 Aurelien Jarno
        n = 0x10000;
6422 326b9e98 Aurelien Jarno
    } else if (n < -0x10000) {
6423 326b9e98 Aurelien Jarno
        n = -0x10000;
6424 326b9e98 Aurelien Jarno
    }
6425 326b9e98 Aurelien Jarno
6426 69397542 pbrook
    aExp += n - 1;
6427 69397542 pbrook
    return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
6428 69397542 pbrook
                                          STATUS_VAR );
6429 9ee6e8bb pbrook
6430 9ee6e8bb pbrook
}