root / target-arm / nwfpe / softfloat.c @ d1d9f421
History | View | Annotate | Download (114.7 kB)
1 | 00406dff | bellard | /*
|
---|---|---|---|
2 | 00406dff | bellard | ===============================================================================
|
3 | 00406dff | bellard | |
4 | 00406dff | bellard | This C source file is part of the SoftFloat IEC/IEEE Floating-point
|
5 | 00406dff | bellard | Arithmetic Package, Release 2.
|
6 | 00406dff | bellard | |
7 | 00406dff | bellard | Written by John R. Hauser. This work was made possible in part by the
|
8 | 00406dff | bellard | International Computer Science Institute, located at Suite 600, 1947 Center
|
9 | 00406dff | bellard | Street, Berkeley, California 94704. Funding was partially provided by the
|
10 | 00406dff | bellard | National Science Foundation under grant MIP-9311980. The original version
|
11 | 00406dff | bellard | of this code was written as part of a project to build a fixed-point vector
|
12 | 00406dff | bellard | processor in collaboration with the University of California at Berkeley,
|
13 | 00406dff | bellard | overseen by Profs. Nelson Morgan and John Wawrzynek. More information
|
14 | 00406dff | bellard | is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
|
15 | 00406dff | bellard | arithmetic/softfloat.html'.
|
16 | 00406dff | bellard | |
17 | 00406dff | bellard | THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
|
18 | 00406dff | bellard | has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
|
19 | 00406dff | bellard | TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
|
20 | 00406dff | bellard | PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
|
21 | 00406dff | bellard | AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
|
22 | 00406dff | bellard | |
23 | 00406dff | bellard | Derivative works are acceptable, even for commercial purposes, so long as
|
24 | 00406dff | bellard | (1) they include prominent notice that the work is derivative, and (2) they
|
25 | 00406dff | bellard | include prominent notice akin to these three paragraphs for those parts of
|
26 | 00406dff | bellard | this code that are retained.
|
27 | 00406dff | bellard | |
28 | 00406dff | bellard | ===============================================================================
|
29 | 00406dff | bellard | */
|
30 | 00406dff | bellard | |
31 | 00406dff | bellard | #include "fpa11.h" |
32 | 00406dff | bellard | #include "milieu.h" |
33 | 00406dff | bellard | #include "softfloat.h" |
34 | 00406dff | bellard | |
35 | 00406dff | bellard | /*
|
36 | 00406dff | bellard | -------------------------------------------------------------------------------
|
37 | 00406dff | bellard | Floating-point rounding mode, extended double-precision rounding precision,
|
38 | 00406dff | bellard | and exception flags.
|
39 | 00406dff | bellard | -------------------------------------------------------------------------------
|
40 | 00406dff | bellard | */
|
41 | 00406dff | bellard | int8 float_rounding_mode = float_round_nearest_even; |
42 | 00406dff | bellard | int8 floatx80_rounding_precision = 80;
|
43 | 00406dff | bellard | int8 float_exception_flags; |
44 | 00406dff | bellard | |
45 | 00406dff | bellard | /*
|
46 | 00406dff | bellard | -------------------------------------------------------------------------------
|
47 | 00406dff | bellard | Primitive arithmetic functions, including multi-word arithmetic, and
|
48 | 00406dff | bellard | division and square root approximations. (Can be specialized to target if
|
49 | 00406dff | bellard | desired.)
|
50 | 00406dff | bellard | -------------------------------------------------------------------------------
|
51 | 00406dff | bellard | */
|
52 | 00406dff | bellard | #include "softfloat-macros" |
53 | 00406dff | bellard | |
54 | 00406dff | bellard | /*
|
55 | 00406dff | bellard | -------------------------------------------------------------------------------
|
56 | 00406dff | bellard | Functions and definitions to determine: (1) whether tininess for underflow
|
57 | 00406dff | bellard | is detected before or after rounding by default, (2) what (if anything)
|
58 | 00406dff | bellard | happens when exceptions are raised, (3) how signaling NaNs are distinguished
|
59 | 00406dff | bellard | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
|
60 | 00406dff | bellard | are propagated from function inputs to output. These details are target-
|
61 | 00406dff | bellard | specific.
|
62 | 00406dff | bellard | -------------------------------------------------------------------------------
|
63 | 00406dff | bellard | */
|
64 | 00406dff | bellard | #include "softfloat-specialize" |
65 | 00406dff | bellard | |
66 | 00406dff | bellard | /*
|
67 | 00406dff | bellard | -------------------------------------------------------------------------------
|
68 | 00406dff | bellard | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
|
69 | 00406dff | bellard | and 7, and returns the properly rounded 32-bit integer corresponding to the
|
70 | 00406dff | bellard | input. If `zSign' is nonzero, the input is negated before being converted
|
71 | 00406dff | bellard | to an integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point
|
72 | 00406dff | bellard | input is simply rounded to an integer, with the inexact exception raised if
|
73 | 00406dff | bellard | the input cannot be represented exactly as an integer. If the fixed-point
|
74 | 00406dff | bellard | input is too large, however, the invalid exception is raised and the largest
|
75 | 00406dff | bellard | positive or negative integer is returned.
|
76 | 00406dff | bellard | -------------------------------------------------------------------------------
|
77 | 00406dff | bellard | */
|
78 | 00406dff | bellard | static int32 roundAndPackInt32( flag zSign, bits64 absZ )
|
79 | 00406dff | bellard | { |
80 | 00406dff | bellard | int8 roundingMode; |
81 | 00406dff | bellard | flag roundNearestEven; |
82 | 00406dff | bellard | int8 roundIncrement, roundBits; |
83 | 00406dff | bellard | int32 z; |
84 | 00406dff | bellard | |
85 | 00406dff | bellard | roundingMode = float_rounding_mode; |
86 | 00406dff | bellard | roundNearestEven = ( roundingMode == float_round_nearest_even ); |
87 | 00406dff | bellard | roundIncrement = 0x40;
|
88 | 00406dff | bellard | if ( ! roundNearestEven ) {
|
89 | 00406dff | bellard | if ( roundingMode == float_round_to_zero ) {
|
90 | 00406dff | bellard | roundIncrement = 0;
|
91 | 00406dff | bellard | } |
92 | 00406dff | bellard | else {
|
93 | 00406dff | bellard | roundIncrement = 0x7F;
|
94 | 00406dff | bellard | if ( zSign ) {
|
95 | 00406dff | bellard | if ( roundingMode == float_round_up ) roundIncrement = 0; |
96 | 00406dff | bellard | } |
97 | 00406dff | bellard | else {
|
98 | 00406dff | bellard | if ( roundingMode == float_round_down ) roundIncrement = 0; |
99 | 00406dff | bellard | } |
100 | 00406dff | bellard | } |
101 | 00406dff | bellard | } |
102 | 00406dff | bellard | roundBits = absZ & 0x7F;
|
103 | 00406dff | bellard | absZ = ( absZ + roundIncrement )>>7;
|
104 | 00406dff | bellard | absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); |
105 | 00406dff | bellard | z = absZ; |
106 | 00406dff | bellard | if ( zSign ) z = - z;
|
107 | 00406dff | bellard | if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) { |
108 | 00406dff | bellard | float_exception_flags |= float_flag_invalid; |
109 | 00406dff | bellard | return zSign ? 0x80000000 : 0x7FFFFFFF; |
110 | 00406dff | bellard | } |
111 | 00406dff | bellard | if ( roundBits ) float_exception_flags |= float_flag_inexact;
|
112 | 00406dff | bellard | return z;
|
113 | 00406dff | bellard | |
114 | 00406dff | bellard | } |
115 | 00406dff | bellard | |
116 | 00406dff | bellard | /*
|
117 | 00406dff | bellard | -------------------------------------------------------------------------------
|
118 | 00406dff | bellard | Returns the fraction bits of the single-precision floating-point value `a'.
|
119 | 00406dff | bellard | -------------------------------------------------------------------------------
|
120 | 00406dff | bellard | */
|
121 | 00406dff | bellard | INLINE bits32 extractFloat32Frac( float32 a ) |
122 | 00406dff | bellard | { |
123 | 00406dff | bellard | |
124 | 00406dff | bellard | return a & 0x007FFFFF; |
125 | 00406dff | bellard | |
126 | 00406dff | bellard | } |
127 | 00406dff | bellard | |
128 | 00406dff | bellard | /*
|
129 | 00406dff | bellard | -------------------------------------------------------------------------------
|
130 | 00406dff | bellard | Returns the exponent bits of the single-precision floating-point value `a'.
|
131 | 00406dff | bellard | -------------------------------------------------------------------------------
|
132 | 00406dff | bellard | */
|
133 | 00406dff | bellard | INLINE int16 extractFloat32Exp( float32 a ) |
134 | 00406dff | bellard | { |
135 | 00406dff | bellard | |
136 | 00406dff | bellard | return ( a>>23 ) & 0xFF; |
137 | 00406dff | bellard | |
138 | 00406dff | bellard | } |
139 | 00406dff | bellard | |
140 | 00406dff | bellard | /*
|
141 | 00406dff | bellard | -------------------------------------------------------------------------------
|
142 | 00406dff | bellard | Returns the sign bit of the single-precision floating-point value `a'.
|
143 | 00406dff | bellard | -------------------------------------------------------------------------------
|
144 | 00406dff | bellard | */
|
145 | 00406dff | bellard | INLINE flag extractFloat32Sign( float32 a ) |
146 | 00406dff | bellard | { |
147 | 00406dff | bellard | |
148 | 00406dff | bellard | return a>>31; |
149 | 00406dff | bellard | |
150 | 00406dff | bellard | } |
151 | 00406dff | bellard | |
152 | 00406dff | bellard | /*
|
153 | 00406dff | bellard | -------------------------------------------------------------------------------
|
154 | 00406dff | bellard | Normalizes the subnormal single-precision floating-point value represented
|
155 | 00406dff | bellard | by the denormalized significand `aSig'. The normalized exponent and
|
156 | 00406dff | bellard | significand are stored at the locations pointed to by `zExpPtr' and
|
157 | 00406dff | bellard | `zSigPtr', respectively.
|
158 | 00406dff | bellard | -------------------------------------------------------------------------------
|
159 | 00406dff | bellard | */
|
160 | 00406dff | bellard | static void |
161 | 00406dff | bellard | normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr ) |
162 | 00406dff | bellard | { |
163 | 00406dff | bellard | int8 shiftCount; |
164 | 00406dff | bellard | |
165 | 00406dff | bellard | shiftCount = countLeadingZeros32( aSig ) - 8;
|
166 | 00406dff | bellard | *zSigPtr = aSig<<shiftCount; |
167 | 00406dff | bellard | *zExpPtr = 1 - shiftCount;
|
168 | 00406dff | bellard | |
169 | 00406dff | bellard | } |
170 | 00406dff | bellard | |
171 | 00406dff | bellard | /*
|
172 | 00406dff | bellard | -------------------------------------------------------------------------------
|
173 | 00406dff | bellard | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
|
174 | 00406dff | bellard | single-precision floating-point value, returning the result. After being
|
175 | 00406dff | bellard | shifted into the proper positions, the three fields are simply added
|
176 | 00406dff | bellard | together to form the result. This means that any integer portion of `zSig'
|
177 | 00406dff | bellard | will be added into the exponent. Since a properly normalized significand
|
178 | 00406dff | bellard | will have an integer portion equal to 1, the `zExp' input should be 1 less
|
179 | 00406dff | bellard | than the desired result exponent whenever `zSig' is a complete, normalized
|
180 | 00406dff | bellard | significand.
|
181 | 00406dff | bellard | -------------------------------------------------------------------------------
|
182 | 00406dff | bellard | */
|
183 | 00406dff | bellard | INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig ) |
184 | 00406dff | bellard | { |
185 | 00406dff | bellard | return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig; |
186 | 00406dff | bellard | } |
187 | 00406dff | bellard | |
188 | 00406dff | bellard | /*
|
189 | 00406dff | bellard | -------------------------------------------------------------------------------
|
190 | 00406dff | bellard | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
|
191 | 00406dff | bellard | and significand `zSig', and returns the proper single-precision floating-
|
192 | 00406dff | bellard | point value corresponding to the abstract input. Ordinarily, the abstract
|
193 | 00406dff | bellard | value is simply rounded and packed into the single-precision format, with
|
194 | 00406dff | bellard | the inexact exception raised if the abstract input cannot be represented
|
195 | 00406dff | bellard | exactly. If the abstract value is too large, however, the overflow and
|
196 | 00406dff | bellard | inexact exceptions are raised and an infinity or maximal finite value is
|
197 | 00406dff | bellard | returned. If the abstract value is too small, the input value is rounded to
|
198 | 00406dff | bellard | a subnormal number, and the underflow and inexact exceptions are raised if
|
199 | 00406dff | bellard | the abstract input cannot be represented exactly as a subnormal single-
|
200 | 00406dff | bellard | precision floating-point number.
|
201 | 00406dff | bellard | The input significand `zSig' has its binary point between bits 30
|
202 | 00406dff | bellard | and 29, which is 7 bits to the left of the usual location. This shifted
|
203 | 00406dff | bellard | significand must be normalized or smaller. If `zSig' is not normalized,
|
204 | 00406dff | bellard | `zExp' must be 0; in that case, the result returned is a subnormal number,
|
205 | 00406dff | bellard | and it must not require rounding. In the usual case that `zSig' is
|
206 | 00406dff | bellard | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
|
207 | 00406dff | bellard | The handling of underflow and overflow follows the IEC/IEEE Standard for
|
208 | 00406dff | bellard | Binary Floating-point Arithmetic.
|
209 | 00406dff | bellard | -------------------------------------------------------------------------------
|
210 | 00406dff | bellard | */
|
211 | 00406dff | bellard | static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
|
212 | 00406dff | bellard | { |
213 | 00406dff | bellard | int8 roundingMode; |
214 | 00406dff | bellard | flag roundNearestEven; |
215 | 00406dff | bellard | int8 roundIncrement, roundBits; |
216 | 00406dff | bellard | flag isTiny; |
217 | 00406dff | bellard | |
218 | 00406dff | bellard | roundingMode = float_rounding_mode; |
219 | 00406dff | bellard | roundNearestEven = ( roundingMode == float_round_nearest_even ); |
220 | 00406dff | bellard | roundIncrement = 0x40;
|
221 | 00406dff | bellard | if ( ! roundNearestEven ) {
|
222 | 00406dff | bellard | if ( roundingMode == float_round_to_zero ) {
|
223 | 00406dff | bellard | roundIncrement = 0;
|
224 | 00406dff | bellard | } |
225 | 00406dff | bellard | else {
|
226 | 00406dff | bellard | roundIncrement = 0x7F;
|
227 | 00406dff | bellard | if ( zSign ) {
|
228 | 00406dff | bellard | if ( roundingMode == float_round_up ) roundIncrement = 0; |
229 | 00406dff | bellard | } |
230 | 00406dff | bellard | else {
|
231 | 00406dff | bellard | if ( roundingMode == float_round_down ) roundIncrement = 0; |
232 | 00406dff | bellard | } |
233 | 00406dff | bellard | } |
234 | 00406dff | bellard | } |
235 | 00406dff | bellard | roundBits = zSig & 0x7F;
|
236 | 00406dff | bellard | if ( 0xFD <= (bits16) zExp ) { |
237 | 00406dff | bellard | if ( ( 0xFD < zExp ) |
238 | 00406dff | bellard | || ( ( zExp == 0xFD )
|
239 | 00406dff | bellard | && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
|
240 | 00406dff | bellard | ) { |
241 | 00406dff | bellard | float_raise( float_flag_overflow | float_flag_inexact ); |
242 | 00406dff | bellard | return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 ); |
243 | 00406dff | bellard | } |
244 | 00406dff | bellard | if ( zExp < 0 ) { |
245 | 00406dff | bellard | isTiny = |
246 | 00406dff | bellard | ( float_detect_tininess == float_tininess_before_rounding ) |
247 | 00406dff | bellard | || ( zExp < -1 )
|
248 | 00406dff | bellard | || ( zSig + roundIncrement < 0x80000000 );
|
249 | 00406dff | bellard | shift32RightJamming( zSig, - zExp, &zSig ); |
250 | 00406dff | bellard | zExp = 0;
|
251 | 00406dff | bellard | roundBits = zSig & 0x7F;
|
252 | 00406dff | bellard | if ( isTiny && roundBits ) float_raise( float_flag_underflow );
|
253 | 00406dff | bellard | } |
254 | 00406dff | bellard | } |
255 | 00406dff | bellard | if ( roundBits ) float_exception_flags |= float_flag_inexact;
|
256 | 00406dff | bellard | zSig = ( zSig + roundIncrement )>>7;
|
257 | 00406dff | bellard | zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); |
258 | 00406dff | bellard | if ( zSig == 0 ) zExp = 0; |
259 | 00406dff | bellard | return packFloat32( zSign, zExp, zSig );
|
260 | 00406dff | bellard | |
261 | 00406dff | bellard | } |
262 | 00406dff | bellard | |
263 | 00406dff | bellard | /*
|
264 | 00406dff | bellard | -------------------------------------------------------------------------------
|
265 | 00406dff | bellard | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
|
266 | 00406dff | bellard | and significand `zSig', and returns the proper single-precision floating-
|
267 | 00406dff | bellard | point value corresponding to the abstract input. This routine is just like
|
268 | 00406dff | bellard | `roundAndPackFloat32' except that `zSig' does not have to be normalized in
|
269 | 00406dff | bellard | any way. In all cases, `zExp' must be 1 less than the ``true'' floating-
|
270 | 00406dff | bellard | point exponent.
|
271 | 00406dff | bellard | -------------------------------------------------------------------------------
|
272 | 00406dff | bellard | */
|
273 | 00406dff | bellard | static float32
|
274 | 00406dff | bellard | normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) |
275 | 00406dff | bellard | { |
276 | 00406dff | bellard | int8 shiftCount; |
277 | 00406dff | bellard | |
278 | 00406dff | bellard | shiftCount = countLeadingZeros32( zSig ) - 1;
|
279 | 00406dff | bellard | return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
|
280 | 00406dff | bellard | |
281 | 00406dff | bellard | } |
282 | 00406dff | bellard | |
283 | 00406dff | bellard | /*
|
284 | 00406dff | bellard | -------------------------------------------------------------------------------
|
285 | 00406dff | bellard | Returns the fraction bits of the double-precision floating-point value `a'.
|
286 | 00406dff | bellard | -------------------------------------------------------------------------------
|
287 | 00406dff | bellard | */
|
288 | 00406dff | bellard | INLINE bits64 extractFloat64Frac( float64 a ) |
289 | 00406dff | bellard | { |
290 | 00406dff | bellard | |
291 | 00406dff | bellard | return a & LIT64( 0x000FFFFFFFFFFFFF ); |
292 | 00406dff | bellard | |
293 | 00406dff | bellard | } |
294 | 00406dff | bellard | |
295 | 00406dff | bellard | /*
|
296 | 00406dff | bellard | -------------------------------------------------------------------------------
|
297 | 00406dff | bellard | Returns the exponent bits of the double-precision floating-point value `a'.
|
298 | 00406dff | bellard | -------------------------------------------------------------------------------
|
299 | 00406dff | bellard | */
|
300 | 00406dff | bellard | INLINE int16 extractFloat64Exp( float64 a ) |
301 | 00406dff | bellard | { |
302 | 00406dff | bellard | |
303 | 00406dff | bellard | return ( a>>52 ) & 0x7FF; |
304 | 00406dff | bellard | |
305 | 00406dff | bellard | } |
306 | 00406dff | bellard | |
307 | 00406dff | bellard | /*
|
308 | 00406dff | bellard | -------------------------------------------------------------------------------
|
309 | 00406dff | bellard | Returns the sign bit of the double-precision floating-point value `a'.
|
310 | 00406dff | bellard | -------------------------------------------------------------------------------
|
311 | 00406dff | bellard | */
|
312 | 00406dff | bellard | INLINE flag extractFloat64Sign( float64 a ) |
313 | 00406dff | bellard | { |
314 | 00406dff | bellard | |
315 | 00406dff | bellard | return a>>63; |
316 | 00406dff | bellard | |
317 | 00406dff | bellard | } |
318 | 00406dff | bellard | |
319 | 00406dff | bellard | /*
|
320 | 00406dff | bellard | -------------------------------------------------------------------------------
|
321 | 00406dff | bellard | Normalizes the subnormal double-precision floating-point value represented
|
322 | 00406dff | bellard | by the denormalized significand `aSig'. The normalized exponent and
|
323 | 00406dff | bellard | significand are stored at the locations pointed to by `zExpPtr' and
|
324 | 00406dff | bellard | `zSigPtr', respectively.
|
325 | 00406dff | bellard | -------------------------------------------------------------------------------
|
326 | 00406dff | bellard | */
|
327 | 00406dff | bellard | static void |
328 | 00406dff | bellard | normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr ) |
329 | 00406dff | bellard | { |
330 | 00406dff | bellard | int8 shiftCount; |
331 | 00406dff | bellard | |
332 | 00406dff | bellard | shiftCount = countLeadingZeros64( aSig ) - 11;
|
333 | 00406dff | bellard | *zSigPtr = aSig<<shiftCount; |
334 | 00406dff | bellard | *zExpPtr = 1 - shiftCount;
|
335 | 00406dff | bellard | |
336 | 00406dff | bellard | } |
337 | 00406dff | bellard | |
338 | 00406dff | bellard | /*
|
339 | 00406dff | bellard | -------------------------------------------------------------------------------
|
340 | 00406dff | bellard | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
|
341 | 00406dff | bellard | double-precision floating-point value, returning the result. After being
|
342 | 00406dff | bellard | shifted into the proper positions, the three fields are simply added
|
343 | 00406dff | bellard | together to form the result. This means that any integer portion of `zSig'
|
344 | 00406dff | bellard | will be added into the exponent. Since a properly normalized significand
|
345 | 00406dff | bellard | will have an integer portion equal to 1, the `zExp' input should be 1 less
|
346 | 00406dff | bellard | than the desired result exponent whenever `zSig' is a complete, normalized
|
347 | 00406dff | bellard | significand.
|
348 | 00406dff | bellard | -------------------------------------------------------------------------------
|
349 | 00406dff | bellard | */
|
350 | 00406dff | bellard | INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig ) |
351 | 00406dff | bellard | { |
352 | 00406dff | bellard | |
353 | 00406dff | bellard | return ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig; |
354 | 00406dff | bellard | |
355 | 00406dff | bellard | } |
356 | 00406dff | bellard | |
357 | 00406dff | bellard | /*
|
358 | 00406dff | bellard | -------------------------------------------------------------------------------
|
359 | 00406dff | bellard | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
|
360 | 00406dff | bellard | and significand `zSig', and returns the proper double-precision floating-
|
361 | 00406dff | bellard | point value corresponding to the abstract input. Ordinarily, the abstract
|
362 | 00406dff | bellard | value is simply rounded and packed into the double-precision format, with
|
363 | 00406dff | bellard | the inexact exception raised if the abstract input cannot be represented
|
364 | 00406dff | bellard | exactly. If the abstract value is too large, however, the overflow and
|
365 | 00406dff | bellard | inexact exceptions are raised and an infinity or maximal finite value is
|
366 | 00406dff | bellard | returned. If the abstract value is too small, the input value is rounded to
|
367 | 00406dff | bellard | a subnormal number, and the underflow and inexact exceptions are raised if
|
368 | 00406dff | bellard | the abstract input cannot be represented exactly as a subnormal double-
|
369 | 00406dff | bellard | precision floating-point number.
|
370 | 00406dff | bellard | The input significand `zSig' has its binary point between bits 62
|
371 | 00406dff | bellard | and 61, which is 10 bits to the left of the usual location. This shifted
|
372 | 00406dff | bellard | significand must be normalized or smaller. If `zSig' is not normalized,
|
373 | 00406dff | bellard | `zExp' must be 0; in that case, the result returned is a subnormal number,
|
374 | 00406dff | bellard | and it must not require rounding. In the usual case that `zSig' is
|
375 | 00406dff | bellard | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
|
376 | 00406dff | bellard | The handling of underflow and overflow follows the IEC/IEEE Standard for
|
377 | 00406dff | bellard | Binary Floating-point Arithmetic.
|
378 | 00406dff | bellard | -------------------------------------------------------------------------------
|
379 | 00406dff | bellard | */
|
380 | 00406dff | bellard | static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
|
381 | 00406dff | bellard | { |
382 | 00406dff | bellard | int8 roundingMode; |
383 | 00406dff | bellard | flag roundNearestEven; |
384 | 00406dff | bellard | int16 roundIncrement, roundBits; |
385 | 00406dff | bellard | flag isTiny; |
386 | 00406dff | bellard | |
387 | 00406dff | bellard | roundingMode = float_rounding_mode; |
388 | 00406dff | bellard | roundNearestEven = ( roundingMode == float_round_nearest_even ); |
389 | 00406dff | bellard | roundIncrement = 0x200;
|
390 | 00406dff | bellard | if ( ! roundNearestEven ) {
|
391 | 00406dff | bellard | if ( roundingMode == float_round_to_zero ) {
|
392 | 00406dff | bellard | roundIncrement = 0;
|
393 | 00406dff | bellard | } |
394 | 00406dff | bellard | else {
|
395 | 00406dff | bellard | roundIncrement = 0x3FF;
|
396 | 00406dff | bellard | if ( zSign ) {
|
397 | 00406dff | bellard | if ( roundingMode == float_round_up ) roundIncrement = 0; |
398 | 00406dff | bellard | } |
399 | 00406dff | bellard | else {
|
400 | 00406dff | bellard | if ( roundingMode == float_round_down ) roundIncrement = 0; |
401 | 00406dff | bellard | } |
402 | 00406dff | bellard | } |
403 | 00406dff | bellard | } |
404 | 00406dff | bellard | roundBits = zSig & 0x3FF;
|
405 | 00406dff | bellard | if ( 0x7FD <= (bits16) zExp ) { |
406 | 00406dff | bellard | if ( ( 0x7FD < zExp ) |
407 | 00406dff | bellard | || ( ( zExp == 0x7FD )
|
408 | 00406dff | bellard | && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
|
409 | 00406dff | bellard | ) { |
410 | 00406dff | bellard | //register int lr = __builtin_return_address(0);
|
411 | 00406dff | bellard | //printk("roundAndPackFloat64 called from 0x%08x\n",lr);
|
412 | 00406dff | bellard | float_raise( float_flag_overflow | float_flag_inexact ); |
413 | 00406dff | bellard | return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 ); |
414 | 00406dff | bellard | } |
415 | 00406dff | bellard | if ( zExp < 0 ) { |
416 | 00406dff | bellard | isTiny = |
417 | 00406dff | bellard | ( float_detect_tininess == float_tininess_before_rounding ) |
418 | 00406dff | bellard | || ( zExp < -1 )
|
419 | 00406dff | bellard | || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
|
420 | 00406dff | bellard | shift64RightJamming( zSig, - zExp, &zSig ); |
421 | 00406dff | bellard | zExp = 0;
|
422 | 00406dff | bellard | roundBits = zSig & 0x3FF;
|
423 | 00406dff | bellard | if ( isTiny && roundBits ) float_raise( float_flag_underflow );
|
424 | 00406dff | bellard | } |
425 | 00406dff | bellard | } |
426 | 00406dff | bellard | if ( roundBits ) float_exception_flags |= float_flag_inexact;
|
427 | 00406dff | bellard | zSig = ( zSig + roundIncrement )>>10;
|
428 | 00406dff | bellard | zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven ); |
429 | 00406dff | bellard | if ( zSig == 0 ) zExp = 0; |
430 | 00406dff | bellard | return packFloat64( zSign, zExp, zSig );
|
431 | 00406dff | bellard | |
432 | 00406dff | bellard | } |
433 | 00406dff | bellard | |
434 | 00406dff | bellard | /*
|
435 | 00406dff | bellard | -------------------------------------------------------------------------------
|
436 | 00406dff | bellard | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
|
437 | 00406dff | bellard | and significand `zSig', and returns the proper double-precision floating-
|
438 | 00406dff | bellard | point value corresponding to the abstract input. This routine is just like
|
439 | 00406dff | bellard | `roundAndPackFloat64' except that `zSig' does not have to be normalized in
|
440 | 00406dff | bellard | any way. In all cases, `zExp' must be 1 less than the ``true'' floating-
|
441 | 00406dff | bellard | point exponent.
|
442 | 00406dff | bellard | -------------------------------------------------------------------------------
|
443 | 00406dff | bellard | */
|
444 | 00406dff | bellard | static float64
|
445 | 00406dff | bellard | normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig ) |
446 | 00406dff | bellard | { |
447 | 00406dff | bellard | int8 shiftCount; |
448 | 00406dff | bellard | |
449 | 00406dff | bellard | shiftCount = countLeadingZeros64( zSig ) - 1;
|
450 | 00406dff | bellard | return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
|
451 | 00406dff | bellard | |
452 | 00406dff | bellard | } |
453 | 00406dff | bellard | |
454 | 00406dff | bellard | #ifdef FLOATX80
|
455 | 00406dff | bellard | |
456 | 00406dff | bellard | /*
|
457 | 00406dff | bellard | -------------------------------------------------------------------------------
|
458 | 00406dff | bellard | Returns the fraction bits of the extended double-precision floating-point
|
459 | 00406dff | bellard | value `a'.
|
460 | 00406dff | bellard | -------------------------------------------------------------------------------
|
461 | 00406dff | bellard | */
|
462 | 00406dff | bellard | INLINE bits64 extractFloatx80Frac( floatx80 a ) |
463 | 00406dff | bellard | { |
464 | 00406dff | bellard | |
465 | 00406dff | bellard | return a.low;
|
466 | 00406dff | bellard | |
467 | 00406dff | bellard | } |
468 | 00406dff | bellard | |
469 | 00406dff | bellard | /*
|
470 | 00406dff | bellard | -------------------------------------------------------------------------------
|
471 | 00406dff | bellard | Returns the exponent bits of the extended double-precision floating-point
|
472 | 00406dff | bellard | value `a'.
|
473 | 00406dff | bellard | -------------------------------------------------------------------------------
|
474 | 00406dff | bellard | */
|
475 | 00406dff | bellard | INLINE int32 extractFloatx80Exp( floatx80 a ) |
476 | 00406dff | bellard | { |
477 | 00406dff | bellard | |
478 | 00406dff | bellard | return a.high & 0x7FFF; |
479 | 00406dff | bellard | |
480 | 00406dff | bellard | } |
481 | 00406dff | bellard | |
482 | 00406dff | bellard | /*
|
483 | 00406dff | bellard | -------------------------------------------------------------------------------
|
484 | 00406dff | bellard | Returns the sign bit of the extended double-precision floating-point value
|
485 | 00406dff | bellard | `a'.
|
486 | 00406dff | bellard | -------------------------------------------------------------------------------
|
487 | 00406dff | bellard | */
|
488 | 00406dff | bellard | INLINE flag extractFloatx80Sign( floatx80 a ) |
489 | 00406dff | bellard | { |
490 | 00406dff | bellard | |
491 | 00406dff | bellard | return a.high>>15; |
492 | 00406dff | bellard | |
493 | 00406dff | bellard | } |
494 | 00406dff | bellard | |
495 | 00406dff | bellard | /*
|
496 | 00406dff | bellard | -------------------------------------------------------------------------------
|
497 | 00406dff | bellard | Normalizes the subnormal extended double-precision floating-point value
|
498 | 00406dff | bellard | represented by the denormalized significand `aSig'. The normalized exponent
|
499 | 00406dff | bellard | and significand are stored at the locations pointed to by `zExpPtr' and
|
500 | 00406dff | bellard | `zSigPtr', respectively.
|
501 | 00406dff | bellard | -------------------------------------------------------------------------------
|
502 | 00406dff | bellard | */
|
503 | 00406dff | bellard | static void |
504 | 00406dff | bellard | normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr ) |
505 | 00406dff | bellard | { |
506 | 00406dff | bellard | int8 shiftCount; |
507 | 00406dff | bellard | |
508 | 00406dff | bellard | shiftCount = countLeadingZeros64( aSig ); |
509 | 00406dff | bellard | *zSigPtr = aSig<<shiftCount; |
510 | 00406dff | bellard | *zExpPtr = 1 - shiftCount;
|
511 | 00406dff | bellard | |
512 | 00406dff | bellard | } |
513 | 00406dff | bellard | |
514 | 00406dff | bellard | /*
|
515 | 00406dff | bellard | -------------------------------------------------------------------------------
|
516 | 00406dff | bellard | Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
|
517 | 00406dff | bellard | extended double-precision floating-point value, returning the result.
|
518 | 00406dff | bellard | -------------------------------------------------------------------------------
|
519 | 00406dff | bellard | */
|
520 | 00406dff | bellard | INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig ) |
521 | 00406dff | bellard | { |
522 | 00406dff | bellard | floatx80 z; |
523 | 00406dff | bellard | |
524 | 00406dff | bellard | z.low = zSig; |
525 | 00406dff | bellard | z.high = ( ( (bits16) zSign )<<15 ) + zExp;
|
526 | 00406dff | bellard | return z;
|
527 | 00406dff | bellard | |
528 | 00406dff | bellard | } |
529 | 00406dff | bellard | |
530 | 00406dff | bellard | /*
|
531 | 00406dff | bellard | -------------------------------------------------------------------------------
|
532 | 00406dff | bellard | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
|
533 | 00406dff | bellard | and extended significand formed by the concatenation of `zSig0' and `zSig1',
|
534 | 00406dff | bellard | and returns the proper extended double-precision floating-point value
|
535 | 00406dff | bellard | corresponding to the abstract input. Ordinarily, the abstract value is
|
536 | 00406dff | bellard | rounded and packed into the extended double-precision format, with the
|
537 | 00406dff | bellard | inexact exception raised if the abstract input cannot be represented
|
538 | 00406dff | bellard | exactly. If the abstract value is too large, however, the overflow and
|
539 | 00406dff | bellard | inexact exceptions are raised and an infinity or maximal finite value is
|
540 | 00406dff | bellard | returned. If the abstract value is too small, the input value is rounded to
|
541 | 00406dff | bellard | a subnormal number, and the underflow and inexact exceptions are raised if
|
542 | 00406dff | bellard | the abstract input cannot be represented exactly as a subnormal extended
|
543 | 00406dff | bellard | double-precision floating-point number.
|
544 | 00406dff | bellard | If `roundingPrecision' is 32 or 64, the result is rounded to the same
|
545 | 00406dff | bellard | number of bits as single or double precision, respectively. Otherwise, the
|
546 | 00406dff | bellard | result is rounded to the full precision of the extended double-precision
|
547 | 00406dff | bellard | format.
|
548 | 00406dff | bellard | The input significand must be normalized or smaller. If the input
|
549 | 00406dff | bellard | significand is not normalized, `zExp' must be 0; in that case, the result
|
550 | 00406dff | bellard | returned is a subnormal number, and it must not require rounding. The
|
551 | 00406dff | bellard | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
|
552 | 00406dff | bellard | Floating-point Arithmetic.
|
553 | 00406dff | bellard | -------------------------------------------------------------------------------
|
554 | 00406dff | bellard | */
|
555 | 00406dff | bellard | static floatx80
|
556 | 00406dff | bellard | roundAndPackFloatx80( |
557 | 00406dff | bellard | int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 |
558 | 00406dff | bellard | ) |
559 | 00406dff | bellard | { |
560 | 00406dff | bellard | int8 roundingMode; |
561 | 00406dff | bellard | flag roundNearestEven, increment, isTiny; |
562 | 00406dff | bellard | int64 roundIncrement, roundMask, roundBits; |
563 | 00406dff | bellard | |
564 | 00406dff | bellard | roundingMode = float_rounding_mode; |
565 | 00406dff | bellard | roundNearestEven = ( roundingMode == float_round_nearest_even ); |
566 | 00406dff | bellard | if ( roundingPrecision == 80 ) goto precision80; |
567 | 00406dff | bellard | if ( roundingPrecision == 64 ) { |
568 | 00406dff | bellard | roundIncrement = LIT64( 0x0000000000000400 );
|
569 | 00406dff | bellard | roundMask = LIT64( 0x00000000000007FF );
|
570 | 00406dff | bellard | } |
571 | 00406dff | bellard | else if ( roundingPrecision == 32 ) { |
572 | 00406dff | bellard | roundIncrement = LIT64( 0x0000008000000000 );
|
573 | 00406dff | bellard | roundMask = LIT64( 0x000000FFFFFFFFFF );
|
574 | 00406dff | bellard | } |
575 | 00406dff | bellard | else {
|
576 | 00406dff | bellard | goto precision80;
|
577 | 00406dff | bellard | } |
578 | 00406dff | bellard | zSig0 |= ( zSig1 != 0 );
|
579 | 00406dff | bellard | if ( ! roundNearestEven ) {
|
580 | 00406dff | bellard | if ( roundingMode == float_round_to_zero ) {
|
581 | 00406dff | bellard | roundIncrement = 0;
|
582 | 00406dff | bellard | } |
583 | 00406dff | bellard | else {
|
584 | 00406dff | bellard | roundIncrement = roundMask; |
585 | 00406dff | bellard | if ( zSign ) {
|
586 | 00406dff | bellard | if ( roundingMode == float_round_up ) roundIncrement = 0; |
587 | 00406dff | bellard | } |
588 | 00406dff | bellard | else {
|
589 | 00406dff | bellard | if ( roundingMode == float_round_down ) roundIncrement = 0; |
590 | 00406dff | bellard | } |
591 | 00406dff | bellard | } |
592 | 00406dff | bellard | } |
593 | 00406dff | bellard | roundBits = zSig0 & roundMask; |
594 | 00406dff | bellard | if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) { |
595 | 00406dff | bellard | if ( ( 0x7FFE < zExp ) |
596 | 00406dff | bellard | || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
|
597 | 00406dff | bellard | ) { |
598 | 00406dff | bellard | goto overflow;
|
599 | 00406dff | bellard | } |
600 | 00406dff | bellard | if ( zExp <= 0 ) { |
601 | 00406dff | bellard | isTiny = |
602 | 00406dff | bellard | ( float_detect_tininess == float_tininess_before_rounding ) |
603 | 00406dff | bellard | || ( zExp < 0 )
|
604 | 00406dff | bellard | || ( zSig0 <= zSig0 + roundIncrement ); |
605 | 00406dff | bellard | shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
|
606 | 00406dff | bellard | zExp = 0;
|
607 | 00406dff | bellard | roundBits = zSig0 & roundMask; |
608 | 00406dff | bellard | if ( isTiny && roundBits ) float_raise( float_flag_underflow );
|
609 | 00406dff | bellard | if ( roundBits ) float_exception_flags |= float_flag_inexact;
|
610 | 00406dff | bellard | zSig0 += roundIncrement; |
611 | 00406dff | bellard | if ( (sbits64) zSig0 < 0 ) zExp = 1; |
612 | 00406dff | bellard | roundIncrement = roundMask + 1;
|
613 | 00406dff | bellard | if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { |
614 | 00406dff | bellard | roundMask |= roundIncrement; |
615 | 00406dff | bellard | } |
616 | 00406dff | bellard | zSig0 &= ~ roundMask; |
617 | 00406dff | bellard | return packFloatx80( zSign, zExp, zSig0 );
|
618 | 00406dff | bellard | } |
619 | 00406dff | bellard | } |
620 | 00406dff | bellard | if ( roundBits ) float_exception_flags |= float_flag_inexact;
|
621 | 00406dff | bellard | zSig0 += roundIncrement; |
622 | 00406dff | bellard | if ( zSig0 < roundIncrement ) {
|
623 | 00406dff | bellard | ++zExp; |
624 | 00406dff | bellard | zSig0 = LIT64( 0x8000000000000000 );
|
625 | 00406dff | bellard | } |
626 | 00406dff | bellard | roundIncrement = roundMask + 1;
|
627 | 00406dff | bellard | if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { |
628 | 00406dff | bellard | roundMask |= roundIncrement; |
629 | 00406dff | bellard | } |
630 | 00406dff | bellard | zSig0 &= ~ roundMask; |
631 | 00406dff | bellard | if ( zSig0 == 0 ) zExp = 0; |
632 | 00406dff | bellard | return packFloatx80( zSign, zExp, zSig0 );
|
633 | 00406dff | bellard | precision80:
|
634 | 00406dff | bellard | increment = ( (sbits64) zSig1 < 0 );
|
635 | 00406dff | bellard | if ( ! roundNearestEven ) {
|
636 | 00406dff | bellard | if ( roundingMode == float_round_to_zero ) {
|
637 | 00406dff | bellard | increment = 0;
|
638 | 00406dff | bellard | } |
639 | 00406dff | bellard | else {
|
640 | 00406dff | bellard | if ( zSign ) {
|
641 | 00406dff | bellard | increment = ( roundingMode == float_round_down ) && zSig1; |
642 | 00406dff | bellard | } |
643 | 00406dff | bellard | else {
|
644 | 00406dff | bellard | increment = ( roundingMode == float_round_up ) && zSig1; |
645 | 00406dff | bellard | } |
646 | 00406dff | bellard | } |
647 | 00406dff | bellard | } |
648 | 00406dff | bellard | if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) { |
649 | 00406dff | bellard | if ( ( 0x7FFE < zExp ) |
650 | 00406dff | bellard | || ( ( zExp == 0x7FFE )
|
651 | 00406dff | bellard | && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
|
652 | 00406dff | bellard | && increment |
653 | 00406dff | bellard | ) |
654 | 00406dff | bellard | ) { |
655 | 00406dff | bellard | roundMask = 0;
|
656 | 00406dff | bellard | overflow:
|
657 | 00406dff | bellard | float_raise( float_flag_overflow | float_flag_inexact ); |
658 | 00406dff | bellard | if ( ( roundingMode == float_round_to_zero )
|
659 | 00406dff | bellard | || ( zSign && ( roundingMode == float_round_up ) ) |
660 | 00406dff | bellard | || ( ! zSign && ( roundingMode == float_round_down ) ) |
661 | 00406dff | bellard | ) { |
662 | 00406dff | bellard | return packFloatx80( zSign, 0x7FFE, ~ roundMask ); |
663 | 00406dff | bellard | } |
664 | 00406dff | bellard | return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); |
665 | 00406dff | bellard | } |
666 | 00406dff | bellard | if ( zExp <= 0 ) { |
667 | 00406dff | bellard | isTiny = |
668 | 00406dff | bellard | ( float_detect_tininess == float_tininess_before_rounding ) |
669 | 00406dff | bellard | || ( zExp < 0 )
|
670 | 00406dff | bellard | || ! increment |
671 | 00406dff | bellard | || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
|
672 | 00406dff | bellard | shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
|
673 | 00406dff | bellard | zExp = 0;
|
674 | 00406dff | bellard | if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
|
675 | 00406dff | bellard | if ( zSig1 ) float_exception_flags |= float_flag_inexact;
|
676 | 00406dff | bellard | if ( roundNearestEven ) {
|
677 | 00406dff | bellard | increment = ( (sbits64) zSig1 < 0 );
|
678 | 00406dff | bellard | } |
679 | 00406dff | bellard | else {
|
680 | 00406dff | bellard | if ( zSign ) {
|
681 | 00406dff | bellard | increment = ( roundingMode == float_round_down ) && zSig1; |
682 | 00406dff | bellard | } |
683 | 00406dff | bellard | else {
|
684 | 00406dff | bellard | increment = ( roundingMode == float_round_up ) && zSig1; |
685 | 00406dff | bellard | } |
686 | 00406dff | bellard | } |
687 | 00406dff | bellard | if ( increment ) {
|
688 | 00406dff | bellard | ++zSig0; |
689 | 00406dff | bellard | zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven );
|
690 | 00406dff | bellard | if ( (sbits64) zSig0 < 0 ) zExp = 1; |
691 | 00406dff | bellard | } |
692 | 00406dff | bellard | return packFloatx80( zSign, zExp, zSig0 );
|
693 | 00406dff | bellard | } |
694 | 00406dff | bellard | } |
695 | 00406dff | bellard | if ( zSig1 ) float_exception_flags |= float_flag_inexact;
|
696 | 00406dff | bellard | if ( increment ) {
|
697 | 00406dff | bellard | ++zSig0; |
698 | 00406dff | bellard | if ( zSig0 == 0 ) { |
699 | 00406dff | bellard | ++zExp; |
700 | 00406dff | bellard | zSig0 = LIT64( 0x8000000000000000 );
|
701 | 00406dff | bellard | } |
702 | 00406dff | bellard | else {
|
703 | 00406dff | bellard | zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven );
|
704 | 00406dff | bellard | } |
705 | 00406dff | bellard | } |
706 | 00406dff | bellard | else {
|
707 | 00406dff | bellard | if ( zSig0 == 0 ) zExp = 0; |
708 | 00406dff | bellard | } |
709 | 00406dff | bellard | |
710 | 00406dff | bellard | return packFloatx80( zSign, zExp, zSig0 );
|
711 | 00406dff | bellard | } |
712 | 00406dff | bellard | |
713 | 00406dff | bellard | /*
|
714 | 00406dff | bellard | -------------------------------------------------------------------------------
|
715 | 00406dff | bellard | Takes an abstract floating-point value having sign `zSign', exponent
|
716 | 00406dff | bellard | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
|
717 | 00406dff | bellard | and returns the proper extended double-precision floating-point value
|
718 | 00406dff | bellard | corresponding to the abstract input. This routine is just like
|
719 | 00406dff | bellard | `roundAndPackFloatx80' except that the input significand does not have to be
|
720 | 00406dff | bellard | normalized.
|
721 | 00406dff | bellard | -------------------------------------------------------------------------------
|
722 | 00406dff | bellard | */
|
723 | 00406dff | bellard | static floatx80
|
724 | 00406dff | bellard | normalizeRoundAndPackFloatx80( |
725 | 00406dff | bellard | int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 |
726 | 00406dff | bellard | ) |
727 | 00406dff | bellard | { |
728 | 00406dff | bellard | int8 shiftCount; |
729 | 00406dff | bellard | |
730 | 00406dff | bellard | if ( zSig0 == 0 ) { |
731 | 00406dff | bellard | zSig0 = zSig1; |
732 | 00406dff | bellard | zSig1 = 0;
|
733 | 00406dff | bellard | zExp -= 64;
|
734 | 00406dff | bellard | } |
735 | 00406dff | bellard | shiftCount = countLeadingZeros64( zSig0 ); |
736 | 00406dff | bellard | shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); |
737 | 00406dff | bellard | zExp -= shiftCount; |
738 | 00406dff | bellard | return
|
739 | 00406dff | bellard | roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 ); |
740 | 00406dff | bellard | |
741 | 00406dff | bellard | } |
742 | 00406dff | bellard | |
743 | 00406dff | bellard | #endif
|
744 | 00406dff | bellard | |
745 | 00406dff | bellard | /*
|
746 | 00406dff | bellard | -------------------------------------------------------------------------------
|
747 | 00406dff | bellard | Returns the result of converting the 32-bit two's complement integer `a' to
|
748 | 00406dff | bellard | the single-precision floating-point format. The conversion is performed
|
749 | 00406dff | bellard | according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
|
750 | 00406dff | bellard | -------------------------------------------------------------------------------
|
751 | 00406dff | bellard | */
|
752 | 00406dff | bellard | float32 int32_to_float32( int32 a ) |
753 | 00406dff | bellard | { |
754 | 00406dff | bellard | flag zSign; |
755 | 00406dff | bellard | |
756 | 00406dff | bellard | if ( a == 0 ) return 0; |
757 | 00406dff | bellard | if ( a == 0x80000000 ) return packFloat32( 1, 0x9E, 0 ); |
758 | 00406dff | bellard | zSign = ( a < 0 );
|
759 | 00406dff | bellard | return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a ); |
760 | 00406dff | bellard | |
761 | 00406dff | bellard | } |
762 | 00406dff | bellard | |
763 | 00406dff | bellard | /*
|
764 | 00406dff | bellard | -------------------------------------------------------------------------------
|
765 | 00406dff | bellard | Returns the result of converting the 32-bit two's complement integer `a' to
|
766 | 00406dff | bellard | the double-precision floating-point format. The conversion is performed
|
767 | 00406dff | bellard | according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
|
768 | 00406dff | bellard | -------------------------------------------------------------------------------
|
769 | 00406dff | bellard | */
|
770 | 00406dff | bellard | float64 int32_to_float64( int32 a ) |
771 | 00406dff | bellard | { |
772 | 00406dff | bellard | flag aSign; |
773 | 00406dff | bellard | uint32 absA; |
774 | 00406dff | bellard | int8 shiftCount; |
775 | 00406dff | bellard | bits64 zSig; |
776 | 00406dff | bellard | |
777 | 00406dff | bellard | if ( a == 0 ) return 0; |
778 | 00406dff | bellard | aSign = ( a < 0 );
|
779 | 00406dff | bellard | absA = aSign ? - a : a; |
780 | 00406dff | bellard | shiftCount = countLeadingZeros32( absA ) + 21;
|
781 | 00406dff | bellard | zSig = absA; |
782 | 00406dff | bellard | return packFloat64( aSign, 0x432 - shiftCount, zSig<<shiftCount ); |
783 | 00406dff | bellard | |
784 | 00406dff | bellard | } |
785 | 00406dff | bellard | |
786 | 00406dff | bellard | #ifdef FLOATX80
|
787 | 00406dff | bellard | |
788 | 00406dff | bellard | /*
|
789 | 00406dff | bellard | -------------------------------------------------------------------------------
|
790 | 00406dff | bellard | Returns the result of converting the 32-bit two's complement integer `a'
|
791 | 00406dff | bellard | to the extended double-precision floating-point format. The conversion
|
792 | 00406dff | bellard | is performed according to the IEC/IEEE Standard for Binary Floating-point
|
793 | 00406dff | bellard | Arithmetic.
|
794 | 00406dff | bellard | -------------------------------------------------------------------------------
|
795 | 00406dff | bellard | */
|
796 | 00406dff | bellard | floatx80 int32_to_floatx80( int32 a ) |
797 | 00406dff | bellard | { |
798 | 00406dff | bellard | flag zSign; |
799 | 00406dff | bellard | uint32 absA; |
800 | 00406dff | bellard | int8 shiftCount; |
801 | 00406dff | bellard | bits64 zSig; |
802 | 00406dff | bellard | |
803 | 00406dff | bellard | if ( a == 0 ) return packFloatx80( 0, 0, 0 ); |
804 | 00406dff | bellard | zSign = ( a < 0 );
|
805 | 00406dff | bellard | absA = zSign ? - a : a; |
806 | 00406dff | bellard | shiftCount = countLeadingZeros32( absA ) + 32;
|
807 | 00406dff | bellard | zSig = absA; |
808 | 00406dff | bellard | return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount ); |
809 | 00406dff | bellard | |
810 | 00406dff | bellard | } |
811 | 00406dff | bellard | |
812 | 00406dff | bellard | #endif
|
813 | 00406dff | bellard | |
814 | 00406dff | bellard | /*
|
815 | 00406dff | bellard | -------------------------------------------------------------------------------
|
816 | 00406dff | bellard | Returns the result of converting the single-precision floating-point value
|
817 | 00406dff | bellard | `a' to the 32-bit two's complement integer format. The conversion is
|
818 | 00406dff | bellard | performed according to the IEC/IEEE Standard for Binary Floating-point
|
819 | 00406dff | bellard | Arithmetic---which means in particular that the conversion is rounded
|
820 | 00406dff | bellard | according to the current rounding mode. If `a' is a NaN, the largest
|
821 | 00406dff | bellard | positive integer is returned. Otherwise, if the conversion overflows, the
|
822 | 00406dff | bellard | largest integer with the same sign as `a' is returned.
|
823 | 00406dff | bellard | -------------------------------------------------------------------------------
|
824 | 00406dff | bellard | */
|
825 | 00406dff | bellard | int32 float32_to_int32( float32 a ) |
826 | 00406dff | bellard | { |
827 | 00406dff | bellard | flag aSign; |
828 | 00406dff | bellard | int16 aExp, shiftCount; |
829 | 00406dff | bellard | bits32 aSig; |
830 | 00406dff | bellard | bits64 zSig; |
831 | 00406dff | bellard | |
832 | 00406dff | bellard | aSig = extractFloat32Frac( a ); |
833 | 00406dff | bellard | aExp = extractFloat32Exp( a ); |
834 | 00406dff | bellard | aSign = extractFloat32Sign( a ); |
835 | 00406dff | bellard | if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; |
836 | 00406dff | bellard | if ( aExp ) aSig |= 0x00800000; |
837 | 00406dff | bellard | shiftCount = 0xAF - aExp;
|
838 | 00406dff | bellard | zSig = aSig; |
839 | 00406dff | bellard | zSig <<= 32;
|
840 | 00406dff | bellard | if ( 0 < shiftCount ) shift64RightJamming( zSig, shiftCount, &zSig ); |
841 | 00406dff | bellard | return roundAndPackInt32( aSign, zSig );
|
842 | 00406dff | bellard | |
843 | 00406dff | bellard | } |
844 | 00406dff | bellard | |
845 | 00406dff | bellard | /*
|
846 | 00406dff | bellard | -------------------------------------------------------------------------------
|
847 | 00406dff | bellard | Returns the result of converting the single-precision floating-point value
|
848 | 00406dff | bellard | `a' to the 32-bit two's complement integer format. The conversion is
|
849 | 00406dff | bellard | performed according to the IEC/IEEE Standard for Binary Floating-point
|
850 | 00406dff | bellard | Arithmetic, except that the conversion is always rounded toward zero. If
|
851 | 00406dff | bellard | `a' is a NaN, the largest positive integer is returned. Otherwise, if the
|
852 | 00406dff | bellard | conversion overflows, the largest integer with the same sign as `a' is
|
853 | 00406dff | bellard | returned.
|
854 | 00406dff | bellard | -------------------------------------------------------------------------------
|
855 | 00406dff | bellard | */
|
856 | 00406dff | bellard | int32 float32_to_int32_round_to_zero( float32 a ) |
857 | 00406dff | bellard | { |
858 | 00406dff | bellard | flag aSign; |
859 | 00406dff | bellard | int16 aExp, shiftCount; |
860 | 00406dff | bellard | bits32 aSig; |
861 | 00406dff | bellard | int32 z; |
862 | 00406dff | bellard | |
863 | 00406dff | bellard | aSig = extractFloat32Frac( a ); |
864 | 00406dff | bellard | aExp = extractFloat32Exp( a ); |
865 | 00406dff | bellard | aSign = extractFloat32Sign( a ); |
866 | 00406dff | bellard | shiftCount = aExp - 0x9E;
|
867 | 00406dff | bellard | if ( 0 <= shiftCount ) { |
868 | 00406dff | bellard | if ( a == 0xCF000000 ) return 0x80000000; |
869 | 00406dff | bellard | float_raise( float_flag_invalid ); |
870 | 00406dff | bellard | if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF; |
871 | 00406dff | bellard | return 0x80000000; |
872 | 00406dff | bellard | } |
873 | 00406dff | bellard | else if ( aExp <= 0x7E ) { |
874 | 00406dff | bellard | if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
|
875 | 00406dff | bellard | return 0; |
876 | 00406dff | bellard | } |
877 | 00406dff | bellard | aSig = ( aSig | 0x00800000 )<<8; |
878 | 00406dff | bellard | z = aSig>>( - shiftCount ); |
879 | 00406dff | bellard | if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) { |
880 | 00406dff | bellard | float_exception_flags |= float_flag_inexact; |
881 | 00406dff | bellard | } |
882 | 00406dff | bellard | return aSign ? - z : z;
|
883 | 00406dff | bellard | |
884 | 00406dff | bellard | } |
885 | 00406dff | bellard | |
886 | 00406dff | bellard | /*
|
887 | 00406dff | bellard | -------------------------------------------------------------------------------
|
888 | 00406dff | bellard | Returns the result of converting the single-precision floating-point value
|
889 | 00406dff | bellard | `a' to the double-precision floating-point format. The conversion is
|
890 | 00406dff | bellard | performed according to the IEC/IEEE Standard for Binary Floating-point
|
891 | 00406dff | bellard | Arithmetic.
|
892 | 00406dff | bellard | -------------------------------------------------------------------------------
|
893 | 00406dff | bellard | */
|
894 | 00406dff | bellard | float64 float32_to_float64( float32 a ) |
895 | 00406dff | bellard | { |
896 | 00406dff | bellard | flag aSign; |
897 | 00406dff | bellard | int16 aExp; |
898 | 00406dff | bellard | bits32 aSig; |
899 | 00406dff | bellard | |
900 | 00406dff | bellard | aSig = extractFloat32Frac( a ); |
901 | 00406dff | bellard | aExp = extractFloat32Exp( a ); |
902 | 00406dff | bellard | aSign = extractFloat32Sign( a ); |
903 | 00406dff | bellard | if ( aExp == 0xFF ) { |
904 | 00406dff | bellard | if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) ); |
905 | 00406dff | bellard | return packFloat64( aSign, 0x7FF, 0 ); |
906 | 00406dff | bellard | } |
907 | 00406dff | bellard | if ( aExp == 0 ) { |
908 | 00406dff | bellard | if ( aSig == 0 ) return packFloat64( aSign, 0, 0 ); |
909 | 00406dff | bellard | normalizeFloat32Subnormal( aSig, &aExp, &aSig ); |
910 | 00406dff | bellard | --aExp; |
911 | 00406dff | bellard | } |
912 | 00406dff | bellard | return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 ); |
913 | 00406dff | bellard | |
914 | 00406dff | bellard | } |
915 | 00406dff | bellard | |
916 | 00406dff | bellard | #ifdef FLOATX80
|
917 | 00406dff | bellard | |
918 | 00406dff | bellard | /*
|
919 | 00406dff | bellard | -------------------------------------------------------------------------------
|
920 | 00406dff | bellard | Returns the result of converting the single-precision floating-point value
|
921 | 00406dff | bellard | `a' to the extended double-precision floating-point format. The conversion
|
922 | 00406dff | bellard | is performed according to the IEC/IEEE Standard for Binary Floating-point
|
923 | 00406dff | bellard | Arithmetic.
|
924 | 00406dff | bellard | -------------------------------------------------------------------------------
|
925 | 00406dff | bellard | */
|
926 | 00406dff | bellard | floatx80 float32_to_floatx80( float32 a ) |
927 | 00406dff | bellard | { |
928 | 00406dff | bellard | flag aSign; |
929 | 00406dff | bellard | int16 aExp; |
930 | 00406dff | bellard | bits32 aSig; |
931 | 00406dff | bellard | |
932 | 00406dff | bellard | aSig = extractFloat32Frac( a ); |
933 | 00406dff | bellard | aExp = extractFloat32Exp( a ); |
934 | 00406dff | bellard | aSign = extractFloat32Sign( a ); |
935 | 00406dff | bellard | if ( aExp == 0xFF ) { |
936 | 00406dff | bellard | if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) ); |
937 | 00406dff | bellard | return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); |
938 | 00406dff | bellard | } |
939 | 00406dff | bellard | if ( aExp == 0 ) { |
940 | 00406dff | bellard | if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); |
941 | 00406dff | bellard | normalizeFloat32Subnormal( aSig, &aExp, &aSig ); |
942 | 00406dff | bellard | } |
943 | 00406dff | bellard | aSig |= 0x00800000;
|
944 | 00406dff | bellard | return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 ); |
945 | 00406dff | bellard | |
946 | 00406dff | bellard | } |
947 | 00406dff | bellard | |
948 | 00406dff | bellard | #endif
|
949 | 00406dff | bellard | |
950 | 00406dff | bellard | /*
|
951 | 00406dff | bellard | -------------------------------------------------------------------------------
|
952 | 00406dff | bellard | Rounds the single-precision floating-point value `a' to an integer, and
|
953 | 00406dff | bellard | returns the result as a single-precision floating-point value. The
|
954 | 00406dff | bellard | operation is performed according to the IEC/IEEE Standard for Binary
|
955 | 00406dff | bellard | Floating-point Arithmetic.
|
956 | 00406dff | bellard | -------------------------------------------------------------------------------
|
957 | 00406dff | bellard | */
|
958 | 00406dff | bellard | float32 float32_round_to_int( float32 a ) |
959 | 00406dff | bellard | { |
960 | 00406dff | bellard | flag aSign; |
961 | 00406dff | bellard | int16 aExp; |
962 | 00406dff | bellard | bits32 lastBitMask, roundBitsMask; |
963 | 00406dff | bellard | int8 roundingMode; |
964 | 00406dff | bellard | float32 z; |
965 | 00406dff | bellard | |
966 | 00406dff | bellard | aExp = extractFloat32Exp( a ); |
967 | 00406dff | bellard | if ( 0x96 <= aExp ) { |
968 | 00406dff | bellard | if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) { |
969 | 00406dff | bellard | return propagateFloat32NaN( a, a );
|
970 | 00406dff | bellard | } |
971 | 00406dff | bellard | return a;
|
972 | 00406dff | bellard | } |
973 | 00406dff | bellard | if ( aExp <= 0x7E ) { |
974 | 00406dff | bellard | if ( (bits32) ( a<<1 ) == 0 ) return a; |
975 | 00406dff | bellard | float_exception_flags |= float_flag_inexact; |
976 | 00406dff | bellard | aSign = extractFloat32Sign( a ); |
977 | 00406dff | bellard | switch ( float_rounding_mode ) {
|
978 | 00406dff | bellard | case float_round_nearest_even:
|
979 | 00406dff | bellard | if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) { |
980 | 00406dff | bellard | return packFloat32( aSign, 0x7F, 0 ); |
981 | 00406dff | bellard | } |
982 | 00406dff | bellard | break;
|
983 | 00406dff | bellard | case float_round_down:
|
984 | 00406dff | bellard | return aSign ? 0xBF800000 : 0; |
985 | 00406dff | bellard | case float_round_up:
|
986 | 00406dff | bellard | return aSign ? 0x80000000 : 0x3F800000; |
987 | 00406dff | bellard | } |
988 | 00406dff | bellard | return packFloat32( aSign, 0, 0 ); |
989 | 00406dff | bellard | } |
990 | 00406dff | bellard | lastBitMask = 1;
|
991 | 00406dff | bellard | lastBitMask <<= 0x96 - aExp;
|
992 | 00406dff | bellard | roundBitsMask = lastBitMask - 1;
|
993 | 00406dff | bellard | z = a; |
994 | 00406dff | bellard | roundingMode = float_rounding_mode; |
995 | 00406dff | bellard | if ( roundingMode == float_round_nearest_even ) {
|
996 | 00406dff | bellard | z += lastBitMask>>1;
|
997 | 00406dff | bellard | if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; |
998 | 00406dff | bellard | } |
999 | 00406dff | bellard | else if ( roundingMode != float_round_to_zero ) { |
1000 | 00406dff | bellard | if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
|
1001 | 00406dff | bellard | z += roundBitsMask; |
1002 | 00406dff | bellard | } |
1003 | 00406dff | bellard | } |
1004 | 00406dff | bellard | z &= ~ roundBitsMask; |
1005 | 00406dff | bellard | if ( z != a ) float_exception_flags |= float_flag_inexact;
|
1006 | 00406dff | bellard | return z;
|
1007 | 00406dff | bellard | |
1008 | 00406dff | bellard | } |
1009 | 00406dff | bellard | |
1010 | 00406dff | bellard | /*
|
1011 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1012 | 00406dff | bellard | Returns the result of adding the absolute values of the single-precision
|
1013 | 00406dff | bellard | floating-point values `a' and `b'. If `zSign' is true, the sum is negated
|
1014 | 00406dff | bellard | before being returned. `zSign' is ignored if the result is a NaN. The
|
1015 | 00406dff | bellard | addition is performed according to the IEC/IEEE Standard for Binary
|
1016 | 00406dff | bellard | Floating-point Arithmetic.
|
1017 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1018 | 00406dff | bellard | */
|
1019 | 00406dff | bellard | static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
|
1020 | 00406dff | bellard | { |
1021 | 00406dff | bellard | int16 aExp, bExp, zExp; |
1022 | 00406dff | bellard | bits32 aSig, bSig, zSig; |
1023 | 00406dff | bellard | int16 expDiff; |
1024 | 00406dff | bellard | |
1025 | 00406dff | bellard | aSig = extractFloat32Frac( a ); |
1026 | 00406dff | bellard | aExp = extractFloat32Exp( a ); |
1027 | 00406dff | bellard | bSig = extractFloat32Frac( b ); |
1028 | 00406dff | bellard | bExp = extractFloat32Exp( b ); |
1029 | 00406dff | bellard | expDiff = aExp - bExp; |
1030 | 00406dff | bellard | aSig <<= 6;
|
1031 | 00406dff | bellard | bSig <<= 6;
|
1032 | 00406dff | bellard | if ( 0 < expDiff ) { |
1033 | 00406dff | bellard | if ( aExp == 0xFF ) { |
1034 | 00406dff | bellard | if ( aSig ) return propagateFloat32NaN( a, b ); |
1035 | 00406dff | bellard | return a;
|
1036 | 00406dff | bellard | } |
1037 | 00406dff | bellard | if ( bExp == 0 ) { |
1038 | 00406dff | bellard | --expDiff; |
1039 | 00406dff | bellard | } |
1040 | 00406dff | bellard | else {
|
1041 | 00406dff | bellard | bSig |= 0x20000000;
|
1042 | 00406dff | bellard | } |
1043 | 00406dff | bellard | shift32RightJamming( bSig, expDiff, &bSig ); |
1044 | 00406dff | bellard | zExp = aExp; |
1045 | 00406dff | bellard | } |
1046 | 00406dff | bellard | else if ( expDiff < 0 ) { |
1047 | 00406dff | bellard | if ( bExp == 0xFF ) { |
1048 | 00406dff | bellard | if ( bSig ) return propagateFloat32NaN( a, b ); |
1049 | 00406dff | bellard | return packFloat32( zSign, 0xFF, 0 ); |
1050 | 00406dff | bellard | } |
1051 | 00406dff | bellard | if ( aExp == 0 ) { |
1052 | 00406dff | bellard | ++expDiff; |
1053 | 00406dff | bellard | } |
1054 | 00406dff | bellard | else {
|
1055 | 00406dff | bellard | aSig |= 0x20000000;
|
1056 | 00406dff | bellard | } |
1057 | 00406dff | bellard | shift32RightJamming( aSig, - expDiff, &aSig ); |
1058 | 00406dff | bellard | zExp = bExp; |
1059 | 00406dff | bellard | } |
1060 | 00406dff | bellard | else {
|
1061 | 00406dff | bellard | if ( aExp == 0xFF ) { |
1062 | 00406dff | bellard | if ( aSig | bSig ) return propagateFloat32NaN( a, b ); |
1063 | 00406dff | bellard | return a;
|
1064 | 00406dff | bellard | } |
1065 | 00406dff | bellard | if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 ); |
1066 | 00406dff | bellard | zSig = 0x40000000 + aSig + bSig;
|
1067 | 00406dff | bellard | zExp = aExp; |
1068 | 00406dff | bellard | goto roundAndPack;
|
1069 | 00406dff | bellard | } |
1070 | 00406dff | bellard | aSig |= 0x20000000;
|
1071 | 00406dff | bellard | zSig = ( aSig + bSig )<<1;
|
1072 | 00406dff | bellard | --zExp; |
1073 | 00406dff | bellard | if ( (sbits32) zSig < 0 ) { |
1074 | 00406dff | bellard | zSig = aSig + bSig; |
1075 | 00406dff | bellard | ++zExp; |
1076 | 00406dff | bellard | } |
1077 | 00406dff | bellard | roundAndPack:
|
1078 | 00406dff | bellard | return roundAndPackFloat32( zSign, zExp, zSig );
|
1079 | 00406dff | bellard | |
1080 | 00406dff | bellard | } |
1081 | 00406dff | bellard | |
1082 | 00406dff | bellard | /*
|
1083 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1084 | 00406dff | bellard | Returns the result of subtracting the absolute values of the single-
|
1085 | 00406dff | bellard | precision floating-point values `a' and `b'. If `zSign' is true, the
|
1086 | 00406dff | bellard | difference is negated before being returned. `zSign' is ignored if the
|
1087 | 00406dff | bellard | result is a NaN. The subtraction is performed according to the IEC/IEEE
|
1088 | 00406dff | bellard | Standard for Binary Floating-point Arithmetic.
|
1089 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1090 | 00406dff | bellard | */
|
1091 | 00406dff | bellard | static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
|
1092 | 00406dff | bellard | { |
1093 | 00406dff | bellard | int16 aExp, bExp, zExp; |
1094 | 00406dff | bellard | bits32 aSig, bSig, zSig; |
1095 | 00406dff | bellard | int16 expDiff; |
1096 | 00406dff | bellard | |
1097 | 00406dff | bellard | aSig = extractFloat32Frac( a ); |
1098 | 00406dff | bellard | aExp = extractFloat32Exp( a ); |
1099 | 00406dff | bellard | bSig = extractFloat32Frac( b ); |
1100 | 00406dff | bellard | bExp = extractFloat32Exp( b ); |
1101 | 00406dff | bellard | expDiff = aExp - bExp; |
1102 | 00406dff | bellard | aSig <<= 7;
|
1103 | 00406dff | bellard | bSig <<= 7;
|
1104 | 00406dff | bellard | if ( 0 < expDiff ) goto aExpBigger; |
1105 | 00406dff | bellard | if ( expDiff < 0 ) goto bExpBigger; |
1106 | 00406dff | bellard | if ( aExp == 0xFF ) { |
1107 | 00406dff | bellard | if ( aSig | bSig ) return propagateFloat32NaN( a, b ); |
1108 | 00406dff | bellard | float_raise( float_flag_invalid ); |
1109 | 00406dff | bellard | return float32_default_nan;
|
1110 | 00406dff | bellard | } |
1111 | 00406dff | bellard | if ( aExp == 0 ) { |
1112 | 00406dff | bellard | aExp = 1;
|
1113 | 00406dff | bellard | bExp = 1;
|
1114 | 00406dff | bellard | } |
1115 | 00406dff | bellard | if ( bSig < aSig ) goto aBigger; |
1116 | 00406dff | bellard | if ( aSig < bSig ) goto bBigger; |
1117 | 00406dff | bellard | return packFloat32( float_rounding_mode == float_round_down, 0, 0 ); |
1118 | 00406dff | bellard | bExpBigger:
|
1119 | 00406dff | bellard | if ( bExp == 0xFF ) { |
1120 | 00406dff | bellard | if ( bSig ) return propagateFloat32NaN( a, b ); |
1121 | 00406dff | bellard | return packFloat32( zSign ^ 1, 0xFF, 0 ); |
1122 | 00406dff | bellard | } |
1123 | 00406dff | bellard | if ( aExp == 0 ) { |
1124 | 00406dff | bellard | ++expDiff; |
1125 | 00406dff | bellard | } |
1126 | 00406dff | bellard | else {
|
1127 | 00406dff | bellard | aSig |= 0x40000000;
|
1128 | 00406dff | bellard | } |
1129 | 00406dff | bellard | shift32RightJamming( aSig, - expDiff, &aSig ); |
1130 | 00406dff | bellard | bSig |= 0x40000000;
|
1131 | 00406dff | bellard | bBigger:
|
1132 | 00406dff | bellard | zSig = bSig - aSig; |
1133 | 00406dff | bellard | zExp = bExp; |
1134 | 00406dff | bellard | zSign ^= 1;
|
1135 | 00406dff | bellard | goto normalizeRoundAndPack;
|
1136 | 00406dff | bellard | aExpBigger:
|
1137 | 00406dff | bellard | if ( aExp == 0xFF ) { |
1138 | 00406dff | bellard | if ( aSig ) return propagateFloat32NaN( a, b ); |
1139 | 00406dff | bellard | return a;
|
1140 | 00406dff | bellard | } |
1141 | 00406dff | bellard | if ( bExp == 0 ) { |
1142 | 00406dff | bellard | --expDiff; |
1143 | 00406dff | bellard | } |
1144 | 00406dff | bellard | else {
|
1145 | 00406dff | bellard | bSig |= 0x40000000;
|
1146 | 00406dff | bellard | } |
1147 | 00406dff | bellard | shift32RightJamming( bSig, expDiff, &bSig ); |
1148 | 00406dff | bellard | aSig |= 0x40000000;
|
1149 | 00406dff | bellard | aBigger:
|
1150 | 00406dff | bellard | zSig = aSig - bSig; |
1151 | 00406dff | bellard | zExp = aExp; |
1152 | 00406dff | bellard | normalizeRoundAndPack:
|
1153 | 00406dff | bellard | --zExp; |
1154 | 00406dff | bellard | return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
|
1155 | 00406dff | bellard | |
1156 | 00406dff | bellard | } |
1157 | 00406dff | bellard | |
1158 | 00406dff | bellard | /*
|
1159 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1160 | 00406dff | bellard | Returns the result of adding the single-precision floating-point values `a'
|
1161 | 00406dff | bellard | and `b'. The operation is performed according to the IEC/IEEE Standard for
|
1162 | 00406dff | bellard | Binary Floating-point Arithmetic.
|
1163 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1164 | 00406dff | bellard | */
|
1165 | 00406dff | bellard | float32 float32_add( float32 a, float32 b ) |
1166 | 00406dff | bellard | { |
1167 | 00406dff | bellard | flag aSign, bSign; |
1168 | 00406dff | bellard | |
1169 | 00406dff | bellard | aSign = extractFloat32Sign( a ); |
1170 | 00406dff | bellard | bSign = extractFloat32Sign( b ); |
1171 | 00406dff | bellard | if ( aSign == bSign ) {
|
1172 | 00406dff | bellard | return addFloat32Sigs( a, b, aSign );
|
1173 | 00406dff | bellard | } |
1174 | 00406dff | bellard | else {
|
1175 | 00406dff | bellard | return subFloat32Sigs( a, b, aSign );
|
1176 | 00406dff | bellard | } |
1177 | 00406dff | bellard | |
1178 | 00406dff | bellard | } |
1179 | 00406dff | bellard | |
1180 | 00406dff | bellard | /*
|
1181 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1182 | 00406dff | bellard | Returns the result of subtracting the single-precision floating-point values
|
1183 | 00406dff | bellard | `a' and `b'. The operation is performed according to the IEC/IEEE Standard
|
1184 | 00406dff | bellard | for Binary Floating-point Arithmetic.
|
1185 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1186 | 00406dff | bellard | */
|
1187 | 00406dff | bellard | float32 float32_sub( float32 a, float32 b ) |
1188 | 00406dff | bellard | { |
1189 | 00406dff | bellard | flag aSign, bSign; |
1190 | 00406dff | bellard | |
1191 | 00406dff | bellard | aSign = extractFloat32Sign( a ); |
1192 | 00406dff | bellard | bSign = extractFloat32Sign( b ); |
1193 | 00406dff | bellard | if ( aSign == bSign ) {
|
1194 | 00406dff | bellard | return subFloat32Sigs( a, b, aSign );
|
1195 | 00406dff | bellard | } |
1196 | 00406dff | bellard | else {
|
1197 | 00406dff | bellard | return addFloat32Sigs( a, b, aSign );
|
1198 | 00406dff | bellard | } |
1199 | 00406dff | bellard | |
1200 | 00406dff | bellard | } |
1201 | 00406dff | bellard | |
1202 | 00406dff | bellard | /*
|
1203 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1204 | 00406dff | bellard | Returns the result of multiplying the single-precision floating-point values
|
1205 | 00406dff | bellard | `a' and `b'. The operation is performed according to the IEC/IEEE Standard
|
1206 | 00406dff | bellard | for Binary Floating-point Arithmetic.
|
1207 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1208 | 00406dff | bellard | */
|
1209 | 00406dff | bellard | float32 float32_mul( float32 a, float32 b ) |
1210 | 00406dff | bellard | { |
1211 | 00406dff | bellard | flag aSign, bSign, zSign; |
1212 | 00406dff | bellard | int16 aExp, bExp, zExp; |
1213 | 00406dff | bellard | bits32 aSig, bSig; |
1214 | 00406dff | bellard | bits64 zSig64; |
1215 | 00406dff | bellard | bits32 zSig; |
1216 | 00406dff | bellard | |
1217 | 00406dff | bellard | aSig = extractFloat32Frac( a ); |
1218 | 00406dff | bellard | aExp = extractFloat32Exp( a ); |
1219 | 00406dff | bellard | aSign = extractFloat32Sign( a ); |
1220 | 00406dff | bellard | bSig = extractFloat32Frac( b ); |
1221 | 00406dff | bellard | bExp = extractFloat32Exp( b ); |
1222 | 00406dff | bellard | bSign = extractFloat32Sign( b ); |
1223 | 00406dff | bellard | zSign = aSign ^ bSign; |
1224 | 00406dff | bellard | if ( aExp == 0xFF ) { |
1225 | 00406dff | bellard | if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { |
1226 | 00406dff | bellard | return propagateFloat32NaN( a, b );
|
1227 | 00406dff | bellard | } |
1228 | 00406dff | bellard | if ( ( bExp | bSig ) == 0 ) { |
1229 | 00406dff | bellard | float_raise( float_flag_invalid ); |
1230 | 00406dff | bellard | return float32_default_nan;
|
1231 | 00406dff | bellard | } |
1232 | 00406dff | bellard | return packFloat32( zSign, 0xFF, 0 ); |
1233 | 00406dff | bellard | } |
1234 | 00406dff | bellard | if ( bExp == 0xFF ) { |
1235 | 00406dff | bellard | if ( bSig ) return propagateFloat32NaN( a, b ); |
1236 | 00406dff | bellard | if ( ( aExp | aSig ) == 0 ) { |
1237 | 00406dff | bellard | float_raise( float_flag_invalid ); |
1238 | 00406dff | bellard | return float32_default_nan;
|
1239 | 00406dff | bellard | } |
1240 | 00406dff | bellard | return packFloat32( zSign, 0xFF, 0 ); |
1241 | 00406dff | bellard | } |
1242 | 00406dff | bellard | if ( aExp == 0 ) { |
1243 | 00406dff | bellard | if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); |
1244 | 00406dff | bellard | normalizeFloat32Subnormal( aSig, &aExp, &aSig ); |
1245 | 00406dff | bellard | } |
1246 | 00406dff | bellard | if ( bExp == 0 ) { |
1247 | 00406dff | bellard | if ( bSig == 0 ) return packFloat32( zSign, 0, 0 ); |
1248 | 00406dff | bellard | normalizeFloat32Subnormal( bSig, &bExp, &bSig ); |
1249 | 00406dff | bellard | } |
1250 | 00406dff | bellard | zExp = aExp + bExp - 0x7F;
|
1251 | 00406dff | bellard | aSig = ( aSig | 0x00800000 )<<7; |
1252 | 00406dff | bellard | bSig = ( bSig | 0x00800000 )<<8; |
1253 | 00406dff | bellard | shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
|
1254 | 00406dff | bellard | zSig = zSig64; |
1255 | 00406dff | bellard | if ( 0 <= (sbits32) ( zSig<<1 ) ) { |
1256 | 00406dff | bellard | zSig <<= 1;
|
1257 | 00406dff | bellard | --zExp; |
1258 | 00406dff | bellard | } |
1259 | 00406dff | bellard | return roundAndPackFloat32( zSign, zExp, zSig );
|
1260 | 00406dff | bellard | |
1261 | 00406dff | bellard | } |
1262 | 00406dff | bellard | |
1263 | 00406dff | bellard | /*
|
1264 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1265 | 00406dff | bellard | Returns the result of dividing the single-precision floating-point value `a'
|
1266 | 00406dff | bellard | by the corresponding value `b'. The operation is performed according to the
|
1267 | 00406dff | bellard | IEC/IEEE Standard for Binary Floating-point Arithmetic.
|
1268 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1269 | 00406dff | bellard | */
|
1270 | 00406dff | bellard | float32 float32_div( float32 a, float32 b ) |
1271 | 00406dff | bellard | { |
1272 | 00406dff | bellard | flag aSign, bSign, zSign; |
1273 | 00406dff | bellard | int16 aExp, bExp, zExp; |
1274 | 00406dff | bellard | bits32 aSig, bSig, zSig; |
1275 | 00406dff | bellard | |
1276 | 00406dff | bellard | aSig = extractFloat32Frac( a ); |
1277 | 00406dff | bellard | aExp = extractFloat32Exp( a ); |
1278 | 00406dff | bellard | aSign = extractFloat32Sign( a ); |
1279 | 00406dff | bellard | bSig = extractFloat32Frac( b ); |
1280 | 00406dff | bellard | bExp = extractFloat32Exp( b ); |
1281 | 00406dff | bellard | bSign = extractFloat32Sign( b ); |
1282 | 00406dff | bellard | zSign = aSign ^ bSign; |
1283 | 00406dff | bellard | if ( aExp == 0xFF ) { |
1284 | 00406dff | bellard | if ( aSig ) return propagateFloat32NaN( a, b ); |
1285 | 00406dff | bellard | if ( bExp == 0xFF ) { |
1286 | 00406dff | bellard | if ( bSig ) return propagateFloat32NaN( a, b ); |
1287 | 00406dff | bellard | float_raise( float_flag_invalid ); |
1288 | 00406dff | bellard | return float32_default_nan;
|
1289 | 00406dff | bellard | } |
1290 | 00406dff | bellard | return packFloat32( zSign, 0xFF, 0 ); |
1291 | 00406dff | bellard | } |
1292 | 00406dff | bellard | if ( bExp == 0xFF ) { |
1293 | 00406dff | bellard | if ( bSig ) return propagateFloat32NaN( a, b ); |
1294 | 00406dff | bellard | return packFloat32( zSign, 0, 0 ); |
1295 | 00406dff | bellard | } |
1296 | 00406dff | bellard | if ( bExp == 0 ) { |
1297 | 00406dff | bellard | if ( bSig == 0 ) { |
1298 | 00406dff | bellard | if ( ( aExp | aSig ) == 0 ) { |
1299 | 00406dff | bellard | float_raise( float_flag_invalid ); |
1300 | 00406dff | bellard | return float32_default_nan;
|
1301 | 00406dff | bellard | } |
1302 | 00406dff | bellard | float_raise( float_flag_divbyzero ); |
1303 | 00406dff | bellard | return packFloat32( zSign, 0xFF, 0 ); |
1304 | 00406dff | bellard | } |
1305 | 00406dff | bellard | normalizeFloat32Subnormal( bSig, &bExp, &bSig ); |
1306 | 00406dff | bellard | } |
1307 | 00406dff | bellard | if ( aExp == 0 ) { |
1308 | 00406dff | bellard | if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); |
1309 | 00406dff | bellard | normalizeFloat32Subnormal( aSig, &aExp, &aSig ); |
1310 | 00406dff | bellard | } |
1311 | 00406dff | bellard | zExp = aExp - bExp + 0x7D;
|
1312 | 00406dff | bellard | aSig = ( aSig | 0x00800000 )<<7; |
1313 | 00406dff | bellard | bSig = ( bSig | 0x00800000 )<<8; |
1314 | 00406dff | bellard | if ( bSig <= ( aSig + aSig ) ) {
|
1315 | 00406dff | bellard | aSig >>= 1;
|
1316 | 00406dff | bellard | ++zExp; |
1317 | 00406dff | bellard | } |
1318 | 00406dff | bellard | zSig = ( ( (bits64) aSig )<<32 ) / bSig;
|
1319 | 00406dff | bellard | if ( ( zSig & 0x3F ) == 0 ) { |
1320 | 00406dff | bellard | zSig |= ( ( (bits64) bSig ) * zSig != ( (bits64) aSig )<<32 );
|
1321 | 00406dff | bellard | } |
1322 | 00406dff | bellard | return roundAndPackFloat32( zSign, zExp, zSig );
|
1323 | 00406dff | bellard | |
1324 | 00406dff | bellard | } |
1325 | 00406dff | bellard | |
1326 | 00406dff | bellard | /*
|
1327 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1328 | 00406dff | bellard | Returns the remainder of the single-precision floating-point value `a'
|
1329 | 00406dff | bellard | with respect to the corresponding value `b'. The operation is performed
|
1330 | 00406dff | bellard | according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
|
1331 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1332 | 00406dff | bellard | */
|
1333 | 00406dff | bellard | float32 float32_rem( float32 a, float32 b ) |
1334 | 00406dff | bellard | { |
1335 | 00406dff | bellard | flag aSign, bSign, zSign; |
1336 | 00406dff | bellard | int16 aExp, bExp, expDiff; |
1337 | 00406dff | bellard | bits32 aSig, bSig; |
1338 | 00406dff | bellard | bits32 q; |
1339 | 00406dff | bellard | bits64 aSig64, bSig64, q64; |
1340 | 00406dff | bellard | bits32 alternateASig; |
1341 | 00406dff | bellard | sbits32 sigMean; |
1342 | 00406dff | bellard | |
1343 | 00406dff | bellard | aSig = extractFloat32Frac( a ); |
1344 | 00406dff | bellard | aExp = extractFloat32Exp( a ); |
1345 | 00406dff | bellard | aSign = extractFloat32Sign( a ); |
1346 | 00406dff | bellard | bSig = extractFloat32Frac( b ); |
1347 | 00406dff | bellard | bExp = extractFloat32Exp( b ); |
1348 | 00406dff | bellard | bSign = extractFloat32Sign( b ); |
1349 | 00406dff | bellard | if ( aExp == 0xFF ) { |
1350 | 00406dff | bellard | if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { |
1351 | 00406dff | bellard | return propagateFloat32NaN( a, b );
|
1352 | 00406dff | bellard | } |
1353 | 00406dff | bellard | float_raise( float_flag_invalid ); |
1354 | 00406dff | bellard | return float32_default_nan;
|
1355 | 00406dff | bellard | } |
1356 | 00406dff | bellard | if ( bExp == 0xFF ) { |
1357 | 00406dff | bellard | if ( bSig ) return propagateFloat32NaN( a, b ); |
1358 | 00406dff | bellard | return a;
|
1359 | 00406dff | bellard | } |
1360 | 00406dff | bellard | if ( bExp == 0 ) { |
1361 | 00406dff | bellard | if ( bSig == 0 ) { |
1362 | 00406dff | bellard | float_raise( float_flag_invalid ); |
1363 | 00406dff | bellard | return float32_default_nan;
|
1364 | 00406dff | bellard | } |
1365 | 00406dff | bellard | normalizeFloat32Subnormal( bSig, &bExp, &bSig ); |
1366 | 00406dff | bellard | } |
1367 | 00406dff | bellard | if ( aExp == 0 ) { |
1368 | 00406dff | bellard | if ( aSig == 0 ) return a; |
1369 | 00406dff | bellard | normalizeFloat32Subnormal( aSig, &aExp, &aSig ); |
1370 | 00406dff | bellard | } |
1371 | 00406dff | bellard | expDiff = aExp - bExp; |
1372 | 00406dff | bellard | aSig |= 0x00800000;
|
1373 | 00406dff | bellard | bSig |= 0x00800000;
|
1374 | 00406dff | bellard | if ( expDiff < 32 ) { |
1375 | 00406dff | bellard | aSig <<= 8;
|
1376 | 00406dff | bellard | bSig <<= 8;
|
1377 | 00406dff | bellard | if ( expDiff < 0 ) { |
1378 | 00406dff | bellard | if ( expDiff < -1 ) return a; |
1379 | 00406dff | bellard | aSig >>= 1;
|
1380 | 00406dff | bellard | } |
1381 | 00406dff | bellard | q = ( bSig <= aSig ); |
1382 | 00406dff | bellard | if ( q ) aSig -= bSig;
|
1383 | 00406dff | bellard | if ( 0 < expDiff ) { |
1384 | 00406dff | bellard | q = ( ( (bits64) aSig )<<32 ) / bSig;
|
1385 | 00406dff | bellard | q >>= 32 - expDiff;
|
1386 | 00406dff | bellard | bSig >>= 2;
|
1387 | 00406dff | bellard | aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; |
1388 | 00406dff | bellard | } |
1389 | 00406dff | bellard | else {
|
1390 | 00406dff | bellard | aSig >>= 2;
|
1391 | 00406dff | bellard | bSig >>= 2;
|
1392 | 00406dff | bellard | } |
1393 | 00406dff | bellard | } |
1394 | 00406dff | bellard | else {
|
1395 | 00406dff | bellard | if ( bSig <= aSig ) aSig -= bSig;
|
1396 | 00406dff | bellard | aSig64 = ( (bits64) aSig )<<40;
|
1397 | 00406dff | bellard | bSig64 = ( (bits64) bSig )<<40;
|
1398 | 00406dff | bellard | expDiff -= 64;
|
1399 | 00406dff | bellard | while ( 0 < expDiff ) { |
1400 | 00406dff | bellard | q64 = estimateDiv128To64( aSig64, 0, bSig64 );
|
1401 | 00406dff | bellard | q64 = ( 2 < q64 ) ? q64 - 2 : 0; |
1402 | 00406dff | bellard | aSig64 = - ( ( bSig * q64 )<<38 );
|
1403 | 00406dff | bellard | expDiff -= 62;
|
1404 | 00406dff | bellard | } |
1405 | 00406dff | bellard | expDiff += 64;
|
1406 | 00406dff | bellard | q64 = estimateDiv128To64( aSig64, 0, bSig64 );
|
1407 | 00406dff | bellard | q64 = ( 2 < q64 ) ? q64 - 2 : 0; |
1408 | 00406dff | bellard | q = q64>>( 64 - expDiff );
|
1409 | 00406dff | bellard | bSig <<= 6;
|
1410 | 00406dff | bellard | aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q; |
1411 | 00406dff | bellard | } |
1412 | 00406dff | bellard | do {
|
1413 | 00406dff | bellard | alternateASig = aSig; |
1414 | 00406dff | bellard | ++q; |
1415 | 00406dff | bellard | aSig -= bSig; |
1416 | 00406dff | bellard | } while ( 0 <= (sbits32) aSig ); |
1417 | 00406dff | bellard | sigMean = aSig + alternateASig; |
1418 | 00406dff | bellard | if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { |
1419 | 00406dff | bellard | aSig = alternateASig; |
1420 | 00406dff | bellard | } |
1421 | 00406dff | bellard | zSign = ( (sbits32) aSig < 0 );
|
1422 | 00406dff | bellard | if ( zSign ) aSig = - aSig;
|
1423 | 00406dff | bellard | return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
|
1424 | 00406dff | bellard | |
1425 | 00406dff | bellard | } |
1426 | 00406dff | bellard | |
1427 | 00406dff | bellard | /*
|
1428 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1429 | 00406dff | bellard | Returns the square root of the single-precision floating-point value `a'.
|
1430 | 00406dff | bellard | The operation is performed according to the IEC/IEEE Standard for Binary
|
1431 | 00406dff | bellard | Floating-point Arithmetic.
|
1432 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1433 | 00406dff | bellard | */
|
1434 | 00406dff | bellard | float32 float32_sqrt( float32 a ) |
1435 | 00406dff | bellard | { |
1436 | 00406dff | bellard | flag aSign; |
1437 | 00406dff | bellard | int16 aExp, zExp; |
1438 | 00406dff | bellard | bits32 aSig, zSig; |
1439 | 00406dff | bellard | bits64 rem, term; |
1440 | 00406dff | bellard | |
1441 | 00406dff | bellard | aSig = extractFloat32Frac( a ); |
1442 | 00406dff | bellard | aExp = extractFloat32Exp( a ); |
1443 | 00406dff | bellard | aSign = extractFloat32Sign( a ); |
1444 | 00406dff | bellard | if ( aExp == 0xFF ) { |
1445 | 00406dff | bellard | if ( aSig ) return propagateFloat32NaN( a, 0 ); |
1446 | 00406dff | bellard | if ( ! aSign ) return a; |
1447 | 00406dff | bellard | float_raise( float_flag_invalid ); |
1448 | 00406dff | bellard | return float32_default_nan;
|
1449 | 00406dff | bellard | } |
1450 | 00406dff | bellard | if ( aSign ) {
|
1451 | 00406dff | bellard | if ( ( aExp | aSig ) == 0 ) return a; |
1452 | 00406dff | bellard | float_raise( float_flag_invalid ); |
1453 | 00406dff | bellard | return float32_default_nan;
|
1454 | 00406dff | bellard | } |
1455 | 00406dff | bellard | if ( aExp == 0 ) { |
1456 | 00406dff | bellard | if ( aSig == 0 ) return 0; |
1457 | 00406dff | bellard | normalizeFloat32Subnormal( aSig, &aExp, &aSig ); |
1458 | 00406dff | bellard | } |
1459 | 00406dff | bellard | zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E; |
1460 | 00406dff | bellard | aSig = ( aSig | 0x00800000 )<<8; |
1461 | 00406dff | bellard | zSig = estimateSqrt32( aExp, aSig ) + 2;
|
1462 | 00406dff | bellard | if ( ( zSig & 0x7F ) <= 5 ) { |
1463 | 00406dff | bellard | if ( zSig < 2 ) { |
1464 | 00406dff | bellard | zSig = 0xFFFFFFFF;
|
1465 | 00406dff | bellard | } |
1466 | 00406dff | bellard | else {
|
1467 | 00406dff | bellard | aSig >>= aExp & 1;
|
1468 | 00406dff | bellard | term = ( (bits64) zSig ) * zSig; |
1469 | 00406dff | bellard | rem = ( ( (bits64) aSig )<<32 ) - term;
|
1470 | 00406dff | bellard | while ( (sbits64) rem < 0 ) { |
1471 | 00406dff | bellard | --zSig; |
1472 | 00406dff | bellard | rem += ( ( (bits64) zSig )<<1 ) | 1; |
1473 | 00406dff | bellard | } |
1474 | 00406dff | bellard | zSig |= ( rem != 0 );
|
1475 | 00406dff | bellard | } |
1476 | 00406dff | bellard | } |
1477 | 00406dff | bellard | shift32RightJamming( zSig, 1, &zSig );
|
1478 | 00406dff | bellard | return roundAndPackFloat32( 0, zExp, zSig ); |
1479 | 00406dff | bellard | |
1480 | 00406dff | bellard | } |
1481 | 00406dff | bellard | |
1482 | 00406dff | bellard | /*
|
1483 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1484 | 00406dff | bellard | Returns 1 if the single-precision floating-point value `a' is equal to the
|
1485 | 00406dff | bellard | corresponding value `b', and 0 otherwise. The comparison is performed
|
1486 | 00406dff | bellard | according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
|
1487 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1488 | 00406dff | bellard | */
|
1489 | 00406dff | bellard | flag float32_eq( float32 a, float32 b ) |
1490 | 00406dff | bellard | { |
1491 | 00406dff | bellard | |
1492 | 00406dff | bellard | if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) |
1493 | 00406dff | bellard | || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
|
1494 | 00406dff | bellard | ) { |
1495 | 00406dff | bellard | if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
|
1496 | 00406dff | bellard | float_raise( float_flag_invalid ); |
1497 | 00406dff | bellard | } |
1498 | 00406dff | bellard | return 0; |
1499 | 00406dff | bellard | } |
1500 | 00406dff | bellard | return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); |
1501 | 00406dff | bellard | |
1502 | 00406dff | bellard | } |
1503 | 00406dff | bellard | |
1504 | 00406dff | bellard | /*
|
1505 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1506 | 00406dff | bellard | Returns 1 if the single-precision floating-point value `a' is less than or
|
1507 | 00406dff | bellard | equal to the corresponding value `b', and 0 otherwise. The comparison is
|
1508 | 00406dff | bellard | performed according to the IEC/IEEE Standard for Binary Floating-point
|
1509 | 00406dff | bellard | Arithmetic.
|
1510 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1511 | 00406dff | bellard | */
|
1512 | 00406dff | bellard | flag float32_le( float32 a, float32 b ) |
1513 | 00406dff | bellard | { |
1514 | 00406dff | bellard | flag aSign, bSign; |
1515 | 00406dff | bellard | |
1516 | 00406dff | bellard | if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) |
1517 | 00406dff | bellard | || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
|
1518 | 00406dff | bellard | ) { |
1519 | 00406dff | bellard | float_raise( float_flag_invalid ); |
1520 | 00406dff | bellard | return 0; |
1521 | 00406dff | bellard | } |
1522 | 00406dff | bellard | aSign = extractFloat32Sign( a ); |
1523 | 00406dff | bellard | bSign = extractFloat32Sign( b ); |
1524 | 00406dff | bellard | if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); |
1525 | 00406dff | bellard | return ( a == b ) || ( aSign ^ ( a < b ) );
|
1526 | 00406dff | bellard | |
1527 | 00406dff | bellard | } |
1528 | 00406dff | bellard | |
1529 | 00406dff | bellard | /*
|
1530 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1531 | 00406dff | bellard | Returns 1 if the single-precision floating-point value `a' is less than
|
1532 | 00406dff | bellard | the corresponding value `b', and 0 otherwise. The comparison is performed
|
1533 | 00406dff | bellard | according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
|
1534 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1535 | 00406dff | bellard | */
|
1536 | 00406dff | bellard | flag float32_lt( float32 a, float32 b ) |
1537 | 00406dff | bellard | { |
1538 | 00406dff | bellard | flag aSign, bSign; |
1539 | 00406dff | bellard | |
1540 | 00406dff | bellard | if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) |
1541 | 00406dff | bellard | || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
|
1542 | 00406dff | bellard | ) { |
1543 | 00406dff | bellard | float_raise( float_flag_invalid ); |
1544 | 00406dff | bellard | return 0; |
1545 | 00406dff | bellard | } |
1546 | 00406dff | bellard | aSign = extractFloat32Sign( a ); |
1547 | 00406dff | bellard | bSign = extractFloat32Sign( b ); |
1548 | 00406dff | bellard | if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); |
1549 | 00406dff | bellard | return ( a != b ) && ( aSign ^ ( a < b ) );
|
1550 | 00406dff | bellard | |
1551 | 00406dff | bellard | } |
1552 | 00406dff | bellard | |
1553 | 00406dff | bellard | /*
|
1554 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1555 | 00406dff | bellard | Returns 1 if the single-precision floating-point value `a' is equal to the
|
1556 | 00406dff | bellard | corresponding value `b', and 0 otherwise. The invalid exception is raised
|
1557 | 00406dff | bellard | if either operand is a NaN. Otherwise, the comparison is performed
|
1558 | 00406dff | bellard | according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
|
1559 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1560 | 00406dff | bellard | */
|
1561 | 00406dff | bellard | flag float32_eq_signaling( float32 a, float32 b ) |
1562 | 00406dff | bellard | { |
1563 | 00406dff | bellard | |
1564 | 00406dff | bellard | if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) |
1565 | 00406dff | bellard | || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
|
1566 | 00406dff | bellard | ) { |
1567 | 00406dff | bellard | float_raise( float_flag_invalid ); |
1568 | 00406dff | bellard | return 0; |
1569 | 00406dff | bellard | } |
1570 | 00406dff | bellard | return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); |
1571 | 00406dff | bellard | |
1572 | 00406dff | bellard | } |
1573 | 00406dff | bellard | |
1574 | 00406dff | bellard | /*
|
1575 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1576 | 00406dff | bellard | Returns 1 if the single-precision floating-point value `a' is less than or
|
1577 | 00406dff | bellard | equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
|
1578 | 00406dff | bellard | cause an exception. Otherwise, the comparison is performed according to the
|
1579 | 00406dff | bellard | IEC/IEEE Standard for Binary Floating-point Arithmetic.
|
1580 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1581 | 00406dff | bellard | */
|
1582 | 00406dff | bellard | flag float32_le_quiet( float32 a, float32 b ) |
1583 | 00406dff | bellard | { |
1584 | 00406dff | bellard | flag aSign, bSign; |
1585 | 00406dff | bellard | //int16 aExp, bExp;
|
1586 | 00406dff | bellard | |
1587 | 00406dff | bellard | if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) |
1588 | 00406dff | bellard | || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
|
1589 | 00406dff | bellard | ) { |
1590 | 00406dff | bellard | if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
|
1591 | 00406dff | bellard | float_raise( float_flag_invalid ); |
1592 | 00406dff | bellard | } |
1593 | 00406dff | bellard | return 0; |
1594 | 00406dff | bellard | } |
1595 | 00406dff | bellard | aSign = extractFloat32Sign( a ); |
1596 | 00406dff | bellard | bSign = extractFloat32Sign( b ); |
1597 | 00406dff | bellard | if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); |
1598 | 00406dff | bellard | return ( a == b ) || ( aSign ^ ( a < b ) );
|
1599 | 00406dff | bellard | |
1600 | 00406dff | bellard | } |
1601 | 00406dff | bellard | |
1602 | 00406dff | bellard | /*
|
1603 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1604 | 00406dff | bellard | Returns 1 if the single-precision floating-point value `a' is less than
|
1605 | 00406dff | bellard | the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
|
1606 | 00406dff | bellard | exception. Otherwise, the comparison is performed according to the IEC/IEEE
|
1607 | 00406dff | bellard | Standard for Binary Floating-point Arithmetic.
|
1608 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1609 | 00406dff | bellard | */
|
1610 | 00406dff | bellard | flag float32_lt_quiet( float32 a, float32 b ) |
1611 | 00406dff | bellard | { |
1612 | 00406dff | bellard | flag aSign, bSign; |
1613 | 00406dff | bellard | |
1614 | 00406dff | bellard | if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) |
1615 | 00406dff | bellard | || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
|
1616 | 00406dff | bellard | ) { |
1617 | 00406dff | bellard | if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
|
1618 | 00406dff | bellard | float_raise( float_flag_invalid ); |
1619 | 00406dff | bellard | } |
1620 | 00406dff | bellard | return 0; |
1621 | 00406dff | bellard | } |
1622 | 00406dff | bellard | aSign = extractFloat32Sign( a ); |
1623 | 00406dff | bellard | bSign = extractFloat32Sign( b ); |
1624 | 00406dff | bellard | if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); |
1625 | 00406dff | bellard | return ( a != b ) && ( aSign ^ ( a < b ) );
|
1626 | 00406dff | bellard | |
1627 | 00406dff | bellard | } |
1628 | 00406dff | bellard | |
1629 | 00406dff | bellard | /*
|
1630 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1631 | 00406dff | bellard | Returns the result of converting the double-precision floating-point value
|
1632 | 00406dff | bellard | `a' to the 32-bit two's complement integer format. The conversion is
|
1633 | 00406dff | bellard | performed according to the IEC/IEEE Standard for Binary Floating-point
|
1634 | 00406dff | bellard | Arithmetic---which means in particular that the conversion is rounded
|
1635 | 00406dff | bellard | according to the current rounding mode. If `a' is a NaN, the largest
|
1636 | 00406dff | bellard | positive integer is returned. Otherwise, if the conversion overflows, the
|
1637 | 00406dff | bellard | largest integer with the same sign as `a' is returned.
|
1638 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1639 | 00406dff | bellard | */
|
1640 | 00406dff | bellard | int32 float64_to_int32( float64 a ) |
1641 | 00406dff | bellard | { |
1642 | 00406dff | bellard | flag aSign; |
1643 | 00406dff | bellard | int16 aExp, shiftCount; |
1644 | 00406dff | bellard | bits64 aSig; |
1645 | 00406dff | bellard | |
1646 | 00406dff | bellard | aSig = extractFloat64Frac( a ); |
1647 | 00406dff | bellard | aExp = extractFloat64Exp( a ); |
1648 | 00406dff | bellard | aSign = extractFloat64Sign( a ); |
1649 | 00406dff | bellard | if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; |
1650 | 00406dff | bellard | if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); |
1651 | 00406dff | bellard | shiftCount = 0x42C - aExp;
|
1652 | 00406dff | bellard | if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig ); |
1653 | 00406dff | bellard | return roundAndPackInt32( aSign, aSig );
|
1654 | 00406dff | bellard | |
1655 | 00406dff | bellard | } |
1656 | 00406dff | bellard | |
1657 | 00406dff | bellard | /*
|
1658 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1659 | 00406dff | bellard | Returns the result of converting the double-precision floating-point value
|
1660 | 00406dff | bellard | `a' to the 32-bit two's complement integer format. The conversion is
|
1661 | 00406dff | bellard | performed according to the IEC/IEEE Standard for Binary Floating-point
|
1662 | 00406dff | bellard | Arithmetic, except that the conversion is always rounded toward zero. If
|
1663 | 00406dff | bellard | `a' is a NaN, the largest positive integer is returned. Otherwise, if the
|
1664 | 00406dff | bellard | conversion overflows, the largest integer with the same sign as `a' is
|
1665 | 00406dff | bellard | returned.
|
1666 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1667 | 00406dff | bellard | */
|
1668 | 00406dff | bellard | int32 float64_to_int32_round_to_zero( float64 a ) |
1669 | 00406dff | bellard | { |
1670 | 00406dff | bellard | flag aSign; |
1671 | 00406dff | bellard | int16 aExp, shiftCount; |
1672 | 00406dff | bellard | bits64 aSig, savedASig; |
1673 | 00406dff | bellard | int32 z; |
1674 | 00406dff | bellard | |
1675 | 00406dff | bellard | aSig = extractFloat64Frac( a ); |
1676 | 00406dff | bellard | aExp = extractFloat64Exp( a ); |
1677 | 00406dff | bellard | aSign = extractFloat64Sign( a ); |
1678 | 00406dff | bellard | shiftCount = 0x433 - aExp;
|
1679 | 00406dff | bellard | if ( shiftCount < 21 ) { |
1680 | 00406dff | bellard | if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; |
1681 | 00406dff | bellard | goto invalid;
|
1682 | 00406dff | bellard | } |
1683 | 00406dff | bellard | else if ( 52 < shiftCount ) { |
1684 | 00406dff | bellard | if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
|
1685 | 00406dff | bellard | return 0; |
1686 | 00406dff | bellard | } |
1687 | 00406dff | bellard | aSig |= LIT64( 0x0010000000000000 );
|
1688 | 00406dff | bellard | savedASig = aSig; |
1689 | 00406dff | bellard | aSig >>= shiftCount; |
1690 | 00406dff | bellard | z = aSig; |
1691 | 00406dff | bellard | if ( aSign ) z = - z;
|
1692 | 00406dff | bellard | if ( ( z < 0 ) ^ aSign ) { |
1693 | 00406dff | bellard | invalid:
|
1694 | 00406dff | bellard | float_exception_flags |= float_flag_invalid; |
1695 | 00406dff | bellard | return aSign ? 0x80000000 : 0x7FFFFFFF; |
1696 | 00406dff | bellard | } |
1697 | 00406dff | bellard | if ( ( aSig<<shiftCount ) != savedASig ) {
|
1698 | 00406dff | bellard | float_exception_flags |= float_flag_inexact; |
1699 | 00406dff | bellard | } |
1700 | 00406dff | bellard | return z;
|
1701 | 00406dff | bellard | |
1702 | 00406dff | bellard | } |
1703 | 00406dff | bellard | |
1704 | 00406dff | bellard | /*
|
1705 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1706 | 00406dff | bellard | Returns the result of converting the double-precision floating-point value
|
1707 | 00406dff | bellard | `a' to the 32-bit two's complement unsigned integer format. The conversion
|
1708 | 00406dff | bellard | is performed according to the IEC/IEEE Standard for Binary Floating-point
|
1709 | 00406dff | bellard | Arithmetic---which means in particular that the conversion is rounded
|
1710 | 00406dff | bellard | according to the current rounding mode. If `a' is a NaN, the largest
|
1711 | 00406dff | bellard | positive integer is returned. Otherwise, if the conversion overflows, the
|
1712 | 00406dff | bellard | largest positive integer is returned.
|
1713 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1714 | 00406dff | bellard | */
|
1715 | 00406dff | bellard | int32 float64_to_uint32( float64 a ) |
1716 | 00406dff | bellard | { |
1717 | 00406dff | bellard | flag aSign; |
1718 | 00406dff | bellard | int16 aExp, shiftCount; |
1719 | 00406dff | bellard | bits64 aSig; |
1720 | 00406dff | bellard | |
1721 | 00406dff | bellard | aSig = extractFloat64Frac( a ); |
1722 | 00406dff | bellard | aExp = extractFloat64Exp( a ); |
1723 | 00406dff | bellard | aSign = 0; //extractFloat64Sign( a ); |
1724 | 00406dff | bellard | //if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
|
1725 | 00406dff | bellard | if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); |
1726 | 00406dff | bellard | shiftCount = 0x42C - aExp;
|
1727 | 00406dff | bellard | if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig ); |
1728 | 00406dff | bellard | return roundAndPackInt32( aSign, aSig );
|
1729 | 00406dff | bellard | } |
1730 | 00406dff | bellard | |
1731 | 00406dff | bellard | /*
|
1732 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1733 | 00406dff | bellard | Returns the result of converting the double-precision floating-point value
|
1734 | 00406dff | bellard | `a' to the 32-bit two's complement integer format. The conversion is
|
1735 | 00406dff | bellard | performed according to the IEC/IEEE Standard for Binary Floating-point
|
1736 | 00406dff | bellard | Arithmetic, except that the conversion is always rounded toward zero. If
|
1737 | 00406dff | bellard | `a' is a NaN, the largest positive integer is returned. Otherwise, if the
|
1738 | 00406dff | bellard | conversion overflows, the largest positive integer is returned.
|
1739 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1740 | 00406dff | bellard | */
|
1741 | 00406dff | bellard | int32 float64_to_uint32_round_to_zero( float64 a ) |
1742 | 00406dff | bellard | { |
1743 | 00406dff | bellard | flag aSign; |
1744 | 00406dff | bellard | int16 aExp, shiftCount; |
1745 | 00406dff | bellard | bits64 aSig, savedASig; |
1746 | 00406dff | bellard | int32 z; |
1747 | 00406dff | bellard | |
1748 | 00406dff | bellard | aSig = extractFloat64Frac( a ); |
1749 | 00406dff | bellard | aExp = extractFloat64Exp( a ); |
1750 | 00406dff | bellard | aSign = extractFloat64Sign( a ); |
1751 | 00406dff | bellard | shiftCount = 0x433 - aExp;
|
1752 | 00406dff | bellard | if ( shiftCount < 21 ) { |
1753 | 00406dff | bellard | if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; |
1754 | 00406dff | bellard | goto invalid;
|
1755 | 00406dff | bellard | } |
1756 | 00406dff | bellard | else if ( 52 < shiftCount ) { |
1757 | 00406dff | bellard | if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
|
1758 | 00406dff | bellard | return 0; |
1759 | 00406dff | bellard | } |
1760 | 00406dff | bellard | aSig |= LIT64( 0x0010000000000000 );
|
1761 | 00406dff | bellard | savedASig = aSig; |
1762 | 00406dff | bellard | aSig >>= shiftCount; |
1763 | 00406dff | bellard | z = aSig; |
1764 | 00406dff | bellard | if ( aSign ) z = - z;
|
1765 | 00406dff | bellard | if ( ( z < 0 ) ^ aSign ) { |
1766 | 00406dff | bellard | invalid:
|
1767 | 00406dff | bellard | float_exception_flags |= float_flag_invalid; |
1768 | 00406dff | bellard | return aSign ? 0x80000000 : 0x7FFFFFFF; |
1769 | 00406dff | bellard | } |
1770 | 00406dff | bellard | if ( ( aSig<<shiftCount ) != savedASig ) {
|
1771 | 00406dff | bellard | float_exception_flags |= float_flag_inexact; |
1772 | 00406dff | bellard | } |
1773 | 00406dff | bellard | return z;
|
1774 | 00406dff | bellard | } |
1775 | 00406dff | bellard | |
1776 | 00406dff | bellard | /*
|
1777 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1778 | 00406dff | bellard | Returns the result of converting the double-precision floating-point value
|
1779 | 00406dff | bellard | `a' to the single-precision floating-point format. The conversion is
|
1780 | 00406dff | bellard | performed according to the IEC/IEEE Standard for Binary Floating-point
|
1781 | 00406dff | bellard | Arithmetic.
|
1782 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1783 | 00406dff | bellard | */
|
1784 | 00406dff | bellard | float32 float64_to_float32( float64 a ) |
1785 | 00406dff | bellard | { |
1786 | 00406dff | bellard | flag aSign; |
1787 | 00406dff | bellard | int16 aExp; |
1788 | 00406dff | bellard | bits64 aSig; |
1789 | 00406dff | bellard | bits32 zSig; |
1790 | 00406dff | bellard | |
1791 | 00406dff | bellard | aSig = extractFloat64Frac( a ); |
1792 | 00406dff | bellard | aExp = extractFloat64Exp( a ); |
1793 | 00406dff | bellard | aSign = extractFloat64Sign( a ); |
1794 | 00406dff | bellard | if ( aExp == 0x7FF ) { |
1795 | 00406dff | bellard | if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) ); |
1796 | 00406dff | bellard | return packFloat32( aSign, 0xFF, 0 ); |
1797 | 00406dff | bellard | } |
1798 | 00406dff | bellard | shift64RightJamming( aSig, 22, &aSig );
|
1799 | 00406dff | bellard | zSig = aSig; |
1800 | 00406dff | bellard | if ( aExp || zSig ) {
|
1801 | 00406dff | bellard | zSig |= 0x40000000;
|
1802 | 00406dff | bellard | aExp -= 0x381;
|
1803 | 00406dff | bellard | } |
1804 | 00406dff | bellard | return roundAndPackFloat32( aSign, aExp, zSig );
|
1805 | 00406dff | bellard | |
1806 | 00406dff | bellard | } |
1807 | 00406dff | bellard | |
1808 | 00406dff | bellard | #ifdef FLOATX80
|
1809 | 00406dff | bellard | |
1810 | 00406dff | bellard | /*
|
1811 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1812 | 00406dff | bellard | Returns the result of converting the double-precision floating-point value
|
1813 | 00406dff | bellard | `a' to the extended double-precision floating-point format. The conversion
|
1814 | 00406dff | bellard | is performed according to the IEC/IEEE Standard for Binary Floating-point
|
1815 | 00406dff | bellard | Arithmetic.
|
1816 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1817 | 00406dff | bellard | */
|
1818 | 00406dff | bellard | floatx80 float64_to_floatx80( float64 a ) |
1819 | 00406dff | bellard | { |
1820 | 00406dff | bellard | flag aSign; |
1821 | 00406dff | bellard | int16 aExp; |
1822 | 00406dff | bellard | bits64 aSig; |
1823 | 00406dff | bellard | |
1824 | 00406dff | bellard | aSig = extractFloat64Frac( a ); |
1825 | 00406dff | bellard | aExp = extractFloat64Exp( a ); |
1826 | 00406dff | bellard | aSign = extractFloat64Sign( a ); |
1827 | 00406dff | bellard | if ( aExp == 0x7FF ) { |
1828 | 00406dff | bellard | if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) ); |
1829 | 00406dff | bellard | return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); |
1830 | 00406dff | bellard | } |
1831 | 00406dff | bellard | if ( aExp == 0 ) { |
1832 | 00406dff | bellard | if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); |
1833 | 00406dff | bellard | normalizeFloat64Subnormal( aSig, &aExp, &aSig ); |
1834 | 00406dff | bellard | } |
1835 | 00406dff | bellard | return
|
1836 | 00406dff | bellard | packFloatx80( |
1837 | 00406dff | bellard | aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 ); |
1838 | 00406dff | bellard | |
1839 | 00406dff | bellard | } |
1840 | 00406dff | bellard | |
1841 | 00406dff | bellard | #endif
|
1842 | 00406dff | bellard | |
1843 | 00406dff | bellard | /*
|
1844 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1845 | 00406dff | bellard | Rounds the double-precision floating-point value `a' to an integer, and
|
1846 | 00406dff | bellard | returns the result as a double-precision floating-point value. The
|
1847 | 00406dff | bellard | operation is performed according to the IEC/IEEE Standard for Binary
|
1848 | 00406dff | bellard | Floating-point Arithmetic.
|
1849 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1850 | 00406dff | bellard | */
|
1851 | 00406dff | bellard | float64 float64_round_to_int( float64 a ) |
1852 | 00406dff | bellard | { |
1853 | 00406dff | bellard | flag aSign; |
1854 | 00406dff | bellard | int16 aExp; |
1855 | 00406dff | bellard | bits64 lastBitMask, roundBitsMask; |
1856 | 00406dff | bellard | int8 roundingMode; |
1857 | 00406dff | bellard | float64 z; |
1858 | 00406dff | bellard | |
1859 | 00406dff | bellard | aExp = extractFloat64Exp( a ); |
1860 | 00406dff | bellard | if ( 0x433 <= aExp ) { |
1861 | 00406dff | bellard | if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) { |
1862 | 00406dff | bellard | return propagateFloat64NaN( a, a );
|
1863 | 00406dff | bellard | } |
1864 | 00406dff | bellard | return a;
|
1865 | 00406dff | bellard | } |
1866 | 00406dff | bellard | if ( aExp <= 0x3FE ) { |
1867 | 00406dff | bellard | if ( (bits64) ( a<<1 ) == 0 ) return a; |
1868 | 00406dff | bellard | float_exception_flags |= float_flag_inexact; |
1869 | 00406dff | bellard | aSign = extractFloat64Sign( a ); |
1870 | 00406dff | bellard | switch ( float_rounding_mode ) {
|
1871 | 00406dff | bellard | case float_round_nearest_even:
|
1872 | 00406dff | bellard | if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) { |
1873 | 00406dff | bellard | return packFloat64( aSign, 0x3FF, 0 ); |
1874 | 00406dff | bellard | } |
1875 | 00406dff | bellard | break;
|
1876 | 00406dff | bellard | case float_round_down:
|
1877 | 00406dff | bellard | return aSign ? LIT64( 0xBFF0000000000000 ) : 0; |
1878 | 00406dff | bellard | case float_round_up:
|
1879 | 00406dff | bellard | return
|
1880 | 00406dff | bellard | aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ); |
1881 | 00406dff | bellard | } |
1882 | 00406dff | bellard | return packFloat64( aSign, 0, 0 ); |
1883 | 00406dff | bellard | } |
1884 | 00406dff | bellard | lastBitMask = 1;
|
1885 | 00406dff | bellard | lastBitMask <<= 0x433 - aExp;
|
1886 | 00406dff | bellard | roundBitsMask = lastBitMask - 1;
|
1887 | 00406dff | bellard | z = a; |
1888 | 00406dff | bellard | roundingMode = float_rounding_mode; |
1889 | 00406dff | bellard | if ( roundingMode == float_round_nearest_even ) {
|
1890 | 00406dff | bellard | z += lastBitMask>>1;
|
1891 | 00406dff | bellard | if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; |
1892 | 00406dff | bellard | } |
1893 | 00406dff | bellard | else if ( roundingMode != float_round_to_zero ) { |
1894 | 00406dff | bellard | if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
|
1895 | 00406dff | bellard | z += roundBitsMask; |
1896 | 00406dff | bellard | } |
1897 | 00406dff | bellard | } |
1898 | 00406dff | bellard | z &= ~ roundBitsMask; |
1899 | 00406dff | bellard | if ( z != a ) float_exception_flags |= float_flag_inexact;
|
1900 | 00406dff | bellard | return z;
|
1901 | 00406dff | bellard | |
1902 | 00406dff | bellard | } |
1903 | 00406dff | bellard | |
1904 | 00406dff | bellard | /*
|
1905 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1906 | 00406dff | bellard | Returns the result of adding the absolute values of the double-precision
|
1907 | 00406dff | bellard | floating-point values `a' and `b'. If `zSign' is true, the sum is negated
|
1908 | 00406dff | bellard | before being returned. `zSign' is ignored if the result is a NaN. The
|
1909 | 00406dff | bellard | addition is performed according to the IEC/IEEE Standard for Binary
|
1910 | 00406dff | bellard | Floating-point Arithmetic.
|
1911 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1912 | 00406dff | bellard | */
|
1913 | 00406dff | bellard | static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
|
1914 | 00406dff | bellard | { |
1915 | 00406dff | bellard | int16 aExp, bExp, zExp; |
1916 | 00406dff | bellard | bits64 aSig, bSig, zSig; |
1917 | 00406dff | bellard | int16 expDiff; |
1918 | 00406dff | bellard | |
1919 | 00406dff | bellard | aSig = extractFloat64Frac( a ); |
1920 | 00406dff | bellard | aExp = extractFloat64Exp( a ); |
1921 | 00406dff | bellard | bSig = extractFloat64Frac( b ); |
1922 | 00406dff | bellard | bExp = extractFloat64Exp( b ); |
1923 | 00406dff | bellard | expDiff = aExp - bExp; |
1924 | 00406dff | bellard | aSig <<= 9;
|
1925 | 00406dff | bellard | bSig <<= 9;
|
1926 | 00406dff | bellard | if ( 0 < expDiff ) { |
1927 | 00406dff | bellard | if ( aExp == 0x7FF ) { |
1928 | 00406dff | bellard | if ( aSig ) return propagateFloat64NaN( a, b ); |
1929 | 00406dff | bellard | return a;
|
1930 | 00406dff | bellard | } |
1931 | 00406dff | bellard | if ( bExp == 0 ) { |
1932 | 00406dff | bellard | --expDiff; |
1933 | 00406dff | bellard | } |
1934 | 00406dff | bellard | else {
|
1935 | 00406dff | bellard | bSig |= LIT64( 0x2000000000000000 );
|
1936 | 00406dff | bellard | } |
1937 | 00406dff | bellard | shift64RightJamming( bSig, expDiff, &bSig ); |
1938 | 00406dff | bellard | zExp = aExp; |
1939 | 00406dff | bellard | } |
1940 | 00406dff | bellard | else if ( expDiff < 0 ) { |
1941 | 00406dff | bellard | if ( bExp == 0x7FF ) { |
1942 | 00406dff | bellard | if ( bSig ) return propagateFloat64NaN( a, b ); |
1943 | 00406dff | bellard | return packFloat64( zSign, 0x7FF, 0 ); |
1944 | 00406dff | bellard | } |
1945 | 00406dff | bellard | if ( aExp == 0 ) { |
1946 | 00406dff | bellard | ++expDiff; |
1947 | 00406dff | bellard | } |
1948 | 00406dff | bellard | else {
|
1949 | 00406dff | bellard | aSig |= LIT64( 0x2000000000000000 );
|
1950 | 00406dff | bellard | } |
1951 | 00406dff | bellard | shift64RightJamming( aSig, - expDiff, &aSig ); |
1952 | 00406dff | bellard | zExp = bExp; |
1953 | 00406dff | bellard | } |
1954 | 00406dff | bellard | else {
|
1955 | 00406dff | bellard | if ( aExp == 0x7FF ) { |
1956 | 00406dff | bellard | if ( aSig | bSig ) return propagateFloat64NaN( a, b ); |
1957 | 00406dff | bellard | return a;
|
1958 | 00406dff | bellard | } |
1959 | 00406dff | bellard | if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 ); |
1960 | 00406dff | bellard | zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
|
1961 | 00406dff | bellard | zExp = aExp; |
1962 | 00406dff | bellard | goto roundAndPack;
|
1963 | 00406dff | bellard | } |
1964 | 00406dff | bellard | aSig |= LIT64( 0x2000000000000000 );
|
1965 | 00406dff | bellard | zSig = ( aSig + bSig )<<1;
|
1966 | 00406dff | bellard | --zExp; |
1967 | 00406dff | bellard | if ( (sbits64) zSig < 0 ) { |
1968 | 00406dff | bellard | zSig = aSig + bSig; |
1969 | 00406dff | bellard | ++zExp; |
1970 | 00406dff | bellard | } |
1971 | 00406dff | bellard | roundAndPack:
|
1972 | 00406dff | bellard | return roundAndPackFloat64( zSign, zExp, zSig );
|
1973 | 00406dff | bellard | |
1974 | 00406dff | bellard | } |
1975 | 00406dff | bellard | |
1976 | 00406dff | bellard | /*
|
1977 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1978 | 00406dff | bellard | Returns the result of subtracting the absolute values of the double-
|
1979 | 00406dff | bellard | precision floating-point values `a' and `b'. If `zSign' is true, the
|
1980 | 00406dff | bellard | difference is negated before being returned. `zSign' is ignored if the
|
1981 | 00406dff | bellard | result is a NaN. The subtraction is performed according to the IEC/IEEE
|
1982 | 00406dff | bellard | Standard for Binary Floating-point Arithmetic.
|
1983 | 00406dff | bellard | -------------------------------------------------------------------------------
|
1984 | 00406dff | bellard | */
|
1985 | 00406dff | bellard | static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
|
1986 | 00406dff | bellard | { |
1987 | 00406dff | bellard | int16 aExp, bExp, zExp; |
1988 | 00406dff | bellard | bits64 aSig, bSig, zSig; |
1989 | 00406dff | bellard | int16 expDiff; |
1990 | 00406dff | bellard | |
1991 | 00406dff | bellard | aSig = extractFloat64Frac( a ); |
1992 | 00406dff | bellard | aExp = extractFloat64Exp( a ); |
1993 | 00406dff | bellard | bSig = extractFloat64Frac( b ); |
1994 | 00406dff | bellard | bExp = extractFloat64Exp( b ); |
1995 | 00406dff | bellard | expDiff = aExp - bExp; |
1996 | 00406dff | bellard | aSig <<= 10;
|
1997 | 00406dff | bellard | bSig <<= 10;
|
1998 | 00406dff | bellard | if ( 0 < expDiff ) goto aExpBigger; |
1999 | 00406dff | bellard | if ( expDiff < 0 ) goto bExpBigger; |
2000 | 00406dff | bellard | if ( aExp == 0x7FF ) { |
2001 | 00406dff | bellard | if ( aSig | bSig ) return propagateFloat64NaN( a, b ); |
2002 | 00406dff | bellard | float_raise( float_flag_invalid ); |
2003 | 00406dff | bellard | return float64_default_nan;
|
2004 | 00406dff | bellard | } |
2005 | 00406dff | bellard | if ( aExp == 0 ) { |
2006 | 00406dff | bellard | aExp = 1;
|
2007 | 00406dff | bellard | bExp = 1;
|
2008 | 00406dff | bellard | } |
2009 | 00406dff | bellard | if ( bSig < aSig ) goto aBigger; |
2010 | 00406dff | bellard | if ( aSig < bSig ) goto bBigger; |
2011 | 00406dff | bellard | return packFloat64( float_rounding_mode == float_round_down, 0, 0 ); |
2012 | 00406dff | bellard | bExpBigger:
|
2013 | 00406dff | bellard | if ( bExp == 0x7FF ) { |
2014 | 00406dff | bellard | if ( bSig ) return propagateFloat64NaN( a, b ); |
2015 | 00406dff | bellard | return packFloat64( zSign ^ 1, 0x7FF, 0 ); |
2016 | 00406dff | bellard | } |
2017 | 00406dff | bellard | if ( aExp == 0 ) { |
2018 | 00406dff | bellard | ++expDiff; |
2019 | 00406dff | bellard | } |
2020 | 00406dff | bellard | else {
|
2021 | 00406dff | bellard | aSig |= LIT64( 0x4000000000000000 );
|
2022 | 00406dff | bellard | } |
2023 | 00406dff | bellard | shift64RightJamming( aSig, - expDiff, &aSig ); |
2024 | 00406dff | bellard | bSig |= LIT64( 0x4000000000000000 );
|
2025 | 00406dff | bellard | bBigger:
|
2026 | 00406dff | bellard | zSig = bSig - aSig; |
2027 | 00406dff | bellard | zExp = bExp; |
2028 | 00406dff | bellard | zSign ^= 1;
|
2029 | 00406dff | bellard | goto normalizeRoundAndPack;
|
2030 | 00406dff | bellard | aExpBigger:
|
2031 | 00406dff | bellard | if ( aExp == 0x7FF ) { |
2032 | 00406dff | bellard | if ( aSig ) return propagateFloat64NaN( a, b ); |
2033 | 00406dff | bellard | return a;
|
2034 | 00406dff | bellard | } |
2035 | 00406dff | bellard | if ( bExp == 0 ) { |
2036 | 00406dff | bellard | --expDiff; |
2037 | 00406dff | bellard | } |
2038 | 00406dff | bellard | else {
|
2039 | 00406dff | bellard | bSig |= LIT64( 0x4000000000000000 );
|
2040 | 00406dff | bellard | } |
2041 | 00406dff | bellard | shift64RightJamming( bSig, expDiff, &bSig ); |
2042 | 00406dff | bellard | aSig |= LIT64( 0x4000000000000000 );
|
2043 | 00406dff | bellard | aBigger:
|
2044 | 00406dff | bellard | zSig = aSig - bSig; |
2045 | 00406dff | bellard | zExp = aExp; |
2046 | 00406dff | bellard | normalizeRoundAndPack:
|
2047 | 00406dff | bellard | --zExp; |
2048 | 00406dff | bellard | return normalizeRoundAndPackFloat64( zSign, zExp, zSig );
|
2049 | 00406dff | bellard | |
2050 | 00406dff | bellard | } |
2051 | 00406dff | bellard | |
2052 | 00406dff | bellard | /*
|
2053 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2054 | 00406dff | bellard | Returns the result of adding the double-precision floating-point values `a'
|
2055 | 00406dff | bellard | and `b'. The operation is performed according to the IEC/IEEE Standard for
|
2056 | 00406dff | bellard | Binary Floating-point Arithmetic.
|
2057 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2058 | 00406dff | bellard | */
|
2059 | 00406dff | bellard | float64 float64_add( float64 a, float64 b ) |
2060 | 00406dff | bellard | { |
2061 | 00406dff | bellard | flag aSign, bSign; |
2062 | 00406dff | bellard | |
2063 | 00406dff | bellard | aSign = extractFloat64Sign( a ); |
2064 | 00406dff | bellard | bSign = extractFloat64Sign( b ); |
2065 | 00406dff | bellard | if ( aSign == bSign ) {
|
2066 | 00406dff | bellard | return addFloat64Sigs( a, b, aSign );
|
2067 | 00406dff | bellard | } |
2068 | 00406dff | bellard | else {
|
2069 | 00406dff | bellard | return subFloat64Sigs( a, b, aSign );
|
2070 | 00406dff | bellard | } |
2071 | 00406dff | bellard | |
2072 | 00406dff | bellard | } |
2073 | 00406dff | bellard | |
2074 | 00406dff | bellard | /*
|
2075 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2076 | 00406dff | bellard | Returns the result of subtracting the double-precision floating-point values
|
2077 | 00406dff | bellard | `a' and `b'. The operation is performed according to the IEC/IEEE Standard
|
2078 | 00406dff | bellard | for Binary Floating-point Arithmetic.
|
2079 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2080 | 00406dff | bellard | */
|
2081 | 00406dff | bellard | float64 float64_sub( float64 a, float64 b ) |
2082 | 00406dff | bellard | { |
2083 | 00406dff | bellard | flag aSign, bSign; |
2084 | 00406dff | bellard | |
2085 | 00406dff | bellard | aSign = extractFloat64Sign( a ); |
2086 | 00406dff | bellard | bSign = extractFloat64Sign( b ); |
2087 | 00406dff | bellard | if ( aSign == bSign ) {
|
2088 | 00406dff | bellard | return subFloat64Sigs( a, b, aSign );
|
2089 | 00406dff | bellard | } |
2090 | 00406dff | bellard | else {
|
2091 | 00406dff | bellard | return addFloat64Sigs( a, b, aSign );
|
2092 | 00406dff | bellard | } |
2093 | 00406dff | bellard | |
2094 | 00406dff | bellard | } |
2095 | 00406dff | bellard | |
2096 | 00406dff | bellard | /*
|
2097 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2098 | 00406dff | bellard | Returns the result of multiplying the double-precision floating-point values
|
2099 | 00406dff | bellard | `a' and `b'. The operation is performed according to the IEC/IEEE Standard
|
2100 | 00406dff | bellard | for Binary Floating-point Arithmetic.
|
2101 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2102 | 00406dff | bellard | */
|
2103 | 00406dff | bellard | float64 float64_mul( float64 a, float64 b ) |
2104 | 00406dff | bellard | { |
2105 | 00406dff | bellard | flag aSign, bSign, zSign; |
2106 | 00406dff | bellard | int16 aExp, bExp, zExp; |
2107 | 00406dff | bellard | bits64 aSig, bSig, zSig0, zSig1; |
2108 | 00406dff | bellard | |
2109 | 00406dff | bellard | aSig = extractFloat64Frac( a ); |
2110 | 00406dff | bellard | aExp = extractFloat64Exp( a ); |
2111 | 00406dff | bellard | aSign = extractFloat64Sign( a ); |
2112 | 00406dff | bellard | bSig = extractFloat64Frac( b ); |
2113 | 00406dff | bellard | bExp = extractFloat64Exp( b ); |
2114 | 00406dff | bellard | bSign = extractFloat64Sign( b ); |
2115 | 00406dff | bellard | zSign = aSign ^ bSign; |
2116 | 00406dff | bellard | if ( aExp == 0x7FF ) { |
2117 | 00406dff | bellard | if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { |
2118 | 00406dff | bellard | return propagateFloat64NaN( a, b );
|
2119 | 00406dff | bellard | } |
2120 | 00406dff | bellard | if ( ( bExp | bSig ) == 0 ) { |
2121 | 00406dff | bellard | float_raise( float_flag_invalid ); |
2122 | 00406dff | bellard | return float64_default_nan;
|
2123 | 00406dff | bellard | } |
2124 | 00406dff | bellard | return packFloat64( zSign, 0x7FF, 0 ); |
2125 | 00406dff | bellard | } |
2126 | 00406dff | bellard | if ( bExp == 0x7FF ) { |
2127 | 00406dff | bellard | if ( bSig ) return propagateFloat64NaN( a, b ); |
2128 | 00406dff | bellard | if ( ( aExp | aSig ) == 0 ) { |
2129 | 00406dff | bellard | float_raise( float_flag_invalid ); |
2130 | 00406dff | bellard | return float64_default_nan;
|
2131 | 00406dff | bellard | } |
2132 | 00406dff | bellard | return packFloat64( zSign, 0x7FF, 0 ); |
2133 | 00406dff | bellard | } |
2134 | 00406dff | bellard | if ( aExp == 0 ) { |
2135 | 00406dff | bellard | if ( aSig == 0 ) return packFloat64( zSign, 0, 0 ); |
2136 | 00406dff | bellard | normalizeFloat64Subnormal( aSig, &aExp, &aSig ); |
2137 | 00406dff | bellard | } |
2138 | 00406dff | bellard | if ( bExp == 0 ) { |
2139 | 00406dff | bellard | if ( bSig == 0 ) return packFloat64( zSign, 0, 0 ); |
2140 | 00406dff | bellard | normalizeFloat64Subnormal( bSig, &bExp, &bSig ); |
2141 | 00406dff | bellard | } |
2142 | 00406dff | bellard | zExp = aExp + bExp - 0x3FF;
|
2143 | 00406dff | bellard | aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10; |
2144 | 00406dff | bellard | bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; |
2145 | 00406dff | bellard | mul64To128( aSig, bSig, &zSig0, &zSig1 ); |
2146 | 00406dff | bellard | zSig0 |= ( zSig1 != 0 );
|
2147 | 00406dff | bellard | if ( 0 <= (sbits64) ( zSig0<<1 ) ) { |
2148 | 00406dff | bellard | zSig0 <<= 1;
|
2149 | 00406dff | bellard | --zExp; |
2150 | 00406dff | bellard | } |
2151 | 00406dff | bellard | return roundAndPackFloat64( zSign, zExp, zSig0 );
|
2152 | 00406dff | bellard | |
2153 | 00406dff | bellard | } |
2154 | 00406dff | bellard | |
2155 | 00406dff | bellard | /*
|
2156 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2157 | 00406dff | bellard | Returns the result of dividing the double-precision floating-point value `a'
|
2158 | 00406dff | bellard | by the corresponding value `b'. The operation is performed according to
|
2159 | 00406dff | bellard | the IEC/IEEE Standard for Binary Floating-point Arithmetic.
|
2160 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2161 | 00406dff | bellard | */
|
2162 | 00406dff | bellard | float64 float64_div( float64 a, float64 b ) |
2163 | 00406dff | bellard | { |
2164 | 00406dff | bellard | flag aSign, bSign, zSign; |
2165 | 00406dff | bellard | int16 aExp, bExp, zExp; |
2166 | 00406dff | bellard | bits64 aSig, bSig, zSig; |
2167 | 00406dff | bellard | bits64 rem0, rem1; |
2168 | 00406dff | bellard | bits64 term0, term1; |
2169 | 00406dff | bellard | |
2170 | 00406dff | bellard | aSig = extractFloat64Frac( a ); |
2171 | 00406dff | bellard | aExp = extractFloat64Exp( a ); |
2172 | 00406dff | bellard | aSign = extractFloat64Sign( a ); |
2173 | 00406dff | bellard | bSig = extractFloat64Frac( b ); |
2174 | 00406dff | bellard | bExp = extractFloat64Exp( b ); |
2175 | 00406dff | bellard | bSign = extractFloat64Sign( b ); |
2176 | 00406dff | bellard | zSign = aSign ^ bSign; |
2177 | 00406dff | bellard | if ( aExp == 0x7FF ) { |
2178 | 00406dff | bellard | if ( aSig ) return propagateFloat64NaN( a, b ); |
2179 | 00406dff | bellard | if ( bExp == 0x7FF ) { |
2180 | 00406dff | bellard | if ( bSig ) return propagateFloat64NaN( a, b ); |
2181 | 00406dff | bellard | float_raise( float_flag_invalid ); |
2182 | 00406dff | bellard | return float64_default_nan;
|
2183 | 00406dff | bellard | } |
2184 | 00406dff | bellard | return packFloat64( zSign, 0x7FF, 0 ); |
2185 | 00406dff | bellard | } |
2186 | 00406dff | bellard | if ( bExp == 0x7FF ) { |
2187 | 00406dff | bellard | if ( bSig ) return propagateFloat64NaN( a, b ); |
2188 | 00406dff | bellard | return packFloat64( zSign, 0, 0 ); |
2189 | 00406dff | bellard | } |
2190 | 00406dff | bellard | if ( bExp == 0 ) { |
2191 | 00406dff | bellard | if ( bSig == 0 ) { |
2192 | 00406dff | bellard | if ( ( aExp | aSig ) == 0 ) { |
2193 | 00406dff | bellard | float_raise( float_flag_invalid ); |
2194 | 00406dff | bellard | return float64_default_nan;
|
2195 | 00406dff | bellard | } |
2196 | 00406dff | bellard | float_raise( float_flag_divbyzero ); |
2197 | 00406dff | bellard | return packFloat64( zSign, 0x7FF, 0 ); |
2198 | 00406dff | bellard | } |
2199 | 00406dff | bellard | normalizeFloat64Subnormal( bSig, &bExp, &bSig ); |
2200 | 00406dff | bellard | } |
2201 | 00406dff | bellard | if ( aExp == 0 ) { |
2202 | 00406dff | bellard | if ( aSig == 0 ) return packFloat64( zSign, 0, 0 ); |
2203 | 00406dff | bellard | normalizeFloat64Subnormal( aSig, &aExp, &aSig ); |
2204 | 00406dff | bellard | } |
2205 | 00406dff | bellard | zExp = aExp - bExp + 0x3FD;
|
2206 | 00406dff | bellard | aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10; |
2207 | 00406dff | bellard | bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; |
2208 | 00406dff | bellard | if ( bSig <= ( aSig + aSig ) ) {
|
2209 | 00406dff | bellard | aSig >>= 1;
|
2210 | 00406dff | bellard | ++zExp; |
2211 | 00406dff | bellard | } |
2212 | 00406dff | bellard | zSig = estimateDiv128To64( aSig, 0, bSig );
|
2213 | 00406dff | bellard | if ( ( zSig & 0x1FF ) <= 2 ) { |
2214 | 00406dff | bellard | mul64To128( bSig, zSig, &term0, &term1 ); |
2215 | 00406dff | bellard | sub128( aSig, 0, term0, term1, &rem0, &rem1 );
|
2216 | 00406dff | bellard | while ( (sbits64) rem0 < 0 ) { |
2217 | 00406dff | bellard | --zSig; |
2218 | 00406dff | bellard | add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
|
2219 | 00406dff | bellard | } |
2220 | 00406dff | bellard | zSig |= ( rem1 != 0 );
|
2221 | 00406dff | bellard | } |
2222 | 00406dff | bellard | return roundAndPackFloat64( zSign, zExp, zSig );
|
2223 | 00406dff | bellard | |
2224 | 00406dff | bellard | } |
2225 | 00406dff | bellard | |
2226 | 00406dff | bellard | /*
|
2227 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2228 | 00406dff | bellard | Returns the remainder of the double-precision floating-point value `a'
|
2229 | 00406dff | bellard | with respect to the corresponding value `b'. The operation is performed
|
2230 | 00406dff | bellard | according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
|
2231 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2232 | 00406dff | bellard | */
|
2233 | 00406dff | bellard | float64 float64_rem( float64 a, float64 b ) |
2234 | 00406dff | bellard | { |
2235 | 00406dff | bellard | flag aSign, bSign, zSign; |
2236 | 00406dff | bellard | int16 aExp, bExp, expDiff; |
2237 | 00406dff | bellard | bits64 aSig, bSig; |
2238 | 00406dff | bellard | bits64 q, alternateASig; |
2239 | 00406dff | bellard | sbits64 sigMean; |
2240 | 00406dff | bellard | |
2241 | 00406dff | bellard | aSig = extractFloat64Frac( a ); |
2242 | 00406dff | bellard | aExp = extractFloat64Exp( a ); |
2243 | 00406dff | bellard | aSign = extractFloat64Sign( a ); |
2244 | 00406dff | bellard | bSig = extractFloat64Frac( b ); |
2245 | 00406dff | bellard | bExp = extractFloat64Exp( b ); |
2246 | 00406dff | bellard | bSign = extractFloat64Sign( b ); |
2247 | 00406dff | bellard | if ( aExp == 0x7FF ) { |
2248 | 00406dff | bellard | if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { |
2249 | 00406dff | bellard | return propagateFloat64NaN( a, b );
|
2250 | 00406dff | bellard | } |
2251 | 00406dff | bellard | float_raise( float_flag_invalid ); |
2252 | 00406dff | bellard | return float64_default_nan;
|
2253 | 00406dff | bellard | } |
2254 | 00406dff | bellard | if ( bExp == 0x7FF ) { |
2255 | 00406dff | bellard | if ( bSig ) return propagateFloat64NaN( a, b ); |
2256 | 00406dff | bellard | return a;
|
2257 | 00406dff | bellard | } |
2258 | 00406dff | bellard | if ( bExp == 0 ) { |
2259 | 00406dff | bellard | if ( bSig == 0 ) { |
2260 | 00406dff | bellard | float_raise( float_flag_invalid ); |
2261 | 00406dff | bellard | return float64_default_nan;
|
2262 | 00406dff | bellard | } |
2263 | 00406dff | bellard | normalizeFloat64Subnormal( bSig, &bExp, &bSig ); |
2264 | 00406dff | bellard | } |
2265 | 00406dff | bellard | if ( aExp == 0 ) { |
2266 | 00406dff | bellard | if ( aSig == 0 ) return a; |
2267 | 00406dff | bellard | normalizeFloat64Subnormal( aSig, &aExp, &aSig ); |
2268 | 00406dff | bellard | } |
2269 | 00406dff | bellard | expDiff = aExp - bExp; |
2270 | 00406dff | bellard | aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11; |
2271 | 00406dff | bellard | bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; |
2272 | 00406dff | bellard | if ( expDiff < 0 ) { |
2273 | 00406dff | bellard | if ( expDiff < -1 ) return a; |
2274 | 00406dff | bellard | aSig >>= 1;
|
2275 | 00406dff | bellard | } |
2276 | 00406dff | bellard | q = ( bSig <= aSig ); |
2277 | 00406dff | bellard | if ( q ) aSig -= bSig;
|
2278 | 00406dff | bellard | expDiff -= 64;
|
2279 | 00406dff | bellard | while ( 0 < expDiff ) { |
2280 | 00406dff | bellard | q = estimateDiv128To64( aSig, 0, bSig );
|
2281 | 00406dff | bellard | q = ( 2 < q ) ? q - 2 : 0; |
2282 | 00406dff | bellard | aSig = - ( ( bSig>>2 ) * q );
|
2283 | 00406dff | bellard | expDiff -= 62;
|
2284 | 00406dff | bellard | } |
2285 | 00406dff | bellard | expDiff += 64;
|
2286 | 00406dff | bellard | if ( 0 < expDiff ) { |
2287 | 00406dff | bellard | q = estimateDiv128To64( aSig, 0, bSig );
|
2288 | 00406dff | bellard | q = ( 2 < q ) ? q - 2 : 0; |
2289 | 00406dff | bellard | q >>= 64 - expDiff;
|
2290 | 00406dff | bellard | bSig >>= 2;
|
2291 | 00406dff | bellard | aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; |
2292 | 00406dff | bellard | } |
2293 | 00406dff | bellard | else {
|
2294 | 00406dff | bellard | aSig >>= 2;
|
2295 | 00406dff | bellard | bSig >>= 2;
|
2296 | 00406dff | bellard | } |
2297 | 00406dff | bellard | do {
|
2298 | 00406dff | bellard | alternateASig = aSig; |
2299 | 00406dff | bellard | ++q; |
2300 | 00406dff | bellard | aSig -= bSig; |
2301 | 00406dff | bellard | } while ( 0 <= (sbits64) aSig ); |
2302 | 00406dff | bellard | sigMean = aSig + alternateASig; |
2303 | 00406dff | bellard | if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { |
2304 | 00406dff | bellard | aSig = alternateASig; |
2305 | 00406dff | bellard | } |
2306 | 00406dff | bellard | zSign = ( (sbits64) aSig < 0 );
|
2307 | 00406dff | bellard | if ( zSign ) aSig = - aSig;
|
2308 | 00406dff | bellard | return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig );
|
2309 | 00406dff | bellard | |
2310 | 00406dff | bellard | } |
2311 | 00406dff | bellard | |
2312 | 00406dff | bellard | /*
|
2313 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2314 | 00406dff | bellard | Returns the square root of the double-precision floating-point value `a'.
|
2315 | 00406dff | bellard | The operation is performed according to the IEC/IEEE Standard for Binary
|
2316 | 00406dff | bellard | Floating-point Arithmetic.
|
2317 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2318 | 00406dff | bellard | */
|
2319 | 00406dff | bellard | float64 float64_sqrt( float64 a ) |
2320 | 00406dff | bellard | { |
2321 | 00406dff | bellard | flag aSign; |
2322 | 00406dff | bellard | int16 aExp, zExp; |
2323 | 00406dff | bellard | bits64 aSig, zSig; |
2324 | 00406dff | bellard | bits64 rem0, rem1, term0, term1; //, shiftedRem;
|
2325 | 00406dff | bellard | //float64 z;
|
2326 | 00406dff | bellard | |
2327 | 00406dff | bellard | aSig = extractFloat64Frac( a ); |
2328 | 00406dff | bellard | aExp = extractFloat64Exp( a ); |
2329 | 00406dff | bellard | aSign = extractFloat64Sign( a ); |
2330 | 00406dff | bellard | if ( aExp == 0x7FF ) { |
2331 | 00406dff | bellard | if ( aSig ) return propagateFloat64NaN( a, a ); |
2332 | 00406dff | bellard | if ( ! aSign ) return a; |
2333 | 00406dff | bellard | float_raise( float_flag_invalid ); |
2334 | 00406dff | bellard | return float64_default_nan;
|
2335 | 00406dff | bellard | } |
2336 | 00406dff | bellard | if ( aSign ) {
|
2337 | 00406dff | bellard | if ( ( aExp | aSig ) == 0 ) return a; |
2338 | 00406dff | bellard | float_raise( float_flag_invalid ); |
2339 | 00406dff | bellard | return float64_default_nan;
|
2340 | 00406dff | bellard | } |
2341 | 00406dff | bellard | if ( aExp == 0 ) { |
2342 | 00406dff | bellard | if ( aSig == 0 ) return 0; |
2343 | 00406dff | bellard | normalizeFloat64Subnormal( aSig, &aExp, &aSig ); |
2344 | 00406dff | bellard | } |
2345 | 00406dff | bellard | zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE; |
2346 | 00406dff | bellard | aSig |= LIT64( 0x0010000000000000 );
|
2347 | 00406dff | bellard | zSig = estimateSqrt32( aExp, aSig>>21 );
|
2348 | 00406dff | bellard | zSig <<= 31;
|
2349 | 00406dff | bellard | aSig <<= 9 - ( aExp & 1 ); |
2350 | 00406dff | bellard | zSig = estimateDiv128To64( aSig, 0, zSig ) + zSig + 2; |
2351 | 00406dff | bellard | if ( ( zSig & 0x3FF ) <= 5 ) { |
2352 | 00406dff | bellard | if ( zSig < 2 ) { |
2353 | 00406dff | bellard | zSig = LIT64( 0xFFFFFFFFFFFFFFFF );
|
2354 | 00406dff | bellard | } |
2355 | 00406dff | bellard | else {
|
2356 | 00406dff | bellard | aSig <<= 2;
|
2357 | 00406dff | bellard | mul64To128( zSig, zSig, &term0, &term1 ); |
2358 | 00406dff | bellard | sub128( aSig, 0, term0, term1, &rem0, &rem1 );
|
2359 | 00406dff | bellard | while ( (sbits64) rem0 < 0 ) { |
2360 | 00406dff | bellard | --zSig; |
2361 | 00406dff | bellard | shortShift128Left( 0, zSig, 1, &term0, &term1 ); |
2362 | 00406dff | bellard | term1 |= 1;
|
2363 | 00406dff | bellard | add128( rem0, rem1, term0, term1, &rem0, &rem1 ); |
2364 | 00406dff | bellard | } |
2365 | 00406dff | bellard | zSig |= ( ( rem0 | rem1 ) != 0 );
|
2366 | 00406dff | bellard | } |
2367 | 00406dff | bellard | } |
2368 | 00406dff | bellard | shift64RightJamming( zSig, 1, &zSig );
|
2369 | 00406dff | bellard | return roundAndPackFloat64( 0, zExp, zSig ); |
2370 | 00406dff | bellard | |
2371 | 00406dff | bellard | } |
2372 | 00406dff | bellard | |
2373 | 00406dff | bellard | /*
|
2374 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2375 | 00406dff | bellard | Returns 1 if the double-precision floating-point value `a' is equal to the
|
2376 | 00406dff | bellard | corresponding value `b', and 0 otherwise. The comparison is performed
|
2377 | 00406dff | bellard | according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
|
2378 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2379 | 00406dff | bellard | */
|
2380 | 00406dff | bellard | flag float64_eq( float64 a, float64 b ) |
2381 | 00406dff | bellard | { |
2382 | 00406dff | bellard | |
2383 | 00406dff | bellard | if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) |
2384 | 00406dff | bellard | || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
|
2385 | 00406dff | bellard | ) { |
2386 | 00406dff | bellard | if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
|
2387 | 00406dff | bellard | float_raise( float_flag_invalid ); |
2388 | 00406dff | bellard | } |
2389 | 00406dff | bellard | return 0; |
2390 | 00406dff | bellard | } |
2391 | 00406dff | bellard | return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 ); |
2392 | 00406dff | bellard | |
2393 | 00406dff | bellard | } |
2394 | 00406dff | bellard | |
2395 | 00406dff | bellard | /*
|
2396 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2397 | 00406dff | bellard | Returns 1 if the double-precision floating-point value `a' is less than or
|
2398 | 00406dff | bellard | equal to the corresponding value `b', and 0 otherwise. The comparison is
|
2399 | 00406dff | bellard | performed according to the IEC/IEEE Standard for Binary Floating-point
|
2400 | 00406dff | bellard | Arithmetic.
|
2401 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2402 | 00406dff | bellard | */
|
2403 | 00406dff | bellard | flag float64_le( float64 a, float64 b ) |
2404 | 00406dff | bellard | { |
2405 | 00406dff | bellard | flag aSign, bSign; |
2406 | 00406dff | bellard | |
2407 | 00406dff | bellard | if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) |
2408 | 00406dff | bellard | || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
|
2409 | 00406dff | bellard | ) { |
2410 | 00406dff | bellard | float_raise( float_flag_invalid ); |
2411 | 00406dff | bellard | return 0; |
2412 | 00406dff | bellard | } |
2413 | 00406dff | bellard | aSign = extractFloat64Sign( a ); |
2414 | 00406dff | bellard | bSign = extractFloat64Sign( b ); |
2415 | 00406dff | bellard | if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 ); |
2416 | 00406dff | bellard | return ( a == b ) || ( aSign ^ ( a < b ) );
|
2417 | 00406dff | bellard | |
2418 | 00406dff | bellard | } |
2419 | 00406dff | bellard | |
2420 | 00406dff | bellard | /*
|
2421 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2422 | 00406dff | bellard | Returns 1 if the double-precision floating-point value `a' is less than
|
2423 | 00406dff | bellard | the corresponding value `b', and 0 otherwise. The comparison is performed
|
2424 | 00406dff | bellard | according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
|
2425 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2426 | 00406dff | bellard | */
|
2427 | 00406dff | bellard | flag float64_lt( float64 a, float64 b ) |
2428 | 00406dff | bellard | { |
2429 | 00406dff | bellard | flag aSign, bSign; |
2430 | 00406dff | bellard | |
2431 | 00406dff | bellard | if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) |
2432 | 00406dff | bellard | || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
|
2433 | 00406dff | bellard | ) { |
2434 | 00406dff | bellard | float_raise( float_flag_invalid ); |
2435 | 00406dff | bellard | return 0; |
2436 | 00406dff | bellard | } |
2437 | 00406dff | bellard | aSign = extractFloat64Sign( a ); |
2438 | 00406dff | bellard | bSign = extractFloat64Sign( b ); |
2439 | 00406dff | bellard | if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 ); |
2440 | 00406dff | bellard | return ( a != b ) && ( aSign ^ ( a < b ) );
|
2441 | 00406dff | bellard | |
2442 | 00406dff | bellard | } |
2443 | 00406dff | bellard | |
2444 | 00406dff | bellard | /*
|
2445 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2446 | 00406dff | bellard | Returns 1 if the double-precision floating-point value `a' is equal to the
|
2447 | 00406dff | bellard | corresponding value `b', and 0 otherwise. The invalid exception is raised
|
2448 | 00406dff | bellard | if either operand is a NaN. Otherwise, the comparison is performed
|
2449 | 00406dff | bellard | according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
|
2450 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2451 | 00406dff | bellard | */
|
2452 | 00406dff | bellard | flag float64_eq_signaling( float64 a, float64 b ) |
2453 | 00406dff | bellard | { |
2454 | 00406dff | bellard | |
2455 | 00406dff | bellard | if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) |
2456 | 00406dff | bellard | || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
|
2457 | 00406dff | bellard | ) { |
2458 | 00406dff | bellard | float_raise( float_flag_invalid ); |
2459 | 00406dff | bellard | return 0; |
2460 | 00406dff | bellard | } |
2461 | 00406dff | bellard | return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 ); |
2462 | 00406dff | bellard | |
2463 | 00406dff | bellard | } |
2464 | 00406dff | bellard | |
2465 | 00406dff | bellard | /*
|
2466 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2467 | 00406dff | bellard | Returns 1 if the double-precision floating-point value `a' is less than or
|
2468 | 00406dff | bellard | equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
|
2469 | 00406dff | bellard | cause an exception. Otherwise, the comparison is performed according to the
|
2470 | 00406dff | bellard | IEC/IEEE Standard for Binary Floating-point Arithmetic.
|
2471 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2472 | 00406dff | bellard | */
|
2473 | 00406dff | bellard | flag float64_le_quiet( float64 a, float64 b ) |
2474 | 00406dff | bellard | { |
2475 | 00406dff | bellard | flag aSign, bSign; |
2476 | 00406dff | bellard | //int16 aExp, bExp;
|
2477 | 00406dff | bellard | |
2478 | 00406dff | bellard | if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) |
2479 | 00406dff | bellard | || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
|
2480 | 00406dff | bellard | ) { |
2481 | 00406dff | bellard | if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
|
2482 | 00406dff | bellard | float_raise( float_flag_invalid ); |
2483 | 00406dff | bellard | } |
2484 | 00406dff | bellard | return 0; |
2485 | 00406dff | bellard | } |
2486 | 00406dff | bellard | aSign = extractFloat64Sign( a ); |
2487 | 00406dff | bellard | bSign = extractFloat64Sign( b ); |
2488 | 00406dff | bellard | if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 ); |
2489 | 00406dff | bellard | return ( a == b ) || ( aSign ^ ( a < b ) );
|
2490 | 00406dff | bellard | |
2491 | 00406dff | bellard | } |
2492 | 00406dff | bellard | |
2493 | 00406dff | bellard | /*
|
2494 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2495 | 00406dff | bellard | Returns 1 if the double-precision floating-point value `a' is less than
|
2496 | 00406dff | bellard | the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
|
2497 | 00406dff | bellard | exception. Otherwise, the comparison is performed according to the IEC/IEEE
|
2498 | 00406dff | bellard | Standard for Binary Floating-point Arithmetic.
|
2499 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2500 | 00406dff | bellard | */
|
2501 | 00406dff | bellard | flag float64_lt_quiet( float64 a, float64 b ) |
2502 | 00406dff | bellard | { |
2503 | 00406dff | bellard | flag aSign, bSign; |
2504 | 00406dff | bellard | |
2505 | 00406dff | bellard | if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) |
2506 | 00406dff | bellard | || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
|
2507 | 00406dff | bellard | ) { |
2508 | 00406dff | bellard | if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
|
2509 | 00406dff | bellard | float_raise( float_flag_invalid ); |
2510 | 00406dff | bellard | } |
2511 | 00406dff | bellard | return 0; |
2512 | 00406dff | bellard | } |
2513 | 00406dff | bellard | aSign = extractFloat64Sign( a ); |
2514 | 00406dff | bellard | bSign = extractFloat64Sign( b ); |
2515 | 00406dff | bellard | if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 ); |
2516 | 00406dff | bellard | return ( a != b ) && ( aSign ^ ( a < b ) );
|
2517 | 00406dff | bellard | |
2518 | 00406dff | bellard | } |
2519 | 00406dff | bellard | |
2520 | 00406dff | bellard | #ifdef FLOATX80
|
2521 | 00406dff | bellard | |
2522 | 00406dff | bellard | /*
|
2523 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2524 | 00406dff | bellard | Returns the result of converting the extended double-precision floating-
|
2525 | 00406dff | bellard | point value `a' to the 32-bit two's complement integer format. The
|
2526 | 00406dff | bellard | conversion is performed according to the IEC/IEEE Standard for Binary
|
2527 | 00406dff | bellard | Floating-point Arithmetic---which means in particular that the conversion
|
2528 | 00406dff | bellard | is rounded according to the current rounding mode. If `a' is a NaN, the
|
2529 | 00406dff | bellard | largest positive integer is returned. Otherwise, if the conversion
|
2530 | 00406dff | bellard | overflows, the largest integer with the same sign as `a' is returned.
|
2531 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2532 | 00406dff | bellard | */
|
2533 | 00406dff | bellard | int32 floatx80_to_int32( floatx80 a ) |
2534 | 00406dff | bellard | { |
2535 | 00406dff | bellard | flag aSign; |
2536 | 00406dff | bellard | int32 aExp, shiftCount; |
2537 | 00406dff | bellard | bits64 aSig; |
2538 | 00406dff | bellard | |
2539 | 00406dff | bellard | aSig = extractFloatx80Frac( a ); |
2540 | 00406dff | bellard | aExp = extractFloatx80Exp( a ); |
2541 | 00406dff | bellard | aSign = extractFloatx80Sign( a ); |
2542 | 00406dff | bellard | if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0; |
2543 | 00406dff | bellard | shiftCount = 0x4037 - aExp;
|
2544 | 00406dff | bellard | if ( shiftCount <= 0 ) shiftCount = 1; |
2545 | 00406dff | bellard | shift64RightJamming( aSig, shiftCount, &aSig ); |
2546 | 00406dff | bellard | return roundAndPackInt32( aSign, aSig );
|
2547 | 00406dff | bellard | |
2548 | 00406dff | bellard | } |
2549 | 00406dff | bellard | |
2550 | 00406dff | bellard | /*
|
2551 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2552 | 00406dff | bellard | Returns the result of converting the extended double-precision floating-
|
2553 | 00406dff | bellard | point value `a' to the 32-bit two's complement integer format. The
|
2554 | 00406dff | bellard | conversion is performed according to the IEC/IEEE Standard for Binary
|
2555 | 00406dff | bellard | Floating-point Arithmetic, except that the conversion is always rounded
|
2556 | 00406dff | bellard | toward zero. If `a' is a NaN, the largest positive integer is returned.
|
2557 | 00406dff | bellard | Otherwise, if the conversion overflows, the largest integer with the same
|
2558 | 00406dff | bellard | sign as `a' is returned.
|
2559 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2560 | 00406dff | bellard | */
|
2561 | 00406dff | bellard | int32 floatx80_to_int32_round_to_zero( floatx80 a ) |
2562 | 00406dff | bellard | { |
2563 | 00406dff | bellard | flag aSign; |
2564 | 00406dff | bellard | int32 aExp, shiftCount; |
2565 | 00406dff | bellard | bits64 aSig, savedASig; |
2566 | 00406dff | bellard | int32 z; |
2567 | 00406dff | bellard | |
2568 | 00406dff | bellard | aSig = extractFloatx80Frac( a ); |
2569 | 00406dff | bellard | aExp = extractFloatx80Exp( a ); |
2570 | 00406dff | bellard | aSign = extractFloatx80Sign( a ); |
2571 | 00406dff | bellard | shiftCount = 0x403E - aExp;
|
2572 | 00406dff | bellard | if ( shiftCount < 32 ) { |
2573 | 00406dff | bellard | if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0; |
2574 | 00406dff | bellard | goto invalid;
|
2575 | 00406dff | bellard | } |
2576 | 00406dff | bellard | else if ( 63 < shiftCount ) { |
2577 | 00406dff | bellard | if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
|
2578 | 00406dff | bellard | return 0; |
2579 | 00406dff | bellard | } |
2580 | 00406dff | bellard | savedASig = aSig; |
2581 | 00406dff | bellard | aSig >>= shiftCount; |
2582 | 00406dff | bellard | z = aSig; |
2583 | 00406dff | bellard | if ( aSign ) z = - z;
|
2584 | 00406dff | bellard | if ( ( z < 0 ) ^ aSign ) { |
2585 | 00406dff | bellard | invalid:
|
2586 | 00406dff | bellard | float_exception_flags |= float_flag_invalid; |
2587 | 00406dff | bellard | return aSign ? 0x80000000 : 0x7FFFFFFF; |
2588 | 00406dff | bellard | } |
2589 | 00406dff | bellard | if ( ( aSig<<shiftCount ) != savedASig ) {
|
2590 | 00406dff | bellard | float_exception_flags |= float_flag_inexact; |
2591 | 00406dff | bellard | } |
2592 | 00406dff | bellard | return z;
|
2593 | 00406dff | bellard | |
2594 | 00406dff | bellard | } |
2595 | 00406dff | bellard | |
2596 | 00406dff | bellard | /*
|
2597 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2598 | 00406dff | bellard | Returns the result of converting the extended double-precision floating-
|
2599 | 00406dff | bellard | point value `a' to the single-precision floating-point format. The
|
2600 | 00406dff | bellard | conversion is performed according to the IEC/IEEE Standard for Binary
|
2601 | 00406dff | bellard | Floating-point Arithmetic.
|
2602 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2603 | 00406dff | bellard | */
|
2604 | 00406dff | bellard | float32 floatx80_to_float32( floatx80 a ) |
2605 | 00406dff | bellard | { |
2606 | 00406dff | bellard | flag aSign; |
2607 | 00406dff | bellard | int32 aExp; |
2608 | 00406dff | bellard | bits64 aSig; |
2609 | 00406dff | bellard | |
2610 | 00406dff | bellard | aSig = extractFloatx80Frac( a ); |
2611 | 00406dff | bellard | aExp = extractFloatx80Exp( a ); |
2612 | 00406dff | bellard | aSign = extractFloatx80Sign( a ); |
2613 | 00406dff | bellard | if ( aExp == 0x7FFF ) { |
2614 | 00406dff | bellard | if ( (bits64) ( aSig<<1 ) ) { |
2615 | 00406dff | bellard | return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
|
2616 | 00406dff | bellard | } |
2617 | 00406dff | bellard | return packFloat32( aSign, 0xFF, 0 ); |
2618 | 00406dff | bellard | } |
2619 | 00406dff | bellard | shift64RightJamming( aSig, 33, &aSig );
|
2620 | 00406dff | bellard | if ( aExp || aSig ) aExp -= 0x3F81; |
2621 | 00406dff | bellard | return roundAndPackFloat32( aSign, aExp, aSig );
|
2622 | 00406dff | bellard | |
2623 | 00406dff | bellard | } |
2624 | 00406dff | bellard | |
2625 | 00406dff | bellard | /*
|
2626 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2627 | 00406dff | bellard | Returns the result of converting the extended double-precision floating-
|
2628 | 00406dff | bellard | point value `a' to the double-precision floating-point format. The
|
2629 | 00406dff | bellard | conversion is performed according to the IEC/IEEE Standard for Binary
|
2630 | 00406dff | bellard | Floating-point Arithmetic.
|
2631 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2632 | 00406dff | bellard | */
|
2633 | 00406dff | bellard | float64 floatx80_to_float64( floatx80 a ) |
2634 | 00406dff | bellard | { |
2635 | 00406dff | bellard | flag aSign; |
2636 | 00406dff | bellard | int32 aExp; |
2637 | 00406dff | bellard | bits64 aSig, zSig; |
2638 | 00406dff | bellard | |
2639 | 00406dff | bellard | aSig = extractFloatx80Frac( a ); |
2640 | 00406dff | bellard | aExp = extractFloatx80Exp( a ); |
2641 | 00406dff | bellard | aSign = extractFloatx80Sign( a ); |
2642 | 00406dff | bellard | if ( aExp == 0x7FFF ) { |
2643 | 00406dff | bellard | if ( (bits64) ( aSig<<1 ) ) { |
2644 | 00406dff | bellard | return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
|
2645 | 00406dff | bellard | } |
2646 | 00406dff | bellard | return packFloat64( aSign, 0x7FF, 0 ); |
2647 | 00406dff | bellard | } |
2648 | 00406dff | bellard | shift64RightJamming( aSig, 1, &zSig );
|
2649 | 00406dff | bellard | if ( aExp || aSig ) aExp -= 0x3C01; |
2650 | 00406dff | bellard | return roundAndPackFloat64( aSign, aExp, zSig );
|
2651 | 00406dff | bellard | |
2652 | 00406dff | bellard | } |
2653 | 00406dff | bellard | |
2654 | 00406dff | bellard | /*
|
2655 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2656 | 00406dff | bellard | Rounds the extended double-precision floating-point value `a' to an integer,
|
2657 | 00406dff | bellard | and returns the result as an extended quadruple-precision floating-point
|
2658 | 00406dff | bellard | value. The operation is performed according to the IEC/IEEE Standard for
|
2659 | 00406dff | bellard | Binary Floating-point Arithmetic.
|
2660 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2661 | 00406dff | bellard | */
|
2662 | 00406dff | bellard | floatx80 floatx80_round_to_int( floatx80 a ) |
2663 | 00406dff | bellard | { |
2664 | 00406dff | bellard | flag aSign; |
2665 | 00406dff | bellard | int32 aExp; |
2666 | 00406dff | bellard | bits64 lastBitMask, roundBitsMask; |
2667 | 00406dff | bellard | int8 roundingMode; |
2668 | 00406dff | bellard | floatx80 z; |
2669 | 00406dff | bellard | |
2670 | 00406dff | bellard | aExp = extractFloatx80Exp( a ); |
2671 | 00406dff | bellard | if ( 0x403E <= aExp ) { |
2672 | 00406dff | bellard | if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) { |
2673 | 00406dff | bellard | return propagateFloatx80NaN( a, a );
|
2674 | 00406dff | bellard | } |
2675 | 00406dff | bellard | return a;
|
2676 | 00406dff | bellard | } |
2677 | 00406dff | bellard | if ( aExp <= 0x3FFE ) { |
2678 | 00406dff | bellard | if ( ( aExp == 0 ) |
2679 | 00406dff | bellard | && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) { |
2680 | 00406dff | bellard | return a;
|
2681 | 00406dff | bellard | } |
2682 | 00406dff | bellard | float_exception_flags |= float_flag_inexact; |
2683 | 00406dff | bellard | aSign = extractFloatx80Sign( a ); |
2684 | 00406dff | bellard | switch ( float_rounding_mode ) {
|
2685 | 00406dff | bellard | case float_round_nearest_even:
|
2686 | 00406dff | bellard | if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 ) |
2687 | 00406dff | bellard | ) { |
2688 | 00406dff | bellard | return
|
2689 | 00406dff | bellard | packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); |
2690 | 00406dff | bellard | } |
2691 | 00406dff | bellard | break;
|
2692 | 00406dff | bellard | case float_round_down:
|
2693 | 00406dff | bellard | return
|
2694 | 00406dff | bellard | aSign ? |
2695 | 00406dff | bellard | packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) ) |
2696 | 00406dff | bellard | : packFloatx80( 0, 0, 0 ); |
2697 | 00406dff | bellard | case float_round_up:
|
2698 | 00406dff | bellard | return
|
2699 | 00406dff | bellard | aSign ? packFloatx80( 1, 0, 0 ) |
2700 | 00406dff | bellard | : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) ); |
2701 | 00406dff | bellard | } |
2702 | 00406dff | bellard | return packFloatx80( aSign, 0, 0 ); |
2703 | 00406dff | bellard | } |
2704 | 00406dff | bellard | lastBitMask = 1;
|
2705 | 00406dff | bellard | lastBitMask <<= 0x403E - aExp;
|
2706 | 00406dff | bellard | roundBitsMask = lastBitMask - 1;
|
2707 | 00406dff | bellard | z = a; |
2708 | 00406dff | bellard | roundingMode = float_rounding_mode; |
2709 | 00406dff | bellard | if ( roundingMode == float_round_nearest_even ) {
|
2710 | 00406dff | bellard | z.low += lastBitMask>>1;
|
2711 | 00406dff | bellard | if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; |
2712 | 00406dff | bellard | } |
2713 | 00406dff | bellard | else if ( roundingMode != float_round_to_zero ) { |
2714 | 00406dff | bellard | if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
|
2715 | 00406dff | bellard | z.low += roundBitsMask; |
2716 | 00406dff | bellard | } |
2717 | 00406dff | bellard | } |
2718 | 00406dff | bellard | z.low &= ~ roundBitsMask; |
2719 | 00406dff | bellard | if ( z.low == 0 ) { |
2720 | 00406dff | bellard | ++z.high; |
2721 | 00406dff | bellard | z.low = LIT64( 0x8000000000000000 );
|
2722 | 00406dff | bellard | } |
2723 | 00406dff | bellard | if ( z.low != a.low ) float_exception_flags |= float_flag_inexact;
|
2724 | 00406dff | bellard | return z;
|
2725 | 00406dff | bellard | |
2726 | 00406dff | bellard | } |
2727 | 00406dff | bellard | |
2728 | 00406dff | bellard | /*
|
2729 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2730 | 00406dff | bellard | Returns the result of adding the absolute values of the extended double-
|
2731 | 00406dff | bellard | precision floating-point values `a' and `b'. If `zSign' is true, the sum is
|
2732 | 00406dff | bellard | negated before being returned. `zSign' is ignored if the result is a NaN.
|
2733 | 00406dff | bellard | The addition is performed according to the IEC/IEEE Standard for Binary
|
2734 | 00406dff | bellard | Floating-point Arithmetic.
|
2735 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2736 | 00406dff | bellard | */
|
2737 | 00406dff | bellard | static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
|
2738 | 00406dff | bellard | { |
2739 | 00406dff | bellard | int32 aExp, bExp, zExp; |
2740 | 00406dff | bellard | bits64 aSig, bSig, zSig0, zSig1; |
2741 | 00406dff | bellard | int32 expDiff; |
2742 | 00406dff | bellard | |
2743 | 00406dff | bellard | aSig = extractFloatx80Frac( a ); |
2744 | 00406dff | bellard | aExp = extractFloatx80Exp( a ); |
2745 | 00406dff | bellard | bSig = extractFloatx80Frac( b ); |
2746 | 00406dff | bellard | bExp = extractFloatx80Exp( b ); |
2747 | 00406dff | bellard | expDiff = aExp - bExp; |
2748 | 00406dff | bellard | if ( 0 < expDiff ) { |
2749 | 00406dff | bellard | if ( aExp == 0x7FFF ) { |
2750 | 00406dff | bellard | if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); |
2751 | 00406dff | bellard | return a;
|
2752 | 00406dff | bellard | } |
2753 | 00406dff | bellard | if ( bExp == 0 ) --expDiff; |
2754 | 00406dff | bellard | shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
|
2755 | 00406dff | bellard | zExp = aExp; |
2756 | 00406dff | bellard | } |
2757 | 00406dff | bellard | else if ( expDiff < 0 ) { |
2758 | 00406dff | bellard | if ( bExp == 0x7FFF ) { |
2759 | 00406dff | bellard | if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); |
2760 | 00406dff | bellard | return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); |
2761 | 00406dff | bellard | } |
2762 | 00406dff | bellard | if ( aExp == 0 ) ++expDiff; |
2763 | 00406dff | bellard | shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
|
2764 | 00406dff | bellard | zExp = bExp; |
2765 | 00406dff | bellard | } |
2766 | 00406dff | bellard | else {
|
2767 | 00406dff | bellard | if ( aExp == 0x7FFF ) { |
2768 | 00406dff | bellard | if ( (bits64) ( ( aSig | bSig )<<1 ) ) { |
2769 | 00406dff | bellard | return propagateFloatx80NaN( a, b );
|
2770 | 00406dff | bellard | } |
2771 | 00406dff | bellard | return a;
|
2772 | 00406dff | bellard | } |
2773 | 00406dff | bellard | zSig1 = 0;
|
2774 | 00406dff | bellard | zSig0 = aSig + bSig; |
2775 | 00406dff | bellard | if ( aExp == 0 ) { |
2776 | 00406dff | bellard | normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 ); |
2777 | 00406dff | bellard | goto roundAndPack;
|
2778 | 00406dff | bellard | } |
2779 | 00406dff | bellard | zExp = aExp; |
2780 | 00406dff | bellard | goto shiftRight1;
|
2781 | 00406dff | bellard | } |
2782 | 00406dff | bellard | |
2783 | 00406dff | bellard | zSig0 = aSig + bSig; |
2784 | 00406dff | bellard | |
2785 | 00406dff | bellard | if ( (sbits64) zSig0 < 0 ) goto roundAndPack; |
2786 | 00406dff | bellard | shiftRight1:
|
2787 | 00406dff | bellard | shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
|
2788 | 00406dff | bellard | zSig0 |= LIT64( 0x8000000000000000 );
|
2789 | 00406dff | bellard | ++zExp; |
2790 | 00406dff | bellard | roundAndPack:
|
2791 | 00406dff | bellard | return
|
2792 | 00406dff | bellard | roundAndPackFloatx80( |
2793 | 00406dff | bellard | floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); |
2794 | 00406dff | bellard | |
2795 | 00406dff | bellard | } |
2796 | 00406dff | bellard | |
2797 | 00406dff | bellard | /*
|
2798 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2799 | 00406dff | bellard | Returns the result of subtracting the absolute values of the extended
|
2800 | 00406dff | bellard | double-precision floating-point values `a' and `b'. If `zSign' is true,
|
2801 | 00406dff | bellard | the difference is negated before being returned. `zSign' is ignored if the
|
2802 | 00406dff | bellard | result is a NaN. The subtraction is performed according to the IEC/IEEE
|
2803 | 00406dff | bellard | Standard for Binary Floating-point Arithmetic.
|
2804 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2805 | 00406dff | bellard | */
|
2806 | 00406dff | bellard | static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
|
2807 | 00406dff | bellard | { |
2808 | 00406dff | bellard | int32 aExp, bExp, zExp; |
2809 | 00406dff | bellard | bits64 aSig, bSig, zSig0, zSig1; |
2810 | 00406dff | bellard | int32 expDiff; |
2811 | 00406dff | bellard | floatx80 z; |
2812 | 00406dff | bellard | |
2813 | 00406dff | bellard | aSig = extractFloatx80Frac( a ); |
2814 | 00406dff | bellard | aExp = extractFloatx80Exp( a ); |
2815 | 00406dff | bellard | bSig = extractFloatx80Frac( b ); |
2816 | 00406dff | bellard | bExp = extractFloatx80Exp( b ); |
2817 | 00406dff | bellard | expDiff = aExp - bExp; |
2818 | 00406dff | bellard | if ( 0 < expDiff ) goto aExpBigger; |
2819 | 00406dff | bellard | if ( expDiff < 0 ) goto bExpBigger; |
2820 | 00406dff | bellard | if ( aExp == 0x7FFF ) { |
2821 | 00406dff | bellard | if ( (bits64) ( ( aSig | bSig )<<1 ) ) { |
2822 | 00406dff | bellard | return propagateFloatx80NaN( a, b );
|
2823 | 00406dff | bellard | } |
2824 | 00406dff | bellard | float_raise( float_flag_invalid ); |
2825 | 00406dff | bellard | z.low = floatx80_default_nan_low; |
2826 | 00406dff | bellard | z.high = floatx80_default_nan_high; |
2827 | 00406dff | bellard | return z;
|
2828 | 00406dff | bellard | } |
2829 | 00406dff | bellard | if ( aExp == 0 ) { |
2830 | 00406dff | bellard | aExp = 1;
|
2831 | 00406dff | bellard | bExp = 1;
|
2832 | 00406dff | bellard | } |
2833 | 00406dff | bellard | zSig1 = 0;
|
2834 | 00406dff | bellard | if ( bSig < aSig ) goto aBigger; |
2835 | 00406dff | bellard | if ( aSig < bSig ) goto bBigger; |
2836 | 00406dff | bellard | return packFloatx80( float_rounding_mode == float_round_down, 0, 0 ); |
2837 | 00406dff | bellard | bExpBigger:
|
2838 | 00406dff | bellard | if ( bExp == 0x7FFF ) { |
2839 | 00406dff | bellard | if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); |
2840 | 00406dff | bellard | return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) ); |
2841 | 00406dff | bellard | } |
2842 | 00406dff | bellard | if ( aExp == 0 ) ++expDiff; |
2843 | 00406dff | bellard | shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
|
2844 | 00406dff | bellard | bBigger:
|
2845 | 00406dff | bellard | sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
|
2846 | 00406dff | bellard | zExp = bExp; |
2847 | 00406dff | bellard | zSign ^= 1;
|
2848 | 00406dff | bellard | goto normalizeRoundAndPack;
|
2849 | 00406dff | bellard | aExpBigger:
|
2850 | 00406dff | bellard | if ( aExp == 0x7FFF ) { |
2851 | 00406dff | bellard | if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); |
2852 | 00406dff | bellard | return a;
|
2853 | 00406dff | bellard | } |
2854 | 00406dff | bellard | if ( bExp == 0 ) --expDiff; |
2855 | 00406dff | bellard | shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
|
2856 | 00406dff | bellard | aBigger:
|
2857 | 00406dff | bellard | sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
|
2858 | 00406dff | bellard | zExp = aExp; |
2859 | 00406dff | bellard | normalizeRoundAndPack:
|
2860 | 00406dff | bellard | return
|
2861 | 00406dff | bellard | normalizeRoundAndPackFloatx80( |
2862 | 00406dff | bellard | floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); |
2863 | 00406dff | bellard | |
2864 | 00406dff | bellard | } |
2865 | 00406dff | bellard | |
2866 | 00406dff | bellard | /*
|
2867 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2868 | 00406dff | bellard | Returns the result of adding the extended double-precision floating-point
|
2869 | 00406dff | bellard | values `a' and `b'. The operation is performed according to the IEC/IEEE
|
2870 | 00406dff | bellard | Standard for Binary Floating-point Arithmetic.
|
2871 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2872 | 00406dff | bellard | */
|
2873 | 00406dff | bellard | floatx80 floatx80_add( floatx80 a, floatx80 b ) |
2874 | 00406dff | bellard | { |
2875 | 00406dff | bellard | flag aSign, bSign; |
2876 | 00406dff | bellard | |
2877 | 00406dff | bellard | aSign = extractFloatx80Sign( a ); |
2878 | 00406dff | bellard | bSign = extractFloatx80Sign( b ); |
2879 | 00406dff | bellard | if ( aSign == bSign ) {
|
2880 | 00406dff | bellard | return addFloatx80Sigs( a, b, aSign );
|
2881 | 00406dff | bellard | } |
2882 | 00406dff | bellard | else {
|
2883 | 00406dff | bellard | return subFloatx80Sigs( a, b, aSign );
|
2884 | 00406dff | bellard | } |
2885 | 00406dff | bellard | |
2886 | 00406dff | bellard | } |
2887 | 00406dff | bellard | |
2888 | 00406dff | bellard | /*
|
2889 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2890 | 00406dff | bellard | Returns the result of subtracting the extended double-precision floating-
|
2891 | 00406dff | bellard | point values `a' and `b'. The operation is performed according to the
|
2892 | 00406dff | bellard | IEC/IEEE Standard for Binary Floating-point Arithmetic.
|
2893 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2894 | 00406dff | bellard | */
|
2895 | 00406dff | bellard | floatx80 floatx80_sub( floatx80 a, floatx80 b ) |
2896 | 00406dff | bellard | { |
2897 | 00406dff | bellard | flag aSign, bSign; |
2898 | 00406dff | bellard | |
2899 | 00406dff | bellard | aSign = extractFloatx80Sign( a ); |
2900 | 00406dff | bellard | bSign = extractFloatx80Sign( b ); |
2901 | 00406dff | bellard | if ( aSign == bSign ) {
|
2902 | 00406dff | bellard | return subFloatx80Sigs( a, b, aSign );
|
2903 | 00406dff | bellard | } |
2904 | 00406dff | bellard | else {
|
2905 | 00406dff | bellard | return addFloatx80Sigs( a, b, aSign );
|
2906 | 00406dff | bellard | } |
2907 | 00406dff | bellard | |
2908 | 00406dff | bellard | } |
2909 | 00406dff | bellard | |
2910 | 00406dff | bellard | /*
|
2911 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2912 | 00406dff | bellard | Returns the result of multiplying the extended double-precision floating-
|
2913 | 00406dff | bellard | point values `a' and `b'. The operation is performed according to the
|
2914 | 00406dff | bellard | IEC/IEEE Standard for Binary Floating-point Arithmetic.
|
2915 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2916 | 00406dff | bellard | */
|
2917 | 00406dff | bellard | floatx80 floatx80_mul( floatx80 a, floatx80 b ) |
2918 | 00406dff | bellard | { |
2919 | 00406dff | bellard | flag aSign, bSign, zSign; |
2920 | 00406dff | bellard | int32 aExp, bExp, zExp; |
2921 | 00406dff | bellard | bits64 aSig, bSig, zSig0, zSig1; |
2922 | 00406dff | bellard | floatx80 z; |
2923 | 00406dff | bellard | |
2924 | 00406dff | bellard | aSig = extractFloatx80Frac( a ); |
2925 | 00406dff | bellard | aExp = extractFloatx80Exp( a ); |
2926 | 00406dff | bellard | aSign = extractFloatx80Sign( a ); |
2927 | 00406dff | bellard | bSig = extractFloatx80Frac( b ); |
2928 | 00406dff | bellard | bExp = extractFloatx80Exp( b ); |
2929 | 00406dff | bellard | bSign = extractFloatx80Sign( b ); |
2930 | 00406dff | bellard | zSign = aSign ^ bSign; |
2931 | 00406dff | bellard | if ( aExp == 0x7FFF ) { |
2932 | 00406dff | bellard | if ( (bits64) ( aSig<<1 ) |
2933 | 00406dff | bellard | || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) { |
2934 | 00406dff | bellard | return propagateFloatx80NaN( a, b );
|
2935 | 00406dff | bellard | } |
2936 | 00406dff | bellard | if ( ( bExp | bSig ) == 0 ) goto invalid; |
2937 | 00406dff | bellard | return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); |
2938 | 00406dff | bellard | } |
2939 | 00406dff | bellard | if ( bExp == 0x7FFF ) { |
2940 | 00406dff | bellard | if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); |
2941 | 00406dff | bellard | if ( ( aExp | aSig ) == 0 ) { |
2942 | 00406dff | bellard | invalid:
|
2943 | 00406dff | bellard | float_raise( float_flag_invalid ); |
2944 | 00406dff | bellard | z.low = floatx80_default_nan_low; |
2945 | 00406dff | bellard | z.high = floatx80_default_nan_high; |
2946 | 00406dff | bellard | return z;
|
2947 | 00406dff | bellard | } |
2948 | 00406dff | bellard | return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); |
2949 | 00406dff | bellard | } |
2950 | 00406dff | bellard | if ( aExp == 0 ) { |
2951 | 00406dff | bellard | if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); |
2952 | 00406dff | bellard | normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); |
2953 | 00406dff | bellard | } |
2954 | 00406dff | bellard | if ( bExp == 0 ) { |
2955 | 00406dff | bellard | if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 ); |
2956 | 00406dff | bellard | normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); |
2957 | 00406dff | bellard | } |
2958 | 00406dff | bellard | zExp = aExp + bExp - 0x3FFE;
|
2959 | 00406dff | bellard | mul64To128( aSig, bSig, &zSig0, &zSig1 ); |
2960 | 00406dff | bellard | if ( 0 < (sbits64) zSig0 ) { |
2961 | 00406dff | bellard | shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
|
2962 | 00406dff | bellard | --zExp; |
2963 | 00406dff | bellard | } |
2964 | 00406dff | bellard | return
|
2965 | 00406dff | bellard | roundAndPackFloatx80( |
2966 | 00406dff | bellard | floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); |
2967 | 00406dff | bellard | |
2968 | 00406dff | bellard | } |
2969 | 00406dff | bellard | |
2970 | 00406dff | bellard | /*
|
2971 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2972 | 00406dff | bellard | Returns the result of dividing the extended double-precision floating-point
|
2973 | 00406dff | bellard | value `a' by the corresponding value `b'. The operation is performed
|
2974 | 00406dff | bellard | according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
|
2975 | 00406dff | bellard | -------------------------------------------------------------------------------
|
2976 | 00406dff | bellard | */
|
2977 | 00406dff | bellard | floatx80 floatx80_div( floatx80 a, floatx80 b ) |
2978 | 00406dff | bellard | { |
2979 | 00406dff | bellard | flag aSign, bSign, zSign; |
2980 | 00406dff | bellard | int32 aExp, bExp, zExp; |
2981 | 00406dff | bellard | bits64 aSig, bSig, zSig0, zSig1; |
2982 | 00406dff | bellard | bits64 rem0, rem1, rem2, term0, term1, term2; |
2983 | 00406dff | bellard | floatx80 z; |
2984 | 00406dff | bellard | |
2985 | 00406dff | bellard | aSig = extractFloatx80Frac( a ); |
2986 | 00406dff | bellard | aExp = extractFloatx80Exp( a ); |
2987 | 00406dff | bellard | aSign = extractFloatx80Sign( a ); |
2988 | 00406dff | bellard | bSig = extractFloatx80Frac( b ); |
2989 | 00406dff | bellard | bExp = extractFloatx80Exp( b ); |
2990 | 00406dff | bellard | bSign = extractFloatx80Sign( b ); |
2991 | 00406dff | bellard | zSign = aSign ^ bSign; |
2992 | 00406dff | bellard | if ( aExp == 0x7FFF ) { |
2993 | 00406dff | bellard | if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); |
2994 | 00406dff | bellard | if ( bExp == 0x7FFF ) { |
2995 | 00406dff | bellard | if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); |
2996 | 00406dff | bellard | goto invalid;
|
2997 | 00406dff | bellard | } |
2998 | 00406dff | bellard | return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); |
2999 | 00406dff | bellard | } |
3000 | 00406dff | bellard | if ( bExp == 0x7FFF ) { |
3001 | 00406dff | bellard | if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); |
3002 | 00406dff | bellard | return packFloatx80( zSign, 0, 0 ); |
3003 | 00406dff | bellard | } |
3004 | 00406dff | bellard | if ( bExp == 0 ) { |
3005 | 00406dff | bellard | if ( bSig == 0 ) { |
3006 | 00406dff | bellard | if ( ( aExp | aSig ) == 0 ) { |
3007 | 00406dff | bellard | invalid:
|
3008 | 00406dff | bellard | float_raise( float_flag_invalid ); |
3009 | 00406dff | bellard | z.low = floatx80_default_nan_low; |
3010 | 00406dff | bellard | z.high = floatx80_default_nan_high; |
3011 | 00406dff | bellard | return z;
|
3012 | 00406dff | bellard | } |
3013 | 00406dff | bellard | float_raise( float_flag_divbyzero ); |
3014 | 00406dff | bellard | return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); |
3015 | 00406dff | bellard | } |
3016 | 00406dff | bellard | normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); |
3017 | 00406dff | bellard | } |
3018 | 00406dff | bellard | if ( aExp == 0 ) { |
3019 | 00406dff | bellard | if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); |
3020 | 00406dff | bellard | normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); |
3021 | 00406dff | bellard | } |
3022 | 00406dff | bellard | zExp = aExp - bExp + 0x3FFE;
|
3023 | 00406dff | bellard | rem1 = 0;
|
3024 | 00406dff | bellard | if ( bSig <= aSig ) {
|
3025 | 00406dff | bellard | shift128Right( aSig, 0, 1, &aSig, &rem1 ); |
3026 | 00406dff | bellard | ++zExp; |
3027 | 00406dff | bellard | } |
3028 | 00406dff | bellard | zSig0 = estimateDiv128To64( aSig, rem1, bSig ); |
3029 | 00406dff | bellard | mul64To128( bSig, zSig0, &term0, &term1 ); |
3030 | 00406dff | bellard | sub128( aSig, rem1, term0, term1, &rem0, &rem1 ); |
3031 | 00406dff | bellard | while ( (sbits64) rem0 < 0 ) { |
3032 | 00406dff | bellard | --zSig0; |
3033 | 00406dff | bellard | add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
|
3034 | 00406dff | bellard | } |
3035 | 00406dff | bellard | zSig1 = estimateDiv128To64( rem1, 0, bSig );
|
3036 | 00406dff | bellard | if ( (bits64) ( zSig1<<1 ) <= 8 ) { |
3037 | 00406dff | bellard | mul64To128( bSig, zSig1, &term1, &term2 ); |
3038 | 00406dff | bellard | sub128( rem1, 0, term1, term2, &rem1, &rem2 );
|
3039 | 00406dff | bellard | while ( (sbits64) rem1 < 0 ) { |
3040 | 00406dff | bellard | --zSig1; |
3041 | 00406dff | bellard | add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
|
3042 | 00406dff | bellard | } |
3043 | 00406dff | bellard | zSig1 |= ( ( rem1 | rem2 ) != 0 );
|
3044 | 00406dff | bellard | } |
3045 | 00406dff | bellard | return
|
3046 | 00406dff | bellard | roundAndPackFloatx80( |
3047 | 00406dff | bellard | floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); |
3048 | 00406dff | bellard | |
3049 | 00406dff | bellard | } |
3050 | 00406dff | bellard | |
3051 | 00406dff | bellard | /*
|
3052 | 00406dff | bellard | -------------------------------------------------------------------------------
|
3053 | 00406dff | bellard | Returns the remainder of the extended double-precision floating-point value
|
3054 | 00406dff | bellard | `a' with respect to the corresponding value `b'. The operation is performed
|
3055 | 00406dff | bellard | according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
|
3056 | 00406dff | bellard | -------------------------------------------------------------------------------
|
3057 | 00406dff | bellard | */
|
3058 | 00406dff | bellard | floatx80 floatx80_rem( floatx80 a, floatx80 b ) |
3059 | 00406dff | bellard | { |
3060 | 00406dff | bellard | flag aSign, bSign, zSign; |
3061 | 00406dff | bellard | int32 aExp, bExp, expDiff; |
3062 | 00406dff | bellard | bits64 aSig0, aSig1, bSig; |
3063 | 00406dff | bellard | bits64 q, term0, term1, alternateASig0, alternateASig1; |
3064 | 00406dff | bellard | floatx80 z; |
3065 | 00406dff | bellard | |
3066 | 00406dff | bellard | aSig0 = extractFloatx80Frac( a ); |
3067 | 00406dff | bellard | aExp = extractFloatx80Exp( a ); |
3068 | 00406dff | bellard | aSign = extractFloatx80Sign( a ); |
3069 | 00406dff | bellard | bSig = extractFloatx80Frac( b ); |
3070 | 00406dff | bellard | bExp = extractFloatx80Exp( b ); |
3071 | 00406dff | bellard | bSign = extractFloatx80Sign( b ); |
3072 | 00406dff | bellard | if ( aExp == 0x7FFF ) { |
3073 | 00406dff | bellard | if ( (bits64) ( aSig0<<1 ) |
3074 | 00406dff | bellard | || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) { |
3075 | 00406dff | bellard | return propagateFloatx80NaN( a, b );
|
3076 | 00406dff | bellard | } |
3077 | 00406dff | bellard | goto invalid;
|
3078 | 00406dff | bellard | } |
3079 | 00406dff | bellard | if ( bExp == 0x7FFF ) { |
3080 | 00406dff | bellard | if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); |
3081 | 00406dff | bellard | return a;
|
3082 | 00406dff | bellard | } |
3083 | 00406dff | bellard | if ( bExp == 0 ) { |
3084 | 00406dff | bellard | if ( bSig == 0 ) { |
3085 | 00406dff | bellard | invalid:
|
3086 | 00406dff | bellard | float_raise( float_flag_invalid ); |
3087 | 00406dff | bellard | z.low = floatx80_default_nan_low; |
3088 | 00406dff | bellard | z.high = floatx80_default_nan_high; |
3089 | 00406dff | bellard | return z;
|
3090 | 00406dff | bellard | } |
3091 | 00406dff | bellard | normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); |
3092 | 00406dff | bellard | } |
3093 | 00406dff | bellard | if ( aExp == 0 ) { |
3094 | 00406dff | bellard | if ( (bits64) ( aSig0<<1 ) == 0 ) return a; |
3095 | 00406dff | bellard | normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); |
3096 | 00406dff | bellard | } |
3097 | 00406dff | bellard | bSig |= LIT64( 0x8000000000000000 );
|
3098 | 00406dff | bellard | zSign = aSign; |
3099 | 00406dff | bellard | expDiff = aExp - bExp; |
3100 | 00406dff | bellard | aSig1 = 0;
|
3101 | 00406dff | bellard | if ( expDiff < 0 ) { |
3102 | 00406dff | bellard | if ( expDiff < -1 ) return a; |
3103 | 00406dff | bellard | shift128Right( aSig0, 0, 1, &aSig0, &aSig1 ); |
3104 | 00406dff | bellard | expDiff = 0;
|
3105 | 00406dff | bellard | } |
3106 | 00406dff | bellard | q = ( bSig <= aSig0 ); |
3107 | 00406dff | bellard | if ( q ) aSig0 -= bSig;
|
3108 | 00406dff | bellard | expDiff -= 64;
|
3109 | 00406dff | bellard | while ( 0 < expDiff ) { |
3110 | 00406dff | bellard | q = estimateDiv128To64( aSig0, aSig1, bSig ); |
3111 | 00406dff | bellard | q = ( 2 < q ) ? q - 2 : 0; |
3112 | 00406dff | bellard | mul64To128( bSig, q, &term0, &term1 ); |
3113 | 00406dff | bellard | sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); |
3114 | 00406dff | bellard | shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
|
3115 | 00406dff | bellard | expDiff -= 62;
|
3116 | 00406dff | bellard | } |
3117 | 00406dff | bellard | expDiff += 64;
|
3118 | 00406dff | bellard | if ( 0 < expDiff ) { |
3119 | 00406dff | bellard | q = estimateDiv128To64( aSig0, aSig1, bSig ); |
3120 | 00406dff | bellard | q = ( 2 < q ) ? q - 2 : 0; |
3121 | 00406dff | bellard | q >>= 64 - expDiff;
|
3122 | 00406dff | bellard | mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
|
3123 | 00406dff | bellard | sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); |
3124 | 00406dff | bellard | shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 ); |
3125 | 00406dff | bellard | while ( le128( term0, term1, aSig0, aSig1 ) ) {
|
3126 | 00406dff | bellard | ++q; |
3127 | 00406dff | bellard | sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); |
3128 | 00406dff | bellard | } |
3129 | 00406dff | bellard | } |
3130 | 00406dff | bellard | else {
|
3131 | 00406dff | bellard | term1 = 0;
|
3132 | 00406dff | bellard | term0 = bSig; |
3133 | 00406dff | bellard | } |
3134 | 00406dff | bellard | sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 ); |
3135 | 00406dff | bellard | if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
|
3136 | 00406dff | bellard | || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 ) |
3137 | 00406dff | bellard | && ( q & 1 ) )
|
3138 | 00406dff | bellard | ) { |
3139 | 00406dff | bellard | aSig0 = alternateASig0; |
3140 | 00406dff | bellard | aSig1 = alternateASig1; |
3141 | 00406dff | bellard | zSign = ! zSign; |
3142 | 00406dff | bellard | } |
3143 | 00406dff | bellard | return
|
3144 | 00406dff | bellard | normalizeRoundAndPackFloatx80( |
3145 | 00406dff | bellard | 80, zSign, bExp + expDiff, aSig0, aSig1 );
|
3146 | 00406dff | bellard | |
3147 | 00406dff | bellard | } |
3148 | 00406dff | bellard | |
3149 | 00406dff | bellard | /*
|
3150 | 00406dff | bellard | -------------------------------------------------------------------------------
|
3151 | 00406dff | bellard | Returns the square root of the extended double-precision floating-point
|
3152 | 00406dff | bellard | value `a'. The operation is performed according to the IEC/IEEE Standard
|
3153 | 00406dff | bellard | for Binary Floating-point Arithmetic.
|
3154 | 00406dff | bellard | -------------------------------------------------------------------------------
|
3155 | 00406dff | bellard | */
|
3156 | 00406dff | bellard | floatx80 floatx80_sqrt( floatx80 a ) |
3157 | 00406dff | bellard | { |
3158 | 00406dff | bellard | flag aSign; |
3159 | 00406dff | bellard | int32 aExp, zExp; |
3160 | 00406dff | bellard | bits64 aSig0, aSig1, zSig0, zSig1; |
3161 | 00406dff | bellard | bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3; |
3162 | 00406dff | bellard | bits64 shiftedRem0, shiftedRem1; |
3163 | 00406dff | bellard | floatx80 z; |
3164 | 00406dff | bellard | |
3165 | 00406dff | bellard | aSig0 = extractFloatx80Frac( a ); |
3166 | 00406dff | bellard | aExp = extractFloatx80Exp( a ); |
3167 | 00406dff | bellard | aSign = extractFloatx80Sign( a ); |
3168 | 00406dff | bellard | if ( aExp == 0x7FFF ) { |
3169 | 00406dff | bellard | if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a ); |
3170 | 00406dff | bellard | if ( ! aSign ) return a; |
3171 | 00406dff | bellard | goto invalid;
|
3172 | 00406dff | bellard | } |
3173 | 00406dff | bellard | if ( aSign ) {
|
3174 | 00406dff | bellard | if ( ( aExp | aSig0 ) == 0 ) return a; |
3175 | 00406dff | bellard | invalid:
|
3176 | 00406dff | bellard | float_raise( float_flag_invalid ); |
3177 | 00406dff | bellard | z.low = floatx80_default_nan_low; |
3178 | 00406dff | bellard | z.high = floatx80_default_nan_high; |
3179 | 00406dff | bellard | return z;
|
3180 | 00406dff | bellard | } |
3181 | 00406dff | bellard | if ( aExp == 0 ) { |
3182 | 00406dff | bellard | if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 ); |
3183 | 00406dff | bellard | normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); |
3184 | 00406dff | bellard | } |
3185 | 00406dff | bellard | zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF; |
3186 | 00406dff | bellard | zSig0 = estimateSqrt32( aExp, aSig0>>32 );
|
3187 | 00406dff | bellard | zSig0 <<= 31;
|
3188 | 00406dff | bellard | aSig1 = 0;
|
3189 | 00406dff | bellard | shift128Right( aSig0, 0, ( aExp & 1 ) + 2, &aSig0, &aSig1 ); |
3190 | 00406dff | bellard | zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0 ) + zSig0 + 4;
|
3191 | 00406dff | bellard | if ( 0 <= (sbits64) zSig0 ) zSig0 = LIT64( 0xFFFFFFFFFFFFFFFF ); |
3192 | 00406dff | bellard | shortShift128Left( aSig0, aSig1, 2, &aSig0, &aSig1 );
|
3193 | 00406dff | bellard | mul64To128( zSig0, zSig0, &term0, &term1 ); |
3194 | 00406dff | bellard | sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 ); |
3195 | 00406dff | bellard | while ( (sbits64) rem0 < 0 ) { |
3196 | 00406dff | bellard | --zSig0; |
3197 | 00406dff | bellard | shortShift128Left( 0, zSig0, 1, &term0, &term1 ); |
3198 | 00406dff | bellard | term1 |= 1;
|
3199 | 00406dff | bellard | add128( rem0, rem1, term0, term1, &rem0, &rem1 ); |
3200 | 00406dff | bellard | } |
3201 | 00406dff | bellard | shortShift128Left( rem0, rem1, 63, &shiftedRem0, &shiftedRem1 );
|
3202 | 00406dff | bellard | zSig1 = estimateDiv128To64( shiftedRem0, shiftedRem1, zSig0 ); |
3203 | 00406dff | bellard | if ( (bits64) ( zSig1<<1 ) <= 10 ) { |
3204 | 00406dff | bellard | if ( zSig1 == 0 ) zSig1 = 1; |
3205 | 00406dff | bellard | mul64To128( zSig0, zSig1, &term1, &term2 ); |
3206 | 00406dff | bellard | shortShift128Left( term1, term2, 1, &term1, &term2 );
|
3207 | 00406dff | bellard | sub128( rem1, 0, term1, term2, &rem1, &rem2 );
|
3208 | 00406dff | bellard | mul64To128( zSig1, zSig1, &term2, &term3 ); |
3209 | 00406dff | bellard | sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); |
3210 | 00406dff | bellard | while ( (sbits64) rem1 < 0 ) { |
3211 | 00406dff | bellard | --zSig1; |
3212 | 00406dff | bellard | shortShift192Left( 0, zSig0, zSig1, 1, &term1, &term2, &term3 ); |
3213 | 00406dff | bellard | term3 |= 1;
|
3214 | 00406dff | bellard | add192( |
3215 | 00406dff | bellard | rem1, rem2, rem3, term1, term2, term3, &rem1, &rem2, &rem3 ); |
3216 | 00406dff | bellard | } |
3217 | 00406dff | bellard | zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
|
3218 | 00406dff | bellard | } |
3219 | 00406dff | bellard | return
|
3220 | 00406dff | bellard | roundAndPackFloatx80( |
3221 | 00406dff | bellard | floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
|
3222 | 00406dff | bellard | |
3223 | 00406dff | bellard | } |
3224 | 00406dff | bellard | |
3225 | 00406dff | bellard | /*
|
3226 | 00406dff | bellard | -------------------------------------------------------------------------------
|
3227 | 00406dff | bellard | Returns 1 if the extended double-precision floating-point value `a' is
|
3228 | 00406dff | bellard | equal to the corresponding value `b', and 0 otherwise. The comparison is
|
3229 | 00406dff | bellard | performed according to the IEC/IEEE Standard for Binary Floating-point
|
3230 | 00406dff | bellard | Arithmetic.
|
3231 | 00406dff | bellard | -------------------------------------------------------------------------------
|
3232 | 00406dff | bellard | */
|
3233 | 00406dff | bellard | flag floatx80_eq( floatx80 a, floatx80 b ) |
3234 | 00406dff | bellard | { |
3235 | 00406dff | bellard | |
3236 | 00406dff | bellard | if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) |
3237 | 00406dff | bellard | && (bits64) ( extractFloatx80Frac( a )<<1 ) )
|
3238 | 00406dff | bellard | || ( ( extractFloatx80Exp( b ) == 0x7FFF )
|
3239 | 00406dff | bellard | && (bits64) ( extractFloatx80Frac( b )<<1 ) )
|
3240 | 00406dff | bellard | ) { |
3241 | 00406dff | bellard | if ( floatx80_is_signaling_nan( a )
|
3242 | 00406dff | bellard | || floatx80_is_signaling_nan( b ) ) { |
3243 | 00406dff | bellard | float_raise( float_flag_invalid ); |
3244 | 00406dff | bellard | } |
3245 | 00406dff | bellard | return 0; |
3246 | 00406dff | bellard | } |
3247 | 00406dff | bellard | return
|
3248 | 00406dff | bellard | ( a.low == b.low ) |
3249 | 00406dff | bellard | && ( ( a.high == b.high ) |
3250 | 00406dff | bellard | || ( ( a.low == 0 )
|
3251 | 00406dff | bellard | && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) ) |
3252 | 00406dff | bellard | ); |
3253 | 00406dff | bellard | |
3254 | 00406dff | bellard | } |
3255 | 00406dff | bellard | |
3256 | 00406dff | bellard | /*
|
3257 | 00406dff | bellard | -------------------------------------------------------------------------------
|
3258 | 00406dff | bellard | Returns 1 if the extended double-precision floating-point value `a' is
|
3259 | 00406dff | bellard | less than or equal to the corresponding value `b', and 0 otherwise. The
|
3260 | 00406dff | bellard | comparison is performed according to the IEC/IEEE Standard for Binary
|
3261 | 00406dff | bellard | Floating-point Arithmetic.
|
3262 | 00406dff | bellard | -------------------------------------------------------------------------------
|
3263 | 00406dff | bellard | */
|
3264 | 00406dff | bellard | flag floatx80_le( floatx80 a, floatx80 b ) |
3265 | 00406dff | bellard | { |
3266 | 00406dff | bellard | flag aSign, bSign; |
3267 | 00406dff | bellard | |
3268 | 00406dff | bellard | if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) |
3269 | 00406dff | bellard | && (bits64) ( extractFloatx80Frac( a )<<1 ) )
|
3270 | 00406dff | bellard | || ( ( extractFloatx80Exp( b ) == 0x7FFF )
|
3271 | 00406dff | bellard | && (bits64) ( extractFloatx80Frac( b )<<1 ) )
|
3272 | 00406dff | bellard | ) { |
3273 | 00406dff | bellard | float_raise( float_flag_invalid ); |
3274 | 00406dff | bellard | return 0; |
3275 | 00406dff | bellard | } |
3276 | 00406dff | bellard | aSign = extractFloatx80Sign( a ); |
3277 | 00406dff | bellard | bSign = extractFloatx80Sign( b ); |
3278 | 00406dff | bellard | if ( aSign != bSign ) {
|
3279 | 00406dff | bellard | return
|
3280 | 00406dff | bellard | aSign |
3281 | 00406dff | bellard | || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
|
3282 | 00406dff | bellard | == 0 );
|
3283 | 00406dff | bellard | } |
3284 | 00406dff | bellard | return
|
3285 | 00406dff | bellard | aSign ? le128( b.high, b.low, a.high, a.low ) |
3286 | 00406dff | bellard | : le128( a.high, a.low, b.high, b.low ); |
3287 | 00406dff | bellard | |
3288 | 00406dff | bellard | } |
3289 | 00406dff | bellard | |
3290 | 00406dff | bellard | /*
|
3291 | 00406dff | bellard | -------------------------------------------------------------------------------
|
3292 | 00406dff | bellard | Returns 1 if the extended double-precision floating-point value `a' is
|
3293 | 00406dff | bellard | less than the corresponding value `b', and 0 otherwise. The comparison
|
3294 | 00406dff | bellard | is performed according to the IEC/IEEE Standard for Binary Floating-point
|
3295 | 00406dff | bellard | Arithmetic.
|
3296 | 00406dff | bellard | -------------------------------------------------------------------------------
|
3297 | 00406dff | bellard | */
|
3298 | 00406dff | bellard | flag floatx80_lt( floatx80 a, floatx80 b ) |
3299 | 00406dff | bellard | { |
3300 | 00406dff | bellard | flag aSign, bSign; |
3301 | 00406dff | bellard | |
3302 | 00406dff | bellard | if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) |
3303 | 00406dff | bellard | && (bits64) ( extractFloatx80Frac( a )<<1 ) )
|
3304 | 00406dff | bellard | || ( ( extractFloatx80Exp( b ) == 0x7FFF )
|
3305 | 00406dff | bellard | && (bits64) ( extractFloatx80Frac( b )<<1 ) )
|
3306 | 00406dff | bellard | ) { |
3307 | 00406dff | bellard | float_raise( float_flag_invalid ); |
3308 | 00406dff | bellard | return 0; |
3309 | 00406dff | bellard | } |
3310 | 00406dff | bellard | aSign = extractFloatx80Sign( a ); |
3311 | 00406dff | bellard | bSign = extractFloatx80Sign( b ); |
3312 | 00406dff | bellard | if ( aSign != bSign ) {
|
3313 | 00406dff | bellard | return
|
3314 | 00406dff | bellard | aSign |
3315 | 00406dff | bellard | && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
|
3316 | 00406dff | bellard | != 0 );
|
3317 | 00406dff | bellard | } |
3318 | 00406dff | bellard | return
|
3319 | 00406dff | bellard | aSign ? lt128( b.high, b.low, a.high, a.low ) |
3320 | 00406dff | bellard | : lt128( a.high, a.low, b.high, b.low ); |
3321 | 00406dff | bellard | |
3322 | 00406dff | bellard | } |
3323 | 00406dff | bellard | |
3324 | 00406dff | bellard | /*
|
3325 | 00406dff | bellard | -------------------------------------------------------------------------------
|
3326 | 00406dff | bellard | Returns 1 if the extended double-precision floating-point value `a' is equal
|
3327 | 00406dff | bellard | to the corresponding value `b', and 0 otherwise. The invalid exception is
|
3328 | 00406dff | bellard | raised if either operand is a NaN. Otherwise, the comparison is performed
|
3329 | 00406dff | bellard | according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
|
3330 | 00406dff | bellard | -------------------------------------------------------------------------------
|
3331 | 00406dff | bellard | */
|
3332 | 00406dff | bellard | flag floatx80_eq_signaling( floatx80 a, floatx80 b ) |
3333 | 00406dff | bellard | { |
3334 | 00406dff | bellard | |
3335 | 00406dff | bellard | if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) |
3336 | 00406dff | bellard | && (bits64) ( extractFloatx80Frac( a )<<1 ) )
|
3337 | 00406dff | bellard | || ( ( extractFloatx80Exp( b ) == 0x7FFF )
|
3338 | 00406dff | bellard | && (bits64) ( extractFloatx80Frac( b )<<1 ) )
|
3339 | 00406dff | bellard | ) { |
3340 | 00406dff | bellard | float_raise( float_flag_invalid ); |
3341 | 00406dff | bellard | return 0; |
3342 | 00406dff | bellard | } |
3343 | 00406dff | bellard | return
|
3344 | 00406dff | bellard | ( a.low == b.low ) |
3345 | 00406dff | bellard | && ( ( a.high == b.high ) |
3346 | 00406dff | bellard | || ( ( a.low == 0 )
|
3347 | 00406dff | bellard | && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) ) |
3348 | 00406dff | bellard | ); |
3349 | 00406dff | bellard | |
3350 | 00406dff | bellard | } |
3351 | 00406dff | bellard | |
3352 | 00406dff | bellard | /*
|
3353 | 00406dff | bellard | -------------------------------------------------------------------------------
|
3354 | 00406dff | bellard | Returns 1 if the extended double-precision floating-point value `a' is less
|
3355 | 00406dff | bellard | than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
|
3356 | 00406dff | bellard | do not cause an exception. Otherwise, the comparison is performed according
|
3357 | 00406dff | bellard | to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
|
3358 | 00406dff | bellard | -------------------------------------------------------------------------------
|
3359 | 00406dff | bellard | */
|
3360 | 00406dff | bellard | flag floatx80_le_quiet( floatx80 a, floatx80 b ) |
3361 | 00406dff | bellard | { |
3362 | 00406dff | bellard | flag aSign, bSign; |
3363 | 00406dff | bellard | |
3364 | 00406dff | bellard | if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) |
3365 | 00406dff | bellard | && (bits64) ( extractFloatx80Frac( a )<<1 ) )
|
3366 | 00406dff | bellard | || ( ( extractFloatx80Exp( b ) == 0x7FFF )
|
3367 | 00406dff | bellard | && (bits64) ( extractFloatx80Frac( b )<<1 ) )
|
3368 | 00406dff | bellard | ) { |
3369 | 00406dff | bellard | if ( floatx80_is_signaling_nan( a )
|
3370 | 00406dff | bellard | || floatx80_is_signaling_nan( b ) ) { |
3371 | 00406dff | bellard | float_raise( float_flag_invalid ); |
3372 | 00406dff | bellard | } |
3373 | 00406dff | bellard | return 0; |
3374 | 00406dff | bellard | } |
3375 | 00406dff | bellard | aSign = extractFloatx80Sign( a ); |
3376 | 00406dff | bellard | bSign = extractFloatx80Sign( b ); |
3377 | 00406dff | bellard | if ( aSign != bSign ) {
|
3378 | 00406dff | bellard | return
|
3379 | 00406dff | bellard | aSign |
3380 | 00406dff | bellard | || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
|
3381 | 00406dff | bellard | == 0 );
|
3382 | 00406dff | bellard | } |
3383 | 00406dff | bellard | return
|
3384 | 00406dff | bellard | aSign ? le128( b.high, b.low, a.high, a.low ) |
3385 | 00406dff | bellard | : le128( a.high, a.low, b.high, b.low ); |
3386 | 00406dff | bellard | |
3387 | 00406dff | bellard | } |
3388 | 00406dff | bellard | |
3389 | 00406dff | bellard | /*
|
3390 | 00406dff | bellard | -------------------------------------------------------------------------------
|
3391 | 00406dff | bellard | Returns 1 if the extended double-precision floating-point value `a' is less
|
3392 | 00406dff | bellard | than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
|
3393 | 00406dff | bellard | an exception. Otherwise, the comparison is performed according to the
|
3394 | 00406dff | bellard | IEC/IEEE Standard for Binary Floating-point Arithmetic.
|
3395 | 00406dff | bellard | -------------------------------------------------------------------------------
|
3396 | 00406dff | bellard | */
|
3397 | 00406dff | bellard | flag floatx80_lt_quiet( floatx80 a, floatx80 b ) |
3398 | 00406dff | bellard | { |
3399 | 00406dff | bellard | flag aSign, bSign; |
3400 | 00406dff | bellard | |
3401 | 00406dff | bellard | if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) |
3402 | 00406dff | bellard | && (bits64) ( extractFloatx80Frac( a )<<1 ) )
|
3403 | 00406dff | bellard | || ( ( extractFloatx80Exp( b ) == 0x7FFF )
|
3404 | 00406dff | bellard | && (bits64) ( extractFloatx80Frac( b )<<1 ) )
|
3405 | 00406dff | bellard | ) { |
3406 | 00406dff | bellard | if ( floatx80_is_signaling_nan( a )
|
3407 | 00406dff | bellard | || floatx80_is_signaling_nan( b ) ) { |
3408 | 00406dff | bellard | float_raise( float_flag_invalid ); |
3409 | 00406dff | bellard | } |
3410 | 00406dff | bellard | return 0; |
3411 | 00406dff | bellard | } |
3412 | 00406dff | bellard | aSign = extractFloatx80Sign( a ); |
3413 | 00406dff | bellard | bSign = extractFloatx80Sign( b ); |
3414 | 00406dff | bellard | if ( aSign != bSign ) {
|
3415 | 00406dff | bellard | return
|
3416 | 00406dff | bellard | aSign |
3417 | 00406dff | bellard | && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
|
3418 | 00406dff | bellard | != 0 );
|
3419 | 00406dff | bellard | } |
3420 | 00406dff | bellard | return
|
3421 | 00406dff | bellard | aSign ? lt128( b.high, b.low, a.high, a.low ) |
3422 | 00406dff | bellard | : lt128( a.high, a.low, b.high, b.low ); |
3423 | 00406dff | bellard | |
3424 | 00406dff | bellard | } |
3425 | 00406dff | bellard | |
3426 | 00406dff | bellard | #endif
|