/fpu/softfloat.c - Annotate - qemu - Greek Research and Technology Network's projects

root / fpu / softfloat.c @ a74cdab4

History | View | Annotate | Download (228.1 kB)

1	8d725fac	Andreas Färber	/*
2	8d725fac	Andreas Färber	* QEMU float support
3	8d725fac	Andreas Färber	*
4	8d725fac	Andreas Färber	* Derived from SoftFloat.
5	8d725fac	Andreas Färber	*/
6	158142c2	bellard
7	158142c2	bellard	/*============================================================================
8	158142c2	bellard
9	158142c2	bellard	This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
10	158142c2	bellard	Package, Release 2b.
11	158142c2	bellard
12	158142c2	bellard	Written by John R. Hauser. This work was made possible in part by the
13	158142c2	bellard	International Computer Science Institute, located at Suite 600, 1947 Center
14	158142c2	bellard	Street, Berkeley, California 94704. Funding was partially provided by the
15	158142c2	bellard	National Science Foundation under grant MIP-9311980. The original version
16	158142c2	bellard	of this code was written as part of a project to build a fixed-point vector
17	158142c2	bellard	processor in collaboration with the University of California at Berkeley,
18	158142c2	bellard	overseen by Profs. Nelson Morgan and John Wawrzynek. More information
19	158142c2	bellard	is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
20	158142c2	bellard	arithmetic/SoftFloat.html'.
21	158142c2	bellard
22	158142c2	bellard	THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
23	158142c2	bellard	been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
24	158142c2	bellard	RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
25	158142c2	bellard	AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
26	158142c2	bellard	COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
27	158142c2	bellard	EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
28	158142c2	bellard	INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
29	158142c2	bellard	OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
30	158142c2	bellard
31	158142c2	bellard	Derivative works are acceptable, even for commercial purposes, so long as
32	158142c2	bellard	(1) the source code for the derivative work includes prominent notice that
33	158142c2	bellard	the work is derivative, and (2) the source code includes prominent notice with
34	158142c2	bellard	these four paragraphs for those parts of this code that are retained.
35	158142c2	bellard
36	158142c2	bellard	=============================================================================*/
37	158142c2	bellard
38	158142c2	bellard	#include "softfloat.h"
39	158142c2	bellard
40	158142c2	bellard	/*----------------------------------------------------------------------------
41	158142c2	bellard	\| Primitive arithmetic functions, including multi-word arithmetic, and
42	158142c2	bellard	\| division and square root approximations. (Can be specialized to target if
43	158142c2	bellard	\| desired.)
44	158142c2	bellard	----------------------------------------------------------------------------/
45	158142c2	bellard	#include "softfloat-macros.h"
46	158142c2	bellard
47	158142c2	bellard	/*----------------------------------------------------------------------------
48	158142c2	bellard	\| Functions and definitions to determine: (1) whether tininess for underflow
49	158142c2	bellard	\| is detected before or after rounding by default, (2) what (if anything)
50	158142c2	bellard	\| happens when exceptions are raised, (3) how signaling NaNs are distinguished
51	158142c2	bellard	\| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
52	158142c2	bellard	\| are propagated from function inputs to output. These details are target-
53	158142c2	bellard	\| specific.
54	158142c2	bellard	----------------------------------------------------------------------------/
55	158142c2	bellard	#include "softfloat-specialize.h"
56	158142c2	bellard
57	158142c2	bellard	void set_float_rounding_mode(int val STATUS_PARAM)
58	158142c2	bellard	{
59	158142c2	bellard	STATUS(float_rounding_mode) = val;
60	158142c2	bellard	}
61	158142c2	bellard
62	1d6bda35	bellard	void set_float_exception_flags(int val STATUS_PARAM)
63	1d6bda35	bellard	{
64	1d6bda35	bellard	STATUS(float_exception_flags) = val;
65	1d6bda35	bellard	}
66	1d6bda35	bellard
67	158142c2	bellard	#ifdef FLOATX80
68	158142c2	bellard	void set_floatx80_rounding_precision(int val STATUS_PARAM)
69	158142c2	bellard	{
70	158142c2	bellard	STATUS(floatx80_rounding_precision) = val;
71	158142c2	bellard	}
72	158142c2	bellard	#endif
73	158142c2	bellard
74	158142c2	bellard	/*----------------------------------------------------------------------------
75	bb4d4bb3	Peter Maydell	\| Returns the fraction bits of the half-precision floating-point value `a'.
76	bb4d4bb3	Peter Maydell	----------------------------------------------------------------------------/
77	bb4d4bb3	Peter Maydell
78	bb4d4bb3	Peter Maydell	INLINE uint32_t extractFloat16Frac(float16 a)
79	bb4d4bb3	Peter Maydell	{
80	bb4d4bb3	Peter Maydell	return float16_val(a) & 0x3ff;
81	bb4d4bb3	Peter Maydell	}
82	bb4d4bb3	Peter Maydell
83	bb4d4bb3	Peter Maydell	/*----------------------------------------------------------------------------
84	bb4d4bb3	Peter Maydell	\| Returns the exponent bits of the half-precision floating-point value `a'.
85	bb4d4bb3	Peter Maydell	----------------------------------------------------------------------------/
86	bb4d4bb3	Peter Maydell
87	bb4d4bb3	Peter Maydell	INLINE int16 extractFloat16Exp(float16 a)
88	bb4d4bb3	Peter Maydell	{
89	bb4d4bb3	Peter Maydell	return (float16_val(a) >> 10) & 0x1f;
90	bb4d4bb3	Peter Maydell	}
91	bb4d4bb3	Peter Maydell
92	bb4d4bb3	Peter Maydell	/*----------------------------------------------------------------------------
93	bb4d4bb3	Peter Maydell	\| Returns the sign bit of the single-precision floating-point value `a'.
94	bb4d4bb3	Peter Maydell	----------------------------------------------------------------------------/
95	bb4d4bb3	Peter Maydell
96	bb4d4bb3	Peter Maydell	INLINE flag extractFloat16Sign(float16 a)
97	bb4d4bb3	Peter Maydell	{
98	bb4d4bb3	Peter Maydell	return float16_val(a)>>15;
99	bb4d4bb3	Peter Maydell	}
100	bb4d4bb3	Peter Maydell
101	bb4d4bb3	Peter Maydell	/*----------------------------------------------------------------------------
102	158142c2	bellard	\| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
103	158142c2	bellard	\| and 7, and returns the properly rounded 32-bit integer corresponding to the
104	158142c2	bellard	\| input. If `zSign' is 1, the input is negated before being converted to an
105	158142c2	bellard	\| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
106	158142c2	bellard	\| is simply rounded to an integer, with the inexact exception raised if the
107	158142c2	bellard	\| input cannot be represented exactly as an integer. However, if the fixed-
108	158142c2	bellard	\| point input is too large, the invalid exception is raised and the largest
109	158142c2	bellard	\| positive or negative integer is returned.
110	158142c2	bellard	----------------------------------------------------------------------------/
111	158142c2	bellard
112	bb98fe42	Andreas Färber	static int32 roundAndPackInt32( flag zSign, uint64_t absZ STATUS_PARAM)
113	158142c2	bellard	{
114	158142c2	bellard	int8 roundingMode;
115	158142c2	bellard	flag roundNearestEven;
116	158142c2	bellard	int8 roundIncrement, roundBits;
117	158142c2	bellard	int32 z;
118	158142c2	bellard
119	158142c2	bellard	roundingMode = STATUS(float_rounding_mode);
120	158142c2	bellard	roundNearestEven = ( roundingMode == float_round_nearest_even );
121	158142c2	bellard	roundIncrement = 0x40;
122	158142c2	bellard	if ( ! roundNearestEven ) {
123	158142c2	bellard	if ( roundingMode == float_round_to_zero ) {
124	158142c2	bellard	roundIncrement = 0;
125	158142c2	bellard	}
126	158142c2	bellard	else {
127	158142c2	bellard	roundIncrement = 0x7F;
128	158142c2	bellard	if ( zSign ) {
129	158142c2	bellard	if ( roundingMode == float_round_up ) roundIncrement = 0;
130	158142c2	bellard	}
131	158142c2	bellard	else {
132	158142c2	bellard	if ( roundingMode == float_round_down ) roundIncrement = 0;
133	158142c2	bellard	}
134	158142c2	bellard	}
135	158142c2	bellard	}
136	158142c2	bellard	roundBits = absZ & 0x7F;
137	158142c2	bellard	absZ = ( absZ + roundIncrement )>>7;
138	158142c2	bellard	absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
139	158142c2	bellard	z = absZ;
140	158142c2	bellard	if ( zSign ) z = - z;
141	158142c2	bellard	if ( ( absZ>>32 ) \|\| ( z && ( ( z < 0 ) ^ zSign ) ) ) {
142	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
143	bb98fe42	Andreas Färber	return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
144	158142c2	bellard	}
145	158142c2	bellard	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
146	158142c2	bellard	return z;
147	158142c2	bellard
148	158142c2	bellard	}
149	158142c2	bellard
150	158142c2	bellard	/*----------------------------------------------------------------------------
151	158142c2	bellard	\| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
152	158142c2	bellard	\| `absZ1', with binary point between bits 63 and 64 (between the input words),
153	158142c2	bellard	\| and returns the properly rounded 64-bit integer corresponding to the input.
154	158142c2	bellard	\| If `zSign' is 1, the input is negated before being converted to an integer.
155	158142c2	bellard	\| Ordinarily, the fixed-point input is simply rounded to an integer, with
156	158142c2	bellard	\| the inexact exception raised if the input cannot be represented exactly as
157	158142c2	bellard	\| an integer. However, if the fixed-point input is too large, the invalid
158	158142c2	bellard	\| exception is raised and the largest positive or negative integer is
159	158142c2	bellard	\| returned.
160	158142c2	bellard	----------------------------------------------------------------------------/
161	158142c2	bellard
162	bb98fe42	Andreas Färber	static int64 roundAndPackInt64( flag zSign, uint64_t absZ0, uint64_t absZ1 STATUS_PARAM)
163	158142c2	bellard	{
164	158142c2	bellard	int8 roundingMode;
165	158142c2	bellard	flag roundNearestEven, increment;
166	158142c2	bellard	int64 z;
167	158142c2	bellard
168	158142c2	bellard	roundingMode = STATUS(float_rounding_mode);
169	158142c2	bellard	roundNearestEven = ( roundingMode == float_round_nearest_even );
170	bb98fe42	Andreas Färber	increment = ( (int64_t) absZ1 < 0 );
171	158142c2	bellard	if ( ! roundNearestEven ) {
172	158142c2	bellard	if ( roundingMode == float_round_to_zero ) {
173	158142c2	bellard	increment = 0;
174	158142c2	bellard	}
175	158142c2	bellard	else {
176	158142c2	bellard	if ( zSign ) {
177	158142c2	bellard	increment = ( roundingMode == float_round_down ) && absZ1;
178	158142c2	bellard	}
179	158142c2	bellard	else {
180	158142c2	bellard	increment = ( roundingMode == float_round_up ) && absZ1;
181	158142c2	bellard	}
182	158142c2	bellard	}
183	158142c2	bellard	}
184	158142c2	bellard	if ( increment ) {
185	158142c2	bellard	++absZ0;
186	158142c2	bellard	if ( absZ0 == 0 ) goto overflow;
187	bb98fe42	Andreas Färber	absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
188	158142c2	bellard	}
189	158142c2	bellard	z = absZ0;
190	158142c2	bellard	if ( zSign ) z = - z;
191	158142c2	bellard	if ( z && ( ( z < 0 ) ^ zSign ) ) {
192	158142c2	bellard	overflow:
193	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
194	158142c2	bellard	return
195	bb98fe42	Andreas Färber	zSign ? (int64_t) LIT64( 0x8000000000000000 )
196	158142c2	bellard	: LIT64( 0x7FFFFFFFFFFFFFFF );
197	158142c2	bellard	}
198	158142c2	bellard	if ( absZ1 ) STATUS(float_exception_flags) \|= float_flag_inexact;
199	158142c2	bellard	return z;
200	158142c2	bellard
201	158142c2	bellard	}
202	158142c2	bellard
203	158142c2	bellard	/*----------------------------------------------------------------------------
204	158142c2	bellard	\| Returns the fraction bits of the single-precision floating-point value `a'.
205	158142c2	bellard	----------------------------------------------------------------------------/
206	158142c2	bellard
207	bb98fe42	Andreas Färber	INLINE uint32_t extractFloat32Frac( float32 a )
208	158142c2	bellard	{
209	158142c2	bellard
210	f090c9d4	pbrook	return float32_val(a) & 0x007FFFFF;
211	158142c2	bellard
212	158142c2	bellard	}
213	158142c2	bellard
214	158142c2	bellard	/*----------------------------------------------------------------------------
215	158142c2	bellard	\| Returns the exponent bits of the single-precision floating-point value `a'.
216	158142c2	bellard	----------------------------------------------------------------------------/
217	158142c2	bellard
218	158142c2	bellard	INLINE int16 extractFloat32Exp( float32 a )
219	158142c2	bellard	{
220	158142c2	bellard
221	f090c9d4	pbrook	return ( float32_val(a)>>23 ) & 0xFF;
222	158142c2	bellard
223	158142c2	bellard	}
224	158142c2	bellard
225	158142c2	bellard	/*----------------------------------------------------------------------------
226	158142c2	bellard	\| Returns the sign bit of the single-precision floating-point value `a'.
227	158142c2	bellard	----------------------------------------------------------------------------/
228	158142c2	bellard
229	158142c2	bellard	INLINE flag extractFloat32Sign( float32 a )
230	158142c2	bellard	{
231	158142c2	bellard
232	f090c9d4	pbrook	return float32_val(a)>>31;
233	158142c2	bellard
234	158142c2	bellard	}
235	158142c2	bellard
236	158142c2	bellard	/*----------------------------------------------------------------------------
237	37d18660	Peter Maydell	\| If `a' is denormal and we are in flush-to-zero mode then set the
238	37d18660	Peter Maydell	\| input-denormal exception and return zero. Otherwise just return the value.
239	37d18660	Peter Maydell	----------------------------------------------------------------------------/
240	37d18660	Peter Maydell	static float32 float32_squash_input_denormal(float32 a STATUS_PARAM)
241	37d18660	Peter Maydell	{
242	37d18660	Peter Maydell	if (STATUS(flush_inputs_to_zero)) {
243	37d18660	Peter Maydell	if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
244	37d18660	Peter Maydell	float_raise(float_flag_input_denormal STATUS_VAR);
245	37d18660	Peter Maydell	return make_float32(float32_val(a) & 0x80000000);
246	37d18660	Peter Maydell	}
247	37d18660	Peter Maydell	}
248	37d18660	Peter Maydell	return a;
249	37d18660	Peter Maydell	}
250	37d18660	Peter Maydell
251	37d18660	Peter Maydell	/*----------------------------------------------------------------------------
252	158142c2	bellard	\| Normalizes the subnormal single-precision floating-point value represented
253	158142c2	bellard	\| by the denormalized significand `aSig'. The normalized exponent and
254	158142c2	bellard	\| significand are stored at the locations pointed to by `zExpPtr' and
255	158142c2	bellard	\| `zSigPtr', respectively.
256	158142c2	bellard	----------------------------------------------------------------------------/
257	158142c2	bellard
258	158142c2	bellard	static void
259	bb98fe42	Andreas Färber	normalizeFloat32Subnormal( uint32_t aSig, int16 zExpPtr, uint32_t zSigPtr )
260	158142c2	bellard	{
261	158142c2	bellard	int8 shiftCount;
262	158142c2	bellard
263	158142c2	bellard	shiftCount = countLeadingZeros32( aSig ) - 8;
264	158142c2	bellard	*zSigPtr = aSig<<shiftCount;
265	158142c2	bellard	*zExpPtr = 1 - shiftCount;
266	158142c2	bellard
267	158142c2	bellard	}
268	158142c2	bellard
269	158142c2	bellard	/*----------------------------------------------------------------------------
270	158142c2	bellard	\| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
271	158142c2	bellard	\| single-precision floating-point value, returning the result. After being
272	158142c2	bellard	\| shifted into the proper positions, the three fields are simply added
273	158142c2	bellard	\| together to form the result. This means that any integer portion of `zSig'
274	158142c2	bellard	\| will be added into the exponent. Since a properly normalized significand
275	158142c2	bellard	\| will have an integer portion equal to 1, the `zExp' input should be 1 less
276	158142c2	bellard	\| than the desired result exponent whenever `zSig' is a complete, normalized
277	158142c2	bellard	\| significand.
278	158142c2	bellard	----------------------------------------------------------------------------/
279	158142c2	bellard
280	bb98fe42	Andreas Färber	INLINE float32 packFloat32( flag zSign, int16 zExp, uint32_t zSig )
281	158142c2	bellard	{
282	158142c2	bellard
283	f090c9d4	pbrook	return make_float32(
284	bb98fe42	Andreas Färber	( ( (uint32_t) zSign )<<31 ) + ( ( (uint32_t) zExp )<<23 ) + zSig);
285	158142c2	bellard
286	158142c2	bellard	}
287	158142c2	bellard
288	158142c2	bellard	/*----------------------------------------------------------------------------
289	158142c2	bellard	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
290	158142c2	bellard	\| and significand `zSig', and returns the proper single-precision floating-
291	158142c2	bellard	\| point value corresponding to the abstract input. Ordinarily, the abstract
292	158142c2	bellard	\| value is simply rounded and packed into the single-precision format, with
293	158142c2	bellard	\| the inexact exception raised if the abstract input cannot be represented
294	158142c2	bellard	\| exactly. However, if the abstract value is too large, the overflow and
295	158142c2	bellard	\| inexact exceptions are raised and an infinity or maximal finite value is
296	158142c2	bellard	\| returned. If the abstract value is too small, the input value is rounded to
297	158142c2	bellard	\| a subnormal number, and the underflow and inexact exceptions are raised if
298	158142c2	bellard	\| the abstract input cannot be represented exactly as a subnormal single-
299	158142c2	bellard	\| precision floating-point number.
300	158142c2	bellard	\| The input significand `zSig' has its binary point between bits 30
301	158142c2	bellard	\| and 29, which is 7 bits to the left of the usual location. This shifted
302	158142c2	bellard	\| significand must be normalized or smaller. If `zSig' is not normalized,
303	158142c2	bellard	\| `zExp' must be 0; in that case, the result returned is a subnormal number,
304	158142c2	bellard	\| and it must not require rounding. In the usual case that `zSig' is
305	158142c2	bellard	\| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
306	158142c2	bellard	\| The handling of underflow and overflow follows the IEC/IEEE Standard for
307	158142c2	bellard	\| Binary Floating-Point Arithmetic.
308	158142c2	bellard	----------------------------------------------------------------------------/
309	158142c2	bellard
310	bb98fe42	Andreas Färber	static float32 roundAndPackFloat32( flag zSign, int16 zExp, uint32_t zSig STATUS_PARAM)
311	158142c2	bellard	{
312	158142c2	bellard	int8 roundingMode;
313	158142c2	bellard	flag roundNearestEven;
314	158142c2	bellard	int8 roundIncrement, roundBits;
315	158142c2	bellard	flag isTiny;
316	158142c2	bellard
317	158142c2	bellard	roundingMode = STATUS(float_rounding_mode);
318	158142c2	bellard	roundNearestEven = ( roundingMode == float_round_nearest_even );
319	158142c2	bellard	roundIncrement = 0x40;
320	158142c2	bellard	if ( ! roundNearestEven ) {
321	158142c2	bellard	if ( roundingMode == float_round_to_zero ) {
322	158142c2	bellard	roundIncrement = 0;
323	158142c2	bellard	}
324	158142c2	bellard	else {
325	158142c2	bellard	roundIncrement = 0x7F;
326	158142c2	bellard	if ( zSign ) {
327	158142c2	bellard	if ( roundingMode == float_round_up ) roundIncrement = 0;
328	158142c2	bellard	}
329	158142c2	bellard	else {
330	158142c2	bellard	if ( roundingMode == float_round_down ) roundIncrement = 0;
331	158142c2	bellard	}
332	158142c2	bellard	}
333	158142c2	bellard	}
334	158142c2	bellard	roundBits = zSig & 0x7F;
335	bb98fe42	Andreas Färber	if ( 0xFD <= (uint16_t) zExp ) {
336	158142c2	bellard	if ( ( 0xFD < zExp )
337	158142c2	bellard	\|\| ( ( zExp == 0xFD )
338	bb98fe42	Andreas Färber	&& ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
339	158142c2	bellard	) {
340	158142c2	bellard	float_raise( float_flag_overflow \| float_flag_inexact STATUS_VAR);
341	f090c9d4	pbrook	return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
342	158142c2	bellard	}
343	158142c2	bellard	if ( zExp < 0 ) {
344	fe76d976	pbrook	if ( STATUS(flush_to_zero) ) return packFloat32( zSign, 0, 0 );
345	158142c2	bellard	isTiny =
346	158142c2	bellard	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
347	158142c2	bellard	\|\| ( zExp < -1 )
348	158142c2	bellard	\|\| ( zSig + roundIncrement < 0x80000000 );
349	158142c2	bellard	shift32RightJamming( zSig, - zExp, &zSig );
350	158142c2	bellard	zExp = 0;
351	158142c2	bellard	roundBits = zSig & 0x7F;
352	158142c2	bellard	if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
353	158142c2	bellard	}
354	158142c2	bellard	}
355	158142c2	bellard	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
356	158142c2	bellard	zSig = ( zSig + roundIncrement )>>7;
357	158142c2	bellard	zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
358	158142c2	bellard	if ( zSig == 0 ) zExp = 0;
359	158142c2	bellard	return packFloat32( zSign, zExp, zSig );
360	158142c2	bellard
361	158142c2	bellard	}
362	158142c2	bellard
363	158142c2	bellard	/*----------------------------------------------------------------------------
364	158142c2	bellard	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
365	158142c2	bellard	\| and significand `zSig', and returns the proper single-precision floating-
366	158142c2	bellard	\| point value corresponding to the abstract input. This routine is just like
367	158142c2	bellard	\| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
368	158142c2	bellard	\| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
369	158142c2	bellard	\| floating-point exponent.
370	158142c2	bellard	----------------------------------------------------------------------------/
371	158142c2	bellard
372	158142c2	bellard	static float32
373	bb98fe42	Andreas Färber	normalizeRoundAndPackFloat32( flag zSign, int16 zExp, uint32_t zSig STATUS_PARAM)
374	158142c2	bellard	{
375	158142c2	bellard	int8 shiftCount;
376	158142c2	bellard
377	158142c2	bellard	shiftCount = countLeadingZeros32( zSig ) - 1;
378	158142c2	bellard	return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
379	158142c2	bellard
380	158142c2	bellard	}
381	158142c2	bellard
382	158142c2	bellard	/*----------------------------------------------------------------------------
383	158142c2	bellard	\| Returns the fraction bits of the double-precision floating-point value `a'.
384	158142c2	bellard	----------------------------------------------------------------------------/
385	158142c2	bellard
386	bb98fe42	Andreas Färber	INLINE uint64_t extractFloat64Frac( float64 a )
387	158142c2	bellard	{
388	158142c2	bellard
389	f090c9d4	pbrook	return float64_val(a) & LIT64( 0x000FFFFFFFFFFFFF );
390	158142c2	bellard
391	158142c2	bellard	}
392	158142c2	bellard
393	158142c2	bellard	/*----------------------------------------------------------------------------
394	158142c2	bellard	\| Returns the exponent bits of the double-precision floating-point value `a'.
395	158142c2	bellard	----------------------------------------------------------------------------/
396	158142c2	bellard
397	158142c2	bellard	INLINE int16 extractFloat64Exp( float64 a )
398	158142c2	bellard	{
399	158142c2	bellard
400	f090c9d4	pbrook	return ( float64_val(a)>>52 ) & 0x7FF;
401	158142c2	bellard
402	158142c2	bellard	}
403	158142c2	bellard
404	158142c2	bellard	/*----------------------------------------------------------------------------
405	158142c2	bellard	\| Returns the sign bit of the double-precision floating-point value `a'.
406	158142c2	bellard	----------------------------------------------------------------------------/
407	158142c2	bellard
408	158142c2	bellard	INLINE flag extractFloat64Sign( float64 a )
409	158142c2	bellard	{
410	158142c2	bellard
411	f090c9d4	pbrook	return float64_val(a)>>63;
412	158142c2	bellard
413	158142c2	bellard	}
414	158142c2	bellard
415	158142c2	bellard	/*----------------------------------------------------------------------------
416	37d18660	Peter Maydell	\| If `a' is denormal and we are in flush-to-zero mode then set the
417	37d18660	Peter Maydell	\| input-denormal exception and return zero. Otherwise just return the value.
418	37d18660	Peter Maydell	----------------------------------------------------------------------------/
419	37d18660	Peter Maydell	static float64 float64_squash_input_denormal(float64 a STATUS_PARAM)
420	37d18660	Peter Maydell	{
421	37d18660	Peter Maydell	if (STATUS(flush_inputs_to_zero)) {
422	37d18660	Peter Maydell	if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
423	37d18660	Peter Maydell	float_raise(float_flag_input_denormal STATUS_VAR);
424	37d18660	Peter Maydell	return make_float64(float64_val(a) & (1ULL << 63));
425	37d18660	Peter Maydell	}
426	37d18660	Peter Maydell	}
427	37d18660	Peter Maydell	return a;
428	37d18660	Peter Maydell	}
429	37d18660	Peter Maydell
430	37d18660	Peter Maydell	/*----------------------------------------------------------------------------
431	158142c2	bellard	\| Normalizes the subnormal double-precision floating-point value represented
432	158142c2	bellard	\| by the denormalized significand `aSig'. The normalized exponent and
433	158142c2	bellard	\| significand are stored at the locations pointed to by `zExpPtr' and
434	158142c2	bellard	\| `zSigPtr', respectively.
435	158142c2	bellard	----------------------------------------------------------------------------/
436	158142c2	bellard
437	158142c2	bellard	static void
438	bb98fe42	Andreas Färber	normalizeFloat64Subnormal( uint64_t aSig, int16 zExpPtr, uint64_t zSigPtr )
439	158142c2	bellard	{
440	158142c2	bellard	int8 shiftCount;
441	158142c2	bellard
442	158142c2	bellard	shiftCount = countLeadingZeros64( aSig ) - 11;
443	158142c2	bellard	*zSigPtr = aSig<<shiftCount;
444	158142c2	bellard	*zExpPtr = 1 - shiftCount;
445	158142c2	bellard
446	158142c2	bellard	}
447	158142c2	bellard
448	158142c2	bellard	/*----------------------------------------------------------------------------
449	158142c2	bellard	\| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
450	158142c2	bellard	\| double-precision floating-point value, returning the result. After being
451	158142c2	bellard	\| shifted into the proper positions, the three fields are simply added
452	158142c2	bellard	\| together to form the result. This means that any integer portion of `zSig'
453	158142c2	bellard	\| will be added into the exponent. Since a properly normalized significand
454	158142c2	bellard	\| will have an integer portion equal to 1, the `zExp' input should be 1 less
455	158142c2	bellard	\| than the desired result exponent whenever `zSig' is a complete, normalized
456	158142c2	bellard	\| significand.
457	158142c2	bellard	----------------------------------------------------------------------------/
458	158142c2	bellard
459	bb98fe42	Andreas Färber	INLINE float64 packFloat64( flag zSign, int16 zExp, uint64_t zSig )
460	158142c2	bellard	{
461	158142c2	bellard
462	f090c9d4	pbrook	return make_float64(
463	bb98fe42	Andreas Färber	( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
464	158142c2	bellard
465	158142c2	bellard	}
466	158142c2	bellard
467	158142c2	bellard	/*----------------------------------------------------------------------------
468	158142c2	bellard	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
469	158142c2	bellard	\| and significand `zSig', and returns the proper double-precision floating-
470	158142c2	bellard	\| point value corresponding to the abstract input. Ordinarily, the abstract
471	158142c2	bellard	\| value is simply rounded and packed into the double-precision format, with
472	158142c2	bellard	\| the inexact exception raised if the abstract input cannot be represented
473	158142c2	bellard	\| exactly. However, if the abstract value is too large, the overflow and
474	158142c2	bellard	\| inexact exceptions are raised and an infinity or maximal finite value is
475	158142c2	bellard	\| returned. If the abstract value is too small, the input value is rounded
476	158142c2	bellard	\| to a subnormal number, and the underflow and inexact exceptions are raised
477	158142c2	bellard	\| if the abstract input cannot be represented exactly as a subnormal double-
478	158142c2	bellard	\| precision floating-point number.
479	158142c2	bellard	\| The input significand `zSig' has its binary point between bits 62
480	158142c2	bellard	\| and 61, which is 10 bits to the left of the usual location. This shifted
481	158142c2	bellard	\| significand must be normalized or smaller. If `zSig' is not normalized,
482	158142c2	bellard	\| `zExp' must be 0; in that case, the result returned is a subnormal number,
483	158142c2	bellard	\| and it must not require rounding. In the usual case that `zSig' is
484	158142c2	bellard	\| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
485	158142c2	bellard	\| The handling of underflow and overflow follows the IEC/IEEE Standard for
486	158142c2	bellard	\| Binary Floating-Point Arithmetic.
487	158142c2	bellard	----------------------------------------------------------------------------/
488	158142c2	bellard
489	bb98fe42	Andreas Färber	static float64 roundAndPackFloat64( flag zSign, int16 zExp, uint64_t zSig STATUS_PARAM)
490	158142c2	bellard	{
491	158142c2	bellard	int8 roundingMode;
492	158142c2	bellard	flag roundNearestEven;
493	158142c2	bellard	int16 roundIncrement, roundBits;
494	158142c2	bellard	flag isTiny;
495	158142c2	bellard
496	158142c2	bellard	roundingMode = STATUS(float_rounding_mode);
497	158142c2	bellard	roundNearestEven = ( roundingMode == float_round_nearest_even );
498	158142c2	bellard	roundIncrement = 0x200;
499	158142c2	bellard	if ( ! roundNearestEven ) {
500	158142c2	bellard	if ( roundingMode == float_round_to_zero ) {
501	158142c2	bellard	roundIncrement = 0;
502	158142c2	bellard	}
503	158142c2	bellard	else {
504	158142c2	bellard	roundIncrement = 0x3FF;
505	158142c2	bellard	if ( zSign ) {
506	158142c2	bellard	if ( roundingMode == float_round_up ) roundIncrement = 0;
507	158142c2	bellard	}
508	158142c2	bellard	else {
509	158142c2	bellard	if ( roundingMode == float_round_down ) roundIncrement = 0;
510	158142c2	bellard	}
511	158142c2	bellard	}
512	158142c2	bellard	}
513	158142c2	bellard	roundBits = zSig & 0x3FF;
514	bb98fe42	Andreas Färber	if ( 0x7FD <= (uint16_t) zExp ) {
515	158142c2	bellard	if ( ( 0x7FD < zExp )
516	158142c2	bellard	\|\| ( ( zExp == 0x7FD )
517	bb98fe42	Andreas Färber	&& ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
518	158142c2	bellard	) {
519	158142c2	bellard	float_raise( float_flag_overflow \| float_flag_inexact STATUS_VAR);
520	f090c9d4	pbrook	return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 ));
521	158142c2	bellard	}
522	158142c2	bellard	if ( zExp < 0 ) {
523	fe76d976	pbrook	if ( STATUS(flush_to_zero) ) return packFloat64( zSign, 0, 0 );
524	158142c2	bellard	isTiny =
525	158142c2	bellard	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
526	158142c2	bellard	\|\| ( zExp < -1 )
527	158142c2	bellard	\|\| ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
528	158142c2	bellard	shift64RightJamming( zSig, - zExp, &zSig );
529	158142c2	bellard	zExp = 0;
530	158142c2	bellard	roundBits = zSig & 0x3FF;
531	158142c2	bellard	if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
532	158142c2	bellard	}
533	158142c2	bellard	}
534	158142c2	bellard	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
535	158142c2	bellard	zSig = ( zSig + roundIncrement )>>10;
536	158142c2	bellard	zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
537	158142c2	bellard	if ( zSig == 0 ) zExp = 0;
538	158142c2	bellard	return packFloat64( zSign, zExp, zSig );
539	158142c2	bellard
540	158142c2	bellard	}
541	158142c2	bellard
542	158142c2	bellard	/*----------------------------------------------------------------------------
543	158142c2	bellard	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
544	158142c2	bellard	\| and significand `zSig', and returns the proper double-precision floating-
545	158142c2	bellard	\| point value corresponding to the abstract input. This routine is just like
546	158142c2	bellard	\| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
547	158142c2	bellard	\| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
548	158142c2	bellard	\| floating-point exponent.
549	158142c2	bellard	----------------------------------------------------------------------------/
550	158142c2	bellard
551	158142c2	bellard	static float64
552	bb98fe42	Andreas Färber	normalizeRoundAndPackFloat64( flag zSign, int16 zExp, uint64_t zSig STATUS_PARAM)
553	158142c2	bellard	{
554	158142c2	bellard	int8 shiftCount;
555	158142c2	bellard
556	158142c2	bellard	shiftCount = countLeadingZeros64( zSig ) - 1;
557	158142c2	bellard	return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
558	158142c2	bellard
559	158142c2	bellard	}
560	158142c2	bellard
561	158142c2	bellard	#ifdef FLOATX80
562	158142c2	bellard
563	158142c2	bellard	/*----------------------------------------------------------------------------
564	158142c2	bellard	\| Returns the fraction bits of the extended double-precision floating-point
565	158142c2	bellard	\| value `a'.
566	158142c2	bellard	----------------------------------------------------------------------------/
567	158142c2	bellard
568	bb98fe42	Andreas Färber	INLINE uint64_t extractFloatx80Frac( floatx80 a )
569	158142c2	bellard	{
570	158142c2	bellard
571	158142c2	bellard	return a.low;
572	158142c2	bellard
573	158142c2	bellard	}
574	158142c2	bellard
575	158142c2	bellard	/*----------------------------------------------------------------------------
576	158142c2	bellard	\| Returns the exponent bits of the extended double-precision floating-point
577	158142c2	bellard	\| value `a'.
578	158142c2	bellard	----------------------------------------------------------------------------/
579	158142c2	bellard
580	158142c2	bellard	INLINE int32 extractFloatx80Exp( floatx80 a )
581	158142c2	bellard	{
582	158142c2	bellard
583	158142c2	bellard	return a.high & 0x7FFF;
584	158142c2	bellard
585	158142c2	bellard	}
586	158142c2	bellard
587	158142c2	bellard	/*----------------------------------------------------------------------------
588	158142c2	bellard	\| Returns the sign bit of the extended double-precision floating-point value
589	158142c2	bellard	\| `a'.
590	158142c2	bellard	----------------------------------------------------------------------------/
591	158142c2	bellard
592	158142c2	bellard	INLINE flag extractFloatx80Sign( floatx80 a )
593	158142c2	bellard	{
594	158142c2	bellard
595	158142c2	bellard	return a.high>>15;
596	158142c2	bellard
597	158142c2	bellard	}
598	158142c2	bellard
599	158142c2	bellard	/*----------------------------------------------------------------------------
600	158142c2	bellard	\| Normalizes the subnormal extended double-precision floating-point value
601	158142c2	bellard	\| represented by the denormalized significand `aSig'. The normalized exponent
602	158142c2	bellard	\| and significand are stored at the locations pointed to by `zExpPtr' and
603	158142c2	bellard	\| `zSigPtr', respectively.
604	158142c2	bellard	----------------------------------------------------------------------------/
605	158142c2	bellard
606	158142c2	bellard	static void
607	bb98fe42	Andreas Färber	normalizeFloatx80Subnormal( uint64_t aSig, int32 zExpPtr, uint64_t zSigPtr )
608	158142c2	bellard	{
609	158142c2	bellard	int8 shiftCount;
610	158142c2	bellard
611	158142c2	bellard	shiftCount = countLeadingZeros64( aSig );
612	158142c2	bellard	*zSigPtr = aSig<<shiftCount;
613	158142c2	bellard	*zExpPtr = 1 - shiftCount;
614	158142c2	bellard
615	158142c2	bellard	}
616	158142c2	bellard
617	158142c2	bellard	/*----------------------------------------------------------------------------
618	158142c2	bellard	\| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
619	158142c2	bellard	\| extended double-precision floating-point value, returning the result.
620	158142c2	bellard	----------------------------------------------------------------------------/
621	158142c2	bellard
622	bb98fe42	Andreas Färber	INLINE floatx80 packFloatx80( flag zSign, int32 zExp, uint64_t zSig )
623	158142c2	bellard	{
624	158142c2	bellard	floatx80 z;
625	158142c2	bellard
626	158142c2	bellard	z.low = zSig;
627	bb98fe42	Andreas Färber	z.high = ( ( (uint16_t) zSign )<<15 ) + zExp;
628	158142c2	bellard	return z;
629	158142c2	bellard
630	158142c2	bellard	}
631	158142c2	bellard
632	158142c2	bellard	/*----------------------------------------------------------------------------
633	158142c2	bellard	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
634	158142c2	bellard	\| and extended significand formed by the concatenation of `zSig0' and `zSig1',
635	158142c2	bellard	\| and returns the proper extended double-precision floating-point value
636	158142c2	bellard	\| corresponding to the abstract input. Ordinarily, the abstract value is
637	158142c2	bellard	\| rounded and packed into the extended double-precision format, with the
638	158142c2	bellard	\| inexact exception raised if the abstract input cannot be represented
639	158142c2	bellard	\| exactly. However, if the abstract value is too large, the overflow and
640	158142c2	bellard	\| inexact exceptions are raised and an infinity or maximal finite value is
641	158142c2	bellard	\| returned. If the abstract value is too small, the input value is rounded to
642	158142c2	bellard	\| a subnormal number, and the underflow and inexact exceptions are raised if
643	158142c2	bellard	\| the abstract input cannot be represented exactly as a subnormal extended
644	158142c2	bellard	\| double-precision floating-point number.
645	158142c2	bellard	\| If `roundingPrecision' is 32 or 64, the result is rounded to the same
646	158142c2	bellard	\| number of bits as single or double precision, respectively. Otherwise, the
647	158142c2	bellard	\| result is rounded to the full precision of the extended double-precision
648	158142c2	bellard	\| format.
649	158142c2	bellard	\| The input significand must be normalized or smaller. If the input
650	158142c2	bellard	\| significand is not normalized, `zExp' must be 0; in that case, the result
651	158142c2	bellard	\| returned is a subnormal number, and it must not require rounding. The
652	158142c2	bellard	\| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
653	158142c2	bellard	\| Floating-Point Arithmetic.
654	158142c2	bellard	----------------------------------------------------------------------------/
655	158142c2	bellard
656	158142c2	bellard	static floatx80
657	158142c2	bellard	roundAndPackFloatx80(
658	bb98fe42	Andreas Färber	int8 roundingPrecision, flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1
659	158142c2	bellard	STATUS_PARAM)
660	158142c2	bellard	{
661	158142c2	bellard	int8 roundingMode;
662	158142c2	bellard	flag roundNearestEven, increment, isTiny;
663	158142c2	bellard	int64 roundIncrement, roundMask, roundBits;
664	158142c2	bellard
665	158142c2	bellard	roundingMode = STATUS(float_rounding_mode);
666	158142c2	bellard	roundNearestEven = ( roundingMode == float_round_nearest_even );
667	158142c2	bellard	if ( roundingPrecision == 80 ) goto precision80;
668	158142c2	bellard	if ( roundingPrecision == 64 ) {
669	158142c2	bellard	roundIncrement = LIT64( 0x0000000000000400 );
670	158142c2	bellard	roundMask = LIT64( 0x00000000000007FF );
671	158142c2	bellard	}
672	158142c2	bellard	else if ( roundingPrecision == 32 ) {
673	158142c2	bellard	roundIncrement = LIT64( 0x0000008000000000 );
674	158142c2	bellard	roundMask = LIT64( 0x000000FFFFFFFFFF );
675	158142c2	bellard	}
676	158142c2	bellard	else {
677	158142c2	bellard	goto precision80;
678	158142c2	bellard	}
679	158142c2	bellard	zSig0 \|= ( zSig1 != 0 );
680	158142c2	bellard	if ( ! roundNearestEven ) {
681	158142c2	bellard	if ( roundingMode == float_round_to_zero ) {
682	158142c2	bellard	roundIncrement = 0;
683	158142c2	bellard	}
684	158142c2	bellard	else {
685	158142c2	bellard	roundIncrement = roundMask;
686	158142c2	bellard	if ( zSign ) {
687	158142c2	bellard	if ( roundingMode == float_round_up ) roundIncrement = 0;
688	158142c2	bellard	}
689	158142c2	bellard	else {
690	158142c2	bellard	if ( roundingMode == float_round_down ) roundIncrement = 0;
691	158142c2	bellard	}
692	158142c2	bellard	}
693	158142c2	bellard	}
694	158142c2	bellard	roundBits = zSig0 & roundMask;
695	bb98fe42	Andreas Färber	if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
696	158142c2	bellard	if ( ( 0x7FFE < zExp )
697	158142c2	bellard	\|\| ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
698	158142c2	bellard	) {
699	158142c2	bellard	goto overflow;
700	158142c2	bellard	}
701	158142c2	bellard	if ( zExp <= 0 ) {
702	fe76d976	pbrook	if ( STATUS(flush_to_zero) ) return packFloatx80( zSign, 0, 0 );
703	158142c2	bellard	isTiny =
704	158142c2	bellard	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
705	158142c2	bellard	\|\| ( zExp < 0 )
706	158142c2	bellard	\|\| ( zSig0 <= zSig0 + roundIncrement );
707	158142c2	bellard	shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
708	158142c2	bellard	zExp = 0;
709	158142c2	bellard	roundBits = zSig0 & roundMask;
710	158142c2	bellard	if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
711	158142c2	bellard	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
712	158142c2	bellard	zSig0 += roundIncrement;
713	bb98fe42	Andreas Färber	if ( (int64_t) zSig0 < 0 ) zExp = 1;
714	158142c2	bellard	roundIncrement = roundMask + 1;
715	158142c2	bellard	if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
716	158142c2	bellard	roundMask \|= roundIncrement;
717	158142c2	bellard	}
718	158142c2	bellard	zSig0 &= ~ roundMask;
719	158142c2	bellard	return packFloatx80( zSign, zExp, zSig0 );
720	158142c2	bellard	}
721	158142c2	bellard	}
722	158142c2	bellard	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
723	158142c2	bellard	zSig0 += roundIncrement;
724	158142c2	bellard	if ( zSig0 < roundIncrement ) {
725	158142c2	bellard	++zExp;
726	158142c2	bellard	zSig0 = LIT64( 0x8000000000000000 );
727	158142c2	bellard	}
728	158142c2	bellard	roundIncrement = roundMask + 1;
729	158142c2	bellard	if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
730	158142c2	bellard	roundMask \|= roundIncrement;
731	158142c2	bellard	}
732	158142c2	bellard	zSig0 &= ~ roundMask;
733	158142c2	bellard	if ( zSig0 == 0 ) zExp = 0;
734	158142c2	bellard	return packFloatx80( zSign, zExp, zSig0 );
735	158142c2	bellard	precision80:
736	bb98fe42	Andreas Färber	increment = ( (int64_t) zSig1 < 0 );
737	158142c2	bellard	if ( ! roundNearestEven ) {
738	158142c2	bellard	if ( roundingMode == float_round_to_zero ) {
739	158142c2	bellard	increment = 0;
740	158142c2	bellard	}
741	158142c2	bellard	else {
742	158142c2	bellard	if ( zSign ) {
743	158142c2	bellard	increment = ( roundingMode == float_round_down ) && zSig1;
744	158142c2	bellard	}
745	158142c2	bellard	else {
746	158142c2	bellard	increment = ( roundingMode == float_round_up ) && zSig1;
747	158142c2	bellard	}
748	158142c2	bellard	}
749	158142c2	bellard	}
750	bb98fe42	Andreas Färber	if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
751	158142c2	bellard	if ( ( 0x7FFE < zExp )
752	158142c2	bellard	\|\| ( ( zExp == 0x7FFE )
753	158142c2	bellard	&& ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
754	158142c2	bellard	&& increment
755	158142c2	bellard	)
756	158142c2	bellard	) {
757	158142c2	bellard	roundMask = 0;
758	158142c2	bellard	overflow:
759	158142c2	bellard	float_raise( float_flag_overflow \| float_flag_inexact STATUS_VAR);
760	158142c2	bellard	if ( ( roundingMode == float_round_to_zero )
761	158142c2	bellard	\|\| ( zSign && ( roundingMode == float_round_up ) )
762	158142c2	bellard	\|\| ( ! zSign && ( roundingMode == float_round_down ) )
763	158142c2	bellard	) {
764	158142c2	bellard	return packFloatx80( zSign, 0x7FFE, ~ roundMask );
765	158142c2	bellard	}
766	158142c2	bellard	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
767	158142c2	bellard	}
768	158142c2	bellard	if ( zExp <= 0 ) {
769	158142c2	bellard	isTiny =
770	158142c2	bellard	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
771	158142c2	bellard	\|\| ( zExp < 0 )
772	158142c2	bellard	\|\| ! increment
773	158142c2	bellard	\|\| ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
774	158142c2	bellard	shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
775	158142c2	bellard	zExp = 0;
776	158142c2	bellard	if ( isTiny && zSig1 ) float_raise( float_flag_underflow STATUS_VAR);
777	158142c2	bellard	if ( zSig1 ) STATUS(float_exception_flags) \|= float_flag_inexact;
778	158142c2	bellard	if ( roundNearestEven ) {
779	bb98fe42	Andreas Färber	increment = ( (int64_t) zSig1 < 0 );
780	158142c2	bellard	}
781	158142c2	bellard	else {
782	158142c2	bellard	if ( zSign ) {
783	158142c2	bellard	increment = ( roundingMode == float_round_down ) && zSig1;
784	158142c2	bellard	}
785	158142c2	bellard	else {
786	158142c2	bellard	increment = ( roundingMode == float_round_up ) && zSig1;
787	158142c2	bellard	}
788	158142c2	bellard	}
789	158142c2	bellard	if ( increment ) {
790	158142c2	bellard	++zSig0;
791	158142c2	bellard	zSig0 &=
792	bb98fe42	Andreas Färber	~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
793	bb98fe42	Andreas Färber	if ( (int64_t) zSig0 < 0 ) zExp = 1;
794	158142c2	bellard	}
795	158142c2	bellard	return packFloatx80( zSign, zExp, zSig0 );
796	158142c2	bellard	}
797	158142c2	bellard	}
798	158142c2	bellard	if ( zSig1 ) STATUS(float_exception_flags) \|= float_flag_inexact;
799	158142c2	bellard	if ( increment ) {
800	158142c2	bellard	++zSig0;
801	158142c2	bellard	if ( zSig0 == 0 ) {
802	158142c2	bellard	++zExp;
803	158142c2	bellard	zSig0 = LIT64( 0x8000000000000000 );
804	158142c2	bellard	}
805	158142c2	bellard	else {
806	bb98fe42	Andreas Färber	zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
807	158142c2	bellard	}
808	158142c2	bellard	}
809	158142c2	bellard	else {
810	158142c2	bellard	if ( zSig0 == 0 ) zExp = 0;
811	158142c2	bellard	}
812	158142c2	bellard	return packFloatx80( zSign, zExp, zSig0 );
813	158142c2	bellard
814	158142c2	bellard	}
815	158142c2	bellard
816	158142c2	bellard	/*----------------------------------------------------------------------------
817	158142c2	bellard	\| Takes an abstract floating-point value having sign `zSign', exponent
818	158142c2	bellard	\| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
819	158142c2	bellard	\| and returns the proper extended double-precision floating-point value
820	158142c2	bellard	\| corresponding to the abstract input. This routine is just like
821	158142c2	bellard	\| `roundAndPackFloatx80' except that the input significand does not have to be
822	158142c2	bellard	\| normalized.
823	158142c2	bellard	----------------------------------------------------------------------------/
824	158142c2	bellard
825	158142c2	bellard	static floatx80
826	158142c2	bellard	normalizeRoundAndPackFloatx80(
827	bb98fe42	Andreas Färber	int8 roundingPrecision, flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1
828	158142c2	bellard	STATUS_PARAM)
829	158142c2	bellard	{
830	158142c2	bellard	int8 shiftCount;
831	158142c2	bellard
832	158142c2	bellard	if ( zSig0 == 0 ) {
833	158142c2	bellard	zSig0 = zSig1;
834	158142c2	bellard	zSig1 = 0;
835	158142c2	bellard	zExp -= 64;
836	158142c2	bellard	}
837	158142c2	bellard	shiftCount = countLeadingZeros64( zSig0 );
838	158142c2	bellard	shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
839	158142c2	bellard	zExp -= shiftCount;
840	158142c2	bellard	return
841	158142c2	bellard	roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 STATUS_VAR);
842	158142c2	bellard
843	158142c2	bellard	}
844	158142c2	bellard
845	158142c2	bellard	#endif
846	158142c2	bellard
847	158142c2	bellard	#ifdef FLOAT128
848	158142c2	bellard
849	158142c2	bellard	/*----------------------------------------------------------------------------
850	158142c2	bellard	\| Returns the least-significant 64 fraction bits of the quadruple-precision
851	158142c2	bellard	\| floating-point value `a'.
852	158142c2	bellard	----------------------------------------------------------------------------/
853	158142c2	bellard
854	bb98fe42	Andreas Färber	INLINE uint64_t extractFloat128Frac1( float128 a )
855	158142c2	bellard	{
856	158142c2	bellard
857	158142c2	bellard	return a.low;
858	158142c2	bellard
859	158142c2	bellard	}
860	158142c2	bellard
861	158142c2	bellard	/*----------------------------------------------------------------------------
862	158142c2	bellard	\| Returns the most-significant 48 fraction bits of the quadruple-precision
863	158142c2	bellard	\| floating-point value `a'.
864	158142c2	bellard	----------------------------------------------------------------------------/
865	158142c2	bellard
866	bb98fe42	Andreas Färber	INLINE uint64_t extractFloat128Frac0( float128 a )
867	158142c2	bellard	{
868	158142c2	bellard
869	158142c2	bellard	return a.high & LIT64( 0x0000FFFFFFFFFFFF );
870	158142c2	bellard
871	158142c2	bellard	}
872	158142c2	bellard
873	158142c2	bellard	/*----------------------------------------------------------------------------
874	158142c2	bellard	\| Returns the exponent bits of the quadruple-precision floating-point value
875	158142c2	bellard	\| `a'.
876	158142c2	bellard	----------------------------------------------------------------------------/
877	158142c2	bellard
878	158142c2	bellard	INLINE int32 extractFloat128Exp( float128 a )
879	158142c2	bellard	{
880	158142c2	bellard
881	158142c2	bellard	return ( a.high>>48 ) & 0x7FFF;
882	158142c2	bellard
883	158142c2	bellard	}
884	158142c2	bellard
885	158142c2	bellard	/*----------------------------------------------------------------------------
886	158142c2	bellard	\| Returns the sign bit of the quadruple-precision floating-point value `a'.
887	158142c2	bellard	----------------------------------------------------------------------------/
888	158142c2	bellard
889	158142c2	bellard	INLINE flag extractFloat128Sign( float128 a )
890	158142c2	bellard	{
891	158142c2	bellard
892	158142c2	bellard	return a.high>>63;
893	158142c2	bellard
894	158142c2	bellard	}
895	158142c2	bellard
896	158142c2	bellard	/*----------------------------------------------------------------------------
897	158142c2	bellard	\| Normalizes the subnormal quadruple-precision floating-point value
898	158142c2	bellard	\| represented by the denormalized significand formed by the concatenation of
899	158142c2	bellard	\| `aSig0' and `aSig1'. The normalized exponent is stored at the location
900	158142c2	bellard	\| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
901	158142c2	bellard	\| significand are stored at the location pointed to by `zSig0Ptr', and the
902	158142c2	bellard	\| least significant 64 bits of the normalized significand are stored at the
903	158142c2	bellard	\| location pointed to by `zSig1Ptr'.
904	158142c2	bellard	----------------------------------------------------------------------------/
905	158142c2	bellard
906	158142c2	bellard	static void
907	158142c2	bellard	normalizeFloat128Subnormal(
908	bb98fe42	Andreas Färber	uint64_t aSig0,
909	bb98fe42	Andreas Färber	uint64_t aSig1,
910	158142c2	bellard	int32 *zExpPtr,
911	bb98fe42	Andreas Färber	uint64_t *zSig0Ptr,
912	bb98fe42	Andreas Färber	uint64_t *zSig1Ptr
913	158142c2	bellard	)
914	158142c2	bellard	{
915	158142c2	bellard	int8 shiftCount;
916	158142c2	bellard
917	158142c2	bellard	if ( aSig0 == 0 ) {
918	158142c2	bellard	shiftCount = countLeadingZeros64( aSig1 ) - 15;
919	158142c2	bellard	if ( shiftCount < 0 ) {
920	158142c2	bellard	*zSig0Ptr = aSig1>>( - shiftCount );
921	158142c2	bellard	*zSig1Ptr = aSig1<<( shiftCount & 63 );
922	158142c2	bellard	}
923	158142c2	bellard	else {
924	158142c2	bellard	*zSig0Ptr = aSig1<<shiftCount;
925	158142c2	bellard	*zSig1Ptr = 0;
926	158142c2	bellard	}
927	158142c2	bellard	*zExpPtr = - shiftCount - 63;
928	158142c2	bellard	}
929	158142c2	bellard	else {
930	158142c2	bellard	shiftCount = countLeadingZeros64( aSig0 ) - 15;
931	158142c2	bellard	shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
932	158142c2	bellard	*zExpPtr = 1 - shiftCount;
933	158142c2	bellard	}
934	158142c2	bellard
935	158142c2	bellard	}
936	158142c2	bellard
937	158142c2	bellard	/*----------------------------------------------------------------------------
938	158142c2	bellard	\| Packs the sign `zSign', the exponent `zExp', and the significand formed
939	158142c2	bellard	\| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
940	158142c2	bellard	\| floating-point value, returning the result. After being shifted into the
941	158142c2	bellard	\| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
942	158142c2	bellard	\| added together to form the most significant 32 bits of the result. This
943	158142c2	bellard	\| means that any integer portion of `zSig0' will be added into the exponent.
944	158142c2	bellard	\| Since a properly normalized significand will have an integer portion equal
945	158142c2	bellard	\| to 1, the `zExp' input should be 1 less than the desired result exponent
946	158142c2	bellard	\| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
947	158142c2	bellard	\| significand.
948	158142c2	bellard	----------------------------------------------------------------------------/
949	158142c2	bellard
950	158142c2	bellard	INLINE float128
951	bb98fe42	Andreas Färber	packFloat128( flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1 )
952	158142c2	bellard	{
953	158142c2	bellard	float128 z;
954	158142c2	bellard
955	158142c2	bellard	z.low = zSig1;
956	bb98fe42	Andreas Färber	z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
957	158142c2	bellard	return z;
958	158142c2	bellard
959	158142c2	bellard	}
960	158142c2	bellard
961	158142c2	bellard	/*----------------------------------------------------------------------------
962	158142c2	bellard	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
963	158142c2	bellard	\| and extended significand formed by the concatenation of `zSig0', `zSig1',
964	158142c2	bellard	\| and `zSig2', and returns the proper quadruple-precision floating-point value
965	158142c2	bellard	\| corresponding to the abstract input. Ordinarily, the abstract value is
966	158142c2	bellard	\| simply rounded and packed into the quadruple-precision format, with the
967	158142c2	bellard	\| inexact exception raised if the abstract input cannot be represented
968	158142c2	bellard	\| exactly. However, if the abstract value is too large, the overflow and
969	158142c2	bellard	\| inexact exceptions are raised and an infinity or maximal finite value is
970	158142c2	bellard	\| returned. If the abstract value is too small, the input value is rounded to
971	158142c2	bellard	\| a subnormal number, and the underflow and inexact exceptions are raised if
972	158142c2	bellard	\| the abstract input cannot be represented exactly as a subnormal quadruple-
973	158142c2	bellard	\| precision floating-point number.
974	158142c2	bellard	\| The input significand must be normalized or smaller. If the input
975	158142c2	bellard	\| significand is not normalized, `zExp' must be 0; in that case, the result
976	158142c2	bellard	\| returned is a subnormal number, and it must not require rounding. In the
977	158142c2	bellard	\| usual case that the input significand is normalized, `zExp' must be 1 less
978	158142c2	bellard	\| than the ``true'' floating-point exponent. The handling of underflow and
979	158142c2	bellard	\| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
980	158142c2	bellard	----------------------------------------------------------------------------/
981	158142c2	bellard
982	158142c2	bellard	static float128
983	158142c2	bellard	roundAndPackFloat128(
984	bb98fe42	Andreas Färber	flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1, uint64_t zSig2 STATUS_PARAM)
985	158142c2	bellard	{
986	158142c2	bellard	int8 roundingMode;
987	158142c2	bellard	flag roundNearestEven, increment, isTiny;
988	158142c2	bellard
989	158142c2	bellard	roundingMode = STATUS(float_rounding_mode);
990	158142c2	bellard	roundNearestEven = ( roundingMode == float_round_nearest_even );
991	bb98fe42	Andreas Färber	increment = ( (int64_t) zSig2 < 0 );
992	158142c2	bellard	if ( ! roundNearestEven ) {
993	158142c2	bellard	if ( roundingMode == float_round_to_zero ) {
994	158142c2	bellard	increment = 0;
995	158142c2	bellard	}
996	158142c2	bellard	else {
997	158142c2	bellard	if ( zSign ) {
998	158142c2	bellard	increment = ( roundingMode == float_round_down ) && zSig2;
999	158142c2	bellard	}
1000	158142c2	bellard	else {
1001	158142c2	bellard	increment = ( roundingMode == float_round_up ) && zSig2;
1002	158142c2	bellard	}
1003	158142c2	bellard	}
1004	158142c2	bellard	}
1005	bb98fe42	Andreas Färber	if ( 0x7FFD <= (uint32_t) zExp ) {
1006	158142c2	bellard	if ( ( 0x7FFD < zExp )
1007	158142c2	bellard	\|\| ( ( zExp == 0x7FFD )
1008	158142c2	bellard	&& eq128(
1009	158142c2	bellard	LIT64( 0x0001FFFFFFFFFFFF ),
1010	158142c2	bellard	LIT64( 0xFFFFFFFFFFFFFFFF ),
1011	158142c2	bellard	zSig0,
1012	158142c2	bellard	zSig1
1013	158142c2	bellard	)
1014	158142c2	bellard	&& increment
1015	158142c2	bellard	)
1016	158142c2	bellard	) {
1017	158142c2	bellard	float_raise( float_flag_overflow \| float_flag_inexact STATUS_VAR);
1018	158142c2	bellard	if ( ( roundingMode == float_round_to_zero )
1019	158142c2	bellard	\|\| ( zSign && ( roundingMode == float_round_up ) )
1020	158142c2	bellard	\|\| ( ! zSign && ( roundingMode == float_round_down ) )
1021	158142c2	bellard	) {
1022	158142c2	bellard	return
1023	158142c2	bellard	packFloat128(
1024	158142c2	bellard	zSign,
1025	158142c2	bellard	0x7FFE,
1026	158142c2	bellard	LIT64( 0x0000FFFFFFFFFFFF ),
1027	158142c2	bellard	LIT64( 0xFFFFFFFFFFFFFFFF )
1028	158142c2	bellard	);
1029	158142c2	bellard	}
1030	158142c2	bellard	return packFloat128( zSign, 0x7FFF, 0, 0 );
1031	158142c2	bellard	}
1032	158142c2	bellard	if ( zExp < 0 ) {
1033	fe76d976	pbrook	if ( STATUS(flush_to_zero) ) return packFloat128( zSign, 0, 0, 0 );
1034	158142c2	bellard	isTiny =
1035	158142c2	bellard	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
1036	158142c2	bellard	\|\| ( zExp < -1 )
1037	158142c2	bellard	\|\| ! increment
1038	158142c2	bellard	\|\| lt128(
1039	158142c2	bellard	zSig0,
1040	158142c2	bellard	zSig1,
1041	158142c2	bellard	LIT64( 0x0001FFFFFFFFFFFF ),
1042	158142c2	bellard	LIT64( 0xFFFFFFFFFFFFFFFF )
1043	158142c2	bellard	);
1044	158142c2	bellard	shift128ExtraRightJamming(
1045	158142c2	bellard	zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
1046	158142c2	bellard	zExp = 0;
1047	158142c2	bellard	if ( isTiny && zSig2 ) float_raise( float_flag_underflow STATUS_VAR);
1048	158142c2	bellard	if ( roundNearestEven ) {
1049	bb98fe42	Andreas Färber	increment = ( (int64_t) zSig2 < 0 );
1050	158142c2	bellard	}
1051	158142c2	bellard	else {
1052	158142c2	bellard	if ( zSign ) {
1053	158142c2	bellard	increment = ( roundingMode == float_round_down ) && zSig2;
1054	158142c2	bellard	}
1055	158142c2	bellard	else {
1056	158142c2	bellard	increment = ( roundingMode == float_round_up ) && zSig2;
1057	158142c2	bellard	}
1058	158142c2	bellard	}
1059	158142c2	bellard	}
1060	158142c2	bellard	}
1061	158142c2	bellard	if ( zSig2 ) STATUS(float_exception_flags) \|= float_flag_inexact;
1062	158142c2	bellard	if ( increment ) {
1063	158142c2	bellard	add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
1064	158142c2	bellard	zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
1065	158142c2	bellard	}
1066	158142c2	bellard	else {
1067	158142c2	bellard	if ( ( zSig0 \| zSig1 ) == 0 ) zExp = 0;
1068	158142c2	bellard	}
1069	158142c2	bellard	return packFloat128( zSign, zExp, zSig0, zSig1 );
1070	158142c2	bellard
1071	158142c2	bellard	}
1072	158142c2	bellard
1073	158142c2	bellard	/*----------------------------------------------------------------------------
1074	158142c2	bellard	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
1075	158142c2	bellard	\| and significand formed by the concatenation of `zSig0' and `zSig1', and
1076	158142c2	bellard	\| returns the proper quadruple-precision floating-point value corresponding
1077	158142c2	bellard	\| to the abstract input. This routine is just like `roundAndPackFloat128'
1078	158142c2	bellard	\| except that the input significand has fewer bits and does not have to be
1079	158142c2	bellard	\| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
1080	158142c2	bellard	\| point exponent.
1081	158142c2	bellard	----------------------------------------------------------------------------/
1082	158142c2	bellard
1083	158142c2	bellard	static float128
1084	158142c2	bellard	normalizeRoundAndPackFloat128(
1085	bb98fe42	Andreas Färber	flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1 STATUS_PARAM)
1086	158142c2	bellard	{
1087	158142c2	bellard	int8 shiftCount;
1088	bb98fe42	Andreas Färber	uint64_t zSig2;
1089	158142c2	bellard
1090	158142c2	bellard	if ( zSig0 == 0 ) {
1091	158142c2	bellard	zSig0 = zSig1;
1092	158142c2	bellard	zSig1 = 0;
1093	158142c2	bellard	zExp -= 64;
1094	158142c2	bellard	}
1095	158142c2	bellard	shiftCount = countLeadingZeros64( zSig0 ) - 15;
1096	158142c2	bellard	if ( 0 <= shiftCount ) {
1097	158142c2	bellard	zSig2 = 0;
1098	158142c2	bellard	shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1099	158142c2	bellard	}
1100	158142c2	bellard	else {
1101	158142c2	bellard	shift128ExtraRightJamming(
1102	158142c2	bellard	zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
1103	158142c2	bellard	}
1104	158142c2	bellard	zExp -= shiftCount;
1105	158142c2	bellard	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR);
1106	158142c2	bellard
1107	158142c2	bellard	}
1108	158142c2	bellard
1109	158142c2	bellard	#endif
1110	158142c2	bellard
1111	158142c2	bellard	/*----------------------------------------------------------------------------
1112	158142c2	bellard	\| Returns the result of converting the 32-bit two's complement integer `a'
1113	158142c2	bellard	\| to the single-precision floating-point format. The conversion is performed
1114	158142c2	bellard	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1115	158142c2	bellard	----------------------------------------------------------------------------/
1116	158142c2	bellard
1117	158142c2	bellard	float32 int32_to_float32( int32 a STATUS_PARAM )
1118	158142c2	bellard	{
1119	158142c2	bellard	flag zSign;
1120	158142c2	bellard
1121	f090c9d4	pbrook	if ( a == 0 ) return float32_zero;
1122	bb98fe42	Andreas Färber	if ( a == (int32_t) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
1123	158142c2	bellard	zSign = ( a < 0 );
1124	158142c2	bellard	return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a STATUS_VAR );
1125	158142c2	bellard
1126	158142c2	bellard	}
1127	158142c2	bellard
1128	158142c2	bellard	/*----------------------------------------------------------------------------
1129	158142c2	bellard	\| Returns the result of converting the 32-bit two's complement integer `a'
1130	158142c2	bellard	\| to the double-precision floating-point format. The conversion is performed
1131	158142c2	bellard	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1132	158142c2	bellard	----------------------------------------------------------------------------/
1133	158142c2	bellard
1134	158142c2	bellard	float64 int32_to_float64( int32 a STATUS_PARAM )
1135	158142c2	bellard	{
1136	158142c2	bellard	flag zSign;
1137	158142c2	bellard	uint32 absA;
1138	158142c2	bellard	int8 shiftCount;
1139	bb98fe42	Andreas Färber	uint64_t zSig;
1140	158142c2	bellard
1141	f090c9d4	pbrook	if ( a == 0 ) return float64_zero;
1142	158142c2	bellard	zSign = ( a < 0 );
1143	158142c2	bellard	absA = zSign ? - a : a;
1144	158142c2	bellard	shiftCount = countLeadingZeros32( absA ) + 21;
1145	158142c2	bellard	zSig = absA;
1146	158142c2	bellard	return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
1147	158142c2	bellard
1148	158142c2	bellard	}
1149	158142c2	bellard
1150	158142c2	bellard	#ifdef FLOATX80
1151	158142c2	bellard
1152	158142c2	bellard	/*----------------------------------------------------------------------------
1153	158142c2	bellard	\| Returns the result of converting the 32-bit two's complement integer `a'
1154	158142c2	bellard	\| to the extended double-precision floating-point format. The conversion
1155	158142c2	bellard	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1156	158142c2	bellard	\| Arithmetic.
1157	158142c2	bellard	----------------------------------------------------------------------------/
1158	158142c2	bellard
1159	158142c2	bellard	floatx80 int32_to_floatx80( int32 a STATUS_PARAM )
1160	158142c2	bellard	{
1161	158142c2	bellard	flag zSign;
1162	158142c2	bellard	uint32 absA;
1163	158142c2	bellard	int8 shiftCount;
1164	bb98fe42	Andreas Färber	uint64_t zSig;
1165	158142c2	bellard
1166	158142c2	bellard	if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1167	158142c2	bellard	zSign = ( a < 0 );
1168	158142c2	bellard	absA = zSign ? - a : a;
1169	158142c2	bellard	shiftCount = countLeadingZeros32( absA ) + 32;
1170	158142c2	bellard	zSig = absA;
1171	158142c2	bellard	return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
1172	158142c2	bellard
1173	158142c2	bellard	}
1174	158142c2	bellard
1175	158142c2	bellard	#endif
1176	158142c2	bellard
1177	158142c2	bellard	#ifdef FLOAT128
1178	158142c2	bellard
1179	158142c2	bellard	/*----------------------------------------------------------------------------
1180	158142c2	bellard	\| Returns the result of converting the 32-bit two's complement integer `a' to
1181	158142c2	bellard	\| the quadruple-precision floating-point format. The conversion is performed
1182	158142c2	bellard	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1183	158142c2	bellard	----------------------------------------------------------------------------/
1184	158142c2	bellard
1185	158142c2	bellard	float128 int32_to_float128( int32 a STATUS_PARAM )
1186	158142c2	bellard	{
1187	158142c2	bellard	flag zSign;
1188	158142c2	bellard	uint32 absA;
1189	158142c2	bellard	int8 shiftCount;
1190	bb98fe42	Andreas Färber	uint64_t zSig0;
1191	158142c2	bellard
1192	158142c2	bellard	if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1193	158142c2	bellard	zSign = ( a < 0 );
1194	158142c2	bellard	absA = zSign ? - a : a;
1195	158142c2	bellard	shiftCount = countLeadingZeros32( absA ) + 17;
1196	158142c2	bellard	zSig0 = absA;
1197	158142c2	bellard	return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
1198	158142c2	bellard
1199	158142c2	bellard	}
1200	158142c2	bellard
1201	158142c2	bellard	#endif
1202	158142c2	bellard
1203	158142c2	bellard	/*----------------------------------------------------------------------------
1204	158142c2	bellard	\| Returns the result of converting the 64-bit two's complement integer `a'
1205	158142c2	bellard	\| to the single-precision floating-point format. The conversion is performed
1206	158142c2	bellard	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1207	158142c2	bellard	----------------------------------------------------------------------------/
1208	158142c2	bellard
1209	158142c2	bellard	float32 int64_to_float32( int64 a STATUS_PARAM )
1210	158142c2	bellard	{
1211	158142c2	bellard	flag zSign;
1212	158142c2	bellard	uint64 absA;
1213	158142c2	bellard	int8 shiftCount;
1214	158142c2	bellard
1215	f090c9d4	pbrook	if ( a == 0 ) return float32_zero;
1216	158142c2	bellard	zSign = ( a < 0 );
1217	158142c2	bellard	absA = zSign ? - a : a;
1218	158142c2	bellard	shiftCount = countLeadingZeros64( absA ) - 40;
1219	158142c2	bellard	if ( 0 <= shiftCount ) {
1220	158142c2	bellard	return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
1221	158142c2	bellard	}
1222	158142c2	bellard	else {
1223	158142c2	bellard	shiftCount += 7;
1224	158142c2	bellard	if ( shiftCount < 0 ) {
1225	158142c2	bellard	shift64RightJamming( absA, - shiftCount, &absA );
1226	158142c2	bellard	}
1227	158142c2	bellard	else {
1228	158142c2	bellard	absA <<= shiftCount;
1229	158142c2	bellard	}
1230	158142c2	bellard	return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA STATUS_VAR );
1231	158142c2	bellard	}
1232	158142c2	bellard
1233	158142c2	bellard	}
1234	158142c2	bellard
1235	3430b0be	j_mayer	float32 uint64_to_float32( uint64 a STATUS_PARAM )
1236	75d62a58	j_mayer	{
1237	75d62a58	j_mayer	int8 shiftCount;
1238	75d62a58	j_mayer
1239	f090c9d4	pbrook	if ( a == 0 ) return float32_zero;
1240	75d62a58	j_mayer	shiftCount = countLeadingZeros64( a ) - 40;
1241	75d62a58	j_mayer	if ( 0 <= shiftCount ) {
1242	75d62a58	j_mayer	return packFloat32( 1 > 0, 0x95 - shiftCount, a<<shiftCount );
1243	75d62a58	j_mayer	}
1244	75d62a58	j_mayer	else {
1245	75d62a58	j_mayer	shiftCount += 7;
1246	75d62a58	j_mayer	if ( shiftCount < 0 ) {
1247	75d62a58	j_mayer	shift64RightJamming( a, - shiftCount, &a );
1248	75d62a58	j_mayer	}
1249	75d62a58	j_mayer	else {
1250	75d62a58	j_mayer	a <<= shiftCount;
1251	75d62a58	j_mayer	}
1252	75d62a58	j_mayer	return roundAndPackFloat32( 1 > 0, 0x9C - shiftCount, a STATUS_VAR );
1253	75d62a58	j_mayer	}
1254	75d62a58	j_mayer	}
1255	75d62a58	j_mayer
1256	158142c2	bellard	/*----------------------------------------------------------------------------
1257	158142c2	bellard	\| Returns the result of converting the 64-bit two's complement integer `a'
1258	158142c2	bellard	\| to the double-precision floating-point format. The conversion is performed
1259	158142c2	bellard	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1260	158142c2	bellard	----------------------------------------------------------------------------/
1261	158142c2	bellard
1262	158142c2	bellard	float64 int64_to_float64( int64 a STATUS_PARAM )
1263	158142c2	bellard	{
1264	158142c2	bellard	flag zSign;
1265	158142c2	bellard
1266	f090c9d4	pbrook	if ( a == 0 ) return float64_zero;
1267	bb98fe42	Andreas Färber	if ( a == (int64_t) LIT64( 0x8000000000000000 ) ) {
1268	158142c2	bellard	return packFloat64( 1, 0x43E, 0 );
1269	158142c2	bellard	}
1270	158142c2	bellard	zSign = ( a < 0 );
1271	158142c2	bellard	return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a STATUS_VAR );
1272	158142c2	bellard
1273	158142c2	bellard	}
1274	158142c2	bellard
1275	75d62a58	j_mayer	float64 uint64_to_float64( uint64 a STATUS_PARAM )
1276	75d62a58	j_mayer	{
1277	f090c9d4	pbrook	if ( a == 0 ) return float64_zero;
1278	75d62a58	j_mayer	return normalizeRoundAndPackFloat64( 0, 0x43C, a STATUS_VAR );
1279	75d62a58	j_mayer
1280	75d62a58	j_mayer	}
1281	75d62a58	j_mayer
1282	158142c2	bellard	#ifdef FLOATX80
1283	158142c2	bellard
1284	158142c2	bellard	/*----------------------------------------------------------------------------
1285	158142c2	bellard	\| Returns the result of converting the 64-bit two's complement integer `a'
1286	158142c2	bellard	\| to the extended double-precision floating-point format. The conversion
1287	158142c2	bellard	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1288	158142c2	bellard	\| Arithmetic.
1289	158142c2	bellard	----------------------------------------------------------------------------/
1290	158142c2	bellard
1291	158142c2	bellard	floatx80 int64_to_floatx80( int64 a STATUS_PARAM )
1292	158142c2	bellard	{
1293	158142c2	bellard	flag zSign;
1294	158142c2	bellard	uint64 absA;
1295	158142c2	bellard	int8 shiftCount;
1296	158142c2	bellard
1297	158142c2	bellard	if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1298	158142c2	bellard	zSign = ( a < 0 );
1299	158142c2	bellard	absA = zSign ? - a : a;
1300	158142c2	bellard	shiftCount = countLeadingZeros64( absA );
1301	158142c2	bellard	return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
1302	158142c2	bellard
1303	158142c2	bellard	}
1304	158142c2	bellard
1305	158142c2	bellard	#endif
1306	158142c2	bellard
1307	158142c2	bellard	#ifdef FLOAT128
1308	158142c2	bellard
1309	158142c2	bellard	/*----------------------------------------------------------------------------
1310	158142c2	bellard	\| Returns the result of converting the 64-bit two's complement integer `a' to
1311	158142c2	bellard	\| the quadruple-precision floating-point format. The conversion is performed
1312	158142c2	bellard	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1313	158142c2	bellard	----------------------------------------------------------------------------/
1314	158142c2	bellard
1315	158142c2	bellard	float128 int64_to_float128( int64 a STATUS_PARAM )
1316	158142c2	bellard	{
1317	158142c2	bellard	flag zSign;
1318	158142c2	bellard	uint64 absA;
1319	158142c2	bellard	int8 shiftCount;
1320	158142c2	bellard	int32 zExp;
1321	bb98fe42	Andreas Färber	uint64_t zSig0, zSig1;
1322	158142c2	bellard
1323	158142c2	bellard	if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1324	158142c2	bellard	zSign = ( a < 0 );
1325	158142c2	bellard	absA = zSign ? - a : a;
1326	158142c2	bellard	shiftCount = countLeadingZeros64( absA ) + 49;
1327	158142c2	bellard	zExp = 0x406E - shiftCount;
1328	158142c2	bellard	if ( 64 <= shiftCount ) {
1329	158142c2	bellard	zSig1 = 0;
1330	158142c2	bellard	zSig0 = absA;
1331	158142c2	bellard	shiftCount -= 64;
1332	158142c2	bellard	}
1333	158142c2	bellard	else {
1334	158142c2	bellard	zSig1 = absA;
1335	158142c2	bellard	zSig0 = 0;
1336	158142c2	bellard	}
1337	158142c2	bellard	shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1338	158142c2	bellard	return packFloat128( zSign, zExp, zSig0, zSig1 );
1339	158142c2	bellard
1340	158142c2	bellard	}
1341	158142c2	bellard
1342	158142c2	bellard	#endif
1343	158142c2	bellard
1344	158142c2	bellard	/*----------------------------------------------------------------------------
1345	158142c2	bellard	\| Returns the result of converting the single-precision floating-point value
1346	158142c2	bellard	\| `a' to the 32-bit two's complement integer format. The conversion is
1347	158142c2	bellard	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
1348	158142c2	bellard	\| Arithmetic---which means in particular that the conversion is rounded
1349	158142c2	bellard	\| according to the current rounding mode. If `a' is a NaN, the largest
1350	158142c2	bellard	\| positive integer is returned. Otherwise, if the conversion overflows, the
1351	158142c2	bellard	\| largest integer with the same sign as `a' is returned.
1352	158142c2	bellard	----------------------------------------------------------------------------/
1353	158142c2	bellard
1354	158142c2	bellard	int32 float32_to_int32( float32 a STATUS_PARAM )
1355	158142c2	bellard	{
1356	158142c2	bellard	flag aSign;
1357	158142c2	bellard	int16 aExp, shiftCount;
1358	bb98fe42	Andreas Färber	uint32_t aSig;
1359	bb98fe42	Andreas Färber	uint64_t aSig64;
1360	158142c2	bellard
1361	37d18660	Peter Maydell	a = float32_squash_input_denormal(a STATUS_VAR);
1362	158142c2	bellard	aSig = extractFloat32Frac( a );
1363	158142c2	bellard	aExp = extractFloat32Exp( a );
1364	158142c2	bellard	aSign = extractFloat32Sign( a );
1365	158142c2	bellard	if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
1366	158142c2	bellard	if ( aExp ) aSig \|= 0x00800000;
1367	158142c2	bellard	shiftCount = 0xAF - aExp;
1368	158142c2	bellard	aSig64 = aSig;
1369	158142c2	bellard	aSig64 <<= 32;
1370	158142c2	bellard	if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
1371	158142c2	bellard	return roundAndPackInt32( aSign, aSig64 STATUS_VAR );
1372	158142c2	bellard
1373	158142c2	bellard	}
1374	158142c2	bellard
1375	158142c2	bellard	/*----------------------------------------------------------------------------
1376	158142c2	bellard	\| Returns the result of converting the single-precision floating-point value
1377	158142c2	bellard	\| `a' to the 32-bit two's complement integer format. The conversion is
1378	158142c2	bellard	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
1379	158142c2	bellard	\| Arithmetic, except that the conversion is always rounded toward zero.
1380	158142c2	bellard	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
1381	158142c2	bellard	\| the conversion overflows, the largest integer with the same sign as `a' is
1382	158142c2	bellard	\| returned.
1383	158142c2	bellard	----------------------------------------------------------------------------/
1384	158142c2	bellard
1385	158142c2	bellard	int32 float32_to_int32_round_to_zero( float32 a STATUS_PARAM )
1386	158142c2	bellard	{
1387	158142c2	bellard	flag aSign;
1388	158142c2	bellard	int16 aExp, shiftCount;
1389	bb98fe42	Andreas Färber	uint32_t aSig;
1390	158142c2	bellard	int32 z;
1391	37d18660	Peter Maydell	a = float32_squash_input_denormal(a STATUS_VAR);
1392	158142c2	bellard
1393	158142c2	bellard	aSig = extractFloat32Frac( a );
1394	158142c2	bellard	aExp = extractFloat32Exp( a );
1395	158142c2	bellard	aSign = extractFloat32Sign( a );
1396	158142c2	bellard	shiftCount = aExp - 0x9E;
1397	158142c2	bellard	if ( 0 <= shiftCount ) {
1398	f090c9d4	pbrook	if ( float32_val(a) != 0xCF000000 ) {
1399	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
1400	158142c2	bellard	if ( ! aSign \|\| ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
1401	158142c2	bellard	}
1402	bb98fe42	Andreas Färber	return (int32_t) 0x80000000;
1403	158142c2	bellard	}
1404	158142c2	bellard	else if ( aExp <= 0x7E ) {
1405	158142c2	bellard	if ( aExp \| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
1406	158142c2	bellard	return 0;
1407	158142c2	bellard	}
1408	158142c2	bellard	aSig = ( aSig \| 0x00800000 )<<8;
1409	158142c2	bellard	z = aSig>>( - shiftCount );
1410	bb98fe42	Andreas Färber	if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
1411	158142c2	bellard	STATUS(float_exception_flags) \|= float_flag_inexact;
1412	158142c2	bellard	}
1413	158142c2	bellard	if ( aSign ) z = - z;
1414	158142c2	bellard	return z;
1415	158142c2	bellard
1416	158142c2	bellard	}
1417	158142c2	bellard
1418	158142c2	bellard	/*----------------------------------------------------------------------------
1419	158142c2	bellard	\| Returns the result of converting the single-precision floating-point value
1420	cbcef455	Peter Maydell	\| `a' to the 16-bit two's complement integer format. The conversion is
1421	cbcef455	Peter Maydell	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
1422	cbcef455	Peter Maydell	\| Arithmetic, except that the conversion is always rounded toward zero.
1423	cbcef455	Peter Maydell	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
1424	cbcef455	Peter Maydell	\| the conversion overflows, the largest integer with the same sign as `a' is
1425	cbcef455	Peter Maydell	\| returned.
1426	cbcef455	Peter Maydell	----------------------------------------------------------------------------/
1427	cbcef455	Peter Maydell
1428	cbcef455	Peter Maydell	int16 float32_to_int16_round_to_zero( float32 a STATUS_PARAM )
1429	cbcef455	Peter Maydell	{
1430	cbcef455	Peter Maydell	flag aSign;
1431	cbcef455	Peter Maydell	int16 aExp, shiftCount;
1432	bb98fe42	Andreas Färber	uint32_t aSig;
1433	cbcef455	Peter Maydell	int32 z;
1434	cbcef455	Peter Maydell
1435	cbcef455	Peter Maydell	aSig = extractFloat32Frac( a );
1436	cbcef455	Peter Maydell	aExp = extractFloat32Exp( a );
1437	cbcef455	Peter Maydell	aSign = extractFloat32Sign( a );
1438	cbcef455	Peter Maydell	shiftCount = aExp - 0x8E;
1439	cbcef455	Peter Maydell	if ( 0 <= shiftCount ) {
1440	cbcef455	Peter Maydell	if ( float32_val(a) != 0xC7000000 ) {
1441	cbcef455	Peter Maydell	float_raise( float_flag_invalid STATUS_VAR);
1442	cbcef455	Peter Maydell	if ( ! aSign \|\| ( ( aExp == 0xFF ) && aSig ) ) {
1443	cbcef455	Peter Maydell	return 0x7FFF;
1444	cbcef455	Peter Maydell	}
1445	cbcef455	Peter Maydell	}
1446	bb98fe42	Andreas Färber	return (int32_t) 0xffff8000;
1447	cbcef455	Peter Maydell	}
1448	cbcef455	Peter Maydell	else if ( aExp <= 0x7E ) {
1449	cbcef455	Peter Maydell	if ( aExp \| aSig ) {
1450	cbcef455	Peter Maydell	STATUS(float_exception_flags) \|= float_flag_inexact;
1451	cbcef455	Peter Maydell	}
1452	cbcef455	Peter Maydell	return 0;
1453	cbcef455	Peter Maydell	}
1454	cbcef455	Peter Maydell	shiftCount -= 0x10;
1455	cbcef455	Peter Maydell	aSig = ( aSig \| 0x00800000 )<<8;
1456	cbcef455	Peter Maydell	z = aSig>>( - shiftCount );
1457	bb98fe42	Andreas Färber	if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
1458	cbcef455	Peter Maydell	STATUS(float_exception_flags) \|= float_flag_inexact;
1459	cbcef455	Peter Maydell	}
1460	cbcef455	Peter Maydell	if ( aSign ) {
1461	cbcef455	Peter Maydell	z = - z;
1462	cbcef455	Peter Maydell	}
1463	cbcef455	Peter Maydell	return z;
1464	cbcef455	Peter Maydell
1465	cbcef455	Peter Maydell	}
1466	cbcef455	Peter Maydell
1467	cbcef455	Peter Maydell	/*----------------------------------------------------------------------------
1468	cbcef455	Peter Maydell	\| Returns the result of converting the single-precision floating-point value
1469	158142c2	bellard	\| `a' to the 64-bit two's complement integer format. The conversion is
1470	158142c2	bellard	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
1471	158142c2	bellard	\| Arithmetic---which means in particular that the conversion is rounded
1472	158142c2	bellard	\| according to the current rounding mode. If `a' is a NaN, the largest
1473	158142c2	bellard	\| positive integer is returned. Otherwise, if the conversion overflows, the
1474	158142c2	bellard	\| largest integer with the same sign as `a' is returned.
1475	158142c2	bellard	----------------------------------------------------------------------------/
1476	158142c2	bellard
1477	158142c2	bellard	int64 float32_to_int64( float32 a STATUS_PARAM )
1478	158142c2	bellard	{
1479	158142c2	bellard	flag aSign;
1480	158142c2	bellard	int16 aExp, shiftCount;
1481	bb98fe42	Andreas Färber	uint32_t aSig;
1482	bb98fe42	Andreas Färber	uint64_t aSig64, aSigExtra;
1483	37d18660	Peter Maydell	a = float32_squash_input_denormal(a STATUS_VAR);
1484	158142c2	bellard
1485	158142c2	bellard	aSig = extractFloat32Frac( a );
1486	158142c2	bellard	aExp = extractFloat32Exp( a );
1487	158142c2	bellard	aSign = extractFloat32Sign( a );
1488	158142c2	bellard	shiftCount = 0xBE - aExp;
1489	158142c2	bellard	if ( shiftCount < 0 ) {
1490	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
1491	158142c2	bellard	if ( ! aSign \|\| ( ( aExp == 0xFF ) && aSig ) ) {
1492	158142c2	bellard	return LIT64( 0x7FFFFFFFFFFFFFFF );
1493	158142c2	bellard	}
1494	bb98fe42	Andreas Färber	return (int64_t) LIT64( 0x8000000000000000 );
1495	158142c2	bellard	}
1496	158142c2	bellard	if ( aExp ) aSig \|= 0x00800000;
1497	158142c2	bellard	aSig64 = aSig;
1498	158142c2	bellard	aSig64 <<= 40;
1499	158142c2	bellard	shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
1500	158142c2	bellard	return roundAndPackInt64( aSign, aSig64, aSigExtra STATUS_VAR );
1501	158142c2	bellard
1502	158142c2	bellard	}
1503	158142c2	bellard
1504	158142c2	bellard	/*----------------------------------------------------------------------------
1505	158142c2	bellard	\| Returns the result of converting the single-precision floating-point value
1506	158142c2	bellard	\| `a' to the 64-bit two's complement integer format. The conversion is
1507	158142c2	bellard	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
1508	158142c2	bellard	\| Arithmetic, except that the conversion is always rounded toward zero. If
1509	158142c2	bellard	\| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
1510	158142c2	bellard	\| conversion overflows, the largest integer with the same sign as `a' is
1511	158142c2	bellard	\| returned.
1512	158142c2	bellard	----------------------------------------------------------------------------/
1513	158142c2	bellard
1514	158142c2	bellard	int64 float32_to_int64_round_to_zero( float32 a STATUS_PARAM )
1515	158142c2	bellard	{
1516	158142c2	bellard	flag aSign;
1517	158142c2	bellard	int16 aExp, shiftCount;
1518	bb98fe42	Andreas Färber	uint32_t aSig;
1519	bb98fe42	Andreas Färber	uint64_t aSig64;
1520	158142c2	bellard	int64 z;
1521	37d18660	Peter Maydell	a = float32_squash_input_denormal(a STATUS_VAR);
1522	158142c2	bellard
1523	158142c2	bellard	aSig = extractFloat32Frac( a );
1524	158142c2	bellard	aExp = extractFloat32Exp( a );
1525	158142c2	bellard	aSign = extractFloat32Sign( a );
1526	158142c2	bellard	shiftCount = aExp - 0xBE;
1527	158142c2	bellard	if ( 0 <= shiftCount ) {
1528	f090c9d4	pbrook	if ( float32_val(a) != 0xDF000000 ) {
1529	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
1530	158142c2	bellard	if ( ! aSign \|\| ( ( aExp == 0xFF ) && aSig ) ) {
1531	158142c2	bellard	return LIT64( 0x7FFFFFFFFFFFFFFF );
1532	158142c2	bellard	}
1533	158142c2	bellard	}
1534	bb98fe42	Andreas Färber	return (int64_t) LIT64( 0x8000000000000000 );
1535	158142c2	bellard	}
1536	158142c2	bellard	else if ( aExp <= 0x7E ) {
1537	158142c2	bellard	if ( aExp \| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
1538	158142c2	bellard	return 0;
1539	158142c2	bellard	}
1540	158142c2	bellard	aSig64 = aSig \| 0x00800000;
1541	158142c2	bellard	aSig64 <<= 40;
1542	158142c2	bellard	z = aSig64>>( - shiftCount );
1543	bb98fe42	Andreas Färber	if ( (uint64_t) ( aSig64<<( shiftCount & 63 ) ) ) {
1544	158142c2	bellard	STATUS(float_exception_flags) \|= float_flag_inexact;
1545	158142c2	bellard	}
1546	158142c2	bellard	if ( aSign ) z = - z;
1547	158142c2	bellard	return z;
1548	158142c2	bellard
1549	158142c2	bellard	}
1550	158142c2	bellard
1551	158142c2	bellard	/*----------------------------------------------------------------------------
1552	158142c2	bellard	\| Returns the result of converting the single-precision floating-point value
1553	158142c2	bellard	\| `a' to the double-precision floating-point format. The conversion is
1554	158142c2	bellard	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
1555	158142c2	bellard	\| Arithmetic.
1556	158142c2	bellard	----------------------------------------------------------------------------/
1557	158142c2	bellard
1558	158142c2	bellard	float64 float32_to_float64( float32 a STATUS_PARAM )
1559	158142c2	bellard	{
1560	158142c2	bellard	flag aSign;
1561	158142c2	bellard	int16 aExp;
1562	bb98fe42	Andreas Färber	uint32_t aSig;
1563	37d18660	Peter Maydell	a = float32_squash_input_denormal(a STATUS_VAR);
1564	158142c2	bellard
1565	158142c2	bellard	aSig = extractFloat32Frac( a );
1566	158142c2	bellard	aExp = extractFloat32Exp( a );
1567	158142c2	bellard	aSign = extractFloat32Sign( a );
1568	158142c2	bellard	if ( aExp == 0xFF ) {
1569	bcd4d9af	Christophe Lyon	if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
1570	158142c2	bellard	return packFloat64( aSign, 0x7FF, 0 );
1571	158142c2	bellard	}
1572	158142c2	bellard	if ( aExp == 0 ) {
1573	158142c2	bellard	if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
1574	158142c2	bellard	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1575	158142c2	bellard	--aExp;
1576	158142c2	bellard	}
1577	bb98fe42	Andreas Färber	return packFloat64( aSign, aExp + 0x380, ( (uint64_t) aSig )<<29 );
1578	158142c2	bellard
1579	158142c2	bellard	}
1580	158142c2	bellard
1581	158142c2	bellard	#ifdef FLOATX80
1582	158142c2	bellard
1583	158142c2	bellard	/*----------------------------------------------------------------------------
1584	158142c2	bellard	\| Returns the result of converting the single-precision floating-point value
1585	158142c2	bellard	\| `a' to the extended double-precision floating-point format. The conversion
1586	158142c2	bellard	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1587	158142c2	bellard	\| Arithmetic.
1588	158142c2	bellard	----------------------------------------------------------------------------/
1589	158142c2	bellard
1590	158142c2	bellard	floatx80 float32_to_floatx80( float32 a STATUS_PARAM )
1591	158142c2	bellard	{
1592	158142c2	bellard	flag aSign;
1593	158142c2	bellard	int16 aExp;
1594	bb98fe42	Andreas Färber	uint32_t aSig;
1595	158142c2	bellard
1596	37d18660	Peter Maydell	a = float32_squash_input_denormal(a STATUS_VAR);
1597	158142c2	bellard	aSig = extractFloat32Frac( a );
1598	158142c2	bellard	aExp = extractFloat32Exp( a );
1599	158142c2	bellard	aSign = extractFloat32Sign( a );
1600	158142c2	bellard	if ( aExp == 0xFF ) {
1601	bcd4d9af	Christophe Lyon	if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
1602	158142c2	bellard	return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
1603	158142c2	bellard	}
1604	158142c2	bellard	if ( aExp == 0 ) {
1605	158142c2	bellard	if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1606	158142c2	bellard	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1607	158142c2	bellard	}
1608	158142c2	bellard	aSig \|= 0x00800000;
1609	bb98fe42	Andreas Färber	return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
1610	158142c2	bellard
1611	158142c2	bellard	}
1612	158142c2	bellard
1613	158142c2	bellard	#endif
1614	158142c2	bellard
1615	158142c2	bellard	#ifdef FLOAT128
1616	158142c2	bellard
1617	158142c2	bellard	/*----------------------------------------------------------------------------
1618	158142c2	bellard	\| Returns the result of converting the single-precision floating-point value
1619	158142c2	bellard	\| `a' to the double-precision floating-point format. The conversion is
1620	158142c2	bellard	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
1621	158142c2	bellard	\| Arithmetic.
1622	158142c2	bellard	----------------------------------------------------------------------------/
1623	158142c2	bellard
1624	158142c2	bellard	float128 float32_to_float128( float32 a STATUS_PARAM )
1625	158142c2	bellard	{
1626	158142c2	bellard	flag aSign;
1627	158142c2	bellard	int16 aExp;
1628	bb98fe42	Andreas Färber	uint32_t aSig;
1629	158142c2	bellard
1630	37d18660	Peter Maydell	a = float32_squash_input_denormal(a STATUS_VAR);
1631	158142c2	bellard	aSig = extractFloat32Frac( a );
1632	158142c2	bellard	aExp = extractFloat32Exp( a );
1633	158142c2	bellard	aSign = extractFloat32Sign( a );
1634	158142c2	bellard	if ( aExp == 0xFF ) {
1635	bcd4d9af	Christophe Lyon	if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
1636	158142c2	bellard	return packFloat128( aSign, 0x7FFF, 0, 0 );
1637	158142c2	bellard	}
1638	158142c2	bellard	if ( aExp == 0 ) {
1639	158142c2	bellard	if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
1640	158142c2	bellard	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1641	158142c2	bellard	--aExp;
1642	158142c2	bellard	}
1643	bb98fe42	Andreas Färber	return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
1644	158142c2	bellard
1645	158142c2	bellard	}
1646	158142c2	bellard
1647	158142c2	bellard	#endif
1648	158142c2	bellard
1649	158142c2	bellard	/*----------------------------------------------------------------------------
1650	158142c2	bellard	\| Rounds the single-precision floating-point value `a' to an integer, and
1651	158142c2	bellard	\| returns the result as a single-precision floating-point value. The
1652	158142c2	bellard	\| operation is performed according to the IEC/IEEE Standard for Binary
1653	158142c2	bellard	\| Floating-Point Arithmetic.
1654	158142c2	bellard	----------------------------------------------------------------------------/
1655	158142c2	bellard
1656	158142c2	bellard	float32 float32_round_to_int( float32 a STATUS_PARAM)
1657	158142c2	bellard	{
1658	158142c2	bellard	flag aSign;
1659	158142c2	bellard	int16 aExp;
1660	bb98fe42	Andreas Färber	uint32_t lastBitMask, roundBitsMask;
1661	158142c2	bellard	int8 roundingMode;
1662	bb98fe42	Andreas Färber	uint32_t z;
1663	37d18660	Peter Maydell	a = float32_squash_input_denormal(a STATUS_VAR);
1664	158142c2	bellard
1665	158142c2	bellard	aExp = extractFloat32Exp( a );
1666	158142c2	bellard	if ( 0x96 <= aExp ) {
1667	158142c2	bellard	if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
1668	158142c2	bellard	return propagateFloat32NaN( a, a STATUS_VAR );
1669	158142c2	bellard	}
1670	158142c2	bellard	return a;
1671	158142c2	bellard	}
1672	158142c2	bellard	if ( aExp <= 0x7E ) {
1673	bb98fe42	Andreas Färber	if ( (uint32_t) ( float32_val(a)<<1 ) == 0 ) return a;
1674	158142c2	bellard	STATUS(float_exception_flags) \|= float_flag_inexact;
1675	158142c2	bellard	aSign = extractFloat32Sign( a );
1676	158142c2	bellard	switch ( STATUS(float_rounding_mode) ) {
1677	158142c2	bellard	case float_round_nearest_even:
1678	158142c2	bellard	if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
1679	158142c2	bellard	return packFloat32( aSign, 0x7F, 0 );
1680	158142c2	bellard	}
1681	158142c2	bellard	break;
1682	158142c2	bellard	case float_round_down:
1683	f090c9d4	pbrook	return make_float32(aSign ? 0xBF800000 : 0);
1684	158142c2	bellard	case float_round_up:
1685	f090c9d4	pbrook	return make_float32(aSign ? 0x80000000 : 0x3F800000);
1686	158142c2	bellard	}
1687	158142c2	bellard	return packFloat32( aSign, 0, 0 );
1688	158142c2	bellard	}
1689	158142c2	bellard	lastBitMask = 1;
1690	158142c2	bellard	lastBitMask <<= 0x96 - aExp;
1691	158142c2	bellard	roundBitsMask = lastBitMask - 1;
1692	f090c9d4	pbrook	z = float32_val(a);
1693	158142c2	bellard	roundingMode = STATUS(float_rounding_mode);
1694	158142c2	bellard	if ( roundingMode == float_round_nearest_even ) {
1695	158142c2	bellard	z += lastBitMask>>1;
1696	158142c2	bellard	if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
1697	158142c2	bellard	}
1698	158142c2	bellard	else if ( roundingMode != float_round_to_zero ) {
1699	f090c9d4	pbrook	if ( extractFloat32Sign( make_float32(z) ) ^ ( roundingMode == float_round_up ) ) {
1700	158142c2	bellard	z += roundBitsMask;
1701	158142c2	bellard	}
1702	158142c2	bellard	}
1703	158142c2	bellard	z &= ~ roundBitsMask;
1704	f090c9d4	pbrook	if ( z != float32_val(a) ) STATUS(float_exception_flags) \|= float_flag_inexact;
1705	f090c9d4	pbrook	return make_float32(z);
1706	158142c2	bellard
1707	158142c2	bellard	}
1708	158142c2	bellard
1709	158142c2	bellard	/*----------------------------------------------------------------------------
1710	158142c2	bellard	\| Returns the result of adding the absolute values of the single-precision
1711	158142c2	bellard	\| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
1712	158142c2	bellard	\| before being returned. `zSign' is ignored if the result is a NaN.
1713	158142c2	bellard	\| The addition is performed according to the IEC/IEEE Standard for Binary
1714	158142c2	bellard	\| Floating-Point Arithmetic.
1715	158142c2	bellard	----------------------------------------------------------------------------/
1716	158142c2	bellard
1717	158142c2	bellard	static float32 addFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
1718	158142c2	bellard	{
1719	158142c2	bellard	int16 aExp, bExp, zExp;
1720	bb98fe42	Andreas Färber	uint32_t aSig, bSig, zSig;
1721	158142c2	bellard	int16 expDiff;
1722	158142c2	bellard
1723	158142c2	bellard	aSig = extractFloat32Frac( a );
1724	158142c2	bellard	aExp = extractFloat32Exp( a );
1725	158142c2	bellard	bSig = extractFloat32Frac( b );
1726	158142c2	bellard	bExp = extractFloat32Exp( b );
1727	158142c2	bellard	expDiff = aExp - bExp;
1728	158142c2	bellard	aSig <<= 6;
1729	158142c2	bellard	bSig <<= 6;
1730	158142c2	bellard	if ( 0 < expDiff ) {
1731	158142c2	bellard	if ( aExp == 0xFF ) {
1732	158142c2	bellard	if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1733	158142c2	bellard	return a;
1734	158142c2	bellard	}
1735	158142c2	bellard	if ( bExp == 0 ) {
1736	158142c2	bellard	--expDiff;
1737	158142c2	bellard	}
1738	158142c2	bellard	else {
1739	158142c2	bellard	bSig \|= 0x20000000;
1740	158142c2	bellard	}
1741	158142c2	bellard	shift32RightJamming( bSig, expDiff, &bSig );
1742	158142c2	bellard	zExp = aExp;
1743	158142c2	bellard	}
1744	158142c2	bellard	else if ( expDiff < 0 ) {
1745	158142c2	bellard	if ( bExp == 0xFF ) {
1746	158142c2	bellard	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1747	158142c2	bellard	return packFloat32( zSign, 0xFF, 0 );
1748	158142c2	bellard	}
1749	158142c2	bellard	if ( aExp == 0 ) {
1750	158142c2	bellard	++expDiff;
1751	158142c2	bellard	}
1752	158142c2	bellard	else {
1753	158142c2	bellard	aSig \|= 0x20000000;
1754	158142c2	bellard	}
1755	158142c2	bellard	shift32RightJamming( aSig, - expDiff, &aSig );
1756	158142c2	bellard	zExp = bExp;
1757	158142c2	bellard	}
1758	158142c2	bellard	else {
1759	158142c2	bellard	if ( aExp == 0xFF ) {
1760	158142c2	bellard	if ( aSig \| bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1761	158142c2	bellard	return a;
1762	158142c2	bellard	}
1763	fe76d976	pbrook	if ( aExp == 0 ) {
1764	fe76d976	pbrook	if ( STATUS(flush_to_zero) ) return packFloat32( zSign, 0, 0 );
1765	fe76d976	pbrook	return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
1766	fe76d976	pbrook	}
1767	158142c2	bellard	zSig = 0x40000000 + aSig + bSig;
1768	158142c2	bellard	zExp = aExp;
1769	158142c2	bellard	goto roundAndPack;
1770	158142c2	bellard	}
1771	158142c2	bellard	aSig \|= 0x20000000;
1772	158142c2	bellard	zSig = ( aSig + bSig )<<1;
1773	158142c2	bellard	--zExp;
1774	bb98fe42	Andreas Färber	if ( (int32_t) zSig < 0 ) {
1775	158142c2	bellard	zSig = aSig + bSig;
1776	158142c2	bellard	++zExp;
1777	158142c2	bellard	}
1778	158142c2	bellard	roundAndPack:
1779	158142c2	bellard	return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1780	158142c2	bellard
1781	158142c2	bellard	}
1782	158142c2	bellard
1783	158142c2	bellard	/*----------------------------------------------------------------------------
1784	158142c2	bellard	\| Returns the result of subtracting the absolute values of the single-
1785	158142c2	bellard	\| precision floating-point values `a' and `b'. If `zSign' is 1, the
1786	158142c2	bellard	\| difference is negated before being returned. `zSign' is ignored if the
1787	158142c2	bellard	\| result is a NaN. The subtraction is performed according to the IEC/IEEE
1788	158142c2	bellard	\| Standard for Binary Floating-Point Arithmetic.
1789	158142c2	bellard	----------------------------------------------------------------------------/
1790	158142c2	bellard
1791	158142c2	bellard	static float32 subFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
1792	158142c2	bellard	{
1793	158142c2	bellard	int16 aExp, bExp, zExp;
1794	bb98fe42	Andreas Färber	uint32_t aSig, bSig, zSig;
1795	158142c2	bellard	int16 expDiff;
1796	158142c2	bellard
1797	158142c2	bellard	aSig = extractFloat32Frac( a );
1798	158142c2	bellard	aExp = extractFloat32Exp( a );
1799	158142c2	bellard	bSig = extractFloat32Frac( b );
1800	158142c2	bellard	bExp = extractFloat32Exp( b );
1801	158142c2	bellard	expDiff = aExp - bExp;
1802	158142c2	bellard	aSig <<= 7;
1803	158142c2	bellard	bSig <<= 7;
1804	158142c2	bellard	if ( 0 < expDiff ) goto aExpBigger;
1805	158142c2	bellard	if ( expDiff < 0 ) goto bExpBigger;
1806	158142c2	bellard	if ( aExp == 0xFF ) {
1807	158142c2	bellard	if ( aSig \| bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1808	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
1809	158142c2	bellard	return float32_default_nan;
1810	158142c2	bellard	}
1811	158142c2	bellard	if ( aExp == 0 ) {
1812	158142c2	bellard	aExp = 1;
1813	158142c2	bellard	bExp = 1;
1814	158142c2	bellard	}
1815	158142c2	bellard	if ( bSig < aSig ) goto aBigger;
1816	158142c2	bellard	if ( aSig < bSig ) goto bBigger;
1817	158142c2	bellard	return packFloat32( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
1818	158142c2	bellard	bExpBigger:
1819	158142c2	bellard	if ( bExp == 0xFF ) {
1820	158142c2	bellard	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1821	158142c2	bellard	return packFloat32( zSign ^ 1, 0xFF, 0 );
1822	158142c2	bellard	}
1823	158142c2	bellard	if ( aExp == 0 ) {
1824	158142c2	bellard	++expDiff;
1825	158142c2	bellard	}
1826	158142c2	bellard	else {
1827	158142c2	bellard	aSig \|= 0x40000000;
1828	158142c2	bellard	}
1829	158142c2	bellard	shift32RightJamming( aSig, - expDiff, &aSig );
1830	158142c2	bellard	bSig \|= 0x40000000;
1831	158142c2	bellard	bBigger:
1832	158142c2	bellard	zSig = bSig - aSig;
1833	158142c2	bellard	zExp = bExp;
1834	158142c2	bellard	zSign ^= 1;
1835	158142c2	bellard	goto normalizeRoundAndPack;
1836	158142c2	bellard	aExpBigger:
1837	158142c2	bellard	if ( aExp == 0xFF ) {
1838	158142c2	bellard	if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1839	158142c2	bellard	return a;
1840	158142c2	bellard	}
1841	158142c2	bellard	if ( bExp == 0 ) {
1842	158142c2	bellard	--expDiff;
1843	158142c2	bellard	}
1844	158142c2	bellard	else {
1845	158142c2	bellard	bSig \|= 0x40000000;
1846	158142c2	bellard	}
1847	158142c2	bellard	shift32RightJamming( bSig, expDiff, &bSig );
1848	158142c2	bellard	aSig \|= 0x40000000;
1849	158142c2	bellard	aBigger:
1850	158142c2	bellard	zSig = aSig - bSig;
1851	158142c2	bellard	zExp = aExp;
1852	158142c2	bellard	normalizeRoundAndPack:
1853	158142c2	bellard	--zExp;
1854	158142c2	bellard	return normalizeRoundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1855	158142c2	bellard
1856	158142c2	bellard	}
1857	158142c2	bellard
1858	158142c2	bellard	/*----------------------------------------------------------------------------
1859	158142c2	bellard	\| Returns the result of adding the single-precision floating-point values `a'
1860	158142c2	bellard	\| and `b'. The operation is performed according to the IEC/IEEE Standard for
1861	158142c2	bellard	\| Binary Floating-Point Arithmetic.
1862	158142c2	bellard	----------------------------------------------------------------------------/
1863	158142c2	bellard
1864	158142c2	bellard	float32 float32_add( float32 a, float32 b STATUS_PARAM )
1865	158142c2	bellard	{
1866	158142c2	bellard	flag aSign, bSign;
1867	37d18660	Peter Maydell	a = float32_squash_input_denormal(a STATUS_VAR);
1868	37d18660	Peter Maydell	b = float32_squash_input_denormal(b STATUS_VAR);
1869	158142c2	bellard
1870	158142c2	bellard	aSign = extractFloat32Sign( a );
1871	158142c2	bellard	bSign = extractFloat32Sign( b );
1872	158142c2	bellard	if ( aSign == bSign ) {
1873	158142c2	bellard	return addFloat32Sigs( a, b, aSign STATUS_VAR);
1874	158142c2	bellard	}
1875	158142c2	bellard	else {
1876	158142c2	bellard	return subFloat32Sigs( a, b, aSign STATUS_VAR );
1877	158142c2	bellard	}
1878	158142c2	bellard
1879	158142c2	bellard	}
1880	158142c2	bellard
1881	158142c2	bellard	/*----------------------------------------------------------------------------
1882	158142c2	bellard	\| Returns the result of subtracting the single-precision floating-point values
1883	158142c2	bellard	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
1884	158142c2	bellard	\| for Binary Floating-Point Arithmetic.
1885	158142c2	bellard	----------------------------------------------------------------------------/
1886	158142c2	bellard
1887	158142c2	bellard	float32 float32_sub( float32 a, float32 b STATUS_PARAM )
1888	158142c2	bellard	{
1889	158142c2	bellard	flag aSign, bSign;
1890	37d18660	Peter Maydell	a = float32_squash_input_denormal(a STATUS_VAR);
1891	37d18660	Peter Maydell	b = float32_squash_input_denormal(b STATUS_VAR);
1892	158142c2	bellard
1893	158142c2	bellard	aSign = extractFloat32Sign( a );
1894	158142c2	bellard	bSign = extractFloat32Sign( b );
1895	158142c2	bellard	if ( aSign == bSign ) {
1896	158142c2	bellard	return subFloat32Sigs( a, b, aSign STATUS_VAR );
1897	158142c2	bellard	}
1898	158142c2	bellard	else {
1899	158142c2	bellard	return addFloat32Sigs( a, b, aSign STATUS_VAR );
1900	158142c2	bellard	}
1901	158142c2	bellard
1902	158142c2	bellard	}
1903	158142c2	bellard
1904	158142c2	bellard	/*----------------------------------------------------------------------------
1905	158142c2	bellard	\| Returns the result of multiplying the single-precision floating-point values
1906	158142c2	bellard	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
1907	158142c2	bellard	\| for Binary Floating-Point Arithmetic.
1908	158142c2	bellard	----------------------------------------------------------------------------/
1909	158142c2	bellard
1910	158142c2	bellard	float32 float32_mul( float32 a, float32 b STATUS_PARAM )
1911	158142c2	bellard	{
1912	158142c2	bellard	flag aSign, bSign, zSign;
1913	158142c2	bellard	int16 aExp, bExp, zExp;
1914	bb98fe42	Andreas Färber	uint32_t aSig, bSig;
1915	bb98fe42	Andreas Färber	uint64_t zSig64;
1916	bb98fe42	Andreas Färber	uint32_t zSig;
1917	158142c2	bellard
1918	37d18660	Peter Maydell	a = float32_squash_input_denormal(a STATUS_VAR);
1919	37d18660	Peter Maydell	b = float32_squash_input_denormal(b STATUS_VAR);
1920	37d18660	Peter Maydell
1921	158142c2	bellard	aSig = extractFloat32Frac( a );
1922	158142c2	bellard	aExp = extractFloat32Exp( a );
1923	158142c2	bellard	aSign = extractFloat32Sign( a );
1924	158142c2	bellard	bSig = extractFloat32Frac( b );
1925	158142c2	bellard	bExp = extractFloat32Exp( b );
1926	158142c2	bellard	bSign = extractFloat32Sign( b );
1927	158142c2	bellard	zSign = aSign ^ bSign;
1928	158142c2	bellard	if ( aExp == 0xFF ) {
1929	158142c2	bellard	if ( aSig \|\| ( ( bExp == 0xFF ) && bSig ) ) {
1930	158142c2	bellard	return propagateFloat32NaN( a, b STATUS_VAR );
1931	158142c2	bellard	}
1932	158142c2	bellard	if ( ( bExp \| bSig ) == 0 ) {
1933	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
1934	158142c2	bellard	return float32_default_nan;
1935	158142c2	bellard	}
1936	158142c2	bellard	return packFloat32( zSign, 0xFF, 0 );
1937	158142c2	bellard	}
1938	158142c2	bellard	if ( bExp == 0xFF ) {
1939	158142c2	bellard	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1940	158142c2	bellard	if ( ( aExp \| aSig ) == 0 ) {
1941	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
1942	158142c2	bellard	return float32_default_nan;
1943	158142c2	bellard	}
1944	158142c2	bellard	return packFloat32( zSign, 0xFF, 0 );
1945	158142c2	bellard	}
1946	158142c2	bellard	if ( aExp == 0 ) {
1947	158142c2	bellard	if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1948	158142c2	bellard	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1949	158142c2	bellard	}
1950	158142c2	bellard	if ( bExp == 0 ) {
1951	158142c2	bellard	if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1952	158142c2	bellard	normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1953	158142c2	bellard	}
1954	158142c2	bellard	zExp = aExp + bExp - 0x7F;
1955	158142c2	bellard	aSig = ( aSig \| 0x00800000 )<<7;
1956	158142c2	bellard	bSig = ( bSig \| 0x00800000 )<<8;
1957	bb98fe42	Andreas Färber	shift64RightJamming( ( (uint64_t) aSig ) * bSig, 32, &zSig64 );
1958	158142c2	bellard	zSig = zSig64;
1959	bb98fe42	Andreas Färber	if ( 0 <= (int32_t) ( zSig<<1 ) ) {
1960	158142c2	bellard	zSig <<= 1;
1961	158142c2	bellard	--zExp;
1962	158142c2	bellard	}
1963	158142c2	bellard	return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1964	158142c2	bellard
1965	158142c2	bellard	}
1966	158142c2	bellard
1967	158142c2	bellard	/*----------------------------------------------------------------------------
1968	158142c2	bellard	\| Returns the result of dividing the single-precision floating-point value `a'
1969	158142c2	bellard	\| by the corresponding value `b'. The operation is performed according to the
1970	158142c2	bellard	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1971	158142c2	bellard	----------------------------------------------------------------------------/
1972	158142c2	bellard
1973	158142c2	bellard	float32 float32_div( float32 a, float32 b STATUS_PARAM )
1974	158142c2	bellard	{
1975	158142c2	bellard	flag aSign, bSign, zSign;
1976	158142c2	bellard	int16 aExp, bExp, zExp;
1977	bb98fe42	Andreas Färber	uint32_t aSig, bSig, zSig;
1978	37d18660	Peter Maydell	a = float32_squash_input_denormal(a STATUS_VAR);
1979	37d18660	Peter Maydell	b = float32_squash_input_denormal(b STATUS_VAR);
1980	158142c2	bellard
1981	158142c2	bellard	aSig = extractFloat32Frac( a );
1982	158142c2	bellard	aExp = extractFloat32Exp( a );
1983	158142c2	bellard	aSign = extractFloat32Sign( a );
1984	158142c2	bellard	bSig = extractFloat32Frac( b );
1985	158142c2	bellard	bExp = extractFloat32Exp( b );
1986	158142c2	bellard	bSign = extractFloat32Sign( b );
1987	158142c2	bellard	zSign = aSign ^ bSign;
1988	158142c2	bellard	if ( aExp == 0xFF ) {
1989	158142c2	bellard	if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1990	158142c2	bellard	if ( bExp == 0xFF ) {
1991	158142c2	bellard	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1992	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
1993	158142c2	bellard	return float32_default_nan;
1994	158142c2	bellard	}
1995	158142c2	bellard	return packFloat32( zSign, 0xFF, 0 );
1996	158142c2	bellard	}
1997	158142c2	bellard	if ( bExp == 0xFF ) {
1998	158142c2	bellard	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1999	158142c2	bellard	return packFloat32( zSign, 0, 0 );
2000	158142c2	bellard	}
2001	158142c2	bellard	if ( bExp == 0 ) {
2002	158142c2	bellard	if ( bSig == 0 ) {
2003	158142c2	bellard	if ( ( aExp \| aSig ) == 0 ) {
2004	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
2005	158142c2	bellard	return float32_default_nan;
2006	158142c2	bellard	}
2007	158142c2	bellard	float_raise( float_flag_divbyzero STATUS_VAR);
2008	158142c2	bellard	return packFloat32( zSign, 0xFF, 0 );
2009	158142c2	bellard	}
2010	158142c2	bellard	normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2011	158142c2	bellard	}
2012	158142c2	bellard	if ( aExp == 0 ) {
2013	158142c2	bellard	if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
2014	158142c2	bellard	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2015	158142c2	bellard	}
2016	158142c2	bellard	zExp = aExp - bExp + 0x7D;
2017	158142c2	bellard	aSig = ( aSig \| 0x00800000 )<<7;
2018	158142c2	bellard	bSig = ( bSig \| 0x00800000 )<<8;
2019	158142c2	bellard	if ( bSig <= ( aSig + aSig ) ) {
2020	158142c2	bellard	aSig >>= 1;
2021	158142c2	bellard	++zExp;
2022	158142c2	bellard	}
2023	bb98fe42	Andreas Färber	zSig = ( ( (uint64_t) aSig )<<32 ) / bSig;
2024	158142c2	bellard	if ( ( zSig & 0x3F ) == 0 ) {
2025	bb98fe42	Andreas Färber	zSig \|= ( (uint64_t) bSig * zSig != ( (uint64_t) aSig )<<32 );
2026	158142c2	bellard	}
2027	158142c2	bellard	return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
2028	158142c2	bellard
2029	158142c2	bellard	}
2030	158142c2	bellard
2031	158142c2	bellard	/*----------------------------------------------------------------------------
2032	158142c2	bellard	\| Returns the remainder of the single-precision floating-point value `a'
2033	158142c2	bellard	\| with respect to the corresponding value `b'. The operation is performed
2034	158142c2	bellard	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2035	158142c2	bellard	----------------------------------------------------------------------------/
2036	158142c2	bellard
2037	158142c2	bellard	float32 float32_rem( float32 a, float32 b STATUS_PARAM )
2038	158142c2	bellard	{
2039	ed086f3d	Blue Swirl	flag aSign, zSign;
2040	158142c2	bellard	int16 aExp, bExp, expDiff;
2041	bb98fe42	Andreas Färber	uint32_t aSig, bSig;
2042	bb98fe42	Andreas Färber	uint32_t q;
2043	bb98fe42	Andreas Färber	uint64_t aSig64, bSig64, q64;
2044	bb98fe42	Andreas Färber	uint32_t alternateASig;
2045	bb98fe42	Andreas Färber	int32_t sigMean;
2046	37d18660	Peter Maydell	a = float32_squash_input_denormal(a STATUS_VAR);
2047	37d18660	Peter Maydell	b = float32_squash_input_denormal(b STATUS_VAR);
2048	158142c2	bellard
2049	158142c2	bellard	aSig = extractFloat32Frac( a );
2050	158142c2	bellard	aExp = extractFloat32Exp( a );
2051	158142c2	bellard	aSign = extractFloat32Sign( a );
2052	158142c2	bellard	bSig = extractFloat32Frac( b );
2053	158142c2	bellard	bExp = extractFloat32Exp( b );
2054	158142c2	bellard	if ( aExp == 0xFF ) {
2055	158142c2	bellard	if ( aSig \|\| ( ( bExp == 0xFF ) && bSig ) ) {
2056	158142c2	bellard	return propagateFloat32NaN( a, b STATUS_VAR );
2057	158142c2	bellard	}
2058	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
2059	158142c2	bellard	return float32_default_nan;
2060	158142c2	bellard	}
2061	158142c2	bellard	if ( bExp == 0xFF ) {
2062	158142c2	bellard	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
2063	158142c2	bellard	return a;
2064	158142c2	bellard	}
2065	158142c2	bellard	if ( bExp == 0 ) {
2066	158142c2	bellard	if ( bSig == 0 ) {
2067	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
2068	158142c2	bellard	return float32_default_nan;
2069	158142c2	bellard	}
2070	158142c2	bellard	normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2071	158142c2	bellard	}
2072	158142c2	bellard	if ( aExp == 0 ) {
2073	158142c2	bellard	if ( aSig == 0 ) return a;
2074	158142c2	bellard	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2075	158142c2	bellard	}
2076	158142c2	bellard	expDiff = aExp - bExp;
2077	158142c2	bellard	aSig \|= 0x00800000;
2078	158142c2	bellard	bSig \|= 0x00800000;
2079	158142c2	bellard	if ( expDiff < 32 ) {
2080	158142c2	bellard	aSig <<= 8;
2081	158142c2	bellard	bSig <<= 8;
2082	158142c2	bellard	if ( expDiff < 0 ) {
2083	158142c2	bellard	if ( expDiff < -1 ) return a;
2084	158142c2	bellard	aSig >>= 1;
2085	158142c2	bellard	}
2086	158142c2	bellard	q = ( bSig <= aSig );
2087	158142c2	bellard	if ( q ) aSig -= bSig;
2088	158142c2	bellard	if ( 0 < expDiff ) {
2089	bb98fe42	Andreas Färber	q = ( ( (uint64_t) aSig )<<32 ) / bSig;
2090	158142c2	bellard	q >>= 32 - expDiff;
2091	158142c2	bellard	bSig >>= 2;
2092	158142c2	bellard	aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
2093	158142c2	bellard	}
2094	158142c2	bellard	else {
2095	158142c2	bellard	aSig >>= 2;
2096	158142c2	bellard	bSig >>= 2;
2097	158142c2	bellard	}
2098	158142c2	bellard	}
2099	158142c2	bellard	else {
2100	158142c2	bellard	if ( bSig <= aSig ) aSig -= bSig;
2101	bb98fe42	Andreas Färber	aSig64 = ( (uint64_t) aSig )<<40;
2102	bb98fe42	Andreas Färber	bSig64 = ( (uint64_t) bSig )<<40;
2103	158142c2	bellard	expDiff -= 64;
2104	158142c2	bellard	while ( 0 < expDiff ) {
2105	158142c2	bellard	q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2106	158142c2	bellard	q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2107	158142c2	bellard	aSig64 = - ( ( bSig * q64 )<<38 );
2108	158142c2	bellard	expDiff -= 62;
2109	158142c2	bellard	}
2110	158142c2	bellard	expDiff += 64;
2111	158142c2	bellard	q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2112	158142c2	bellard	q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2113	158142c2	bellard	q = q64>>( 64 - expDiff );
2114	158142c2	bellard	bSig <<= 6;
2115	158142c2	bellard	aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
2116	158142c2	bellard	}
2117	158142c2	bellard	do {
2118	158142c2	bellard	alternateASig = aSig;
2119	158142c2	bellard	++q;
2120	158142c2	bellard	aSig -= bSig;
2121	bb98fe42	Andreas Färber	} while ( 0 <= (int32_t) aSig );
2122	158142c2	bellard	sigMean = aSig + alternateASig;
2123	158142c2	bellard	if ( ( sigMean < 0 ) \|\| ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
2124	158142c2	bellard	aSig = alternateASig;
2125	158142c2	bellard	}
2126	bb98fe42	Andreas Färber	zSign = ( (int32_t) aSig < 0 );
2127	158142c2	bellard	if ( zSign ) aSig = - aSig;
2128	158142c2	bellard	return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig STATUS_VAR );
2129	158142c2	bellard
2130	158142c2	bellard	}
2131	158142c2	bellard
2132	158142c2	bellard	/*----------------------------------------------------------------------------
2133	158142c2	bellard	\| Returns the square root of the single-precision floating-point value `a'.
2134	158142c2	bellard	\| The operation is performed according to the IEC/IEEE Standard for Binary
2135	158142c2	bellard	\| Floating-Point Arithmetic.
2136	158142c2	bellard	----------------------------------------------------------------------------/
2137	158142c2	bellard
2138	158142c2	bellard	float32 float32_sqrt( float32 a STATUS_PARAM )
2139	158142c2	bellard	{
2140	158142c2	bellard	flag aSign;
2141	158142c2	bellard	int16 aExp, zExp;
2142	bb98fe42	Andreas Färber	uint32_t aSig, zSig;
2143	bb98fe42	Andreas Färber	uint64_t rem, term;
2144	37d18660	Peter Maydell	a = float32_squash_input_denormal(a STATUS_VAR);
2145	158142c2	bellard
2146	158142c2	bellard	aSig = extractFloat32Frac( a );
2147	158142c2	bellard	aExp = extractFloat32Exp( a );
2148	158142c2	bellard	aSign = extractFloat32Sign( a );
2149	158142c2	bellard	if ( aExp == 0xFF ) {
2150	f090c9d4	pbrook	if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
2151	158142c2	bellard	if ( ! aSign ) return a;
2152	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
2153	158142c2	bellard	return float32_default_nan;
2154	158142c2	bellard	}
2155	158142c2	bellard	if ( aSign ) {
2156	158142c2	bellard	if ( ( aExp \| aSig ) == 0 ) return a;
2157	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
2158	158142c2	bellard	return float32_default_nan;
2159	158142c2	bellard	}
2160	158142c2	bellard	if ( aExp == 0 ) {
2161	f090c9d4	pbrook	if ( aSig == 0 ) return float32_zero;
2162	158142c2	bellard	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2163	158142c2	bellard	}
2164	158142c2	bellard	zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
2165	158142c2	bellard	aSig = ( aSig \| 0x00800000 )<<8;
2166	158142c2	bellard	zSig = estimateSqrt32( aExp, aSig ) + 2;
2167	158142c2	bellard	if ( ( zSig & 0x7F ) <= 5 ) {
2168	158142c2	bellard	if ( zSig < 2 ) {
2169	158142c2	bellard	zSig = 0x7FFFFFFF;
2170	158142c2	bellard	goto roundAndPack;
2171	158142c2	bellard	}
2172	158142c2	bellard	aSig >>= aExp & 1;
2173	bb98fe42	Andreas Färber	term = ( (uint64_t) zSig ) * zSig;
2174	bb98fe42	Andreas Färber	rem = ( ( (uint64_t) aSig )<<32 ) - term;
2175	bb98fe42	Andreas Färber	while ( (int64_t) rem < 0 ) {
2176	158142c2	bellard	--zSig;
2177	bb98fe42	Andreas Färber	rem += ( ( (uint64_t) zSig )<<1 ) \| 1;
2178	158142c2	bellard	}
2179	158142c2	bellard	zSig \|= ( rem != 0 );
2180	158142c2	bellard	}
2181	158142c2	bellard	shift32RightJamming( zSig, 1, &zSig );
2182	158142c2	bellard	roundAndPack:
2183	158142c2	bellard	return roundAndPackFloat32( 0, zExp, zSig STATUS_VAR );
2184	158142c2	bellard
2185	158142c2	bellard	}
2186	158142c2	bellard
2187	158142c2	bellard	/*----------------------------------------------------------------------------
2188	8229c991	Aurelien Jarno	\| Returns the binary exponential of the single-precision floating-point value
2189	8229c991	Aurelien Jarno	\| `a'. The operation is performed according to the IEC/IEEE Standard for
2190	8229c991	Aurelien Jarno	\| Binary Floating-Point Arithmetic.
2191	8229c991	Aurelien Jarno	\|
2192	8229c991	Aurelien Jarno	\| Uses the following identities:
2193	8229c991	Aurelien Jarno	\|
2194	8229c991	Aurelien Jarno	\| 1. -------------------------------------------------------------------------
2195	8229c991	Aurelien Jarno	\| x x*ln(2)
2196	8229c991	Aurelien Jarno	\| 2 = e
2197	8229c991	Aurelien Jarno	\|
2198	8229c991	Aurelien Jarno	\| 2. -------------------------------------------------------------------------
2199	8229c991	Aurelien Jarno	\| 2 3 4 5 n
2200	8229c991	Aurelien Jarno	\| x x x x x x x
2201	8229c991	Aurelien Jarno	\| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
2202	8229c991	Aurelien Jarno	\| 1! 2! 3! 4! 5! n!
2203	8229c991	Aurelien Jarno	----------------------------------------------------------------------------/
2204	8229c991	Aurelien Jarno
2205	8229c991	Aurelien Jarno	static const float64 float32_exp2_coefficients[15] =
2206	8229c991	Aurelien Jarno	{
2207	d5138cf4	Peter Maydell	const_float64( 0x3ff0000000000000ll ), /* 1 */
2208	d5138cf4	Peter Maydell	const_float64( 0x3fe0000000000000ll ), /* 2 */
2209	d5138cf4	Peter Maydell	const_float64( 0x3fc5555555555555ll ), /* 3 */
2210	d5138cf4	Peter Maydell	const_float64( 0x3fa5555555555555ll ), /* 4 */
2211	d5138cf4	Peter Maydell	const_float64( 0x3f81111111111111ll ), /* 5 */
2212	d5138cf4	Peter Maydell	const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
2213	d5138cf4	Peter Maydell	const_float64( 0x3f2a01a01a01a01all ), /* 7 */
2214	d5138cf4	Peter Maydell	const_float64( 0x3efa01a01a01a01all ), /* 8 */
2215	d5138cf4	Peter Maydell	const_float64( 0x3ec71de3a556c734ll ), /* 9 */
2216	d5138cf4	Peter Maydell	const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
2217	d5138cf4	Peter Maydell	const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
2218	d5138cf4	Peter Maydell	const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
2219	d5138cf4	Peter Maydell	const_float64( 0x3de6124613a86d09ll ), /* 13 */
2220	d5138cf4	Peter Maydell	const_float64( 0x3da93974a8c07c9dll ), /* 14 */
2221	d5138cf4	Peter Maydell	const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
2222	8229c991	Aurelien Jarno	};
2223	8229c991	Aurelien Jarno
2224	8229c991	Aurelien Jarno	float32 float32_exp2( float32 a STATUS_PARAM )
2225	8229c991	Aurelien Jarno	{
2226	8229c991	Aurelien Jarno	flag aSign;
2227	8229c991	Aurelien Jarno	int16 aExp;
2228	bb98fe42	Andreas Färber	uint32_t aSig;
2229	8229c991	Aurelien Jarno	float64 r, x, xn;
2230	8229c991	Aurelien Jarno	int i;
2231	37d18660	Peter Maydell	a = float32_squash_input_denormal(a STATUS_VAR);
2232	8229c991	Aurelien Jarno
2233	8229c991	Aurelien Jarno	aSig = extractFloat32Frac( a );
2234	8229c991	Aurelien Jarno	aExp = extractFloat32Exp( a );
2235	8229c991	Aurelien Jarno	aSign = extractFloat32Sign( a );
2236	8229c991	Aurelien Jarno
2237	8229c991	Aurelien Jarno	if ( aExp == 0xFF) {
2238	8229c991	Aurelien Jarno	if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
2239	8229c991	Aurelien Jarno	return (aSign) ? float32_zero : a;
2240	8229c991	Aurelien Jarno	}
2241	8229c991	Aurelien Jarno	if (aExp == 0) {
2242	8229c991	Aurelien Jarno	if (aSig == 0) return float32_one;
2243	8229c991	Aurelien Jarno	}
2244	8229c991	Aurelien Jarno
2245	8229c991	Aurelien Jarno	float_raise( float_flag_inexact STATUS_VAR);
2246	8229c991	Aurelien Jarno
2247	8229c991	Aurelien Jarno	/* ******************************* */
2248	8229c991	Aurelien Jarno	/* using float64 for approximation */
2249	8229c991	Aurelien Jarno	/* ******************************* */
2250	8229c991	Aurelien Jarno	x = float32_to_float64(a STATUS_VAR);
2251	8229c991	Aurelien Jarno	x = float64_mul(x, float64_ln2 STATUS_VAR);
2252	8229c991	Aurelien Jarno
2253	8229c991	Aurelien Jarno	xn = x;
2254	8229c991	Aurelien Jarno	r = float64_one;
2255	8229c991	Aurelien Jarno	for (i = 0 ; i < 15 ; i++) {
2256	8229c991	Aurelien Jarno	float64 f;
2257	8229c991	Aurelien Jarno
2258	8229c991	Aurelien Jarno	f = float64_mul(xn, float32_exp2_coefficients[i] STATUS_VAR);
2259	8229c991	Aurelien Jarno	r = float64_add(r, f STATUS_VAR);
2260	8229c991	Aurelien Jarno
2261	8229c991	Aurelien Jarno	xn = float64_mul(xn, x STATUS_VAR);
2262	8229c991	Aurelien Jarno	}
2263	8229c991	Aurelien Jarno
2264	8229c991	Aurelien Jarno	return float64_to_float32(r, status);
2265	8229c991	Aurelien Jarno	}
2266	8229c991	Aurelien Jarno
2267	8229c991	Aurelien Jarno	/*----------------------------------------------------------------------------
2268	374dfc33	aurel32	\| Returns the binary log of the single-precision floating-point value `a'.
2269	374dfc33	aurel32	\| The operation is performed according to the IEC/IEEE Standard for Binary
2270	374dfc33	aurel32	\| Floating-Point Arithmetic.
2271	374dfc33	aurel32	----------------------------------------------------------------------------/
2272	374dfc33	aurel32	float32 float32_log2( float32 a STATUS_PARAM )
2273	374dfc33	aurel32	{
2274	374dfc33	aurel32	flag aSign, zSign;
2275	374dfc33	aurel32	int16 aExp;
2276	bb98fe42	Andreas Färber	uint32_t aSig, zSig, i;
2277	374dfc33	aurel32
2278	37d18660	Peter Maydell	a = float32_squash_input_denormal(a STATUS_VAR);
2279	374dfc33	aurel32	aSig = extractFloat32Frac( a );
2280	374dfc33	aurel32	aExp = extractFloat32Exp( a );
2281	374dfc33	aurel32	aSign = extractFloat32Sign( a );
2282	374dfc33	aurel32
2283	374dfc33	aurel32	if ( aExp == 0 ) {
2284	374dfc33	aurel32	if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
2285	374dfc33	aurel32	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2286	374dfc33	aurel32	}
2287	374dfc33	aurel32	if ( aSign ) {
2288	374dfc33	aurel32	float_raise( float_flag_invalid STATUS_VAR);
2289	374dfc33	aurel32	return float32_default_nan;
2290	374dfc33	aurel32	}
2291	374dfc33	aurel32	if ( aExp == 0xFF ) {
2292	374dfc33	aurel32	if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
2293	374dfc33	aurel32	return a;
2294	374dfc33	aurel32	}
2295	374dfc33	aurel32
2296	374dfc33	aurel32	aExp -= 0x7F;
2297	374dfc33	aurel32	aSig \|= 0x00800000;
2298	374dfc33	aurel32	zSign = aExp < 0;
2299	374dfc33	aurel32	zSig = aExp << 23;
2300	374dfc33	aurel32
2301	374dfc33	aurel32	for (i = 1 << 22; i > 0; i >>= 1) {
2302	bb98fe42	Andreas Färber	aSig = ( (uint64_t)aSig * aSig ) >> 23;
2303	374dfc33	aurel32	if ( aSig & 0x01000000 ) {
2304	374dfc33	aurel32	aSig >>= 1;
2305	374dfc33	aurel32	zSig \|= i;
2306	374dfc33	aurel32	}
2307	374dfc33	aurel32	}
2308	374dfc33	aurel32
2309	374dfc33	aurel32	if ( zSign )
2310	374dfc33	aurel32	zSig = -zSig;
2311	374dfc33	aurel32
2312	374dfc33	aurel32	return normalizeRoundAndPackFloat32( zSign, 0x85, zSig STATUS_VAR );
2313	374dfc33	aurel32	}
2314	374dfc33	aurel32
2315	374dfc33	aurel32	/*----------------------------------------------------------------------------
2316	158142c2	bellard	\| Returns 1 if the single-precision floating-point value `a' is equal to
2317	b689362d	Aurelien Jarno	\| the corresponding value `b', and 0 otherwise. The invalid exception is
2318	b689362d	Aurelien Jarno	\| raised if either operand is a NaN. Otherwise, the comparison is performed
2319	158142c2	bellard	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2320	158142c2	bellard	----------------------------------------------------------------------------/
2321	158142c2	bellard
2322	b689362d	Aurelien Jarno	int float32_eq( float32 a, float32 b STATUS_PARAM )
2323	158142c2	bellard	{
2324	b689362d	Aurelien Jarno	uint32_t av, bv;
2325	37d18660	Peter Maydell	a = float32_squash_input_denormal(a STATUS_VAR);
2326	37d18660	Peter Maydell	b = float32_squash_input_denormal(b STATUS_VAR);
2327	158142c2	bellard
2328	158142c2	bellard	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2329	158142c2	bellard	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2330	158142c2	bellard	) {
2331	b689362d	Aurelien Jarno	float_raise( float_flag_invalid STATUS_VAR);
2332	158142c2	bellard	return 0;
2333	158142c2	bellard	}
2334	b689362d	Aurelien Jarno	av = float32_val(a);
2335	b689362d	Aurelien Jarno	bv = float32_val(b);
2336	b689362d	Aurelien Jarno	return ( av == bv ) \|\| ( (uint32_t) ( ( av \| bv )<<1 ) == 0 );
2337	158142c2	bellard	}
2338	158142c2	bellard
2339	158142c2	bellard	/*----------------------------------------------------------------------------
2340	158142c2	bellard	\| Returns 1 if the single-precision floating-point value `a' is less than
2341	f5a64251	Aurelien Jarno	\| or equal to the corresponding value `b', and 0 otherwise. The invalid
2342	f5a64251	Aurelien Jarno	\| exception is raised if either operand is a NaN. The comparison is performed
2343	f5a64251	Aurelien Jarno	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2344	158142c2	bellard	----------------------------------------------------------------------------/
2345	158142c2	bellard
2346	750afe93	bellard	int float32_le( float32 a, float32 b STATUS_PARAM )
2347	158142c2	bellard	{
2348	158142c2	bellard	flag aSign, bSign;
2349	bb98fe42	Andreas Färber	uint32_t av, bv;
2350	37d18660	Peter Maydell	a = float32_squash_input_denormal(a STATUS_VAR);
2351	37d18660	Peter Maydell	b = float32_squash_input_denormal(b STATUS_VAR);
2352	158142c2	bellard
2353	158142c2	bellard	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2354	158142c2	bellard	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2355	158142c2	bellard	) {
2356	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
2357	158142c2	bellard	return 0;
2358	158142c2	bellard	}
2359	158142c2	bellard	aSign = extractFloat32Sign( a );
2360	158142c2	bellard	bSign = extractFloat32Sign( b );
2361	f090c9d4	pbrook	av = float32_val(a);
2362	f090c9d4	pbrook	bv = float32_val(b);
2363	bb98fe42	Andreas Färber	if ( aSign != bSign ) return aSign \|\| ( (uint32_t) ( ( av \| bv )<<1 ) == 0 );
2364	f090c9d4	pbrook	return ( av == bv ) \|\| ( aSign ^ ( av < bv ) );
2365	158142c2	bellard
2366	158142c2	bellard	}
2367	158142c2	bellard
2368	158142c2	bellard	/*----------------------------------------------------------------------------
2369	158142c2	bellard	\| Returns 1 if the single-precision floating-point value `a' is less than
2370	f5a64251	Aurelien Jarno	\| the corresponding value `b', and 0 otherwise. The invalid exception is
2371	f5a64251	Aurelien Jarno	\| raised if either operand is a NaN. The comparison is performed according
2372	f5a64251	Aurelien Jarno	\| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2373	158142c2	bellard	----------------------------------------------------------------------------/
2374	158142c2	bellard
2375	750afe93	bellard	int float32_lt( float32 a, float32 b STATUS_PARAM )
2376	158142c2	bellard	{
2377	158142c2	bellard	flag aSign, bSign;
2378	bb98fe42	Andreas Färber	uint32_t av, bv;
2379	37d18660	Peter Maydell	a = float32_squash_input_denormal(a STATUS_VAR);
2380	37d18660	Peter Maydell	b = float32_squash_input_denormal(b STATUS_VAR);
2381	158142c2	bellard
2382	158142c2	bellard	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2383	158142c2	bellard	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2384	158142c2	bellard	) {
2385	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
2386	158142c2	bellard	return 0;
2387	158142c2	bellard	}
2388	158142c2	bellard	aSign = extractFloat32Sign( a );
2389	158142c2	bellard	bSign = extractFloat32Sign( b );
2390	f090c9d4	pbrook	av = float32_val(a);
2391	f090c9d4	pbrook	bv = float32_val(b);
2392	bb98fe42	Andreas Färber	if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av \| bv )<<1 ) != 0 );
2393	f090c9d4	pbrook	return ( av != bv ) && ( aSign ^ ( av < bv ) );
2394	158142c2	bellard
2395	158142c2	bellard	}
2396	158142c2	bellard
2397	158142c2	bellard	/*----------------------------------------------------------------------------
2398	67b7861d	Aurelien Jarno	\| Returns 1 if the single-precision floating-point values `a' and `b' cannot
2399	f5a64251	Aurelien Jarno	\| be compared, and 0 otherwise. The invalid exception is raised if either
2400	f5a64251	Aurelien Jarno	\| operand is a NaN. The comparison is performed according to the IEC/IEEE
2401	f5a64251	Aurelien Jarno	\| Standard for Binary Floating-Point Arithmetic.
2402	67b7861d	Aurelien Jarno	----------------------------------------------------------------------------/
2403	67b7861d	Aurelien Jarno
2404	67b7861d	Aurelien Jarno	int float32_unordered( float32 a, float32 b STATUS_PARAM )
2405	67b7861d	Aurelien Jarno	{
2406	67b7861d	Aurelien Jarno	a = float32_squash_input_denormal(a STATUS_VAR);
2407	67b7861d	Aurelien Jarno	b = float32_squash_input_denormal(b STATUS_VAR);
2408	67b7861d	Aurelien Jarno
2409	67b7861d	Aurelien Jarno	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2410	67b7861d	Aurelien Jarno	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2411	67b7861d	Aurelien Jarno	) {
2412	67b7861d	Aurelien Jarno	float_raise( float_flag_invalid STATUS_VAR);
2413	67b7861d	Aurelien Jarno	return 1;
2414	67b7861d	Aurelien Jarno	}
2415	67b7861d	Aurelien Jarno	return 0;
2416	67b7861d	Aurelien Jarno	}
2417	b689362d	Aurelien Jarno
2418	67b7861d	Aurelien Jarno	/*----------------------------------------------------------------------------
2419	158142c2	bellard	\| Returns 1 if the single-precision floating-point value `a' is equal to
2420	f5a64251	Aurelien Jarno	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
2421	f5a64251	Aurelien Jarno	\| exception. The comparison is performed according to the IEC/IEEE Standard
2422	f5a64251	Aurelien Jarno	\| for Binary Floating-Point Arithmetic.
2423	158142c2	bellard	----------------------------------------------------------------------------/
2424	158142c2	bellard
2425	b689362d	Aurelien Jarno	int float32_eq_quiet( float32 a, float32 b STATUS_PARAM )
2426	158142c2	bellard	{
2427	37d18660	Peter Maydell	a = float32_squash_input_denormal(a STATUS_VAR);
2428	37d18660	Peter Maydell	b = float32_squash_input_denormal(b STATUS_VAR);
2429	158142c2	bellard
2430	158142c2	bellard	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2431	158142c2	bellard	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2432	158142c2	bellard	) {
2433	b689362d	Aurelien Jarno	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
2434	b689362d	Aurelien Jarno	float_raise( float_flag_invalid STATUS_VAR);
2435	b689362d	Aurelien Jarno	}
2436	158142c2	bellard	return 0;
2437	158142c2	bellard	}
2438	b689362d	Aurelien Jarno	return ( float32_val(a) == float32_val(b) ) \|\|
2439	b689362d	Aurelien Jarno	( (uint32_t) ( ( float32_val(a) \| float32_val(b) )<<1 ) == 0 );
2440	158142c2	bellard	}
2441	158142c2	bellard
2442	158142c2	bellard	/*----------------------------------------------------------------------------
2443	158142c2	bellard	\| Returns 1 if the single-precision floating-point value `a' is less than or
2444	158142c2	bellard	\| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
2445	158142c2	bellard	\| cause an exception. Otherwise, the comparison is performed according to the
2446	158142c2	bellard	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2447	158142c2	bellard	----------------------------------------------------------------------------/
2448	158142c2	bellard
2449	750afe93	bellard	int float32_le_quiet( float32 a, float32 b STATUS_PARAM )
2450	158142c2	bellard	{
2451	158142c2	bellard	flag aSign, bSign;
2452	bb98fe42	Andreas Färber	uint32_t av, bv;
2453	37d18660	Peter Maydell	a = float32_squash_input_denormal(a STATUS_VAR);
2454	37d18660	Peter Maydell	b = float32_squash_input_denormal(b STATUS_VAR);
2455	158142c2	bellard
2456	158142c2	bellard	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2457	158142c2	bellard	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2458	158142c2	bellard	) {
2459	158142c2	bellard	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
2460	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
2461	158142c2	bellard	}
2462	158142c2	bellard	return 0;
2463	158142c2	bellard	}
2464	158142c2	bellard	aSign = extractFloat32Sign( a );
2465	158142c2	bellard	bSign = extractFloat32Sign( b );
2466	f090c9d4	pbrook	av = float32_val(a);
2467	f090c9d4	pbrook	bv = float32_val(b);
2468	bb98fe42	Andreas Färber	if ( aSign != bSign ) return aSign \|\| ( (uint32_t) ( ( av \| bv )<<1 ) == 0 );
2469	f090c9d4	pbrook	return ( av == bv ) \|\| ( aSign ^ ( av < bv ) );
2470	158142c2	bellard
2471	158142c2	bellard	}
2472	158142c2	bellard
2473	158142c2	bellard	/*----------------------------------------------------------------------------
2474	158142c2	bellard	\| Returns 1 if the single-precision floating-point value `a' is less than
2475	158142c2	bellard	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
2476	158142c2	bellard	\| exception. Otherwise, the comparison is performed according to the IEC/IEEE
2477	158142c2	bellard	\| Standard for Binary Floating-Point Arithmetic.
2478	158142c2	bellard	----------------------------------------------------------------------------/
2479	158142c2	bellard
2480	750afe93	bellard	int float32_lt_quiet( float32 a, float32 b STATUS_PARAM )
2481	158142c2	bellard	{
2482	158142c2	bellard	flag aSign, bSign;
2483	bb98fe42	Andreas Färber	uint32_t av, bv;
2484	37d18660	Peter Maydell	a = float32_squash_input_denormal(a STATUS_VAR);
2485	37d18660	Peter Maydell	b = float32_squash_input_denormal(b STATUS_VAR);
2486	158142c2	bellard
2487	158142c2	bellard	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2488	158142c2	bellard	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2489	158142c2	bellard	) {
2490	158142c2	bellard	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
2491	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
2492	158142c2	bellard	}
2493	158142c2	bellard	return 0;
2494	158142c2	bellard	}
2495	158142c2	bellard	aSign = extractFloat32Sign( a );
2496	158142c2	bellard	bSign = extractFloat32Sign( b );
2497	f090c9d4	pbrook	av = float32_val(a);
2498	f090c9d4	pbrook	bv = float32_val(b);
2499	bb98fe42	Andreas Färber	if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av \| bv )<<1 ) != 0 );
2500	f090c9d4	pbrook	return ( av != bv ) && ( aSign ^ ( av < bv ) );
2501	158142c2	bellard
2502	158142c2	bellard	}
2503	158142c2	bellard
2504	158142c2	bellard	/*----------------------------------------------------------------------------
2505	67b7861d	Aurelien Jarno	\| Returns 1 if the single-precision floating-point values `a' and `b' cannot
2506	67b7861d	Aurelien Jarno	\| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
2507	67b7861d	Aurelien Jarno	\| comparison is performed according to the IEC/IEEE Standard for Binary
2508	67b7861d	Aurelien Jarno	\| Floating-Point Arithmetic.
2509	67b7861d	Aurelien Jarno	----------------------------------------------------------------------------/
2510	67b7861d	Aurelien Jarno
2511	67b7861d	Aurelien Jarno	int float32_unordered_quiet( float32 a, float32 b STATUS_PARAM )
2512	67b7861d	Aurelien Jarno	{
2513	67b7861d	Aurelien Jarno	a = float32_squash_input_denormal(a STATUS_VAR);
2514	67b7861d	Aurelien Jarno	b = float32_squash_input_denormal(b STATUS_VAR);
2515	67b7861d	Aurelien Jarno
2516	67b7861d	Aurelien Jarno	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2517	67b7861d	Aurelien Jarno	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2518	67b7861d	Aurelien Jarno	) {
2519	67b7861d	Aurelien Jarno	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
2520	67b7861d	Aurelien Jarno	float_raise( float_flag_invalid STATUS_VAR);
2521	67b7861d	Aurelien Jarno	}
2522	67b7861d	Aurelien Jarno	return 1;
2523	67b7861d	Aurelien Jarno	}
2524	67b7861d	Aurelien Jarno	return 0;
2525	67b7861d	Aurelien Jarno	}
2526	67b7861d	Aurelien Jarno
2527	67b7861d	Aurelien Jarno	/*----------------------------------------------------------------------------
2528	158142c2	bellard	\| Returns the result of converting the double-precision floating-point value
2529	158142c2	bellard	\| `a' to the 32-bit two's complement integer format. The conversion is
2530	158142c2	bellard	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
2531	158142c2	bellard	\| Arithmetic---which means in particular that the conversion is rounded
2532	158142c2	bellard	\| according to the current rounding mode. If `a' is a NaN, the largest
2533	158142c2	bellard	\| positive integer is returned. Otherwise, if the conversion overflows, the
2534	158142c2	bellard	\| largest integer with the same sign as `a' is returned.
2535	158142c2	bellard	----------------------------------------------------------------------------/
2536	158142c2	bellard
2537	158142c2	bellard	int32 float64_to_int32( float64 a STATUS_PARAM )
2538	158142c2	bellard	{
2539	158142c2	bellard	flag aSign;
2540	158142c2	bellard	int16 aExp, shiftCount;
2541	bb98fe42	Andreas Färber	uint64_t aSig;
2542	37d18660	Peter Maydell	a = float64_squash_input_denormal(a STATUS_VAR);
2543	158142c2	bellard
2544	158142c2	bellard	aSig = extractFloat64Frac( a );
2545	158142c2	bellard	aExp = extractFloat64Exp( a );
2546	158142c2	bellard	aSign = extractFloat64Sign( a );
2547	158142c2	bellard	if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2548	158142c2	bellard	if ( aExp ) aSig \|= LIT64( 0x0010000000000000 );
2549	158142c2	bellard	shiftCount = 0x42C - aExp;
2550	158142c2	bellard	if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
2551	158142c2	bellard	return roundAndPackInt32( aSign, aSig STATUS_VAR );
2552	158142c2	bellard
2553	158142c2	bellard	}
2554	158142c2	bellard
2555	158142c2	bellard	/*----------------------------------------------------------------------------
2556	158142c2	bellard	\| Returns the result of converting the double-precision floating-point value
2557	158142c2	bellard	\| `a' to the 32-bit two's complement integer format. The conversion is
2558	158142c2	bellard	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
2559	158142c2	bellard	\| Arithmetic, except that the conversion is always rounded toward zero.
2560	158142c2	bellard	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
2561	158142c2	bellard	\| the conversion overflows, the largest integer with the same sign as `a' is
2562	158142c2	bellard	\| returned.
2563	158142c2	bellard	----------------------------------------------------------------------------/
2564	158142c2	bellard
2565	158142c2	bellard	int32 float64_to_int32_round_to_zero( float64 a STATUS_PARAM )
2566	158142c2	bellard	{
2567	158142c2	bellard	flag aSign;
2568	158142c2	bellard	int16 aExp, shiftCount;
2569	bb98fe42	Andreas Färber	uint64_t aSig, savedASig;
2570	158142c2	bellard	int32 z;
2571	37d18660	Peter Maydell	a = float64_squash_input_denormal(a STATUS_VAR);
2572	158142c2	bellard
2573	158142c2	bellard	aSig = extractFloat64Frac( a );
2574	158142c2	bellard	aExp = extractFloat64Exp( a );
2575	158142c2	bellard	aSign = extractFloat64Sign( a );
2576	158142c2	bellard	if ( 0x41E < aExp ) {
2577	158142c2	bellard	if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2578	158142c2	bellard	goto invalid;
2579	158142c2	bellard	}
2580	158142c2	bellard	else if ( aExp < 0x3FF ) {
2581	158142c2	bellard	if ( aExp \|\| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
2582	158142c2	bellard	return 0;
2583	158142c2	bellard	}
2584	158142c2	bellard	aSig \|= LIT64( 0x0010000000000000 );
2585	158142c2	bellard	shiftCount = 0x433 - aExp;
2586	158142c2	bellard	savedASig = aSig;
2587	158142c2	bellard	aSig >>= shiftCount;
2588	158142c2	bellard	z = aSig;
2589	158142c2	bellard	if ( aSign ) z = - z;
2590	158142c2	bellard	if ( ( z < 0 ) ^ aSign ) {
2591	158142c2	bellard	invalid:
2592	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
2593	bb98fe42	Andreas Färber	return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
2594	158142c2	bellard	}
2595	158142c2	bellard	if ( ( aSig<<shiftCount ) != savedASig ) {
2596	158142c2	bellard	STATUS(float_exception_flags) \|= float_flag_inexact;
2597	158142c2	bellard	}
2598	158142c2	bellard	return z;
2599	158142c2	bellard
2600	158142c2	bellard	}
2601	158142c2	bellard
2602	158142c2	bellard	/*----------------------------------------------------------------------------
2603	158142c2	bellard	\| Returns the result of converting the double-precision floating-point value
2604	cbcef455	Peter Maydell	\| `a' to the 16-bit two's complement integer format. The conversion is
2605	cbcef455	Peter Maydell	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
2606	cbcef455	Peter Maydell	\| Arithmetic, except that the conversion is always rounded toward zero.
2607	cbcef455	Peter Maydell	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
2608	cbcef455	Peter Maydell	\| the conversion overflows, the largest integer with the same sign as `a' is
2609	cbcef455	Peter Maydell	\| returned.
2610	cbcef455	Peter Maydell	----------------------------------------------------------------------------/
2611	cbcef455	Peter Maydell
2612	cbcef455	Peter Maydell	int16 float64_to_int16_round_to_zero( float64 a STATUS_PARAM )
2613	cbcef455	Peter Maydell	{
2614	cbcef455	Peter Maydell	flag aSign;
2615	cbcef455	Peter Maydell	int16 aExp, shiftCount;
2616	bb98fe42	Andreas Färber	uint64_t aSig, savedASig;
2617	cbcef455	Peter Maydell	int32 z;
2618	cbcef455	Peter Maydell
2619	cbcef455	Peter Maydell	aSig = extractFloat64Frac( a );
2620	cbcef455	Peter Maydell	aExp = extractFloat64Exp( a );
2621	cbcef455	Peter Maydell	aSign = extractFloat64Sign( a );
2622	cbcef455	Peter Maydell	if ( 0x40E < aExp ) {
2623	cbcef455	Peter Maydell	if ( ( aExp == 0x7FF ) && aSig ) {
2624	cbcef455	Peter Maydell	aSign = 0;
2625	cbcef455	Peter Maydell	}
2626	cbcef455	Peter Maydell	goto invalid;
2627	cbcef455	Peter Maydell	}
2628	cbcef455	Peter Maydell	else if ( aExp < 0x3FF ) {
2629	cbcef455	Peter Maydell	if ( aExp \|\| aSig ) {
2630	cbcef455	Peter Maydell	STATUS(float_exception_flags) \|= float_flag_inexact;
2631	cbcef455	Peter Maydell	}
2632	cbcef455	Peter Maydell	return 0;
2633	cbcef455	Peter Maydell	}
2634	cbcef455	Peter Maydell	aSig \|= LIT64( 0x0010000000000000 );
2635	cbcef455	Peter Maydell	shiftCount = 0x433 - aExp;
2636	cbcef455	Peter Maydell	savedASig = aSig;
2637	cbcef455	Peter Maydell	aSig >>= shiftCount;
2638	cbcef455	Peter Maydell	z = aSig;
2639	cbcef455	Peter Maydell	if ( aSign ) {
2640	cbcef455	Peter Maydell	z = - z;
2641	cbcef455	Peter Maydell	}
2642	cbcef455	Peter Maydell	if ( ( (int16_t)z < 0 ) ^ aSign ) {
2643	cbcef455	Peter Maydell	invalid:
2644	cbcef455	Peter Maydell	float_raise( float_flag_invalid STATUS_VAR);
2645	bb98fe42	Andreas Färber	return aSign ? (int32_t) 0xffff8000 : 0x7FFF;
2646	cbcef455	Peter Maydell	}
2647	cbcef455	Peter Maydell	if ( ( aSig<<shiftCount ) != savedASig ) {
2648	cbcef455	Peter Maydell	STATUS(float_exception_flags) \|= float_flag_inexact;
2649	cbcef455	Peter Maydell	}
2650	cbcef455	Peter Maydell	return z;
2651	cbcef455	Peter Maydell	}
2652	cbcef455	Peter Maydell
2653	cbcef455	Peter Maydell	/*----------------------------------------------------------------------------
2654	cbcef455	Peter Maydell	\| Returns the result of converting the double-precision floating-point value
2655	158142c2	bellard	\| `a' to the 64-bit two's complement integer format. The conversion is
2656	158142c2	bellard	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
2657	158142c2	bellard	\| Arithmetic---which means in particular that the conversion is rounded
2658	158142c2	bellard	\| according to the current rounding mode. If `a' is a NaN, the largest
2659	158142c2	bellard	\| positive integer is returned. Otherwise, if the conversion overflows, the
2660	158142c2	bellard	\| largest integer with the same sign as `a' is returned.
2661	158142c2	bellard	----------------------------------------------------------------------------/
2662	158142c2	bellard
2663	158142c2	bellard	int64 float64_to_int64( float64 a STATUS_PARAM )
2664	158142c2	bellard	{
2665	158142c2	bellard	flag aSign;
2666	158142c2	bellard	int16 aExp, shiftCount;
2667	bb98fe42	Andreas Färber	uint64_t aSig, aSigExtra;
2668	37d18660	Peter Maydell	a = float64_squash_input_denormal(a STATUS_VAR);
2669	158142c2	bellard
2670	158142c2	bellard	aSig = extractFloat64Frac( a );
2671	158142c2	bellard	aExp = extractFloat64Exp( a );
2672	158142c2	bellard	aSign = extractFloat64Sign( a );
2673	158142c2	bellard	if ( aExp ) aSig \|= LIT64( 0x0010000000000000 );
2674	158142c2	bellard	shiftCount = 0x433 - aExp;
2675	158142c2	bellard	if ( shiftCount <= 0 ) {
2676	158142c2	bellard	if ( 0x43E < aExp ) {
2677	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
2678	158142c2	bellard	if ( ! aSign
2679	158142c2	bellard	\|\| ( ( aExp == 0x7FF )
2680	158142c2	bellard	&& ( aSig != LIT64( 0x0010000000000000 ) ) )
2681	158142c2	bellard	) {
2682	158142c2	bellard	return LIT64( 0x7FFFFFFFFFFFFFFF );
2683	158142c2	bellard	}
2684	bb98fe42	Andreas Färber	return (int64_t) LIT64( 0x8000000000000000 );
2685	158142c2	bellard	}
2686	158142c2	bellard	aSigExtra = 0;
2687	158142c2	bellard	aSig <<= - shiftCount;
2688	158142c2	bellard	}
2689	158142c2	bellard	else {
2690	158142c2	bellard	shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
2691	158142c2	bellard	}
2692	158142c2	bellard	return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
2693	158142c2	bellard
2694	158142c2	bellard	}
2695	158142c2	bellard
2696	158142c2	bellard	/*----------------------------------------------------------------------------
2697	158142c2	bellard	\| Returns the result of converting the double-precision floating-point value
2698	158142c2	bellard	\| `a' to the 64-bit two's complement integer format. The conversion is
2699	158142c2	bellard	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
2700	158142c2	bellard	\| Arithmetic, except that the conversion is always rounded toward zero.
2701	158142c2	bellard	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
2702	158142c2	bellard	\| the conversion overflows, the largest integer with the same sign as `a' is
2703	158142c2	bellard	\| returned.
2704	158142c2	bellard	----------------------------------------------------------------------------/
2705	158142c2	bellard
2706	158142c2	bellard	int64 float64_to_int64_round_to_zero( float64 a STATUS_PARAM )
2707	158142c2	bellard	{
2708	158142c2	bellard	flag aSign;
2709	158142c2	bellard	int16 aExp, shiftCount;
2710	bb98fe42	Andreas Färber	uint64_t aSig;
2711	158142c2	bellard	int64 z;
2712	37d18660	Peter Maydell	a = float64_squash_input_denormal(a STATUS_VAR);
2713	158142c2	bellard
2714	158142c2	bellard	aSig = extractFloat64Frac( a );
2715	158142c2	bellard	aExp = extractFloat64Exp( a );
2716	158142c2	bellard	aSign = extractFloat64Sign( a );
2717	158142c2	bellard	if ( aExp ) aSig \|= LIT64( 0x0010000000000000 );
2718	158142c2	bellard	shiftCount = aExp - 0x433;
2719	158142c2	bellard	if ( 0 <= shiftCount ) {
2720	158142c2	bellard	if ( 0x43E <= aExp ) {
2721	f090c9d4	pbrook	if ( float64_val(a) != LIT64( 0xC3E0000000000000 ) ) {
2722	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
2723	158142c2	bellard	if ( ! aSign
2724	158142c2	bellard	\|\| ( ( aExp == 0x7FF )
2725	158142c2	bellard	&& ( aSig != LIT64( 0x0010000000000000 ) ) )
2726	158142c2	bellard	) {
2727	158142c2	bellard	return LIT64( 0x7FFFFFFFFFFFFFFF );
2728	158142c2	bellard	}
2729	158142c2	bellard	}
2730	bb98fe42	Andreas Färber	return (int64_t) LIT64( 0x8000000000000000 );
2731	158142c2	bellard	}
2732	158142c2	bellard	z = aSig<<shiftCount;
2733	158142c2	bellard	}
2734	158142c2	bellard	else {
2735	158142c2	bellard	if ( aExp < 0x3FE ) {
2736	158142c2	bellard	if ( aExp \| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
2737	158142c2	bellard	return 0;
2738	158142c2	bellard	}
2739	158142c2	bellard	z = aSig>>( - shiftCount );
2740	bb98fe42	Andreas Färber	if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
2741	158142c2	bellard	STATUS(float_exception_flags) \|= float_flag_inexact;
2742	158142c2	bellard	}
2743	158142c2	bellard	}
2744	158142c2	bellard	if ( aSign ) z = - z;
2745	158142c2	bellard	return z;
2746	158142c2	bellard
2747	158142c2	bellard	}
2748	158142c2	bellard
2749	158142c2	bellard	/*----------------------------------------------------------------------------
2750	158142c2	bellard	\| Returns the result of converting the double-precision floating-point value
2751	158142c2	bellard	\| `a' to the single-precision floating-point format. The conversion is
2752	158142c2	bellard	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
2753	158142c2	bellard	\| Arithmetic.
2754	158142c2	bellard	----------------------------------------------------------------------------/
2755	158142c2	bellard
2756	158142c2	bellard	float32 float64_to_float32( float64 a STATUS_PARAM )
2757	158142c2	bellard	{
2758	158142c2	bellard	flag aSign;
2759	158142c2	bellard	int16 aExp;
2760	bb98fe42	Andreas Färber	uint64_t aSig;
2761	bb98fe42	Andreas Färber	uint32_t zSig;
2762	37d18660	Peter Maydell	a = float64_squash_input_denormal(a STATUS_VAR);
2763	158142c2	bellard
2764	158142c2	bellard	aSig = extractFloat64Frac( a );
2765	158142c2	bellard	aExp = extractFloat64Exp( a );
2766	158142c2	bellard	aSign = extractFloat64Sign( a );
2767	158142c2	bellard	if ( aExp == 0x7FF ) {
2768	bcd4d9af	Christophe Lyon	if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
2769	158142c2	bellard	return packFloat32( aSign, 0xFF, 0 );
2770	158142c2	bellard	}
2771	158142c2	bellard	shift64RightJamming( aSig, 22, &aSig );
2772	158142c2	bellard	zSig = aSig;
2773	158142c2	bellard	if ( aExp \|\| zSig ) {
2774	158142c2	bellard	zSig \|= 0x40000000;
2775	158142c2	bellard	aExp -= 0x381;
2776	158142c2	bellard	}
2777	158142c2	bellard	return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
2778	158142c2	bellard
2779	158142c2	bellard	}
2780	158142c2	bellard
2781	60011498	Paul Brook
2782	60011498	Paul Brook	/*----------------------------------------------------------------------------
2783	60011498	Paul Brook	\| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
2784	60011498	Paul Brook	\| half-precision floating-point value, returning the result. After being
2785	60011498	Paul Brook	\| shifted into the proper positions, the three fields are simply added
2786	60011498	Paul Brook	\| together to form the result. This means that any integer portion of `zSig'
2787	60011498	Paul Brook	\| will be added into the exponent. Since a properly normalized significand
2788	60011498	Paul Brook	\| will have an integer portion equal to 1, the `zExp' input should be 1 less
2789	60011498	Paul Brook	\| than the desired result exponent whenever `zSig' is a complete, normalized
2790	60011498	Paul Brook	\| significand.
2791	60011498	Paul Brook	----------------------------------------------------------------------------/
2792	bb98fe42	Andreas Färber	static float16 packFloat16(flag zSign, int16 zExp, uint16_t zSig)
2793	60011498	Paul Brook	{
2794	bb4d4bb3	Peter Maydell	return make_float16(
2795	bb98fe42	Andreas Färber	(((uint32_t)zSign) << 15) + (((uint32_t)zExp) << 10) + zSig);
2796	60011498	Paul Brook	}
2797	60011498	Paul Brook
2798	60011498	Paul Brook	/* Half precision floats come in two formats: standard IEEE and "ARM" format.
2799	60011498	Paul Brook	The latter gains extra exponent range by omitting the NaN/Inf encodings. */
2800	bb4d4bb3	Peter Maydell
2801	bb4d4bb3	Peter Maydell	float32 float16_to_float32(float16 a, flag ieee STATUS_PARAM)
2802	60011498	Paul Brook	{
2803	60011498	Paul Brook	flag aSign;
2804	60011498	Paul Brook	int16 aExp;
2805	bb98fe42	Andreas Färber	uint32_t aSig;
2806	60011498	Paul Brook
2807	bb4d4bb3	Peter Maydell	aSign = extractFloat16Sign(a);
2808	bb4d4bb3	Peter Maydell	aExp = extractFloat16Exp(a);
2809	bb4d4bb3	Peter Maydell	aSig = extractFloat16Frac(a);
2810	60011498	Paul Brook
2811	60011498	Paul Brook	if (aExp == 0x1f && ieee) {
2812	60011498	Paul Brook	if (aSig) {
2813	f591e1be	Peter Maydell	return commonNaNToFloat32(float16ToCommonNaN(a STATUS_VAR) STATUS_VAR);
2814	60011498	Paul Brook	}
2815	60011498	Paul Brook	return packFloat32(aSign, 0xff, aSig << 13);
2816	60011498	Paul Brook	}
2817	60011498	Paul Brook	if (aExp == 0) {
2818	60011498	Paul Brook	int8 shiftCount;
2819	60011498	Paul Brook
2820	60011498	Paul Brook	if (aSig == 0) {
2821	60011498	Paul Brook	return packFloat32(aSign, 0, 0);
2822	60011498	Paul Brook	}
2823	60011498	Paul Brook
2824	60011498	Paul Brook	shiftCount = countLeadingZeros32( aSig ) - 21;
2825	60011498	Paul Brook	aSig = aSig << shiftCount;
2826	60011498	Paul Brook	aExp = -shiftCount;
2827	60011498	Paul Brook	}
2828	60011498	Paul Brook	return packFloat32( aSign, aExp + 0x70, aSig << 13);
2829	60011498	Paul Brook	}
2830	60011498	Paul Brook
2831	bb4d4bb3	Peter Maydell	float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM)
2832	60011498	Paul Brook	{
2833	60011498	Paul Brook	flag aSign;
2834	60011498	Paul Brook	int16 aExp;
2835	bb98fe42	Andreas Färber	uint32_t aSig;
2836	bb98fe42	Andreas Färber	uint32_t mask;
2837	bb98fe42	Andreas Färber	uint32_t increment;
2838	60011498	Paul Brook	int8 roundingMode;
2839	37d18660	Peter Maydell	a = float32_squash_input_denormal(a STATUS_VAR);
2840	60011498	Paul Brook
2841	60011498	Paul Brook	aSig = extractFloat32Frac( a );
2842	60011498	Paul Brook	aExp = extractFloat32Exp( a );
2843	60011498	Paul Brook	aSign = extractFloat32Sign( a );
2844	60011498	Paul Brook	if ( aExp == 0xFF ) {
2845	60011498	Paul Brook	if (aSig) {
2846	600e30d2	Peter Maydell	/* Input is a NaN */
2847	600e30d2	Peter Maydell	float16 r = commonNaNToFloat16( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
2848	600e30d2	Peter Maydell	if (!ieee) {
2849	600e30d2	Peter Maydell	return packFloat16(aSign, 0, 0);
2850	600e30d2	Peter Maydell	}
2851	600e30d2	Peter Maydell	return r;
2852	60011498	Paul Brook	}
2853	600e30d2	Peter Maydell	/* Infinity */
2854	600e30d2	Peter Maydell	if (!ieee) {
2855	600e30d2	Peter Maydell	float_raise(float_flag_invalid STATUS_VAR);
2856	600e30d2	Peter Maydell	return packFloat16(aSign, 0x1f, 0x3ff);
2857	600e30d2	Peter Maydell	}
2858	600e30d2	Peter Maydell	return packFloat16(aSign, 0x1f, 0);
2859	60011498	Paul Brook	}
2860	600e30d2	Peter Maydell	if (aExp == 0 && aSig == 0) {
2861	60011498	Paul Brook	return packFloat16(aSign, 0, 0);
2862	60011498	Paul Brook	}
2863	60011498	Paul Brook	/* Decimal point between bits 22 and 23. */
2864	60011498	Paul Brook	aSig \|= 0x00800000;
2865	60011498	Paul Brook	aExp -= 0x7f;
2866	60011498	Paul Brook	if (aExp < -14) {
2867	600e30d2	Peter Maydell	mask = 0x00ffffff;
2868	600e30d2	Peter Maydell	if (aExp >= -24) {
2869	600e30d2	Peter Maydell	mask >>= 25 + aExp;
2870	60011498	Paul Brook	}
2871	60011498	Paul Brook	} else {
2872	60011498	Paul Brook	mask = 0x00001fff;
2873	60011498	Paul Brook	}
2874	60011498	Paul Brook	if (aSig & mask) {
2875	60011498	Paul Brook	float_raise( float_flag_underflow STATUS_VAR );
2876	60011498	Paul Brook	roundingMode = STATUS(float_rounding_mode);
2877	60011498	Paul Brook	switch (roundingMode) {
2878	60011498	Paul Brook	case float_round_nearest_even:
2879	60011498	Paul Brook	increment = (mask + 1) >> 1;
2880	60011498	Paul Brook	if ((aSig & mask) == increment) {
2881	60011498	Paul Brook	increment = aSig & (increment << 1);
2882	60011498	Paul Brook	}
2883	60011498	Paul Brook	break;
2884	60011498	Paul Brook	case float_round_up:
2885	60011498	Paul Brook	increment = aSign ? 0 : mask;
2886	60011498	Paul Brook	break;
2887	60011498	Paul Brook	case float_round_down:
2888	60011498	Paul Brook	increment = aSign ? mask : 0;
2889	60011498	Paul Brook	break;
2890	60011498	Paul Brook	default: /* round_to_zero */
2891	60011498	Paul Brook	increment = 0;
2892	60011498	Paul Brook	break;
2893	60011498	Paul Brook	}
2894	60011498	Paul Brook	aSig += increment;
2895	60011498	Paul Brook	if (aSig >= 0x01000000) {
2896	60011498	Paul Brook	aSig >>= 1;
2897	60011498	Paul Brook	aExp++;
2898	60011498	Paul Brook	}
2899	60011498	Paul Brook	} else if (aExp < -14
2900	60011498	Paul Brook	&& STATUS(float_detect_tininess) == float_tininess_before_rounding) {
2901	60011498	Paul Brook	float_raise( float_flag_underflow STATUS_VAR);
2902	60011498	Paul Brook	}
2903	60011498	Paul Brook
2904	60011498	Paul Brook	if (ieee) {
2905	60011498	Paul Brook	if (aExp > 15) {
2906	60011498	Paul Brook	float_raise( float_flag_overflow \| float_flag_inexact STATUS_VAR);
2907	60011498	Paul Brook	return packFloat16(aSign, 0x1f, 0);
2908	60011498	Paul Brook	}
2909	60011498	Paul Brook	} else {
2910	60011498	Paul Brook	if (aExp > 16) {
2911	600e30d2	Peter Maydell	float_raise(float_flag_invalid \| float_flag_inexact STATUS_VAR);
2912	60011498	Paul Brook	return packFloat16(aSign, 0x1f, 0x3ff);
2913	60011498	Paul Brook	}
2914	60011498	Paul Brook	}
2915	60011498	Paul Brook	if (aExp < -24) {
2916	60011498	Paul Brook	return packFloat16(aSign, 0, 0);
2917	60011498	Paul Brook	}
2918	60011498	Paul Brook	if (aExp < -14) {
2919	60011498	Paul Brook	aSig >>= -14 - aExp;
2920	60011498	Paul Brook	aExp = -14;
2921	60011498	Paul Brook	}
2922	60011498	Paul Brook	return packFloat16(aSign, aExp + 14, aSig >> 13);
2923	60011498	Paul Brook	}
2924	60011498	Paul Brook
2925	158142c2	bellard	#ifdef FLOATX80
2926	158142c2	bellard
2927	158142c2	bellard	/*----------------------------------------------------------------------------
2928	158142c2	bellard	\| Returns the result of converting the double-precision floating-point value
2929	158142c2	bellard	\| `a' to the extended double-precision floating-point format. The conversion
2930	158142c2	bellard	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
2931	158142c2	bellard	\| Arithmetic.
2932	158142c2	bellard	----------------------------------------------------------------------------/
2933	158142c2	bellard
2934	158142c2	bellard	floatx80 float64_to_floatx80( float64 a STATUS_PARAM )
2935	158142c2	bellard	{
2936	158142c2	bellard	flag aSign;
2937	158142c2	bellard	int16 aExp;
2938	bb98fe42	Andreas Färber	uint64_t aSig;
2939	158142c2	bellard
2940	37d18660	Peter Maydell	a = float64_squash_input_denormal(a STATUS_VAR);
2941	158142c2	bellard	aSig = extractFloat64Frac( a );
2942	158142c2	bellard	aExp = extractFloat64Exp( a );
2943	158142c2	bellard	aSign = extractFloat64Sign( a );
2944	158142c2	bellard	if ( aExp == 0x7FF ) {
2945	bcd4d9af	Christophe Lyon	if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
2946	158142c2	bellard	return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2947	158142c2	bellard	}
2948	158142c2	bellard	if ( aExp == 0 ) {
2949	158142c2	bellard	if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
2950	158142c2	bellard	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2951	158142c2	bellard	}
2952	158142c2	bellard	return
2953	158142c2	bellard	packFloatx80(
2954	158142c2	bellard	aSign, aExp + 0x3C00, ( aSig \| LIT64( 0x0010000000000000 ) )<<11 );
2955	158142c2	bellard
2956	158142c2	bellard	}
2957	158142c2	bellard
2958	158142c2	bellard	#endif
2959	158142c2	bellard
2960	158142c2	bellard	#ifdef FLOAT128
2961	158142c2	bellard
2962	158142c2	bellard	/*----------------------------------------------------------------------------
2963	158142c2	bellard	\| Returns the result of converting the double-precision floating-point value
2964	158142c2	bellard	\| `a' to the quadruple-precision floating-point format. The conversion is
2965	158142c2	bellard	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
2966	158142c2	bellard	\| Arithmetic.
2967	158142c2	bellard	----------------------------------------------------------------------------/
2968	158142c2	bellard
2969	158142c2	bellard	float128 float64_to_float128( float64 a STATUS_PARAM )
2970	158142c2	bellard	{
2971	158142c2	bellard	flag aSign;
2972	158142c2	bellard	int16 aExp;
2973	bb98fe42	Andreas Färber	uint64_t aSig, zSig0, zSig1;
2974	158142c2	bellard
2975	37d18660	Peter Maydell	a = float64_squash_input_denormal(a STATUS_VAR);
2976	158142c2	bellard	aSig = extractFloat64Frac( a );
2977	158142c2	bellard	aExp = extractFloat64Exp( a );
2978	158142c2	bellard	aSign = extractFloat64Sign( a );
2979	158142c2	bellard	if ( aExp == 0x7FF ) {
2980	bcd4d9af	Christophe Lyon	if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
2981	158142c2	bellard	return packFloat128( aSign, 0x7FFF, 0, 0 );
2982	158142c2	bellard	}
2983	158142c2	bellard	if ( aExp == 0 ) {
2984	158142c2	bellard	if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
2985	158142c2	bellard	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2986	158142c2	bellard	--aExp;
2987	158142c2	bellard	}
2988	158142c2	bellard	shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
2989	158142c2	bellard	return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
2990	158142c2	bellard
2991	158142c2	bellard	}
2992	158142c2	bellard
2993	158142c2	bellard	#endif
2994	158142c2	bellard
2995	158142c2	bellard	/*----------------------------------------------------------------------------
2996	158142c2	bellard	\| Rounds the double-precision floating-point value `a' to an integer, and
2997	158142c2	bellard	\| returns the result as a double-precision floating-point value. The
2998	158142c2	bellard	\| operation is performed according to the IEC/IEEE Standard for Binary
2999	158142c2	bellard	\| Floating-Point Arithmetic.
3000	158142c2	bellard	----------------------------------------------------------------------------/
3001	158142c2	bellard
3002	158142c2	bellard	float64 float64_round_to_int( float64 a STATUS_PARAM )
3003	158142c2	bellard	{
3004	158142c2	bellard	flag aSign;
3005	158142c2	bellard	int16 aExp;
3006	bb98fe42	Andreas Färber	uint64_t lastBitMask, roundBitsMask;
3007	158142c2	bellard	int8 roundingMode;
3008	bb98fe42	Andreas Färber	uint64_t z;
3009	37d18660	Peter Maydell	a = float64_squash_input_denormal(a STATUS_VAR);
3010	158142c2	bellard
3011	158142c2	bellard	aExp = extractFloat64Exp( a );
3012	158142c2	bellard	if ( 0x433 <= aExp ) {
3013	158142c2	bellard	if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
3014	158142c2	bellard	return propagateFloat64NaN( a, a STATUS_VAR );
3015	158142c2	bellard	}
3016	158142c2	bellard	return a;
3017	158142c2	bellard	}
3018	158142c2	bellard	if ( aExp < 0x3FF ) {
3019	bb98fe42	Andreas Färber	if ( (uint64_t) ( float64_val(a)<<1 ) == 0 ) return a;
3020	158142c2	bellard	STATUS(float_exception_flags) \|= float_flag_inexact;
3021	158142c2	bellard	aSign = extractFloat64Sign( a );
3022	158142c2	bellard	switch ( STATUS(float_rounding_mode) ) {
3023	158142c2	bellard	case float_round_nearest_even:
3024	158142c2	bellard	if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
3025	158142c2	bellard	return packFloat64( aSign, 0x3FF, 0 );
3026	158142c2	bellard	}
3027	158142c2	bellard	break;
3028	158142c2	bellard	case float_round_down:
3029	f090c9d4	pbrook	return make_float64(aSign ? LIT64( 0xBFF0000000000000 ) : 0);
3030	158142c2	bellard	case float_round_up:
3031	f090c9d4	pbrook	return make_float64(
3032	f090c9d4	pbrook	aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ));
3033	158142c2	bellard	}
3034	158142c2	bellard	return packFloat64( aSign, 0, 0 );
3035	158142c2	bellard	}
3036	158142c2	bellard	lastBitMask = 1;
3037	158142c2	bellard	lastBitMask <<= 0x433 - aExp;
3038	158142c2	bellard	roundBitsMask = lastBitMask - 1;
3039	f090c9d4	pbrook	z = float64_val(a);
3040	158142c2	bellard	roundingMode = STATUS(float_rounding_mode);
3041	158142c2	bellard	if ( roundingMode == float_round_nearest_even ) {
3042	158142c2	bellard	z += lastBitMask>>1;
3043	158142c2	bellard	if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
3044	158142c2	bellard	}
3045	158142c2	bellard	else if ( roundingMode != float_round_to_zero ) {
3046	f090c9d4	pbrook	if ( extractFloat64Sign( make_float64(z) ) ^ ( roundingMode == float_round_up ) ) {
3047	158142c2	bellard	z += roundBitsMask;
3048	158142c2	bellard	}
3049	158142c2	bellard	}
3050	158142c2	bellard	z &= ~ roundBitsMask;
3051	f090c9d4	pbrook	if ( z != float64_val(a) )
3052	f090c9d4	pbrook	STATUS(float_exception_flags) \|= float_flag_inexact;
3053	f090c9d4	pbrook	return make_float64(z);
3054	158142c2	bellard
3055	158142c2	bellard	}
3056	158142c2	bellard
3057	e6e5906b	pbrook	float64 float64_trunc_to_int( float64 a STATUS_PARAM)
3058	e6e5906b	pbrook	{
3059	e6e5906b	pbrook	int oldmode;
3060	e6e5906b	pbrook	float64 res;
3061	e6e5906b	pbrook	oldmode = STATUS(float_rounding_mode);
3062	e6e5906b	pbrook	STATUS(float_rounding_mode) = float_round_to_zero;
3063	e6e5906b	pbrook	res = float64_round_to_int(a STATUS_VAR);
3064	e6e5906b	pbrook	STATUS(float_rounding_mode) = oldmode;
3065	e6e5906b	pbrook	return res;
3066	e6e5906b	pbrook	}
3067	e6e5906b	pbrook
3068	158142c2	bellard	/*----------------------------------------------------------------------------
3069	158142c2	bellard	\| Returns the result of adding the absolute values of the double-precision
3070	158142c2	bellard	\| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
3071	158142c2	bellard	\| before being returned. `zSign' is ignored if the result is a NaN.
3072	158142c2	bellard	\| The addition is performed according to the IEC/IEEE Standard for Binary
3073	158142c2	bellard	\| Floating-Point Arithmetic.
3074	158142c2	bellard	----------------------------------------------------------------------------/
3075	158142c2	bellard
3076	158142c2	bellard	static float64 addFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
3077	158142c2	bellard	{
3078	158142c2	bellard	int16 aExp, bExp, zExp;
3079	bb98fe42	Andreas Färber	uint64_t aSig, bSig, zSig;
3080	158142c2	bellard	int16 expDiff;
3081	158142c2	bellard
3082	158142c2	bellard	aSig = extractFloat64Frac( a );
3083	158142c2	bellard	aExp = extractFloat64Exp( a );
3084	158142c2	bellard	bSig = extractFloat64Frac( b );
3085	158142c2	bellard	bExp = extractFloat64Exp( b );
3086	158142c2	bellard	expDiff = aExp - bExp;
3087	158142c2	bellard	aSig <<= 9;
3088	158142c2	bellard	bSig <<= 9;
3089	158142c2	bellard	if ( 0 < expDiff ) {
3090	158142c2	bellard	if ( aExp == 0x7FF ) {
3091	158142c2	bellard	if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3092	158142c2	bellard	return a;
3093	158142c2	bellard	}
3094	158142c2	bellard	if ( bExp == 0 ) {
3095	158142c2	bellard	--expDiff;
3096	158142c2	bellard	}
3097	158142c2	bellard	else {
3098	158142c2	bellard	bSig \|= LIT64( 0x2000000000000000 );
3099	158142c2	bellard	}
3100	158142c2	bellard	shift64RightJamming( bSig, expDiff, &bSig );
3101	158142c2	bellard	zExp = aExp;
3102	158142c2	bellard	}
3103	158142c2	bellard	else if ( expDiff < 0 ) {
3104	158142c2	bellard	if ( bExp == 0x7FF ) {
3105	158142c2	bellard	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3106	158142c2	bellard	return packFloat64( zSign, 0x7FF, 0 );
3107	158142c2	bellard	}
3108	158142c2	bellard	if ( aExp == 0 ) {
3109	158142c2	bellard	++expDiff;
3110	158142c2	bellard	}
3111	158142c2	bellard	else {
3112	158142c2	bellard	aSig \|= LIT64( 0x2000000000000000 );
3113	158142c2	bellard	}
3114	158142c2	bellard	shift64RightJamming( aSig, - expDiff, &aSig );
3115	158142c2	bellard	zExp = bExp;
3116	158142c2	bellard	}
3117	158142c2	bellard	else {
3118	158142c2	bellard	if ( aExp == 0x7FF ) {
3119	158142c2	bellard	if ( aSig \| bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3120	158142c2	bellard	return a;
3121	158142c2	bellard	}
3122	fe76d976	pbrook	if ( aExp == 0 ) {
3123	fe76d976	pbrook	if ( STATUS(flush_to_zero) ) return packFloat64( zSign, 0, 0 );
3124	fe76d976	pbrook	return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
3125	fe76d976	pbrook	}
3126	158142c2	bellard	zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
3127	158142c2	bellard	zExp = aExp;
3128	158142c2	bellard	goto roundAndPack;
3129	158142c2	bellard	}
3130	158142c2	bellard	aSig \|= LIT64( 0x2000000000000000 );
3131	158142c2	bellard	zSig = ( aSig + bSig )<<1;
3132	158142c2	bellard	--zExp;
3133	bb98fe42	Andreas Färber	if ( (int64_t) zSig < 0 ) {
3134	158142c2	bellard	zSig = aSig + bSig;
3135	158142c2	bellard	++zExp;
3136	158142c2	bellard	}
3137	158142c2	bellard	roundAndPack:
3138	158142c2	bellard	return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
3139	158142c2	bellard
3140	158142c2	bellard	}
3141	158142c2	bellard
3142	158142c2	bellard	/*----------------------------------------------------------------------------
3143	158142c2	bellard	\| Returns the result of subtracting the absolute values of the double-
3144	158142c2	bellard	\| precision floating-point values `a' and `b'. If `zSign' is 1, the
3145	158142c2	bellard	\| difference is negated before being returned. `zSign' is ignored if the
3146	158142c2	bellard	\| result is a NaN. The subtraction is performed according to the IEC/IEEE
3147	158142c2	bellard	\| Standard for Binary Floating-Point Arithmetic.
3148	158142c2	bellard	----------------------------------------------------------------------------/
3149	158142c2	bellard
3150	158142c2	bellard	static float64 subFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
3151	158142c2	bellard	{
3152	158142c2	bellard	int16 aExp, bExp, zExp;
3153	bb98fe42	Andreas Färber	uint64_t aSig, bSig, zSig;
3154	158142c2	bellard	int16 expDiff;
3155	158142c2	bellard
3156	158142c2	bellard	aSig = extractFloat64Frac( a );
3157	158142c2	bellard	aExp = extractFloat64Exp( a );
3158	158142c2	bellard	bSig = extractFloat64Frac( b );
3159	158142c2	bellard	bExp = extractFloat64Exp( b );
3160	158142c2	bellard	expDiff = aExp - bExp;
3161	158142c2	bellard	aSig <<= 10;
3162	158142c2	bellard	bSig <<= 10;
3163	158142c2	bellard	if ( 0 < expDiff ) goto aExpBigger;
3164	158142c2	bellard	if ( expDiff < 0 ) goto bExpBigger;
3165	158142c2	bellard	if ( aExp == 0x7FF ) {
3166	158142c2	bellard	if ( aSig \| bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3167	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
3168	158142c2	bellard	return float64_default_nan;
3169	158142c2	bellard	}
3170	158142c2	bellard	if ( aExp == 0 ) {
3171	158142c2	bellard	aExp = 1;
3172	158142c2	bellard	bExp = 1;
3173	158142c2	bellard	}
3174	158142c2	bellard	if ( bSig < aSig ) goto aBigger;
3175	158142c2	bellard	if ( aSig < bSig ) goto bBigger;
3176	158142c2	bellard	return packFloat64( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
3177	158142c2	bellard	bExpBigger:
3178	158142c2	bellard	if ( bExp == 0x7FF ) {
3179	158142c2	bellard	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3180	158142c2	bellard	return packFloat64( zSign ^ 1, 0x7FF, 0 );
3181	158142c2	bellard	}
3182	158142c2	bellard	if ( aExp == 0 ) {
3183	158142c2	bellard	++expDiff;
3184	158142c2	bellard	}
3185	158142c2	bellard	else {
3186	158142c2	bellard	aSig \|= LIT64( 0x4000000000000000 );
3187	158142c2	bellard	}
3188	158142c2	bellard	shift64RightJamming( aSig, - expDiff, &aSig );
3189	158142c2	bellard	bSig \|= LIT64( 0x4000000000000000 );
3190	158142c2	bellard	bBigger:
3191	158142c2	bellard	zSig = bSig - aSig;
3192	158142c2	bellard	zExp = bExp;
3193	158142c2	bellard	zSign ^= 1;
3194	158142c2	bellard	goto normalizeRoundAndPack;
3195	158142c2	bellard	aExpBigger:
3196	158142c2	bellard	if ( aExp == 0x7FF ) {
3197	158142c2	bellard	if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3198	158142c2	bellard	return a;
3199	158142c2	bellard	}
3200	158142c2	bellard	if ( bExp == 0 ) {
3201	158142c2	bellard	--expDiff;
3202	158142c2	bellard	}
3203	158142c2	bellard	else {
3204	158142c2	bellard	bSig \|= LIT64( 0x4000000000000000 );
3205	158142c2	bellard	}
3206	158142c2	bellard	shift64RightJamming( bSig, expDiff, &bSig );
3207	158142c2	bellard	aSig \|= LIT64( 0x4000000000000000 );
3208	158142c2	bellard	aBigger:
3209	158142c2	bellard	zSig = aSig - bSig;
3210	158142c2	bellard	zExp = aExp;
3211	158142c2	bellard	normalizeRoundAndPack:
3212	158142c2	bellard	--zExp;
3213	158142c2	bellard	return normalizeRoundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
3214	158142c2	bellard
3215	158142c2	bellard	}
3216	158142c2	bellard
3217	158142c2	bellard	/*----------------------------------------------------------------------------
3218	158142c2	bellard	\| Returns the result of adding the double-precision floating-point values `a'
3219	158142c2	bellard	\| and `b'. The operation is performed according to the IEC/IEEE Standard for
3220	158142c2	bellard	\| Binary Floating-Point Arithmetic.
3221	158142c2	bellard	----------------------------------------------------------------------------/
3222	158142c2	bellard
3223	158142c2	bellard	float64 float64_add( float64 a, float64 b STATUS_PARAM )
3224	158142c2	bellard	{
3225	158142c2	bellard	flag aSign, bSign;
3226	37d18660	Peter Maydell	a = float64_squash_input_denormal(a STATUS_VAR);
3227	37d18660	Peter Maydell	b = float64_squash_input_denormal(b STATUS_VAR);
3228	158142c2	bellard
3229	158142c2	bellard	aSign = extractFloat64Sign( a );
3230	158142c2	bellard	bSign = extractFloat64Sign( b );
3231	158142c2	bellard	if ( aSign == bSign ) {
3232	158142c2	bellard	return addFloat64Sigs( a, b, aSign STATUS_VAR );
3233	158142c2	bellard	}
3234	158142c2	bellard	else {
3235	158142c2	bellard	return subFloat64Sigs( a, b, aSign STATUS_VAR );
3236	158142c2	bellard	}
3237	158142c2	bellard
3238	158142c2	bellard	}
3239	158142c2	bellard
3240	158142c2	bellard	/*----------------------------------------------------------------------------
3241	158142c2	bellard	\| Returns the result of subtracting the double-precision floating-point values
3242	158142c2	bellard	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
3243	158142c2	bellard	\| for Binary Floating-Point Arithmetic.
3244	158142c2	bellard	----------------------------------------------------------------------------/
3245	158142c2	bellard
3246	158142c2	bellard	float64 float64_sub( float64 a, float64 b STATUS_PARAM )
3247	158142c2	bellard	{
3248	158142c2	bellard	flag aSign, bSign;
3249	37d18660	Peter Maydell	a = float64_squash_input_denormal(a STATUS_VAR);
3250	37d18660	Peter Maydell	b = float64_squash_input_denormal(b STATUS_VAR);
3251	158142c2	bellard
3252	158142c2	bellard	aSign = extractFloat64Sign( a );
3253	158142c2	bellard	bSign = extractFloat64Sign( b );
3254	158142c2	bellard	if ( aSign == bSign ) {
3255	158142c2	bellard	return subFloat64Sigs( a, b, aSign STATUS_VAR );
3256	158142c2	bellard	}
3257	158142c2	bellard	else {
3258	158142c2	bellard	return addFloat64Sigs( a, b, aSign STATUS_VAR );
3259	158142c2	bellard	}
3260	158142c2	bellard
3261	158142c2	bellard	}
3262	158142c2	bellard
3263	158142c2	bellard	/*----------------------------------------------------------------------------
3264	158142c2	bellard	\| Returns the result of multiplying the double-precision floating-point values
3265	158142c2	bellard	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
3266	158142c2	bellard	\| for Binary Floating-Point Arithmetic.
3267	158142c2	bellard	----------------------------------------------------------------------------/
3268	158142c2	bellard
3269	158142c2	bellard	float64 float64_mul( float64 a, float64 b STATUS_PARAM )
3270	158142c2	bellard	{
3271	158142c2	bellard	flag aSign, bSign, zSign;
3272	158142c2	bellard	int16 aExp, bExp, zExp;
3273	bb98fe42	Andreas Färber	uint64_t aSig, bSig, zSig0, zSig1;
3274	158142c2	bellard
3275	37d18660	Peter Maydell	a = float64_squash_input_denormal(a STATUS_VAR);
3276	37d18660	Peter Maydell	b = float64_squash_input_denormal(b STATUS_VAR);
3277	37d18660	Peter Maydell
3278	158142c2	bellard	aSig = extractFloat64Frac( a );
3279	158142c2	bellard	aExp = extractFloat64Exp( a );
3280	158142c2	bellard	aSign = extractFloat64Sign( a );
3281	158142c2	bellard	bSig = extractFloat64Frac( b );
3282	158142c2	bellard	bExp = extractFloat64Exp( b );
3283	158142c2	bellard	bSign = extractFloat64Sign( b );
3284	158142c2	bellard	zSign = aSign ^ bSign;
3285	158142c2	bellard	if ( aExp == 0x7FF ) {
3286	158142c2	bellard	if ( aSig \|\| ( ( bExp == 0x7FF ) && bSig ) ) {
3287	158142c2	bellard	return propagateFloat64NaN( a, b STATUS_VAR );
3288	158142c2	bellard	}
3289	158142c2	bellard	if ( ( bExp \| bSig ) == 0 ) {
3290	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
3291	158142c2	bellard	return float64_default_nan;
3292	158142c2	bellard	}
3293	158142c2	bellard	return packFloat64( zSign, 0x7FF, 0 );
3294	158142c2	bellard	}
3295	158142c2	bellard	if ( bExp == 0x7FF ) {
3296	158142c2	bellard	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3297	158142c2	bellard	if ( ( aExp \| aSig ) == 0 ) {
3298	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
3299	158142c2	bellard	return float64_default_nan;
3300	158142c2	bellard	}
3301	158142c2	bellard	return packFloat64( zSign, 0x7FF, 0 );
3302	158142c2	bellard	}
3303	158142c2	bellard	if ( aExp == 0 ) {
3304	158142c2	bellard	if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
3305	158142c2	bellard	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3306	158142c2	bellard	}
3307	158142c2	bellard	if ( bExp == 0 ) {
3308	158142c2	bellard	if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
3309	158142c2	bellard	normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3310	158142c2	bellard	}
3311	158142c2	bellard	zExp = aExp + bExp - 0x3FF;
3312	158142c2	bellard	aSig = ( aSig \| LIT64( 0x0010000000000000 ) )<<10;
3313	158142c2	bellard	bSig = ( bSig \| LIT64( 0x0010000000000000 ) )<<11;
3314	158142c2	bellard	mul64To128( aSig, bSig, &zSig0, &zSig1 );
3315	158142c2	bellard	zSig0 \|= ( zSig1 != 0 );
3316	bb98fe42	Andreas Färber	if ( 0 <= (int64_t) ( zSig0<<1 ) ) {
3317	158142c2	bellard	zSig0 <<= 1;
3318	158142c2	bellard	--zExp;
3319	158142c2	bellard	}
3320	158142c2	bellard	return roundAndPackFloat64( zSign, zExp, zSig0 STATUS_VAR );
3321	158142c2	bellard
3322	158142c2	bellard	}
3323	158142c2	bellard
3324	158142c2	bellard	/*----------------------------------------------------------------------------
3325	158142c2	bellard	\| Returns the result of dividing the double-precision floating-point value `a'
3326	158142c2	bellard	\| by the corresponding value `b'. The operation is performed according to
3327	158142c2	bellard	\| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3328	158142c2	bellard	----------------------------------------------------------------------------/
3329	158142c2	bellard
3330	158142c2	bellard	float64 float64_div( float64 a, float64 b STATUS_PARAM )
3331	158142c2	bellard	{
3332	158142c2	bellard	flag aSign, bSign, zSign;
3333	158142c2	bellard	int16 aExp, bExp, zExp;
3334	bb98fe42	Andreas Färber	uint64_t aSig, bSig, zSig;
3335	bb98fe42	Andreas Färber	uint64_t rem0, rem1;
3336	bb98fe42	Andreas Färber	uint64_t term0, term1;
3337	37d18660	Peter Maydell	a = float64_squash_input_denormal(a STATUS_VAR);
3338	37d18660	Peter Maydell	b = float64_squash_input_denormal(b STATUS_VAR);
3339	158142c2	bellard
3340	158142c2	bellard	aSig = extractFloat64Frac( a );
3341	158142c2	bellard	aExp = extractFloat64Exp( a );
3342	158142c2	bellard	aSign = extractFloat64Sign( a );
3343	158142c2	bellard	bSig = extractFloat64Frac( b );
3344	158142c2	bellard	bExp = extractFloat64Exp( b );
3345	158142c2	bellard	bSign = extractFloat64Sign( b );
3346	158142c2	bellard	zSign = aSign ^ bSign;
3347	158142c2	bellard	if ( aExp == 0x7FF ) {
3348	158142c2	bellard	if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3349	158142c2	bellard	if ( bExp == 0x7FF ) {
3350	158142c2	bellard	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3351	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
3352	158142c2	bellard	return float64_default_nan;
3353	158142c2	bellard	}
3354	158142c2	bellard	return packFloat64( zSign, 0x7FF, 0 );
3355	158142c2	bellard	}
3356	158142c2	bellard	if ( bExp == 0x7FF ) {
3357	158142c2	bellard	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3358	158142c2	bellard	return packFloat64( zSign, 0, 0 );
3359	158142c2	bellard	}
3360	158142c2	bellard	if ( bExp == 0 ) {
3361	158142c2	bellard	if ( bSig == 0 ) {
3362	158142c2	bellard	if ( ( aExp \| aSig ) == 0 ) {
3363	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
3364	158142c2	bellard	return float64_default_nan;
3365	158142c2	bellard	}
3366	158142c2	bellard	float_raise( float_flag_divbyzero STATUS_VAR);
3367	158142c2	bellard	return packFloat64( zSign, 0x7FF, 0 );
3368	158142c2	bellard	}
3369	158142c2	bellard	normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3370	158142c2	bellard	}
3371	158142c2	bellard	if ( aExp == 0 ) {
3372	158142c2	bellard	if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
3373	158142c2	bellard	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3374	158142c2	bellard	}
3375	158142c2	bellard	zExp = aExp - bExp + 0x3FD;
3376	158142c2	bellard	aSig = ( aSig \| LIT64( 0x0010000000000000 ) )<<10;
3377	158142c2	bellard	bSig = ( bSig \| LIT64( 0x0010000000000000 ) )<<11;
3378	158142c2	bellard	if ( bSig <= ( aSig + aSig ) ) {
3379	158142c2	bellard	aSig >>= 1;
3380	158142c2	bellard	++zExp;
3381	158142c2	bellard	}
3382	158142c2	bellard	zSig = estimateDiv128To64( aSig, 0, bSig );
3383	158142c2	bellard	if ( ( zSig & 0x1FF ) <= 2 ) {
3384	158142c2	bellard	mul64To128( bSig, zSig, &term0, &term1 );
3385	158142c2	bellard	sub128( aSig, 0, term0, term1, &rem0, &rem1 );
3386	bb98fe42	Andreas Färber	while ( (int64_t) rem0 < 0 ) {
3387	158142c2	bellard	--zSig;
3388	158142c2	bellard	add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
3389	158142c2	bellard	}
3390	158142c2	bellard	zSig \|= ( rem1 != 0 );
3391	158142c2	bellard	}
3392	158142c2	bellard	return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
3393	158142c2	bellard
3394	158142c2	bellard	}
3395	158142c2	bellard
3396	158142c2	bellard	/*----------------------------------------------------------------------------
3397	158142c2	bellard	\| Returns the remainder of the double-precision floating-point value `a'
3398	158142c2	bellard	\| with respect to the corresponding value `b'. The operation is performed
3399	158142c2	bellard	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3400	158142c2	bellard	----------------------------------------------------------------------------/
3401	158142c2	bellard
3402	158142c2	bellard	float64 float64_rem( float64 a, float64 b STATUS_PARAM )
3403	158142c2	bellard	{
3404	ed086f3d	Blue Swirl	flag aSign, zSign;
3405	158142c2	bellard	int16 aExp, bExp, expDiff;
3406	bb98fe42	Andreas Färber	uint64_t aSig, bSig;
3407	bb98fe42	Andreas Färber	uint64_t q, alternateASig;
3408	bb98fe42	Andreas Färber	int64_t sigMean;
3409	158142c2	bellard
3410	37d18660	Peter Maydell	a = float64_squash_input_denormal(a STATUS_VAR);
3411	37d18660	Peter Maydell	b = float64_squash_input_denormal(b STATUS_VAR);
3412	158142c2	bellard	aSig = extractFloat64Frac( a );
3413	158142c2	bellard	aExp = extractFloat64Exp( a );
3414	158142c2	bellard	aSign = extractFloat64Sign( a );
3415	158142c2	bellard	bSig = extractFloat64Frac( b );
3416	158142c2	bellard	bExp = extractFloat64Exp( b );
3417	158142c2	bellard	if ( aExp == 0x7FF ) {
3418	158142c2	bellard	if ( aSig \|\| ( ( bExp == 0x7FF ) && bSig ) ) {
3419	158142c2	bellard	return propagateFloat64NaN( a, b STATUS_VAR );
3420	158142c2	bellard	}
3421	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
3422	158142c2	bellard	return float64_default_nan;
3423	158142c2	bellard	}
3424	158142c2	bellard	if ( bExp == 0x7FF ) {
3425	158142c2	bellard	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3426	158142c2	bellard	return a;
3427	158142c2	bellard	}
3428	158142c2	bellard	if ( bExp == 0 ) {
3429	158142c2	bellard	if ( bSig == 0 ) {
3430	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
3431	158142c2	bellard	return float64_default_nan;
3432	158142c2	bellard	}
3433	158142c2	bellard	normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3434	158142c2	bellard	}
3435	158142c2	bellard	if ( aExp == 0 ) {
3436	158142c2	bellard	if ( aSig == 0 ) return a;
3437	158142c2	bellard	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3438	158142c2	bellard	}
3439	158142c2	bellard	expDiff = aExp - bExp;
3440	158142c2	bellard	aSig = ( aSig \| LIT64( 0x0010000000000000 ) )<<11;
3441	158142c2	bellard	bSig = ( bSig \| LIT64( 0x0010000000000000 ) )<<11;
3442	158142c2	bellard	if ( expDiff < 0 ) {
3443	158142c2	bellard	if ( expDiff < -1 ) return a;
3444	158142c2	bellard	aSig >>= 1;
3445	158142c2	bellard	}
3446	158142c2	bellard	q = ( bSig <= aSig );
3447	158142c2	bellard	if ( q ) aSig -= bSig;
3448	158142c2	bellard	expDiff -= 64;
3449	158142c2	bellard	while ( 0 < expDiff ) {
3450	158142c2	bellard	q = estimateDiv128To64( aSig, 0, bSig );
3451	158142c2	bellard	q = ( 2 < q ) ? q - 2 : 0;
3452	158142c2	bellard	aSig = - ( ( bSig>>2 ) * q );
3453	158142c2	bellard	expDiff -= 62;
3454	158142c2	bellard	}
3455	158142c2	bellard	expDiff += 64;
3456	158142c2	bellard	if ( 0 < expDiff ) {
3457	158142c2	bellard	q = estimateDiv128To64( aSig, 0, bSig );
3458	158142c2	bellard	q = ( 2 < q ) ? q - 2 : 0;
3459	158142c2	bellard	q >>= 64 - expDiff;
3460	158142c2	bellard	bSig >>= 2;
3461	158142c2	bellard	aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
3462	158142c2	bellard	}
3463	158142c2	bellard	else {
3464	158142c2	bellard	aSig >>= 2;
3465	158142c2	bellard	bSig >>= 2;
3466	158142c2	bellard	}
3467	158142c2	bellard	do {
3468	158142c2	bellard	alternateASig = aSig;
3469	158142c2	bellard	++q;
3470	158142c2	bellard	aSig -= bSig;
3471	bb98fe42	Andreas Färber	} while ( 0 <= (int64_t) aSig );
3472	158142c2	bellard	sigMean = aSig + alternateASig;
3473	158142c2	bellard	if ( ( sigMean < 0 ) \|\| ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
3474	158142c2	bellard	aSig = alternateASig;
3475	158142c2	bellard	}
3476	bb98fe42	Andreas Färber	zSign = ( (int64_t) aSig < 0 );
3477	158142c2	bellard	if ( zSign ) aSig = - aSig;
3478	158142c2	bellard	return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig STATUS_VAR );
3479	158142c2	bellard
3480	158142c2	bellard	}
3481	158142c2	bellard
3482	158142c2	bellard	/*----------------------------------------------------------------------------
3483	158142c2	bellard	\| Returns the square root of the double-precision floating-point value `a'.
3484	158142c2	bellard	\| The operation is performed according to the IEC/IEEE Standard for Binary
3485	158142c2	bellard	\| Floating-Point Arithmetic.
3486	158142c2	bellard	----------------------------------------------------------------------------/
3487	158142c2	bellard
3488	158142c2	bellard	float64 float64_sqrt( float64 a STATUS_PARAM )
3489	158142c2	bellard	{
3490	158142c2	bellard	flag aSign;
3491	158142c2	bellard	int16 aExp, zExp;
3492	bb98fe42	Andreas Färber	uint64_t aSig, zSig, doubleZSig;
3493	bb98fe42	Andreas Färber	uint64_t rem0, rem1, term0, term1;
3494	37d18660	Peter Maydell	a = float64_squash_input_denormal(a STATUS_VAR);
3495	158142c2	bellard
3496	158142c2	bellard	aSig = extractFloat64Frac( a );
3497	158142c2	bellard	aExp = extractFloat64Exp( a );
3498	158142c2	bellard	aSign = extractFloat64Sign( a );
3499	158142c2	bellard	if ( aExp == 0x7FF ) {
3500	158142c2	bellard	if ( aSig ) return propagateFloat64NaN( a, a STATUS_VAR );
3501	158142c2	bellard	if ( ! aSign ) return a;
3502	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
3503	158142c2	bellard	return float64_default_nan;
3504	158142c2	bellard	}
3505	158142c2	bellard	if ( aSign ) {
3506	158142c2	bellard	if ( ( aExp \| aSig ) == 0 ) return a;
3507	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
3508	158142c2	bellard	return float64_default_nan;
3509	158142c2	bellard	}
3510	158142c2	bellard	if ( aExp == 0 ) {
3511	f090c9d4	pbrook	if ( aSig == 0 ) return float64_zero;
3512	158142c2	bellard	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3513	158142c2	bellard	}
3514	158142c2	bellard	zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
3515	158142c2	bellard	aSig \|= LIT64( 0x0010000000000000 );
3516	158142c2	bellard	zSig = estimateSqrt32( aExp, aSig>>21 );
3517	158142c2	bellard	aSig <<= 9 - ( aExp & 1 );
3518	158142c2	bellard	zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
3519	158142c2	bellard	if ( ( zSig & 0x1FF ) <= 5 ) {
3520	158142c2	bellard	doubleZSig = zSig<<1;
3521	158142c2	bellard	mul64To128( zSig, zSig, &term0, &term1 );
3522	158142c2	bellard	sub128( aSig, 0, term0, term1, &rem0, &rem1 );
3523	bb98fe42	Andreas Färber	while ( (int64_t) rem0 < 0 ) {
3524	158142c2	bellard	--zSig;
3525	158142c2	bellard	doubleZSig -= 2;
3526	158142c2	bellard	add128( rem0, rem1, zSig>>63, doubleZSig \| 1, &rem0, &rem1 );
3527	158142c2	bellard	}
3528	158142c2	bellard	zSig \|= ( ( rem0 \| rem1 ) != 0 );
3529	158142c2	bellard	}
3530	158142c2	bellard	return roundAndPackFloat64( 0, zExp, zSig STATUS_VAR );
3531	158142c2	bellard
3532	158142c2	bellard	}
3533	158142c2	bellard
3534	158142c2	bellard	/*----------------------------------------------------------------------------
3535	374dfc33	aurel32	\| Returns the binary log of the double-precision floating-point value `a'.
3536	374dfc33	aurel32	\| The operation is performed according to the IEC/IEEE Standard for Binary
3537	374dfc33	aurel32	\| Floating-Point Arithmetic.
3538	374dfc33	aurel32	----------------------------------------------------------------------------/
3539	374dfc33	aurel32	float64 float64_log2( float64 a STATUS_PARAM )
3540	374dfc33	aurel32	{
3541	374dfc33	aurel32	flag aSign, zSign;
3542	374dfc33	aurel32	int16 aExp;
3543	bb98fe42	Andreas Färber	uint64_t aSig, aSig0, aSig1, zSig, i;
3544	37d18660	Peter Maydell	a = float64_squash_input_denormal(a STATUS_VAR);
3545	374dfc33	aurel32
3546	374dfc33	aurel32	aSig = extractFloat64Frac( a );
3547	374dfc33	aurel32	aExp = extractFloat64Exp( a );
3548	374dfc33	aurel32	aSign = extractFloat64Sign( a );
3549	374dfc33	aurel32
3550	374dfc33	aurel32	if ( aExp == 0 ) {
3551	374dfc33	aurel32	if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
3552	374dfc33	aurel32	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3553	374dfc33	aurel32	}
3554	374dfc33	aurel32	if ( aSign ) {
3555	374dfc33	aurel32	float_raise( float_flag_invalid STATUS_VAR);
3556	374dfc33	aurel32	return float64_default_nan;
3557	374dfc33	aurel32	}
3558	374dfc33	aurel32	if ( aExp == 0x7FF ) {
3559	374dfc33	aurel32	if ( aSig ) return propagateFloat64NaN( a, float64_zero STATUS_VAR );
3560	374dfc33	aurel32	return a;
3561	374dfc33	aurel32	}
3562	374dfc33	aurel32
3563	374dfc33	aurel32	aExp -= 0x3FF;
3564	374dfc33	aurel32	aSig \|= LIT64( 0x0010000000000000 );
3565	374dfc33	aurel32	zSign = aExp < 0;
3566	bb98fe42	Andreas Färber	zSig = (uint64_t)aExp << 52;
3567	374dfc33	aurel32	for (i = 1LL << 51; i > 0; i >>= 1) {
3568	374dfc33	aurel32	mul64To128( aSig, aSig, &aSig0, &aSig1 );
3569	374dfc33	aurel32	aSig = ( aSig0 << 12 ) \| ( aSig1 >> 52 );
3570	374dfc33	aurel32	if ( aSig & LIT64( 0x0020000000000000 ) ) {
3571	374dfc33	aurel32	aSig >>= 1;
3572	374dfc33	aurel32	zSig \|= i;
3573	374dfc33	aurel32	}
3574	374dfc33	aurel32	}
3575	374dfc33	aurel32
3576	374dfc33	aurel32	if ( zSign )
3577	374dfc33	aurel32	zSig = -zSig;
3578	374dfc33	aurel32	return normalizeRoundAndPackFloat64( zSign, 0x408, zSig STATUS_VAR );
3579	374dfc33	aurel32	}
3580	374dfc33	aurel32
3581	374dfc33	aurel32	/*----------------------------------------------------------------------------
3582	158142c2	bellard	\| Returns 1 if the double-precision floating-point value `a' is equal to the
3583	b689362d	Aurelien Jarno	\| corresponding value `b', and 0 otherwise. The invalid exception is raised
3584	b689362d	Aurelien Jarno	\| if either operand is a NaN. Otherwise, the comparison is performed
3585	158142c2	bellard	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3586	158142c2	bellard	----------------------------------------------------------------------------/
3587	158142c2	bellard
3588	b689362d	Aurelien Jarno	int float64_eq( float64 a, float64 b STATUS_PARAM )
3589	158142c2	bellard	{
3590	bb98fe42	Andreas Färber	uint64_t av, bv;
3591	37d18660	Peter Maydell	a = float64_squash_input_denormal(a STATUS_VAR);
3592	37d18660	Peter Maydell	b = float64_squash_input_denormal(b STATUS_VAR);
3593	158142c2	bellard
3594	158142c2	bellard	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3595	158142c2	bellard	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3596	158142c2	bellard	) {
3597	b689362d	Aurelien Jarno	float_raise( float_flag_invalid STATUS_VAR);
3598	158142c2	bellard	return 0;
3599	158142c2	bellard	}
3600	f090c9d4	pbrook	av = float64_val(a);
3601	a1b91bb4	pbrook	bv = float64_val(b);
3602	bb98fe42	Andreas Färber	return ( av == bv ) \|\| ( (uint64_t) ( ( av \| bv )<<1 ) == 0 );
3603	158142c2	bellard
3604	158142c2	bellard	}
3605	158142c2	bellard
3606	158142c2	bellard	/*----------------------------------------------------------------------------
3607	158142c2	bellard	\| Returns 1 if the double-precision floating-point value `a' is less than or
3608	f5a64251	Aurelien Jarno	\| equal to the corresponding value `b', and 0 otherwise. The invalid
3609	f5a64251	Aurelien Jarno	\| exception is raised if either operand is a NaN. The comparison is performed
3610	f5a64251	Aurelien Jarno	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3611	158142c2	bellard	----------------------------------------------------------------------------/
3612	158142c2	bellard
3613	750afe93	bellard	int float64_le( float64 a, float64 b STATUS_PARAM )
3614	158142c2	bellard	{
3615	158142c2	bellard	flag aSign, bSign;
3616	bb98fe42	Andreas Färber	uint64_t av, bv;
3617	37d18660	Peter Maydell	a = float64_squash_input_denormal(a STATUS_VAR);
3618	37d18660	Peter Maydell	b = float64_squash_input_denormal(b STATUS_VAR);
3619	158142c2	bellard
3620	158142c2	bellard	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3621	158142c2	bellard	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3622	158142c2	bellard	) {
3623	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
3624	158142c2	bellard	return 0;
3625	158142c2	bellard	}
3626	158142c2	bellard	aSign = extractFloat64Sign( a );
3627	158142c2	bellard	bSign = extractFloat64Sign( b );
3628	f090c9d4	pbrook	av = float64_val(a);
3629	a1b91bb4	pbrook	bv = float64_val(b);
3630	bb98fe42	Andreas Färber	if ( aSign != bSign ) return aSign \|\| ( (uint64_t) ( ( av \| bv )<<1 ) == 0 );
3631	f090c9d4	pbrook	return ( av == bv ) \|\| ( aSign ^ ( av < bv ) );
3632	158142c2	bellard
3633	158142c2	bellard	}
3634	158142c2	bellard
3635	158142c2	bellard	/*----------------------------------------------------------------------------
3636	158142c2	bellard	\| Returns 1 if the double-precision floating-point value `a' is less than
3637	f5a64251	Aurelien Jarno	\| the corresponding value `b', and 0 otherwise. The invalid exception is
3638	f5a64251	Aurelien Jarno	\| raised if either operand is a NaN. The comparison is performed according
3639	f5a64251	Aurelien Jarno	\| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3640	158142c2	bellard	----------------------------------------------------------------------------/
3641	158142c2	bellard
3642	750afe93	bellard	int float64_lt( float64 a, float64 b STATUS_PARAM )
3643	158142c2	bellard	{
3644	158142c2	bellard	flag aSign, bSign;
3645	bb98fe42	Andreas Färber	uint64_t av, bv;
3646	158142c2	bellard
3647	37d18660	Peter Maydell	a = float64_squash_input_denormal(a STATUS_VAR);
3648	37d18660	Peter Maydell	b = float64_squash_input_denormal(b STATUS_VAR);
3649	158142c2	bellard	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3650	158142c2	bellard	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3651	158142c2	bellard	) {
3652	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
3653	158142c2	bellard	return 0;
3654	158142c2	bellard	}
3655	158142c2	bellard	aSign = extractFloat64Sign( a );
3656	158142c2	bellard	bSign = extractFloat64Sign( b );
3657	f090c9d4	pbrook	av = float64_val(a);
3658	a1b91bb4	pbrook	bv = float64_val(b);
3659	bb98fe42	Andreas Färber	if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av \| bv )<<1 ) != 0 );
3660	f090c9d4	pbrook	return ( av != bv ) && ( aSign ^ ( av < bv ) );
3661	158142c2	bellard
3662	158142c2	bellard	}
3663	158142c2	bellard
3664	158142c2	bellard	/*----------------------------------------------------------------------------
3665	67b7861d	Aurelien Jarno	\| Returns 1 if the double-precision floating-point values `a' and `b' cannot
3666	f5a64251	Aurelien Jarno	\| be compared, and 0 otherwise. The invalid exception is raised if either
3667	f5a64251	Aurelien Jarno	\| operand is a NaN. The comparison is performed according to the IEC/IEEE
3668	f5a64251	Aurelien Jarno	\| Standard for Binary Floating-Point Arithmetic.
3669	67b7861d	Aurelien Jarno	----------------------------------------------------------------------------/
3670	67b7861d	Aurelien Jarno
3671	67b7861d	Aurelien Jarno	int float64_unordered( float64 a, float64 b STATUS_PARAM )
3672	67b7861d	Aurelien Jarno	{
3673	67b7861d	Aurelien Jarno	a = float64_squash_input_denormal(a STATUS_VAR);
3674	67b7861d	Aurelien Jarno	b = float64_squash_input_denormal(b STATUS_VAR);
3675	67b7861d	Aurelien Jarno
3676	67b7861d	Aurelien Jarno	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3677	67b7861d	Aurelien Jarno	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3678	67b7861d	Aurelien Jarno	) {
3679	67b7861d	Aurelien Jarno	float_raise( float_flag_invalid STATUS_VAR);
3680	67b7861d	Aurelien Jarno	return 1;
3681	67b7861d	Aurelien Jarno	}
3682	67b7861d	Aurelien Jarno	return 0;
3683	67b7861d	Aurelien Jarno	}
3684	67b7861d	Aurelien Jarno
3685	67b7861d	Aurelien Jarno	/*----------------------------------------------------------------------------
3686	158142c2	bellard	\| Returns 1 if the double-precision floating-point value `a' is equal to the
3687	f5a64251	Aurelien Jarno	\| corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
3688	f5a64251	Aurelien Jarno	\| exception.The comparison is performed according to the IEC/IEEE Standard
3689	f5a64251	Aurelien Jarno	\| for Binary Floating-Point Arithmetic.
3690	158142c2	bellard	----------------------------------------------------------------------------/
3691	158142c2	bellard
3692	b689362d	Aurelien Jarno	int float64_eq_quiet( float64 a, float64 b STATUS_PARAM )
3693	158142c2	bellard	{
3694	bb98fe42	Andreas Färber	uint64_t av, bv;
3695	37d18660	Peter Maydell	a = float64_squash_input_denormal(a STATUS_VAR);
3696	37d18660	Peter Maydell	b = float64_squash_input_denormal(b STATUS_VAR);
3697	158142c2	bellard
3698	158142c2	bellard	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3699	158142c2	bellard	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3700	158142c2	bellard	) {
3701	b689362d	Aurelien Jarno	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
3702	b689362d	Aurelien Jarno	float_raise( float_flag_invalid STATUS_VAR);
3703	b689362d	Aurelien Jarno	}
3704	158142c2	bellard	return 0;
3705	158142c2	bellard	}
3706	f090c9d4	pbrook	av = float64_val(a);
3707	a1b91bb4	pbrook	bv = float64_val(b);
3708	bb98fe42	Andreas Färber	return ( av == bv ) \|\| ( (uint64_t) ( ( av \| bv )<<1 ) == 0 );
3709	158142c2	bellard
3710	158142c2	bellard	}
3711	158142c2	bellard
3712	158142c2	bellard	/*----------------------------------------------------------------------------
3713	158142c2	bellard	\| Returns 1 if the double-precision floating-point value `a' is less than or
3714	158142c2	bellard	\| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
3715	158142c2	bellard	\| cause an exception. Otherwise, the comparison is performed according to the
3716	158142c2	bellard	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3717	158142c2	bellard	----------------------------------------------------------------------------/
3718	158142c2	bellard
3719	750afe93	bellard	int float64_le_quiet( float64 a, float64 b STATUS_PARAM )
3720	158142c2	bellard	{
3721	158142c2	bellard	flag aSign, bSign;
3722	bb98fe42	Andreas Färber	uint64_t av, bv;
3723	37d18660	Peter Maydell	a = float64_squash_input_denormal(a STATUS_VAR);
3724	37d18660	Peter Maydell	b = float64_squash_input_denormal(b STATUS_VAR);
3725	158142c2	bellard
3726	158142c2	bellard	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3727	158142c2	bellard	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3728	158142c2	bellard	) {
3729	158142c2	bellard	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
3730	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
3731	158142c2	bellard	}
3732	158142c2	bellard	return 0;
3733	158142c2	bellard	}
3734	158142c2	bellard	aSign = extractFloat64Sign( a );
3735	158142c2	bellard	bSign = extractFloat64Sign( b );
3736	f090c9d4	pbrook	av = float64_val(a);
3737	a1b91bb4	pbrook	bv = float64_val(b);
3738	bb98fe42	Andreas Färber	if ( aSign != bSign ) return aSign \|\| ( (uint64_t) ( ( av \| bv )<<1 ) == 0 );
3739	f090c9d4	pbrook	return ( av == bv ) \|\| ( aSign ^ ( av < bv ) );
3740	158142c2	bellard
3741	158142c2	bellard	}
3742	158142c2	bellard
3743	158142c2	bellard	/*----------------------------------------------------------------------------
3744	158142c2	bellard	\| Returns 1 if the double-precision floating-point value `a' is less than
3745	158142c2	bellard	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
3746	158142c2	bellard	\| exception. Otherwise, the comparison is performed according to the IEC/IEEE
3747	158142c2	bellard	\| Standard for Binary Floating-Point Arithmetic.
3748	158142c2	bellard	----------------------------------------------------------------------------/
3749	158142c2	bellard
3750	750afe93	bellard	int float64_lt_quiet( float64 a, float64 b STATUS_PARAM )
3751	158142c2	bellard	{
3752	158142c2	bellard	flag aSign, bSign;
3753	bb98fe42	Andreas Färber	uint64_t av, bv;
3754	37d18660	Peter Maydell	a = float64_squash_input_denormal(a STATUS_VAR);
3755	37d18660	Peter Maydell	b = float64_squash_input_denormal(b STATUS_VAR);
3756	158142c2	bellard
3757	158142c2	bellard	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3758	158142c2	bellard	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3759	158142c2	bellard	) {
3760	158142c2	bellard	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
3761	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
3762	158142c2	bellard	}
3763	158142c2	bellard	return 0;
3764	158142c2	bellard	}
3765	158142c2	bellard	aSign = extractFloat64Sign( a );
3766	158142c2	bellard	bSign = extractFloat64Sign( b );
3767	f090c9d4	pbrook	av = float64_val(a);
3768	a1b91bb4	pbrook	bv = float64_val(b);
3769	bb98fe42	Andreas Färber	if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av \| bv )<<1 ) != 0 );
3770	f090c9d4	pbrook	return ( av != bv ) && ( aSign ^ ( av < bv ) );
3771	158142c2	bellard
3772	158142c2	bellard	}
3773	158142c2	bellard
3774	67b7861d	Aurelien Jarno	/*----------------------------------------------------------------------------
3775	67b7861d	Aurelien Jarno	\| Returns 1 if the double-precision floating-point values `a' and `b' cannot
3776	67b7861d	Aurelien Jarno	\| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
3777	67b7861d	Aurelien Jarno	\| comparison is performed according to the IEC/IEEE Standard for Binary
3778	67b7861d	Aurelien Jarno	\| Floating-Point Arithmetic.
3779	67b7861d	Aurelien Jarno	----------------------------------------------------------------------------/
3780	67b7861d	Aurelien Jarno
3781	67b7861d	Aurelien Jarno	int float64_unordered_quiet( float64 a, float64 b STATUS_PARAM )
3782	67b7861d	Aurelien Jarno	{
3783	67b7861d	Aurelien Jarno	a = float64_squash_input_denormal(a STATUS_VAR);
3784	67b7861d	Aurelien Jarno	b = float64_squash_input_denormal(b STATUS_VAR);
3785	67b7861d	Aurelien Jarno
3786	67b7861d	Aurelien Jarno	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3787	67b7861d	Aurelien Jarno	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3788	67b7861d	Aurelien Jarno	) {
3789	67b7861d	Aurelien Jarno	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
3790	67b7861d	Aurelien Jarno	float_raise( float_flag_invalid STATUS_VAR);
3791	67b7861d	Aurelien Jarno	}
3792	67b7861d	Aurelien Jarno	return 1;
3793	67b7861d	Aurelien Jarno	}
3794	67b7861d	Aurelien Jarno	return 0;
3795	67b7861d	Aurelien Jarno	}
3796	67b7861d	Aurelien Jarno
3797	158142c2	bellard	#ifdef FLOATX80
3798	158142c2	bellard
3799	158142c2	bellard	/*----------------------------------------------------------------------------
3800	158142c2	bellard	\| Returns the result of converting the extended double-precision floating-
3801	158142c2	bellard	\| point value `a' to the 32-bit two's complement integer format. The
3802	158142c2	bellard	\| conversion is performed according to the IEC/IEEE Standard for Binary
3803	158142c2	bellard	\| Floating-Point Arithmetic---which means in particular that the conversion
3804	158142c2	bellard	\| is rounded according to the current rounding mode. If `a' is a NaN, the
3805	158142c2	bellard	\| largest positive integer is returned. Otherwise, if the conversion
3806	158142c2	bellard	\| overflows, the largest integer with the same sign as `a' is returned.
3807	158142c2	bellard	----------------------------------------------------------------------------/
3808	158142c2	bellard
3809	158142c2	bellard	int32 floatx80_to_int32( floatx80 a STATUS_PARAM )
3810	158142c2	bellard	{
3811	158142c2	bellard	flag aSign;
3812	158142c2	bellard	int32 aExp, shiftCount;
3813	bb98fe42	Andreas Färber	uint64_t aSig;
3814	158142c2	bellard
3815	158142c2	bellard	aSig = extractFloatx80Frac( a );
3816	158142c2	bellard	aExp = extractFloatx80Exp( a );
3817	158142c2	bellard	aSign = extractFloatx80Sign( a );
3818	bb98fe42	Andreas Färber	if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
3819	158142c2	bellard	shiftCount = 0x4037 - aExp;
3820	158142c2	bellard	if ( shiftCount <= 0 ) shiftCount = 1;
3821	158142c2	bellard	shift64RightJamming( aSig, shiftCount, &aSig );
3822	158142c2	bellard	return roundAndPackInt32( aSign, aSig STATUS_VAR );
3823	158142c2	bellard
3824	158142c2	bellard	}
3825	158142c2	bellard
3826	158142c2	bellard	/*----------------------------------------------------------------------------
3827	158142c2	bellard	\| Returns the result of converting the extended double-precision floating-
3828	158142c2	bellard	\| point value `a' to the 32-bit two's complement integer format. The
3829	158142c2	bellard	\| conversion is performed according to the IEC/IEEE Standard for Binary
3830	158142c2	bellard	\| Floating-Point Arithmetic, except that the conversion is always rounded
3831	158142c2	bellard	\| toward zero. If `a' is a NaN, the largest positive integer is returned.
3832	158142c2	bellard	\| Otherwise, if the conversion overflows, the largest integer with the same
3833	158142c2	bellard	\| sign as `a' is returned.
3834	158142c2	bellard	----------------------------------------------------------------------------/
3835	158142c2	bellard
3836	158142c2	bellard	int32 floatx80_to_int32_round_to_zero( floatx80 a STATUS_PARAM )
3837	158142c2	bellard	{
3838	158142c2	bellard	flag aSign;
3839	158142c2	bellard	int32 aExp, shiftCount;
3840	bb98fe42	Andreas Färber	uint64_t aSig, savedASig;
3841	158142c2	bellard	int32 z;
3842	158142c2	bellard
3843	158142c2	bellard	aSig = extractFloatx80Frac( a );
3844	158142c2	bellard	aExp = extractFloatx80Exp( a );
3845	158142c2	bellard	aSign = extractFloatx80Sign( a );
3846	158142c2	bellard	if ( 0x401E < aExp ) {
3847	bb98fe42	Andreas Färber	if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
3848	158142c2	bellard	goto invalid;
3849	158142c2	bellard	}
3850	158142c2	bellard	else if ( aExp < 0x3FFF ) {
3851	158142c2	bellard	if ( aExp \|\| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
3852	158142c2	bellard	return 0;
3853	158142c2	bellard	}
3854	158142c2	bellard	shiftCount = 0x403E - aExp;
3855	158142c2	bellard	savedASig = aSig;
3856	158142c2	bellard	aSig >>= shiftCount;
3857	158142c2	bellard	z = aSig;
3858	158142c2	bellard	if ( aSign ) z = - z;
3859	158142c2	bellard	if ( ( z < 0 ) ^ aSign ) {
3860	158142c2	bellard	invalid:
3861	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
3862	bb98fe42	Andreas Färber	return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
3863	158142c2	bellard	}
3864	158142c2	bellard	if ( ( aSig<<shiftCount ) != savedASig ) {
3865	158142c2	bellard	STATUS(float_exception_flags) \|= float_flag_inexact;
3866	158142c2	bellard	}
3867	158142c2	bellard	return z;
3868	158142c2	bellard
3869	158142c2	bellard	}
3870	158142c2	bellard
3871	158142c2	bellard	/*----------------------------------------------------------------------------
3872	158142c2	bellard	\| Returns the result of converting the extended double-precision floating-
3873	158142c2	bellard	\| point value `a' to the 64-bit two's complement integer format. The
3874	158142c2	bellard	\| conversion is performed according to the IEC/IEEE Standard for Binary
3875	158142c2	bellard	\| Floating-Point Arithmetic---which means in particular that the conversion
3876	158142c2	bellard	\| is rounded according to the current rounding mode. If `a' is a NaN,
3877	158142c2	bellard	\| the largest positive integer is returned. Otherwise, if the conversion
3878	158142c2	bellard	\| overflows, the largest integer with the same sign as `a' is returned.
3879	158142c2	bellard	----------------------------------------------------------------------------/
3880	158142c2	bellard
3881	158142c2	bellard	int64 floatx80_to_int64( floatx80 a STATUS_PARAM )
3882	158142c2	bellard	{
3883	158142c2	bellard	flag aSign;
3884	158142c2	bellard	int32 aExp, shiftCount;
3885	bb98fe42	Andreas Färber	uint64_t aSig, aSigExtra;
3886	158142c2	bellard
3887	158142c2	bellard	aSig = extractFloatx80Frac( a );
3888	158142c2	bellard	aExp = extractFloatx80Exp( a );
3889	158142c2	bellard	aSign = extractFloatx80Sign( a );
3890	158142c2	bellard	shiftCount = 0x403E - aExp;
3891	158142c2	bellard	if ( shiftCount <= 0 ) {
3892	158142c2	bellard	if ( shiftCount ) {
3893	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
3894	158142c2	bellard	if ( ! aSign
3895	158142c2	bellard	\|\| ( ( aExp == 0x7FFF )
3896	158142c2	bellard	&& ( aSig != LIT64( 0x8000000000000000 ) ) )
3897	158142c2	bellard	) {
3898	158142c2	bellard	return LIT64( 0x7FFFFFFFFFFFFFFF );
3899	158142c2	bellard	}
3900	bb98fe42	Andreas Färber	return (int64_t) LIT64( 0x8000000000000000 );
3901	158142c2	bellard	}
3902	158142c2	bellard	aSigExtra = 0;
3903	158142c2	bellard	}
3904	158142c2	bellard	else {
3905	158142c2	bellard	shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
3906	158142c2	bellard	}
3907	158142c2	bellard	return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
3908	158142c2	bellard
3909	158142c2	bellard	}
3910	158142c2	bellard
3911	158142c2	bellard	/*----------------------------------------------------------------------------
3912	158142c2	bellard	\| Returns the result of converting the extended double-precision floating-
3913	158142c2	bellard	\| point value `a' to the 64-bit two's complement integer format. The
3914	158142c2	bellard	\| conversion is performed according to the IEC/IEEE Standard for Binary
3915	158142c2	bellard	\| Floating-Point Arithmetic, except that the conversion is always rounded
3916	158142c2	bellard	\| toward zero. If `a' is a NaN, the largest positive integer is returned.
3917	158142c2	bellard	\| Otherwise, if the conversion overflows, the largest integer with the same
3918	158142c2	bellard	\| sign as `a' is returned.
3919	158142c2	bellard	----------------------------------------------------------------------------/
3920	158142c2	bellard
3921	158142c2	bellard	int64 floatx80_to_int64_round_to_zero( floatx80 a STATUS_PARAM )
3922	158142c2	bellard	{
3923	158142c2	bellard	flag aSign;
3924	158142c2	bellard	int32 aExp, shiftCount;
3925	bb98fe42	Andreas Färber	uint64_t aSig;
3926	158142c2	bellard	int64 z;
3927	158142c2	bellard
3928	158142c2	bellard	aSig = extractFloatx80Frac( a );
3929	158142c2	bellard	aExp = extractFloatx80Exp( a );
3930	158142c2	bellard	aSign = extractFloatx80Sign( a );
3931	158142c2	bellard	shiftCount = aExp - 0x403E;
3932	158142c2	bellard	if ( 0 <= shiftCount ) {
3933	158142c2	bellard	aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
3934	158142c2	bellard	if ( ( a.high != 0xC03E ) \|\| aSig ) {
3935	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
3936	158142c2	bellard	if ( ! aSign \|\| ( ( aExp == 0x7FFF ) && aSig ) ) {
3937	158142c2	bellard	return LIT64( 0x7FFFFFFFFFFFFFFF );
3938	158142c2	bellard	}
3939	158142c2	bellard	}
3940	bb98fe42	Andreas Färber	return (int64_t) LIT64( 0x8000000000000000 );
3941	158142c2	bellard	}
3942	158142c2	bellard	else if ( aExp < 0x3FFF ) {
3943	158142c2	bellard	if ( aExp \| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
3944	158142c2	bellard	return 0;
3945	158142c2	bellard	}
3946	158142c2	bellard	z = aSig>>( - shiftCount );
3947	bb98fe42	Andreas Färber	if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
3948	158142c2	bellard	STATUS(float_exception_flags) \|= float_flag_inexact;
3949	158142c2	bellard	}
3950	158142c2	bellard	if ( aSign ) z = - z;
3951	158142c2	bellard	return z;
3952	158142c2	bellard
3953	158142c2	bellard	}
3954	158142c2	bellard
3955	158142c2	bellard	/*----------------------------------------------------------------------------
3956	158142c2	bellard	\| Returns the result of converting the extended double-precision floating-
3957	158142c2	bellard	\| point value `a' to the single-precision floating-point format. The
3958	158142c2	bellard	\| conversion is performed according to the IEC/IEEE Standard for Binary
3959	158142c2	bellard	\| Floating-Point Arithmetic.
3960	158142c2	bellard	----------------------------------------------------------------------------/
3961	158142c2	bellard
3962	158142c2	bellard	float32 floatx80_to_float32( floatx80 a STATUS_PARAM )
3963	158142c2	bellard	{
3964	158142c2	bellard	flag aSign;
3965	158142c2	bellard	int32 aExp;
3966	bb98fe42	Andreas Färber	uint64_t aSig;
3967	158142c2	bellard
3968	158142c2	bellard	aSig = extractFloatx80Frac( a );
3969	158142c2	bellard	aExp = extractFloatx80Exp( a );
3970	158142c2	bellard	aSign = extractFloatx80Sign( a );
3971	158142c2	bellard	if ( aExp == 0x7FFF ) {
3972	bb98fe42	Andreas Färber	if ( (uint64_t) ( aSig<<1 ) ) {
3973	bcd4d9af	Christophe Lyon	return commonNaNToFloat32( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
3974	158142c2	bellard	}
3975	158142c2	bellard	return packFloat32( aSign, 0xFF, 0 );
3976	158142c2	bellard	}
3977	158142c2	bellard	shift64RightJamming( aSig, 33, &aSig );
3978	158142c2	bellard	if ( aExp \|\| aSig ) aExp -= 0x3F81;
3979	158142c2	bellard	return roundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
3980	158142c2	bellard
3981	158142c2	bellard	}
3982	158142c2	bellard
3983	158142c2	bellard	/*----------------------------------------------------------------------------
3984	158142c2	bellard	\| Returns the result of converting the extended double-precision floating-
3985	158142c2	bellard	\| point value `a' to the double-precision floating-point format. The
3986	158142c2	bellard	\| conversion is performed according to the IEC/IEEE Standard for Binary
3987	158142c2	bellard	\| Floating-Point Arithmetic.
3988	158142c2	bellard	----------------------------------------------------------------------------/
3989	158142c2	bellard
3990	158142c2	bellard	float64 floatx80_to_float64( floatx80 a STATUS_PARAM )
3991	158142c2	bellard	{
3992	158142c2	bellard	flag aSign;
3993	158142c2	bellard	int32 aExp;
3994	bb98fe42	Andreas Färber	uint64_t aSig, zSig;
3995	158142c2	bellard
3996	158142c2	bellard	aSig = extractFloatx80Frac( a );
3997	158142c2	bellard	aExp = extractFloatx80Exp( a );
3998	158142c2	bellard	aSign = extractFloatx80Sign( a );
3999	158142c2	bellard	if ( aExp == 0x7FFF ) {
4000	bb98fe42	Andreas Färber	if ( (uint64_t) ( aSig<<1 ) ) {
4001	bcd4d9af	Christophe Lyon	return commonNaNToFloat64( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
4002	158142c2	bellard	}
4003	158142c2	bellard	return packFloat64( aSign, 0x7FF, 0 );
4004	158142c2	bellard	}
4005	158142c2	bellard	shift64RightJamming( aSig, 1, &zSig );
4006	158142c2	bellard	if ( aExp \|\| aSig ) aExp -= 0x3C01;
4007	158142c2	bellard	return roundAndPackFloat64( aSign, aExp, zSig STATUS_VAR );
4008	158142c2	bellard
4009	158142c2	bellard	}
4010	158142c2	bellard
4011	158142c2	bellard	#ifdef FLOAT128
4012	158142c2	bellard
4013	158142c2	bellard	/*----------------------------------------------------------------------------
4014	158142c2	bellard	\| Returns the result of converting the extended double-precision floating-
4015	158142c2	bellard	\| point value `a' to the quadruple-precision floating-point format. The
4016	158142c2	bellard	\| conversion is performed according to the IEC/IEEE Standard for Binary
4017	158142c2	bellard	\| Floating-Point Arithmetic.
4018	158142c2	bellard	----------------------------------------------------------------------------/
4019	158142c2	bellard
4020	158142c2	bellard	float128 floatx80_to_float128( floatx80 a STATUS_PARAM )
4021	158142c2	bellard	{
4022	158142c2	bellard	flag aSign;
4023	158142c2	bellard	int16 aExp;
4024	bb98fe42	Andreas Färber	uint64_t aSig, zSig0, zSig1;
4025	158142c2	bellard
4026	158142c2	bellard	aSig = extractFloatx80Frac( a );
4027	158142c2	bellard	aExp = extractFloatx80Exp( a );
4028	158142c2	bellard	aSign = extractFloatx80Sign( a );
4029	bb98fe42	Andreas Färber	if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
4030	bcd4d9af	Christophe Lyon	return commonNaNToFloat128( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
4031	158142c2	bellard	}
4032	158142c2	bellard	shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
4033	158142c2	bellard	return packFloat128( aSign, aExp, zSig0, zSig1 );
4034	158142c2	bellard
4035	158142c2	bellard	}
4036	158142c2	bellard
4037	158142c2	bellard	#endif
4038	158142c2	bellard
4039	158142c2	bellard	/*----------------------------------------------------------------------------
4040	158142c2	bellard	\| Rounds the extended double-precision floating-point value `a' to an integer,
4041	158142c2	bellard	\| and returns the result as an extended quadruple-precision floating-point
4042	158142c2	bellard	\| value. The operation is performed according to the IEC/IEEE Standard for
4043	158142c2	bellard	\| Binary Floating-Point Arithmetic.
4044	158142c2	bellard	----------------------------------------------------------------------------/
4045	158142c2	bellard
4046	158142c2	bellard	floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM )
4047	158142c2	bellard	{
4048	158142c2	bellard	flag aSign;
4049	158142c2	bellard	int32 aExp;
4050	bb98fe42	Andreas Färber	uint64_t lastBitMask, roundBitsMask;
4051	158142c2	bellard	int8 roundingMode;
4052	158142c2	bellard	floatx80 z;
4053	158142c2	bellard
4054	158142c2	bellard	aExp = extractFloatx80Exp( a );
4055	158142c2	bellard	if ( 0x403E <= aExp ) {
4056	bb98fe42	Andreas Färber	if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
4057	158142c2	bellard	return propagateFloatx80NaN( a, a STATUS_VAR );
4058	158142c2	bellard	}
4059	158142c2	bellard	return a;
4060	158142c2	bellard	}
4061	158142c2	bellard	if ( aExp < 0x3FFF ) {
4062	158142c2	bellard	if ( ( aExp == 0 )
4063	bb98fe42	Andreas Färber	&& ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
4064	158142c2	bellard	return a;
4065	158142c2	bellard	}
4066	158142c2	bellard	STATUS(float_exception_flags) \|= float_flag_inexact;
4067	158142c2	bellard	aSign = extractFloatx80Sign( a );
4068	158142c2	bellard	switch ( STATUS(float_rounding_mode) ) {
4069	158142c2	bellard	case float_round_nearest_even:
4070	bb98fe42	Andreas Färber	if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
4071	158142c2	bellard	) {
4072	158142c2	bellard	return
4073	158142c2	bellard	packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
4074	158142c2	bellard	}
4075	158142c2	bellard	break;
4076	158142c2	bellard	case float_round_down:
4077	158142c2	bellard	return
4078	158142c2	bellard	aSign ?
4079	158142c2	bellard	packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
4080	158142c2	bellard	: packFloatx80( 0, 0, 0 );
4081	158142c2	bellard	case float_round_up:
4082	158142c2	bellard	return
4083	158142c2	bellard	aSign ? packFloatx80( 1, 0, 0 )
4084	158142c2	bellard	: packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
4085	158142c2	bellard	}
4086	158142c2	bellard	return packFloatx80( aSign, 0, 0 );
4087	158142c2	bellard	}
4088	158142c2	bellard	lastBitMask = 1;
4089	158142c2	bellard	lastBitMask <<= 0x403E - aExp;
4090	158142c2	bellard	roundBitsMask = lastBitMask - 1;
4091	158142c2	bellard	z = a;
4092	158142c2	bellard	roundingMode = STATUS(float_rounding_mode);
4093	158142c2	bellard	if ( roundingMode == float_round_nearest_even ) {
4094	158142c2	bellard	z.low += lastBitMask>>1;
4095	158142c2	bellard	if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
4096	158142c2	bellard	}
4097	158142c2	bellard	else if ( roundingMode != float_round_to_zero ) {
4098	158142c2	bellard	if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
4099	158142c2	bellard	z.low += roundBitsMask;
4100	158142c2	bellard	}
4101	158142c2	bellard	}
4102	158142c2	bellard	z.low &= ~ roundBitsMask;
4103	158142c2	bellard	if ( z.low == 0 ) {
4104	158142c2	bellard	++z.high;
4105	158142c2	bellard	z.low = LIT64( 0x8000000000000000 );
4106	158142c2	bellard	}
4107	158142c2	bellard	if ( z.low != a.low ) STATUS(float_exception_flags) \|= float_flag_inexact;
4108	158142c2	bellard	return z;
4109	158142c2	bellard
4110	158142c2	bellard	}
4111	158142c2	bellard
4112	158142c2	bellard	/*----------------------------------------------------------------------------
4113	158142c2	bellard	\| Returns the result of adding the absolute values of the extended double-
4114	158142c2	bellard	\| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
4115	158142c2	bellard	\| negated before being returned. `zSign' is ignored if the result is a NaN.
4116	158142c2	bellard	\| The addition is performed according to the IEC/IEEE Standard for Binary
4117	158142c2	bellard	\| Floating-Point Arithmetic.
4118	158142c2	bellard	----------------------------------------------------------------------------/
4119	158142c2	bellard
4120	158142c2	bellard	static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM)
4121	158142c2	bellard	{
4122	158142c2	bellard	int32 aExp, bExp, zExp;
4123	bb98fe42	Andreas Färber	uint64_t aSig, bSig, zSig0, zSig1;
4124	158142c2	bellard	int32 expDiff;
4125	158142c2	bellard
4126	158142c2	bellard	aSig = extractFloatx80Frac( a );
4127	158142c2	bellard	aExp = extractFloatx80Exp( a );
4128	158142c2	bellard	bSig = extractFloatx80Frac( b );
4129	158142c2	bellard	bExp = extractFloatx80Exp( b );
4130	158142c2	bellard	expDiff = aExp - bExp;
4131	158142c2	bellard	if ( 0 < expDiff ) {
4132	158142c2	bellard	if ( aExp == 0x7FFF ) {
4133	bb98fe42	Andreas Färber	if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4134	158142c2	bellard	return a;
4135	158142c2	bellard	}
4136	158142c2	bellard	if ( bExp == 0 ) --expDiff;
4137	158142c2	bellard	shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
4138	158142c2	bellard	zExp = aExp;
4139	158142c2	bellard	}
4140	158142c2	bellard	else if ( expDiff < 0 ) {
4141	158142c2	bellard	if ( bExp == 0x7FFF ) {
4142	bb98fe42	Andreas Färber	if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4143	158142c2	bellard	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4144	158142c2	bellard	}
4145	158142c2	bellard	if ( aExp == 0 ) ++expDiff;
4146	158142c2	bellard	shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
4147	158142c2	bellard	zExp = bExp;
4148	158142c2	bellard	}
4149	158142c2	bellard	else {
4150	158142c2	bellard	if ( aExp == 0x7FFF ) {
4151	bb98fe42	Andreas Färber	if ( (uint64_t) ( ( aSig \| bSig )<<1 ) ) {
4152	158142c2	bellard	return propagateFloatx80NaN( a, b STATUS_VAR );
4153	158142c2	bellard	}
4154	158142c2	bellard	return a;
4155	158142c2	bellard	}
4156	158142c2	bellard	zSig1 = 0;
4157	158142c2	bellard	zSig0 = aSig + bSig;
4158	158142c2	bellard	if ( aExp == 0 ) {
4159	158142c2	bellard	normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
4160	158142c2	bellard	goto roundAndPack;
4161	158142c2	bellard	}
4162	158142c2	bellard	zExp = aExp;
4163	158142c2	bellard	goto shiftRight1;
4164	158142c2	bellard	}
4165	158142c2	bellard	zSig0 = aSig + bSig;
4166	bb98fe42	Andreas Färber	if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
4167	158142c2	bellard	shiftRight1:
4168	158142c2	bellard	shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
4169	158142c2	bellard	zSig0 \|= LIT64( 0x8000000000000000 );
4170	158142c2	bellard	++zExp;
4171	158142c2	bellard	roundAndPack:
4172	158142c2	bellard	return
4173	158142c2	bellard	roundAndPackFloatx80(
4174	158142c2	bellard	STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
4175	158142c2	bellard
4176	158142c2	bellard	}
4177	158142c2	bellard
4178	158142c2	bellard	/*----------------------------------------------------------------------------
4179	158142c2	bellard	\| Returns the result of subtracting the absolute values of the extended
4180	158142c2	bellard	\| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
4181	158142c2	bellard	\| difference is negated before being returned. `zSign' is ignored if the
4182	158142c2	bellard	\| result is a NaN. The subtraction is performed according to the IEC/IEEE
4183	158142c2	bellard	\| Standard for Binary Floating-Point Arithmetic.
4184	158142c2	bellard	----------------------------------------------------------------------------/
4185	158142c2	bellard
4186	158142c2	bellard	static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM )
4187	158142c2	bellard	{
4188	158142c2	bellard	int32 aExp, bExp, zExp;
4189	bb98fe42	Andreas Färber	uint64_t aSig, bSig, zSig0, zSig1;
4190	158142c2	bellard	int32 expDiff;
4191	158142c2	bellard	floatx80 z;
4192	158142c2	bellard
4193	158142c2	bellard	aSig = extractFloatx80Frac( a );
4194	158142c2	bellard	aExp = extractFloatx80Exp( a );
4195	158142c2	bellard	bSig = extractFloatx80Frac( b );
4196	158142c2	bellard	bExp = extractFloatx80Exp( b );
4197	158142c2	bellard	expDiff = aExp - bExp;
4198	158142c2	bellard	if ( 0 < expDiff ) goto aExpBigger;
4199	158142c2	bellard	if ( expDiff < 0 ) goto bExpBigger;
4200	158142c2	bellard	if ( aExp == 0x7FFF ) {
4201	bb98fe42	Andreas Färber	if ( (uint64_t) ( ( aSig \| bSig )<<1 ) ) {
4202	158142c2	bellard	return propagateFloatx80NaN( a, b STATUS_VAR );
4203	158142c2	bellard	}
4204	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
4205	158142c2	bellard	z.low = floatx80_default_nan_low;
4206	158142c2	bellard	z.high = floatx80_default_nan_high;
4207	158142c2	bellard	return z;
4208	158142c2	bellard	}
4209	158142c2	bellard	if ( aExp == 0 ) {
4210	158142c2	bellard	aExp = 1;
4211	158142c2	bellard	bExp = 1;
4212	158142c2	bellard	}
4213	158142c2	bellard	zSig1 = 0;
4214	158142c2	bellard	if ( bSig < aSig ) goto aBigger;
4215	158142c2	bellard	if ( aSig < bSig ) goto bBigger;
4216	158142c2	bellard	return packFloatx80( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
4217	158142c2	bellard	bExpBigger:
4218	158142c2	bellard	if ( bExp == 0x7FFF ) {
4219	bb98fe42	Andreas Färber	if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4220	158142c2	bellard	return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
4221	158142c2	bellard	}
4222	158142c2	bellard	if ( aExp == 0 ) ++expDiff;
4223	158142c2	bellard	shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
4224	158142c2	bellard	bBigger:
4225	158142c2	bellard	sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
4226	158142c2	bellard	zExp = bExp;
4227	158142c2	bellard	zSign ^= 1;
4228	158142c2	bellard	goto normalizeRoundAndPack;
4229	158142c2	bellard	aExpBigger:
4230	158142c2	bellard	if ( aExp == 0x7FFF ) {
4231	bb98fe42	Andreas Färber	if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4232	158142c2	bellard	return a;
4233	158142c2	bellard	}
4234	158142c2	bellard	if ( bExp == 0 ) --expDiff;
4235	158142c2	bellard	shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
4236	158142c2	bellard	aBigger:
4237	158142c2	bellard	sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
4238	158142c2	bellard	zExp = aExp;
4239	158142c2	bellard	normalizeRoundAndPack:
4240	158142c2	bellard	return
4241	158142c2	bellard	normalizeRoundAndPackFloatx80(
4242	158142c2	bellard	STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
4243	158142c2	bellard
4244	158142c2	bellard	}
4245	158142c2	bellard
4246	158142c2	bellard	/*----------------------------------------------------------------------------
4247	158142c2	bellard	\| Returns the result of adding the extended double-precision floating-point
4248	158142c2	bellard	\| values `a' and `b'. The operation is performed according to the IEC/IEEE
4249	158142c2	bellard	\| Standard for Binary Floating-Point Arithmetic.
4250	158142c2	bellard	----------------------------------------------------------------------------/
4251	158142c2	bellard
4252	158142c2	bellard	floatx80 floatx80_add( floatx80 a, floatx80 b STATUS_PARAM )
4253	158142c2	bellard	{
4254	158142c2	bellard	flag aSign, bSign;
4255	158142c2	bellard
4256	158142c2	bellard	aSign = extractFloatx80Sign( a );
4257	158142c2	bellard	bSign = extractFloatx80Sign( b );
4258	158142c2	bellard	if ( aSign == bSign ) {
4259	158142c2	bellard	return addFloatx80Sigs( a, b, aSign STATUS_VAR );
4260	158142c2	bellard	}
4261	158142c2	bellard	else {
4262	158142c2	bellard	return subFloatx80Sigs( a, b, aSign STATUS_VAR );
4263	158142c2	bellard	}
4264	158142c2	bellard
4265	158142c2	bellard	}
4266	158142c2	bellard
4267	158142c2	bellard	/*----------------------------------------------------------------------------
4268	158142c2	bellard	\| Returns the result of subtracting the extended double-precision floating-
4269	158142c2	bellard	\| point values `a' and `b'. The operation is performed according to the
4270	158142c2	bellard	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4271	158142c2	bellard	----------------------------------------------------------------------------/
4272	158142c2	bellard
4273	158142c2	bellard	floatx80 floatx80_sub( floatx80 a, floatx80 b STATUS_PARAM )
4274	158142c2	bellard	{
4275	158142c2	bellard	flag aSign, bSign;
4276	158142c2	bellard
4277	158142c2	bellard	aSign = extractFloatx80Sign( a );
4278	158142c2	bellard	bSign = extractFloatx80Sign( b );
4279	158142c2	bellard	if ( aSign == bSign ) {
4280	158142c2	bellard	return subFloatx80Sigs( a, b, aSign STATUS_VAR );
4281	158142c2	bellard	}
4282	158142c2	bellard	else {
4283	158142c2	bellard	return addFloatx80Sigs( a, b, aSign STATUS_VAR );
4284	158142c2	bellard	}
4285	158142c2	bellard
4286	158142c2	bellard	}
4287	158142c2	bellard
4288	158142c2	bellard	/*----------------------------------------------------------------------------
4289	158142c2	bellard	\| Returns the result of multiplying the extended double-precision floating-
4290	158142c2	bellard	\| point values `a' and `b'. The operation is performed according to the
4291	158142c2	bellard	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4292	158142c2	bellard	----------------------------------------------------------------------------/
4293	158142c2	bellard
4294	158142c2	bellard	floatx80 floatx80_mul( floatx80 a, floatx80 b STATUS_PARAM )
4295	158142c2	bellard	{
4296	158142c2	bellard	flag aSign, bSign, zSign;
4297	158142c2	bellard	int32 aExp, bExp, zExp;
4298	bb98fe42	Andreas Färber	uint64_t aSig, bSig, zSig0, zSig1;
4299	158142c2	bellard	floatx80 z;
4300	158142c2	bellard
4301	158142c2	bellard	aSig = extractFloatx80Frac( a );
4302	158142c2	bellard	aExp = extractFloatx80Exp( a );
4303	158142c2	bellard	aSign = extractFloatx80Sign( a );
4304	158142c2	bellard	bSig = extractFloatx80Frac( b );
4305	158142c2	bellard	bExp = extractFloatx80Exp( b );
4306	158142c2	bellard	bSign = extractFloatx80Sign( b );
4307	158142c2	bellard	zSign = aSign ^ bSign;
4308	158142c2	bellard	if ( aExp == 0x7FFF ) {
4309	bb98fe42	Andreas Färber	if ( (uint64_t) ( aSig<<1 )
4310	bb98fe42	Andreas Färber	\|\| ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
4311	158142c2	bellard	return propagateFloatx80NaN( a, b STATUS_VAR );
4312	158142c2	bellard	}
4313	158142c2	bellard	if ( ( bExp \| bSig ) == 0 ) goto invalid;
4314	158142c2	bellard	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4315	158142c2	bellard	}
4316	158142c2	bellard	if ( bExp == 0x7FFF ) {
4317	bb98fe42	Andreas Färber	if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4318	158142c2	bellard	if ( ( aExp \| aSig ) == 0 ) {
4319	158142c2	bellard	invalid:
4320	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
4321	158142c2	bellard	z.low = floatx80_default_nan_low;
4322	158142c2	bellard	z.high = floatx80_default_nan_high;
4323	158142c2	bellard	return z;
4324	158142c2	bellard	}
4325	158142c2	bellard	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4326	158142c2	bellard	}
4327	158142c2	bellard	if ( aExp == 0 ) {
4328	158142c2	bellard	if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
4329	158142c2	bellard	normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
4330	158142c2	bellard	}
4331	158142c2	bellard	if ( bExp == 0 ) {
4332	158142c2	bellard	if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
4333	158142c2	bellard	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
4334	158142c2	bellard	}
4335	158142c2	bellard	zExp = aExp + bExp - 0x3FFE;
4336	158142c2	bellard	mul64To128( aSig, bSig, &zSig0, &zSig1 );
4337	bb98fe42	Andreas Färber	if ( 0 < (int64_t) zSig0 ) {
4338	158142c2	bellard	shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
4339	158142c2	bellard	--zExp;
4340	158142c2	bellard	}
4341	158142c2	bellard	return
4342	158142c2	bellard	roundAndPackFloatx80(
4343	158142c2	bellard	STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
4344	158142c2	bellard
4345	158142c2	bellard	}
4346	158142c2	bellard
4347	158142c2	bellard	/*----------------------------------------------------------------------------
4348	158142c2	bellard	\| Returns the result of dividing the extended double-precision floating-point
4349	158142c2	bellard	\| value `a' by the corresponding value `b'. The operation is performed
4350	158142c2	bellard	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4351	158142c2	bellard	----------------------------------------------------------------------------/
4352	158142c2	bellard
4353	158142c2	bellard	floatx80 floatx80_div( floatx80 a, floatx80 b STATUS_PARAM )
4354	158142c2	bellard	{
4355	158142c2	bellard	flag aSign, bSign, zSign;
4356	158142c2	bellard	int32 aExp, bExp, zExp;
4357	bb98fe42	Andreas Färber	uint64_t aSig, bSig, zSig0, zSig1;
4358	bb98fe42	Andreas Färber	uint64_t rem0, rem1, rem2, term0, term1, term2;
4359	158142c2	bellard	floatx80 z;
4360	158142c2	bellard
4361	158142c2	bellard	aSig = extractFloatx80Frac( a );
4362	158142c2	bellard	aExp = extractFloatx80Exp( a );
4363	158142c2	bellard	aSign = extractFloatx80Sign( a );
4364	158142c2	bellard	bSig = extractFloatx80Frac( b );
4365	158142c2	bellard	bExp = extractFloatx80Exp( b );
4366	158142c2	bellard	bSign = extractFloatx80Sign( b );
4367	158142c2	bellard	zSign = aSign ^ bSign;
4368	158142c2	bellard	if ( aExp == 0x7FFF ) {
4369	bb98fe42	Andreas Färber	if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4370	158142c2	bellard	if ( bExp == 0x7FFF ) {
4371	bb98fe42	Andreas Färber	if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4372	158142c2	bellard	goto invalid;
4373	158142c2	bellard	}
4374	158142c2	bellard	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4375	158142c2	bellard	}
4376	158142c2	bellard	if ( bExp == 0x7FFF ) {
4377	bb98fe42	Andreas Färber	if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4378	158142c2	bellard	return packFloatx80( zSign, 0, 0 );
4379	158142c2	bellard	}
4380	158142c2	bellard	if ( bExp == 0 ) {
4381	158142c2	bellard	if ( bSig == 0 ) {
4382	158142c2	bellard	if ( ( aExp \| aSig ) == 0 ) {
4383	158142c2	bellard	invalid:
4384	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
4385	158142c2	bellard	z.low = floatx80_default_nan_low;
4386	158142c2	bellard	z.high = floatx80_default_nan_high;
4387	158142c2	bellard	return z;
4388	158142c2	bellard	}
4389	158142c2	bellard	float_raise( float_flag_divbyzero STATUS_VAR);
4390	158142c2	bellard	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4391	158142c2	bellard	}
4392	158142c2	bellard	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
4393	158142c2	bellard	}
4394	158142c2	bellard	if ( aExp == 0 ) {
4395	158142c2	bellard	if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
4396	158142c2	bellard	normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
4397	158142c2	bellard	}
4398	158142c2	bellard	zExp = aExp - bExp + 0x3FFE;
4399	158142c2	bellard	rem1 = 0;
4400	158142c2	bellard	if ( bSig <= aSig ) {
4401	158142c2	bellard	shift128Right( aSig, 0, 1, &aSig, &rem1 );
4402	158142c2	bellard	++zExp;
4403	158142c2	bellard	}
4404	158142c2	bellard	zSig0 = estimateDiv128To64( aSig, rem1, bSig );
4405	158142c2	bellard	mul64To128( bSig, zSig0, &term0, &term1 );
4406	158142c2	bellard	sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
4407	bb98fe42	Andreas Färber	while ( (int64_t) rem0 < 0 ) {
4408	158142c2	bellard	--zSig0;
4409	158142c2	bellard	add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
4410	158142c2	bellard	}
4411	158142c2	bellard	zSig1 = estimateDiv128To64( rem1, 0, bSig );
4412	bb98fe42	Andreas Färber	if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
4413	158142c2	bellard	mul64To128( bSig, zSig1, &term1, &term2 );
4414	158142c2	bellard	sub128( rem1, 0, term1, term2, &rem1, &rem2 );
4415	bb98fe42	Andreas Färber	while ( (int64_t) rem1 < 0 ) {
4416	158142c2	bellard	--zSig1;
4417	158142c2	bellard	add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
4418	158142c2	bellard	}
4419	158142c2	bellard	zSig1 \|= ( ( rem1 \| rem2 ) != 0 );
4420	158142c2	bellard	}
4421	158142c2	bellard	return
4422	158142c2	bellard	roundAndPackFloatx80(
4423	158142c2	bellard	STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
4424	158142c2	bellard
4425	158142c2	bellard	}
4426	158142c2	bellard
4427	158142c2	bellard	/*----------------------------------------------------------------------------
4428	158142c2	bellard	\| Returns the remainder of the extended double-precision floating-point value
4429	158142c2	bellard	\| `a' with respect to the corresponding value `b'. The operation is performed
4430	158142c2	bellard	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4431	158142c2	bellard	----------------------------------------------------------------------------/
4432	158142c2	bellard
4433	158142c2	bellard	floatx80 floatx80_rem( floatx80 a, floatx80 b STATUS_PARAM )
4434	158142c2	bellard	{
4435	ed086f3d	Blue Swirl	flag aSign, zSign;
4436	158142c2	bellard	int32 aExp, bExp, expDiff;
4437	bb98fe42	Andreas Färber	uint64_t aSig0, aSig1, bSig;
4438	bb98fe42	Andreas Färber	uint64_t q, term0, term1, alternateASig0, alternateASig1;
4439	158142c2	bellard	floatx80 z;
4440	158142c2	bellard
4441	158142c2	bellard	aSig0 = extractFloatx80Frac( a );
4442	158142c2	bellard	aExp = extractFloatx80Exp( a );
4443	158142c2	bellard	aSign = extractFloatx80Sign( a );
4444	158142c2	bellard	bSig = extractFloatx80Frac( b );
4445	158142c2	bellard	bExp = extractFloatx80Exp( b );
4446	158142c2	bellard	if ( aExp == 0x7FFF ) {
4447	bb98fe42	Andreas Färber	if ( (uint64_t) ( aSig0<<1 )
4448	bb98fe42	Andreas Färber	\|\| ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
4449	158142c2	bellard	return propagateFloatx80NaN( a, b STATUS_VAR );
4450	158142c2	bellard	}
4451	158142c2	bellard	goto invalid;
4452	158142c2	bellard	}
4453	158142c2	bellard	if ( bExp == 0x7FFF ) {
4454	bb98fe42	Andreas Färber	if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4455	158142c2	bellard	return a;
4456	158142c2	bellard	}
4457	158142c2	bellard	if ( bExp == 0 ) {
4458	158142c2	bellard	if ( bSig == 0 ) {
4459	158142c2	bellard	invalid:
4460	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
4461	158142c2	bellard	z.low = floatx80_default_nan_low;
4462	158142c2	bellard	z.high = floatx80_default_nan_high;
4463	158142c2	bellard	return z;
4464	158142c2	bellard	}
4465	158142c2	bellard	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
4466	158142c2	bellard	}
4467	158142c2	bellard	if ( aExp == 0 ) {
4468	bb98fe42	Andreas Färber	if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
4469	158142c2	bellard	normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
4470	158142c2	bellard	}
4471	158142c2	bellard	bSig \|= LIT64( 0x8000000000000000 );
4472	158142c2	bellard	zSign = aSign;
4473	158142c2	bellard	expDiff = aExp - bExp;
4474	158142c2	bellard	aSig1 = 0;
4475	158142c2	bellard	if ( expDiff < 0 ) {
4476	158142c2	bellard	if ( expDiff < -1 ) return a;
4477	158142c2	bellard	shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
4478	158142c2	bellard	expDiff = 0;
4479	158142c2	bellard	}
4480	158142c2	bellard	q = ( bSig <= aSig0 );
4481	158142c2	bellard	if ( q ) aSig0 -= bSig;
4482	158142c2	bellard	expDiff -= 64;
4483	158142c2	bellard	while ( 0 < expDiff ) {
4484	158142c2	bellard	q = estimateDiv128To64( aSig0, aSig1, bSig );
4485	158142c2	bellard	q = ( 2 < q ) ? q - 2 : 0;
4486	158142c2	bellard	mul64To128( bSig, q, &term0, &term1 );
4487	158142c2	bellard	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4488	158142c2	bellard	shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
4489	158142c2	bellard	expDiff -= 62;
4490	158142c2	bellard	}
4491	158142c2	bellard	expDiff += 64;
4492	158142c2	bellard	if ( 0 < expDiff ) {
4493	158142c2	bellard	q = estimateDiv128To64( aSig0, aSig1, bSig );
4494	158142c2	bellard	q = ( 2 < q ) ? q - 2 : 0;
4495	158142c2	bellard	q >>= 64 - expDiff;
4496	158142c2	bellard	mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
4497	158142c2	bellard	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4498	158142c2	bellard	shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
4499	158142c2	bellard	while ( le128( term0, term1, aSig0, aSig1 ) ) {
4500	158142c2	bellard	++q;
4501	158142c2	bellard	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4502	158142c2	bellard	}
4503	158142c2	bellard	}
4504	158142c2	bellard	else {
4505	158142c2	bellard	term1 = 0;
4506	158142c2	bellard	term0 = bSig;
4507	158142c2	bellard	}
4508	158142c2	bellard	sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
4509	158142c2	bellard	if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
4510	158142c2	bellard	\|\| ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
4511	158142c2	bellard	&& ( q & 1 ) )
4512	158142c2	bellard	) {
4513	158142c2	bellard	aSig0 = alternateASig0;
4514	158142c2	bellard	aSig1 = alternateASig1;
4515	158142c2	bellard	zSign = ! zSign;
4516	158142c2	bellard	}
4517	158142c2	bellard	return
4518	158142c2	bellard	normalizeRoundAndPackFloatx80(
4519	158142c2	bellard	80, zSign, bExp + expDiff, aSig0, aSig1 STATUS_VAR );
4520	158142c2	bellard
4521	158142c2	bellard	}
4522	158142c2	bellard
4523	158142c2	bellard	/*----------------------------------------------------------------------------
4524	158142c2	bellard	\| Returns the square root of the extended double-precision floating-point
4525	158142c2	bellard	\| value `a'. The operation is performed according to the IEC/IEEE Standard
4526	158142c2	bellard	\| for Binary Floating-Point Arithmetic.
4527	158142c2	bellard	----------------------------------------------------------------------------/
4528	158142c2	bellard
4529	158142c2	bellard	floatx80 floatx80_sqrt( floatx80 a STATUS_PARAM )
4530	158142c2	bellard	{
4531	158142c2	bellard	flag aSign;
4532	158142c2	bellard	int32 aExp, zExp;
4533	bb98fe42	Andreas Färber	uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
4534	bb98fe42	Andreas Färber	uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
4535	158142c2	bellard	floatx80 z;
4536	158142c2	bellard
4537	158142c2	bellard	aSig0 = extractFloatx80Frac( a );
4538	158142c2	bellard	aExp = extractFloatx80Exp( a );
4539	158142c2	bellard	aSign = extractFloatx80Sign( a );
4540	158142c2	bellard	if ( aExp == 0x7FFF ) {
4541	bb98fe42	Andreas Färber	if ( (uint64_t) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a STATUS_VAR );
4542	158142c2	bellard	if ( ! aSign ) return a;
4543	158142c2	bellard	goto invalid;
4544	158142c2	bellard	}
4545	158142c2	bellard	if ( aSign ) {
4546	158142c2	bellard	if ( ( aExp \| aSig0 ) == 0 ) return a;
4547	158142c2	bellard	invalid:
4548	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
4549	158142c2	bellard	z.low = floatx80_default_nan_low;
4550	158142c2	bellard	z.high = floatx80_default_nan_high;
4551	158142c2	bellard	return z;
4552	158142c2	bellard	}
4553	158142c2	bellard	if ( aExp == 0 ) {
4554	158142c2	bellard	if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
4555	158142c2	bellard	normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
4556	158142c2	bellard	}
4557	158142c2	bellard	zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
4558	158142c2	bellard	zSig0 = estimateSqrt32( aExp, aSig0>>32 );
4559	158142c2	bellard	shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
4560	158142c2	bellard	zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
4561	158142c2	bellard	doubleZSig0 = zSig0<<1;
4562	158142c2	bellard	mul64To128( zSig0, zSig0, &term0, &term1 );
4563	158142c2	bellard	sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
4564	bb98fe42	Andreas Färber	while ( (int64_t) rem0 < 0 ) {
4565	158142c2	bellard	--zSig0;
4566	158142c2	bellard	doubleZSig0 -= 2;
4567	158142c2	bellard	add128( rem0, rem1, zSig0>>63, doubleZSig0 \| 1, &rem0, &rem1 );
4568	158142c2	bellard	}
4569	158142c2	bellard	zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
4570	158142c2	bellard	if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
4571	158142c2	bellard	if ( zSig1 == 0 ) zSig1 = 1;
4572	158142c2	bellard	mul64To128( doubleZSig0, zSig1, &term1, &term2 );
4573	158142c2	bellard	sub128( rem1, 0, term1, term2, &rem1, &rem2 );
4574	158142c2	bellard	mul64To128( zSig1, zSig1, &term2, &term3 );
4575	158142c2	bellard	sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
4576	bb98fe42	Andreas Färber	while ( (int64_t) rem1 < 0 ) {
4577	158142c2	bellard	--zSig1;
4578	158142c2	bellard	shortShift128Left( 0, zSig1, 1, &term2, &term3 );
4579	158142c2	bellard	term3 \|= 1;
4580	158142c2	bellard	term2 \|= doubleZSig0;
4581	158142c2	bellard	add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
4582	158142c2	bellard	}
4583	158142c2	bellard	zSig1 \|= ( ( rem1 \| rem2 \| rem3 ) != 0 );
4584	158142c2	bellard	}
4585	158142c2	bellard	shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
4586	158142c2	bellard	zSig0 \|= doubleZSig0;
4587	158142c2	bellard	return
4588	158142c2	bellard	roundAndPackFloatx80(
4589	158142c2	bellard	STATUS(floatx80_rounding_precision), 0, zExp, zSig0, zSig1 STATUS_VAR );
4590	158142c2	bellard
4591	158142c2	bellard	}
4592	158142c2	bellard
4593	158142c2	bellard	/*----------------------------------------------------------------------------
4594	b689362d	Aurelien Jarno	\| Returns 1 if the extended double-precision floating-point value `a' is equal
4595	b689362d	Aurelien Jarno	\| to the corresponding value `b', and 0 otherwise. The invalid exception is
4596	b689362d	Aurelien Jarno	\| raised if either operand is a NaN. Otherwise, the comparison is performed
4597	b689362d	Aurelien Jarno	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4598	158142c2	bellard	----------------------------------------------------------------------------/
4599	158142c2	bellard
4600	b689362d	Aurelien Jarno	int floatx80_eq( floatx80 a, floatx80 b STATUS_PARAM )
4601	158142c2	bellard	{
4602	158142c2	bellard
4603	158142c2	bellard	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
4604	bb98fe42	Andreas Färber	&& (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
4605	158142c2	bellard	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
4606	bb98fe42	Andreas Färber	&& (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
4607	158142c2	bellard	) {
4608	b689362d	Aurelien Jarno	float_raise( float_flag_invalid STATUS_VAR);
4609	158142c2	bellard	return 0;
4610	158142c2	bellard	}
4611	158142c2	bellard	return
4612	158142c2	bellard	( a.low == b.low )
4613	158142c2	bellard	&& ( ( a.high == b.high )
4614	158142c2	bellard	\|\| ( ( a.low == 0 )
4615	bb98fe42	Andreas Färber	&& ( (uint16_t) ( ( a.high \| b.high )<<1 ) == 0 ) )
4616	158142c2	bellard	);
4617	158142c2	bellard
4618	158142c2	bellard	}
4619	158142c2	bellard
4620	158142c2	bellard	/*----------------------------------------------------------------------------
4621	158142c2	bellard	\| Returns 1 if the extended double-precision floating-point value `a' is
4622	158142c2	bellard	\| less than or equal to the corresponding value `b', and 0 otherwise. The
4623	f5a64251	Aurelien Jarno	\| invalid exception is raised if either operand is a NaN. The comparison is
4624	f5a64251	Aurelien Jarno	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
4625	f5a64251	Aurelien Jarno	\| Arithmetic.
4626	158142c2	bellard	----------------------------------------------------------------------------/
4627	158142c2	bellard
4628	750afe93	bellard	int floatx80_le( floatx80 a, floatx80 b STATUS_PARAM )
4629	158142c2	bellard	{
4630	158142c2	bellard	flag aSign, bSign;
4631	158142c2	bellard
4632	158142c2	bellard	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
4633	bb98fe42	Andreas Färber	&& (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
4634	158142c2	bellard	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
4635	bb98fe42	Andreas Färber	&& (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
4636	158142c2	bellard	) {
4637	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
4638	158142c2	bellard	return 0;
4639	158142c2	bellard	}
4640	158142c2	bellard	aSign = extractFloatx80Sign( a );
4641	158142c2	bellard	bSign = extractFloatx80Sign( b );
4642	158142c2	bellard	if ( aSign != bSign ) {
4643	158142c2	bellard	return
4644	158142c2	bellard	aSign
4645	bb98fe42	Andreas Färber	\|\| ( ( ( (uint16_t) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
4646	158142c2	bellard	== 0 );
4647	158142c2	bellard	}
4648	158142c2	bellard	return
4649	158142c2	bellard	aSign ? le128( b.high, b.low, a.high, a.low )
4650	158142c2	bellard	: le128( a.high, a.low, b.high, b.low );
4651	158142c2	bellard
4652	158142c2	bellard	}
4653	158142c2	bellard
4654	158142c2	bellard	/*----------------------------------------------------------------------------
4655	158142c2	bellard	\| Returns 1 if the extended double-precision floating-point value `a' is
4656	f5a64251	Aurelien Jarno	\| less than the corresponding value `b', and 0 otherwise. The invalid
4657	f5a64251	Aurelien Jarno	\| exception is raised if either operand is a NaN. The comparison is performed
4658	f5a64251	Aurelien Jarno	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4659	158142c2	bellard	----------------------------------------------------------------------------/
4660	158142c2	bellard
4661	750afe93	bellard	int floatx80_lt( floatx80 a, floatx80 b STATUS_PARAM )
4662	158142c2	bellard	{
4663	158142c2	bellard	flag aSign, bSign;
4664	158142c2	bellard
4665	158142c2	bellard	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
4666	bb98fe42	Andreas Färber	&& (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
4667	158142c2	bellard	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
4668	bb98fe42	Andreas Färber	&& (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
4669	158142c2	bellard	) {
4670	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
4671	158142c2	bellard	return 0;
4672	158142c2	bellard	}
4673	158142c2	bellard	aSign = extractFloatx80Sign( a );
4674	158142c2	bellard	bSign = extractFloatx80Sign( b );
4675	158142c2	bellard	if ( aSign != bSign ) {
4676	158142c2	bellard	return
4677	158142c2	bellard	aSign
4678	bb98fe42	Andreas Färber	&& ( ( ( (uint16_t) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
4679	158142c2	bellard	!= 0 );
4680	158142c2	bellard	}
4681	158142c2	bellard	return
4682	158142c2	bellard	aSign ? lt128( b.high, b.low, a.high, a.low )
4683	158142c2	bellard	: lt128( a.high, a.low, b.high, b.low );
4684	158142c2	bellard
4685	158142c2	bellard	}
4686	158142c2	bellard
4687	158142c2	bellard	/*----------------------------------------------------------------------------
4688	67b7861d	Aurelien Jarno	\| Returns 1 if the extended double-precision floating-point values `a' and `b'
4689	f5a64251	Aurelien Jarno	\| cannot be compared, and 0 otherwise. The invalid exception is raised if
4690	f5a64251	Aurelien Jarno	\| either operand is a NaN. The comparison is performed according to the
4691	f5a64251	Aurelien Jarno	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4692	67b7861d	Aurelien Jarno	----------------------------------------------------------------------------/
4693	67b7861d	Aurelien Jarno	int floatx80_unordered( floatx80 a, floatx80 b STATUS_PARAM )
4694	67b7861d	Aurelien Jarno	{
4695	67b7861d	Aurelien Jarno	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
4696	67b7861d	Aurelien Jarno	&& (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
4697	67b7861d	Aurelien Jarno	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
4698	67b7861d	Aurelien Jarno	&& (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
4699	67b7861d	Aurelien Jarno	) {
4700	67b7861d	Aurelien Jarno	float_raise( float_flag_invalid STATUS_VAR);
4701	67b7861d	Aurelien Jarno	return 1;
4702	67b7861d	Aurelien Jarno	}
4703	67b7861d	Aurelien Jarno	return 0;
4704	67b7861d	Aurelien Jarno	}
4705	67b7861d	Aurelien Jarno
4706	67b7861d	Aurelien Jarno	/*----------------------------------------------------------------------------
4707	b689362d	Aurelien Jarno	\| Returns 1 if the extended double-precision floating-point value `a' is
4708	f5a64251	Aurelien Jarno	\| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
4709	f5a64251	Aurelien Jarno	\| cause an exception. The comparison is performed according to the IEC/IEEE
4710	f5a64251	Aurelien Jarno	\| Standard for Binary Floating-Point Arithmetic.
4711	158142c2	bellard	----------------------------------------------------------------------------/
4712	158142c2	bellard
4713	b689362d	Aurelien Jarno	int floatx80_eq_quiet( floatx80 a, floatx80 b STATUS_PARAM )
4714	158142c2	bellard	{
4715	158142c2	bellard
4716	158142c2	bellard	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
4717	bb98fe42	Andreas Färber	&& (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
4718	158142c2	bellard	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
4719	bb98fe42	Andreas Färber	&& (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
4720	158142c2	bellard	) {
4721	b689362d	Aurelien Jarno	if ( floatx80_is_signaling_nan( a )
4722	b689362d	Aurelien Jarno	\|\| floatx80_is_signaling_nan( b ) ) {
4723	b689362d	Aurelien Jarno	float_raise( float_flag_invalid STATUS_VAR);
4724	b689362d	Aurelien Jarno	}
4725	158142c2	bellard	return 0;
4726	158142c2	bellard	}
4727	158142c2	bellard	return
4728	158142c2	bellard	( a.low == b.low )
4729	158142c2	bellard	&& ( ( a.high == b.high )
4730	158142c2	bellard	\|\| ( ( a.low == 0 )
4731	bb98fe42	Andreas Färber	&& ( (uint16_t) ( ( a.high \| b.high )<<1 ) == 0 ) )
4732	158142c2	bellard	);
4733	158142c2	bellard
4734	158142c2	bellard	}
4735	158142c2	bellard
4736	158142c2	bellard	/*----------------------------------------------------------------------------
4737	158142c2	bellard	\| Returns 1 if the extended double-precision floating-point value `a' is less
4738	158142c2	bellard	\| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
4739	158142c2	bellard	\| do not cause an exception. Otherwise, the comparison is performed according
4740	158142c2	bellard	\| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4741	158142c2	bellard	----------------------------------------------------------------------------/
4742	158142c2	bellard
4743	750afe93	bellard	int floatx80_le_quiet( floatx80 a, floatx80 b STATUS_PARAM )
4744	158142c2	bellard	{
4745	158142c2	bellard	flag aSign, bSign;
4746	158142c2	bellard
4747	158142c2	bellard	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
4748	bb98fe42	Andreas Färber	&& (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
4749	158142c2	bellard	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
4750	bb98fe42	Andreas Färber	&& (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
4751	158142c2	bellard	) {
4752	158142c2	bellard	if ( floatx80_is_signaling_nan( a )
4753	158142c2	bellard	\|\| floatx80_is_signaling_nan( b ) ) {
4754	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
4755	158142c2	bellard	}
4756	158142c2	bellard	return 0;
4757	158142c2	bellard	}
4758	158142c2	bellard	aSign = extractFloatx80Sign( a );
4759	158142c2	bellard	bSign = extractFloatx80Sign( b );
4760	158142c2	bellard	if ( aSign != bSign ) {
4761	158142c2	bellard	return
4762	158142c2	bellard	aSign
4763	bb98fe42	Andreas Färber	\|\| ( ( ( (uint16_t) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
4764	158142c2	bellard	== 0 );
4765	158142c2	bellard	}
4766	158142c2	bellard	return
4767	158142c2	bellard	aSign ? le128( b.high, b.low, a.high, a.low )
4768	158142c2	bellard	: le128( a.high, a.low, b.high, b.low );
4769	158142c2	bellard
4770	158142c2	bellard	}
4771	158142c2	bellard
4772	158142c2	bellard	/*----------------------------------------------------------------------------
4773	158142c2	bellard	\| Returns 1 if the extended double-precision floating-point value `a' is less
4774	158142c2	bellard	\| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
4775	158142c2	bellard	\| an exception. Otherwise, the comparison is performed according to the
4776	158142c2	bellard	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4777	158142c2	bellard	----------------------------------------------------------------------------/
4778	158142c2	bellard
4779	750afe93	bellard	int floatx80_lt_quiet( floatx80 a, floatx80 b STATUS_PARAM )
4780	158142c2	bellard	{
4781	158142c2	bellard	flag aSign, bSign;
4782	158142c2	bellard
4783	158142c2	bellard	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
4784	bb98fe42	Andreas Färber	&& (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
4785	158142c2	bellard	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
4786	bb98fe42	Andreas Färber	&& (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
4787	158142c2	bellard	) {
4788	158142c2	bellard	if ( floatx80_is_signaling_nan( a )
4789	158142c2	bellard	\|\| floatx80_is_signaling_nan( b ) ) {
4790	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
4791	158142c2	bellard	}
4792	158142c2	bellard	return 0;
4793	158142c2	bellard	}
4794	158142c2	bellard	aSign = extractFloatx80Sign( a );
4795	158142c2	bellard	bSign = extractFloatx80Sign( b );
4796	158142c2	bellard	if ( aSign != bSign ) {
4797	158142c2	bellard	return
4798	158142c2	bellard	aSign
4799	bb98fe42	Andreas Färber	&& ( ( ( (uint16_t) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
4800	158142c2	bellard	!= 0 );
4801	158142c2	bellard	}
4802	158142c2	bellard	return
4803	158142c2	bellard	aSign ? lt128( b.high, b.low, a.high, a.low )
4804	158142c2	bellard	: lt128( a.high, a.low, b.high, b.low );
4805	158142c2	bellard
4806	158142c2	bellard	}
4807	158142c2	bellard
4808	67b7861d	Aurelien Jarno	/*----------------------------------------------------------------------------
4809	67b7861d	Aurelien Jarno	\| Returns 1 if the extended double-precision floating-point values `a' and `b'
4810	67b7861d	Aurelien Jarno	\| cannot be compared, and 0 otherwise. Quiet NaNs do not cause an exception.
4811	67b7861d	Aurelien Jarno	\| The comparison is performed according to the IEC/IEEE Standard for Binary
4812	67b7861d	Aurelien Jarno	\| Floating-Point Arithmetic.
4813	67b7861d	Aurelien Jarno	----------------------------------------------------------------------------/
4814	67b7861d	Aurelien Jarno	int floatx80_unordered_quiet( floatx80 a, floatx80 b STATUS_PARAM )
4815	67b7861d	Aurelien Jarno	{
4816	67b7861d	Aurelien Jarno	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
4817	67b7861d	Aurelien Jarno	&& (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
4818	67b7861d	Aurelien Jarno	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
4819	67b7861d	Aurelien Jarno	&& (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
4820	67b7861d	Aurelien Jarno	) {
4821	67b7861d	Aurelien Jarno	if ( floatx80_is_signaling_nan( a )
4822	67b7861d	Aurelien Jarno	\|\| floatx80_is_signaling_nan( b ) ) {
4823	67b7861d	Aurelien Jarno	float_raise( float_flag_invalid STATUS_VAR);
4824	67b7861d	Aurelien Jarno	}
4825	67b7861d	Aurelien Jarno	return 1;
4826	67b7861d	Aurelien Jarno	}
4827	67b7861d	Aurelien Jarno	return 0;
4828	67b7861d	Aurelien Jarno	}
4829	67b7861d	Aurelien Jarno
4830	158142c2	bellard	#endif
4831	158142c2	bellard
4832	158142c2	bellard	#ifdef FLOAT128
4833	158142c2	bellard
4834	158142c2	bellard	/*----------------------------------------------------------------------------
4835	158142c2	bellard	\| Returns the result of converting the quadruple-precision floating-point
4836	158142c2	bellard	\| value `a' to the 32-bit two's complement integer format. The conversion
4837	158142c2	bellard	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4838	158142c2	bellard	\| Arithmetic---which means in particular that the conversion is rounded
4839	158142c2	bellard	\| according to the current rounding mode. If `a' is a NaN, the largest
4840	158142c2	bellard	\| positive integer is returned. Otherwise, if the conversion overflows, the
4841	158142c2	bellard	\| largest integer with the same sign as `a' is returned.
4842	158142c2	bellard	----------------------------------------------------------------------------/
4843	158142c2	bellard
4844	158142c2	bellard	int32 float128_to_int32( float128 a STATUS_PARAM )
4845	158142c2	bellard	{
4846	158142c2	bellard	flag aSign;
4847	158142c2	bellard	int32 aExp, shiftCount;
4848	bb98fe42	Andreas Färber	uint64_t aSig0, aSig1;
4849	158142c2	bellard
4850	158142c2	bellard	aSig1 = extractFloat128Frac1( a );
4851	158142c2	bellard	aSig0 = extractFloat128Frac0( a );
4852	158142c2	bellard	aExp = extractFloat128Exp( a );
4853	158142c2	bellard	aSign = extractFloat128Sign( a );
4854	158142c2	bellard	if ( ( aExp == 0x7FFF ) && ( aSig0 \| aSig1 ) ) aSign = 0;
4855	158142c2	bellard	if ( aExp ) aSig0 \|= LIT64( 0x0001000000000000 );
4856	158142c2	bellard	aSig0 \|= ( aSig1 != 0 );
4857	158142c2	bellard	shiftCount = 0x4028 - aExp;
4858	158142c2	bellard	if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
4859	158142c2	bellard	return roundAndPackInt32( aSign, aSig0 STATUS_VAR );
4860	158142c2	bellard
4861	158142c2	bellard	}
4862	158142c2	bellard
4863	158142c2	bellard	/*----------------------------------------------------------------------------
4864	158142c2	bellard	\| Returns the result of converting the quadruple-precision floating-point
4865	158142c2	bellard	\| value `a' to the 32-bit two's complement integer format. The conversion
4866	158142c2	bellard	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4867	158142c2	bellard	\| Arithmetic, except that the conversion is always rounded toward zero. If
4868	158142c2	bellard	\| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
4869	158142c2	bellard	\| conversion overflows, the largest integer with the same sign as `a' is
4870	158142c2	bellard	\| returned.
4871	158142c2	bellard	----------------------------------------------------------------------------/
4872	158142c2	bellard
4873	158142c2	bellard	int32 float128_to_int32_round_to_zero( float128 a STATUS_PARAM )
4874	158142c2	bellard	{
4875	158142c2	bellard	flag aSign;
4876	158142c2	bellard	int32 aExp, shiftCount;
4877	bb98fe42	Andreas Färber	uint64_t aSig0, aSig1, savedASig;
4878	158142c2	bellard	int32 z;
4879	158142c2	bellard
4880	158142c2	bellard	aSig1 = extractFloat128Frac1( a );
4881	158142c2	bellard	aSig0 = extractFloat128Frac0( a );
4882	158142c2	bellard	aExp = extractFloat128Exp( a );
4883	158142c2	bellard	aSign = extractFloat128Sign( a );
4884	158142c2	bellard	aSig0 \|= ( aSig1 != 0 );
4885	158142c2	bellard	if ( 0x401E < aExp ) {
4886	158142c2	bellard	if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
4887	158142c2	bellard	goto invalid;
4888	158142c2	bellard	}
4889	158142c2	bellard	else if ( aExp < 0x3FFF ) {
4890	158142c2	bellard	if ( aExp \|\| aSig0 ) STATUS(float_exception_flags) \|= float_flag_inexact;
4891	158142c2	bellard	return 0;
4892	158142c2	bellard	}
4893	158142c2	bellard	aSig0 \|= LIT64( 0x0001000000000000 );
4894	158142c2	bellard	shiftCount = 0x402F - aExp;
4895	158142c2	bellard	savedASig = aSig0;
4896	158142c2	bellard	aSig0 >>= shiftCount;
4897	158142c2	bellard	z = aSig0;
4898	158142c2	bellard	if ( aSign ) z = - z;
4899	158142c2	bellard	if ( ( z < 0 ) ^ aSign ) {
4900	158142c2	bellard	invalid:
4901	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
4902	bb98fe42	Andreas Färber	return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
4903	158142c2	bellard	}
4904	158142c2	bellard	if ( ( aSig0<<shiftCount ) != savedASig ) {
4905	158142c2	bellard	STATUS(float_exception_flags) \|= float_flag_inexact;
4906	158142c2	bellard	}
4907	158142c2	bellard	return z;
4908	158142c2	bellard
4909	158142c2	bellard	}
4910	158142c2	bellard
4911	158142c2	bellard	/*----------------------------------------------------------------------------
4912	158142c2	bellard	\| Returns the result of converting the quadruple-precision floating-point
4913	158142c2	bellard	\| value `a' to the 64-bit two's complement integer format. The conversion
4914	158142c2	bellard	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4915	158142c2	bellard	\| Arithmetic---which means in particular that the conversion is rounded
4916	158142c2	bellard	\| according to the current rounding mode. If `a' is a NaN, the largest
4917	158142c2	bellard	\| positive integer is returned. Otherwise, if the conversion overflows, the
4918	158142c2	bellard	\| largest integer with the same sign as `a' is returned.
4919	158142c2	bellard	----------------------------------------------------------------------------/
4920	158142c2	bellard
4921	158142c2	bellard	int64 float128_to_int64( float128 a STATUS_PARAM )
4922	158142c2	bellard	{
4923	158142c2	bellard	flag aSign;
4924	158142c2	bellard	int32 aExp, shiftCount;
4925	bb98fe42	Andreas Färber	uint64_t aSig0, aSig1;
4926	158142c2	bellard
4927	158142c2	bellard	aSig1 = extractFloat128Frac1( a );
4928	158142c2	bellard	aSig0 = extractFloat128Frac0( a );
4929	158142c2	bellard	aExp = extractFloat128Exp( a );
4930	158142c2	bellard	aSign = extractFloat128Sign( a );
4931	158142c2	bellard	if ( aExp ) aSig0 \|= LIT64( 0x0001000000000000 );
4932	158142c2	bellard	shiftCount = 0x402F - aExp;
4933	158142c2	bellard	if ( shiftCount <= 0 ) {
4934	158142c2	bellard	if ( 0x403E < aExp ) {
4935	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
4936	158142c2	bellard	if ( ! aSign
4937	158142c2	bellard	\|\| ( ( aExp == 0x7FFF )
4938	158142c2	bellard	&& ( aSig1 \|\| ( aSig0 != LIT64( 0x0001000000000000 ) ) )
4939	158142c2	bellard	)
4940	158142c2	bellard	) {
4941	158142c2	bellard	return LIT64( 0x7FFFFFFFFFFFFFFF );
4942	158142c2	bellard	}
4943	bb98fe42	Andreas Färber	return (int64_t) LIT64( 0x8000000000000000 );
4944	158142c2	bellard	}
4945	158142c2	bellard	shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
4946	158142c2	bellard	}
4947	158142c2	bellard	else {
4948	158142c2	bellard	shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
4949	158142c2	bellard	}
4950	158142c2	bellard	return roundAndPackInt64( aSign, aSig0, aSig1 STATUS_VAR );
4951	158142c2	bellard
4952	158142c2	bellard	}
4953	158142c2	bellard
4954	158142c2	bellard	/*----------------------------------------------------------------------------
4955	158142c2	bellard	\| Returns the result of converting the quadruple-precision floating-point
4956	158142c2	bellard	\| value `a' to the 64-bit two's complement integer format. The conversion
4957	158142c2	bellard	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4958	158142c2	bellard	\| Arithmetic, except that the conversion is always rounded toward zero.
4959	158142c2	bellard	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
4960	158142c2	bellard	\| the conversion overflows, the largest integer with the same sign as `a' is
4961	158142c2	bellard	\| returned.
4962	158142c2	bellard	----------------------------------------------------------------------------/
4963	158142c2	bellard
4964	158142c2	bellard	int64 float128_to_int64_round_to_zero( float128 a STATUS_PARAM )
4965	158142c2	bellard	{
4966	158142c2	bellard	flag aSign;
4967	158142c2	bellard	int32 aExp, shiftCount;
4968	bb98fe42	Andreas Färber	uint64_t aSig0, aSig1;
4969	158142c2	bellard	int64 z;
4970	158142c2	bellard
4971	158142c2	bellard	aSig1 = extractFloat128Frac1( a );
4972	158142c2	bellard	aSig0 = extractFloat128Frac0( a );
4973	158142c2	bellard	aExp = extractFloat128Exp( a );
4974	158142c2	bellard	aSign = extractFloat128Sign( a );
4975	158142c2	bellard	if ( aExp ) aSig0 \|= LIT64( 0x0001000000000000 );
4976	158142c2	bellard	shiftCount = aExp - 0x402F;
4977	158142c2	bellard	if ( 0 < shiftCount ) {
4978	158142c2	bellard	if ( 0x403E <= aExp ) {
4979	158142c2	bellard	aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
4980	158142c2	bellard	if ( ( a.high == LIT64( 0xC03E000000000000 ) )
4981	158142c2	bellard	&& ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
4982	158142c2	bellard	if ( aSig1 ) STATUS(float_exception_flags) \|= float_flag_inexact;
4983	158142c2	bellard	}
4984	158142c2	bellard	else {
4985	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
4986	158142c2	bellard	if ( ! aSign \|\| ( ( aExp == 0x7FFF ) && ( aSig0 \| aSig1 ) ) ) {
4987	158142c2	bellard	return LIT64( 0x7FFFFFFFFFFFFFFF );
4988	158142c2	bellard	}
4989	158142c2	bellard	}
4990	bb98fe42	Andreas Färber	return (int64_t) LIT64( 0x8000000000000000 );
4991	158142c2	bellard	}
4992	158142c2	bellard	z = ( aSig0<<shiftCount ) \| ( aSig1>>( ( - shiftCount ) & 63 ) );
4993	bb98fe42	Andreas Färber	if ( (uint64_t) ( aSig1<<shiftCount ) ) {
4994	158142c2	bellard	STATUS(float_exception_flags) \|= float_flag_inexact;
4995	158142c2	bellard	}
4996	158142c2	bellard	}
4997	158142c2	bellard	else {
4998	158142c2	bellard	if ( aExp < 0x3FFF ) {
4999	158142c2	bellard	if ( aExp \| aSig0 \| aSig1 ) {
5000	158142c2	bellard	STATUS(float_exception_flags) \|= float_flag_inexact;
5001	158142c2	bellard	}
5002	158142c2	bellard	return 0;
5003	158142c2	bellard	}
5004	158142c2	bellard	z = aSig0>>( - shiftCount );
5005	158142c2	bellard	if ( aSig1
5006	bb98fe42	Andreas Färber	\|\| ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
5007	158142c2	bellard	STATUS(float_exception_flags) \|= float_flag_inexact;
5008	158142c2	bellard	}
5009	158142c2	bellard	}
5010	158142c2	bellard	if ( aSign ) z = - z;
5011	158142c2	bellard	return z;
5012	158142c2	bellard
5013	158142c2	bellard	}
5014	158142c2	bellard
5015	158142c2	bellard	/*----------------------------------------------------------------------------
5016	158142c2	bellard	\| Returns the result of converting the quadruple-precision floating-point
5017	158142c2	bellard	\| value `a' to the single-precision floating-point format. The conversion
5018	158142c2	bellard	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5019	158142c2	bellard	\| Arithmetic.
5020	158142c2	bellard	----------------------------------------------------------------------------/
5021	158142c2	bellard
5022	158142c2	bellard	float32 float128_to_float32( float128 a STATUS_PARAM )
5023	158142c2	bellard	{
5024	158142c2	bellard	flag aSign;
5025	158142c2	bellard	int32 aExp;
5026	bb98fe42	Andreas Färber	uint64_t aSig0, aSig1;
5027	bb98fe42	Andreas Färber	uint32_t zSig;
5028	158142c2	bellard
5029	158142c2	bellard	aSig1 = extractFloat128Frac1( a );
5030	158142c2	bellard	aSig0 = extractFloat128Frac0( a );
5031	158142c2	bellard	aExp = extractFloat128Exp( a );
5032	158142c2	bellard	aSign = extractFloat128Sign( a );
5033	158142c2	bellard	if ( aExp == 0x7FFF ) {
5034	158142c2	bellard	if ( aSig0 \| aSig1 ) {
5035	bcd4d9af	Christophe Lyon	return commonNaNToFloat32( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
5036	158142c2	bellard	}
5037	158142c2	bellard	return packFloat32( aSign, 0xFF, 0 );
5038	158142c2	bellard	}
5039	158142c2	bellard	aSig0 \|= ( aSig1 != 0 );
5040	158142c2	bellard	shift64RightJamming( aSig0, 18, &aSig0 );
5041	158142c2	bellard	zSig = aSig0;
5042	158142c2	bellard	if ( aExp \|\| zSig ) {
5043	158142c2	bellard	zSig \|= 0x40000000;
5044	158142c2	bellard	aExp -= 0x3F81;
5045	158142c2	bellard	}
5046	158142c2	bellard	return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
5047	158142c2	bellard
5048	158142c2	bellard	}
5049	158142c2	bellard
5050	158142c2	bellard	/*----------------------------------------------------------------------------
5051	158142c2	bellard	\| Returns the result of converting the quadruple-precision floating-point
5052	158142c2	bellard	\| value `a' to the double-precision floating-point format. The conversion
5053	158142c2	bellard	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5054	158142c2	bellard	\| Arithmetic.
5055	158142c2	bellard	----------------------------------------------------------------------------/
5056	158142c2	bellard
5057	158142c2	bellard	float64 float128_to_float64( float128 a STATUS_PARAM )
5058	158142c2	bellard	{
5059	158142c2	bellard	flag aSign;
5060	158142c2	bellard	int32 aExp;
5061	bb98fe42	Andreas Färber	uint64_t aSig0, aSig1;
5062	158142c2	bellard
5063	158142c2	bellard	aSig1 = extractFloat128Frac1( a );
5064	158142c2	bellard	aSig0 = extractFloat128Frac0( a );
5065	158142c2	bellard	aExp = extractFloat128Exp( a );
5066	158142c2	bellard	aSign = extractFloat128Sign( a );
5067	158142c2	bellard	if ( aExp == 0x7FFF ) {
5068	158142c2	bellard	if ( aSig0 \| aSig1 ) {
5069	bcd4d9af	Christophe Lyon	return commonNaNToFloat64( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
5070	158142c2	bellard	}
5071	158142c2	bellard	return packFloat64( aSign, 0x7FF, 0 );
5072	158142c2	bellard	}
5073	158142c2	bellard	shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
5074	158142c2	bellard	aSig0 \|= ( aSig1 != 0 );
5075	158142c2	bellard	if ( aExp \|\| aSig0 ) {
5076	158142c2	bellard	aSig0 \|= LIT64( 0x4000000000000000 );
5077	158142c2	bellard	aExp -= 0x3C01;
5078	158142c2	bellard	}
5079	158142c2	bellard	return roundAndPackFloat64( aSign, aExp, aSig0 STATUS_VAR );
5080	158142c2	bellard
5081	158142c2	bellard	}
5082	158142c2	bellard
5083	158142c2	bellard	#ifdef FLOATX80
5084	158142c2	bellard
5085	158142c2	bellard	/*----------------------------------------------------------------------------
5086	158142c2	bellard	\| Returns the result of converting the quadruple-precision floating-point
5087	158142c2	bellard	\| value `a' to the extended double-precision floating-point format. The
5088	158142c2	bellard	\| conversion is performed according to the IEC/IEEE Standard for Binary
5089	158142c2	bellard	\| Floating-Point Arithmetic.
5090	158142c2	bellard	----------------------------------------------------------------------------/
5091	158142c2	bellard
5092	158142c2	bellard	floatx80 float128_to_floatx80( float128 a STATUS_PARAM )
5093	158142c2	bellard	{
5094	158142c2	bellard	flag aSign;
5095	158142c2	bellard	int32 aExp;
5096	bb98fe42	Andreas Färber	uint64_t aSig0, aSig1;
5097	158142c2	bellard
5098	158142c2	bellard	aSig1 = extractFloat128Frac1( a );
5099	158142c2	bellard	aSig0 = extractFloat128Frac0( a );
5100	158142c2	bellard	aExp = extractFloat128Exp( a );
5101	158142c2	bellard	aSign = extractFloat128Sign( a );
5102	158142c2	bellard	if ( aExp == 0x7FFF ) {
5103	158142c2	bellard	if ( aSig0 \| aSig1 ) {
5104	bcd4d9af	Christophe Lyon	return commonNaNToFloatx80( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
5105	158142c2	bellard	}
5106	158142c2	bellard	return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5107	158142c2	bellard	}
5108	158142c2	bellard	if ( aExp == 0 ) {
5109	158142c2	bellard	if ( ( aSig0 \| aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
5110	158142c2	bellard	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5111	158142c2	bellard	}
5112	158142c2	bellard	else {
5113	158142c2	bellard	aSig0 \|= LIT64( 0x0001000000000000 );
5114	158142c2	bellard	}
5115	158142c2	bellard	shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
5116	158142c2	bellard	return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 STATUS_VAR );
5117	158142c2	bellard
5118	158142c2	bellard	}
5119	158142c2	bellard
5120	158142c2	bellard	#endif
5121	158142c2	bellard
5122	158142c2	bellard	/*----------------------------------------------------------------------------
5123	158142c2	bellard	\| Rounds the quadruple-precision floating-point value `a' to an integer, and
5124	158142c2	bellard	\| returns the result as a quadruple-precision floating-point value. The
5125	158142c2	bellard	\| operation is performed according to the IEC/IEEE Standard for Binary
5126	158142c2	bellard	\| Floating-Point Arithmetic.
5127	158142c2	bellard	----------------------------------------------------------------------------/
5128	158142c2	bellard
5129	158142c2	bellard	float128 float128_round_to_int( float128 a STATUS_PARAM )
5130	158142c2	bellard	{
5131	158142c2	bellard	flag aSign;
5132	158142c2	bellard	int32 aExp;
5133	bb98fe42	Andreas Färber	uint64_t lastBitMask, roundBitsMask;
5134	158142c2	bellard	int8 roundingMode;
5135	158142c2	bellard	float128 z;
5136	158142c2	bellard
5137	158142c2	bellard	aExp = extractFloat128Exp( a );
5138	158142c2	bellard	if ( 0x402F <= aExp ) {
5139	158142c2	bellard	if ( 0x406F <= aExp ) {
5140	158142c2	bellard	if ( ( aExp == 0x7FFF )
5141	158142c2	bellard	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) )
5142	158142c2	bellard	) {
5143	158142c2	bellard	return propagateFloat128NaN( a, a STATUS_VAR );
5144	158142c2	bellard	}
5145	158142c2	bellard	return a;
5146	158142c2	bellard	}
5147	158142c2	bellard	lastBitMask = 1;
5148	158142c2	bellard	lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
5149	158142c2	bellard	roundBitsMask = lastBitMask - 1;
5150	158142c2	bellard	z = a;
5151	158142c2	bellard	roundingMode = STATUS(float_rounding_mode);
5152	158142c2	bellard	if ( roundingMode == float_round_nearest_even ) {
5153	158142c2	bellard	if ( lastBitMask ) {
5154	158142c2	bellard	add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
5155	158142c2	bellard	if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
5156	158142c2	bellard	}
5157	158142c2	bellard	else {
5158	bb98fe42	Andreas Färber	if ( (int64_t) z.low < 0 ) {
5159	158142c2	bellard	++z.high;
5160	bb98fe42	Andreas Färber	if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
5161	158142c2	bellard	}
5162	158142c2	bellard	}
5163	158142c2	bellard	}
5164	158142c2	bellard	else if ( roundingMode != float_round_to_zero ) {
5165	158142c2	bellard	if ( extractFloat128Sign( z )
5166	158142c2	bellard	^ ( roundingMode == float_round_up ) ) {
5167	158142c2	bellard	add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
5168	158142c2	bellard	}
5169	158142c2	bellard	}
5170	158142c2	bellard	z.low &= ~ roundBitsMask;
5171	158142c2	bellard	}
5172	158142c2	bellard	else {
5173	158142c2	bellard	if ( aExp < 0x3FFF ) {
5174	bb98fe42	Andreas Färber	if ( ( ( (uint64_t) ( a.high<<1 ) ) \| a.low ) == 0 ) return a;
5175	158142c2	bellard	STATUS(float_exception_flags) \|= float_flag_inexact;
5176	158142c2	bellard	aSign = extractFloat128Sign( a );
5177	158142c2	bellard	switch ( STATUS(float_rounding_mode) ) {
5178	158142c2	bellard	case float_round_nearest_even:
5179	158142c2	bellard	if ( ( aExp == 0x3FFE )
5180	158142c2	bellard	&& ( extractFloat128Frac0( a )
5181	158142c2	bellard	\| extractFloat128Frac1( a ) )
5182	158142c2	bellard	) {
5183	158142c2	bellard	return packFloat128( aSign, 0x3FFF, 0, 0 );
5184	158142c2	bellard	}
5185	158142c2	bellard	break;
5186	158142c2	bellard	case float_round_down:
5187	158142c2	bellard	return
5188	158142c2	bellard	aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
5189	158142c2	bellard	: packFloat128( 0, 0, 0, 0 );
5190	158142c2	bellard	case float_round_up:
5191	158142c2	bellard	return
5192	158142c2	bellard	aSign ? packFloat128( 1, 0, 0, 0 )
5193	158142c2	bellard	: packFloat128( 0, 0x3FFF, 0, 0 );
5194	158142c2	bellard	}
5195	158142c2	bellard	return packFloat128( aSign, 0, 0, 0 );
5196	158142c2	bellard	}
5197	158142c2	bellard	lastBitMask = 1;
5198	158142c2	bellard	lastBitMask <<= 0x402F - aExp;
5199	158142c2	bellard	roundBitsMask = lastBitMask - 1;
5200	158142c2	bellard	z.low = 0;
5201	158142c2	bellard	z.high = a.high;
5202	158142c2	bellard	roundingMode = STATUS(float_rounding_mode);
5203	158142c2	bellard	if ( roundingMode == float_round_nearest_even ) {
5204	158142c2	bellard	z.high += lastBitMask>>1;
5205	158142c2	bellard	if ( ( ( z.high & roundBitsMask ) \| a.low ) == 0 ) {
5206	158142c2	bellard	z.high &= ~ lastBitMask;
5207	158142c2	bellard	}
5208	158142c2	bellard	}
5209	158142c2	bellard	else if ( roundingMode != float_round_to_zero ) {
5210	158142c2	bellard	if ( extractFloat128Sign( z )
5211	158142c2	bellard	^ ( roundingMode == float_round_up ) ) {
5212	158142c2	bellard	z.high \|= ( a.low != 0 );
5213	158142c2	bellard	z.high += roundBitsMask;
5214	158142c2	bellard	}
5215	158142c2	bellard	}
5216	158142c2	bellard	z.high &= ~ roundBitsMask;
5217	158142c2	bellard	}
5218	158142c2	bellard	if ( ( z.low != a.low ) \|\| ( z.high != a.high ) ) {
5219	158142c2	bellard	STATUS(float_exception_flags) \|= float_flag_inexact;
5220	158142c2	bellard	}
5221	158142c2	bellard	return z;
5222	158142c2	bellard
5223	158142c2	bellard	}
5224	158142c2	bellard
5225	158142c2	bellard	/*----------------------------------------------------------------------------
5226	158142c2	bellard	\| Returns the result of adding the absolute values of the quadruple-precision
5227	158142c2	bellard	\| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
5228	158142c2	bellard	\| before being returned. `zSign' is ignored if the result is a NaN.
5229	158142c2	bellard	\| The addition is performed according to the IEC/IEEE Standard for Binary
5230	158142c2	bellard	\| Floating-Point Arithmetic.
5231	158142c2	bellard	----------------------------------------------------------------------------/
5232	158142c2	bellard
5233	158142c2	bellard	static float128 addFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
5234	158142c2	bellard	{
5235	158142c2	bellard	int32 aExp, bExp, zExp;
5236	bb98fe42	Andreas Färber	uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
5237	158142c2	bellard	int32 expDiff;
5238	158142c2	bellard
5239	158142c2	bellard	aSig1 = extractFloat128Frac1( a );
5240	158142c2	bellard	aSig0 = extractFloat128Frac0( a );
5241	158142c2	bellard	aExp = extractFloat128Exp( a );
5242	158142c2	bellard	bSig1 = extractFloat128Frac1( b );
5243	158142c2	bellard	bSig0 = extractFloat128Frac0( b );
5244	158142c2	bellard	bExp = extractFloat128Exp( b );
5245	158142c2	bellard	expDiff = aExp - bExp;
5246	158142c2	bellard	if ( 0 < expDiff ) {
5247	158142c2	bellard	if ( aExp == 0x7FFF ) {
5248	158142c2	bellard	if ( aSig0 \| aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5249	158142c2	bellard	return a;
5250	158142c2	bellard	}
5251	158142c2	bellard	if ( bExp == 0 ) {
5252	158142c2	bellard	--expDiff;
5253	158142c2	bellard	}
5254	158142c2	bellard	else {
5255	158142c2	bellard	bSig0 \|= LIT64( 0x0001000000000000 );
5256	158142c2	bellard	}
5257	158142c2	bellard	shift128ExtraRightJamming(
5258	158142c2	bellard	bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
5259	158142c2	bellard	zExp = aExp;
5260	158142c2	bellard	}
5261	158142c2	bellard	else if ( expDiff < 0 ) {
5262	158142c2	bellard	if ( bExp == 0x7FFF ) {
5263	158142c2	bellard	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5264	158142c2	bellard	return packFloat128( zSign, 0x7FFF, 0, 0 );
5265	158142c2	bellard	}
5266	158142c2	bellard	if ( aExp == 0 ) {
5267	158142c2	bellard	++expDiff;
5268	158142c2	bellard	}
5269	158142c2	bellard	else {
5270	158142c2	bellard	aSig0 \|= LIT64( 0x0001000000000000 );
5271	158142c2	bellard	}
5272	158142c2	bellard	shift128ExtraRightJamming(
5273	158142c2	bellard	aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
5274	158142c2	bellard	zExp = bExp;
5275	158142c2	bellard	}
5276	158142c2	bellard	else {
5277	158142c2	bellard	if ( aExp == 0x7FFF ) {
5278	158142c2	bellard	if ( aSig0 \| aSig1 \| bSig0 \| bSig1 ) {
5279	158142c2	bellard	return propagateFloat128NaN( a, b STATUS_VAR );
5280	158142c2	bellard	}
5281	158142c2	bellard	return a;
5282	158142c2	bellard	}
5283	158142c2	bellard	add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
5284	fe76d976	pbrook	if ( aExp == 0 ) {
5285	fe76d976	pbrook	if ( STATUS(flush_to_zero) ) return packFloat128( zSign, 0, 0, 0 );
5286	fe76d976	pbrook	return packFloat128( zSign, 0, zSig0, zSig1 );
5287	fe76d976	pbrook	}
5288	158142c2	bellard	zSig2 = 0;
5289	158142c2	bellard	zSig0 \|= LIT64( 0x0002000000000000 );
5290	158142c2	bellard	zExp = aExp;
5291	158142c2	bellard	goto shiftRight1;
5292	158142c2	bellard	}
5293	158142c2	bellard	aSig0 \|= LIT64( 0x0001000000000000 );
5294	158142c2	bellard	add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
5295	158142c2	bellard	--zExp;
5296	158142c2	bellard	if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
5297	158142c2	bellard	++zExp;
5298	158142c2	bellard	shiftRight1:
5299	158142c2	bellard	shift128ExtraRightJamming(
5300	158142c2	bellard	zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
5301	158142c2	bellard	roundAndPack:
5302	158142c2	bellard	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5303	158142c2	bellard
5304	158142c2	bellard	}
5305	158142c2	bellard
5306	158142c2	bellard	/*----------------------------------------------------------------------------
5307	158142c2	bellard	\| Returns the result of subtracting the absolute values of the quadruple-
5308	158142c2	bellard	\| precision floating-point values `a' and `b'. If `zSign' is 1, the
5309	158142c2	bellard	\| difference is negated before being returned. `zSign' is ignored if the
5310	158142c2	bellard	\| result is a NaN. The subtraction is performed according to the IEC/IEEE
5311	158142c2	bellard	\| Standard for Binary Floating-Point Arithmetic.
5312	158142c2	bellard	----------------------------------------------------------------------------/
5313	158142c2	bellard
5314	158142c2	bellard	static float128 subFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
5315	158142c2	bellard	{
5316	158142c2	bellard	int32 aExp, bExp, zExp;
5317	bb98fe42	Andreas Färber	uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
5318	158142c2	bellard	int32 expDiff;
5319	158142c2	bellard	float128 z;
5320	158142c2	bellard
5321	158142c2	bellard	aSig1 = extractFloat128Frac1( a );
5322	158142c2	bellard	aSig0 = extractFloat128Frac0( a );
5323	158142c2	bellard	aExp = extractFloat128Exp( a );
5324	158142c2	bellard	bSig1 = extractFloat128Frac1( b );
5325	158142c2	bellard	bSig0 = extractFloat128Frac0( b );
5326	158142c2	bellard	bExp = extractFloat128Exp( b );
5327	158142c2	bellard	expDiff = aExp - bExp;
5328	158142c2	bellard	shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
5329	158142c2	bellard	shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
5330	158142c2	bellard	if ( 0 < expDiff ) goto aExpBigger;
5331	158142c2	bellard	if ( expDiff < 0 ) goto bExpBigger;
5332	158142c2	bellard	if ( aExp == 0x7FFF ) {
5333	158142c2	bellard	if ( aSig0 \| aSig1 \| bSig0 \| bSig1 ) {
5334	158142c2	bellard	return propagateFloat128NaN( a, b STATUS_VAR );
5335	158142c2	bellard	}
5336	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
5337	158142c2	bellard	z.low = float128_default_nan_low;
5338	158142c2	bellard	z.high = float128_default_nan_high;
5339	158142c2	bellard	return z;
5340	158142c2	bellard	}
5341	158142c2	bellard	if ( aExp == 0 ) {
5342	158142c2	bellard	aExp = 1;
5343	158142c2	bellard	bExp = 1;
5344	158142c2	bellard	}
5345	158142c2	bellard	if ( bSig0 < aSig0 ) goto aBigger;
5346	158142c2	bellard	if ( aSig0 < bSig0 ) goto bBigger;
5347	158142c2	bellard	if ( bSig1 < aSig1 ) goto aBigger;
5348	158142c2	bellard	if ( aSig1 < bSig1 ) goto bBigger;
5349	158142c2	bellard	return packFloat128( STATUS(float_rounding_mode) == float_round_down, 0, 0, 0 );
5350	158142c2	bellard	bExpBigger:
5351	158142c2	bellard	if ( bExp == 0x7FFF ) {
5352	158142c2	bellard	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5353	158142c2	bellard	return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
5354	158142c2	bellard	}
5355	158142c2	bellard	if ( aExp == 0 ) {
5356	158142c2	bellard	++expDiff;
5357	158142c2	bellard	}
5358	158142c2	bellard	else {
5359	158142c2	bellard	aSig0 \|= LIT64( 0x4000000000000000 );
5360	158142c2	bellard	}
5361	158142c2	bellard	shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
5362	158142c2	bellard	bSig0 \|= LIT64( 0x4000000000000000 );
5363	158142c2	bellard	bBigger:
5364	158142c2	bellard	sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
5365	158142c2	bellard	zExp = bExp;
5366	158142c2	bellard	zSign ^= 1;
5367	158142c2	bellard	goto normalizeRoundAndPack;
5368	158142c2	bellard	aExpBigger:
5369	158142c2	bellard	if ( aExp == 0x7FFF ) {
5370	158142c2	bellard	if ( aSig0 \| aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5371	158142c2	bellard	return a;
5372	158142c2	bellard	}
5373	158142c2	bellard	if ( bExp == 0 ) {
5374	158142c2	bellard	--expDiff;
5375	158142c2	bellard	}
5376	158142c2	bellard	else {
5377	158142c2	bellard	bSig0 \|= LIT64( 0x4000000000000000 );
5378	158142c2	bellard	}
5379	158142c2	bellard	shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
5380	158142c2	bellard	aSig0 \|= LIT64( 0x4000000000000000 );
5381	158142c2	bellard	aBigger:
5382	158142c2	bellard	sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
5383	158142c2	bellard	zExp = aExp;
5384	158142c2	bellard	normalizeRoundAndPack:
5385	158142c2	bellard	--zExp;
5386	158142c2	bellard	return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 STATUS_VAR );
5387	158142c2	bellard
5388	158142c2	bellard	}
5389	158142c2	bellard
5390	158142c2	bellard	/*----------------------------------------------------------------------------
5391	158142c2	bellard	\| Returns the result of adding the quadruple-precision floating-point values
5392	158142c2	bellard	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
5393	158142c2	bellard	\| for Binary Floating-Point Arithmetic.
5394	158142c2	bellard	----------------------------------------------------------------------------/
5395	158142c2	bellard
5396	158142c2	bellard	float128 float128_add( float128 a, float128 b STATUS_PARAM )
5397	158142c2	bellard	{
5398	158142c2	bellard	flag aSign, bSign;
5399	158142c2	bellard
5400	158142c2	bellard	aSign = extractFloat128Sign( a );
5401	158142c2	bellard	bSign = extractFloat128Sign( b );
5402	158142c2	bellard	if ( aSign == bSign ) {
5403	158142c2	bellard	return addFloat128Sigs( a, b, aSign STATUS_VAR );
5404	158142c2	bellard	}
5405	158142c2	bellard	else {
5406	158142c2	bellard	return subFloat128Sigs( a, b, aSign STATUS_VAR );
5407	158142c2	bellard	}
5408	158142c2	bellard
5409	158142c2	bellard	}
5410	158142c2	bellard
5411	158142c2	bellard	/*----------------------------------------------------------------------------
5412	158142c2	bellard	\| Returns the result of subtracting the quadruple-precision floating-point
5413	158142c2	bellard	\| values `a' and `b'. The operation is performed according to the IEC/IEEE
5414	158142c2	bellard	\| Standard for Binary Floating-Point Arithmetic.
5415	158142c2	bellard	----------------------------------------------------------------------------/
5416	158142c2	bellard
5417	158142c2	bellard	float128 float128_sub( float128 a, float128 b STATUS_PARAM )
5418	158142c2	bellard	{
5419	158142c2	bellard	flag aSign, bSign;
5420	158142c2	bellard
5421	158142c2	bellard	aSign = extractFloat128Sign( a );
5422	158142c2	bellard	bSign = extractFloat128Sign( b );
5423	158142c2	bellard	if ( aSign == bSign ) {
5424	158142c2	bellard	return subFloat128Sigs( a, b, aSign STATUS_VAR );
5425	158142c2	bellard	}
5426	158142c2	bellard	else {
5427	158142c2	bellard	return addFloat128Sigs( a, b, aSign STATUS_VAR );
5428	158142c2	bellard	}
5429	158142c2	bellard
5430	158142c2	bellard	}
5431	158142c2	bellard
5432	158142c2	bellard	/*----------------------------------------------------------------------------
5433	158142c2	bellard	\| Returns the result of multiplying the quadruple-precision floating-point
5434	158142c2	bellard	\| values `a' and `b'. The operation is performed according to the IEC/IEEE
5435	158142c2	bellard	\| Standard for Binary Floating-Point Arithmetic.
5436	158142c2	bellard	----------------------------------------------------------------------------/
5437	158142c2	bellard
5438	158142c2	bellard	float128 float128_mul( float128 a, float128 b STATUS_PARAM )
5439	158142c2	bellard	{
5440	158142c2	bellard	flag aSign, bSign, zSign;
5441	158142c2	bellard	int32 aExp, bExp, zExp;
5442	bb98fe42	Andreas Färber	uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
5443	158142c2	bellard	float128 z;
5444	158142c2	bellard
5445	158142c2	bellard	aSig1 = extractFloat128Frac1( a );
5446	158142c2	bellard	aSig0 = extractFloat128Frac0( a );
5447	158142c2	bellard	aExp = extractFloat128Exp( a );
5448	158142c2	bellard	aSign = extractFloat128Sign( a );
5449	158142c2	bellard	bSig1 = extractFloat128Frac1( b );
5450	158142c2	bellard	bSig0 = extractFloat128Frac0( b );
5451	158142c2	bellard	bExp = extractFloat128Exp( b );
5452	158142c2	bellard	bSign = extractFloat128Sign( b );
5453	158142c2	bellard	zSign = aSign ^ bSign;
5454	158142c2	bellard	if ( aExp == 0x7FFF ) {
5455	158142c2	bellard	if ( ( aSig0 \| aSig1 )
5456	158142c2	bellard	\|\| ( ( bExp == 0x7FFF ) && ( bSig0 \| bSig1 ) ) ) {
5457	158142c2	bellard	return propagateFloat128NaN( a, b STATUS_VAR );
5458	158142c2	bellard	}
5459	158142c2	bellard	if ( ( bExp \| bSig0 \| bSig1 ) == 0 ) goto invalid;
5460	158142c2	bellard	return packFloat128( zSign, 0x7FFF, 0, 0 );
5461	158142c2	bellard	}
5462	158142c2	bellard	if ( bExp == 0x7FFF ) {
5463	158142c2	bellard	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5464	158142c2	bellard	if ( ( aExp \| aSig0 \| aSig1 ) == 0 ) {
5465	158142c2	bellard	invalid:
5466	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
5467	158142c2	bellard	z.low = float128_default_nan_low;
5468	158142c2	bellard	z.high = float128_default_nan_high;
5469	158142c2	bellard	return z;
5470	158142c2	bellard	}
5471	158142c2	bellard	return packFloat128( zSign, 0x7FFF, 0, 0 );
5472	158142c2	bellard	}
5473	158142c2	bellard	if ( aExp == 0 ) {
5474	158142c2	bellard	if ( ( aSig0 \| aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5475	158142c2	bellard	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5476	158142c2	bellard	}
5477	158142c2	bellard	if ( bExp == 0 ) {
5478	158142c2	bellard	if ( ( bSig0 \| bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5479	158142c2	bellard	normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5480	158142c2	bellard	}
5481	158142c2	bellard	zExp = aExp + bExp - 0x4000;
5482	158142c2	bellard	aSig0 \|= LIT64( 0x0001000000000000 );
5483	158142c2	bellard	shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
5484	158142c2	bellard	mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
5485	158142c2	bellard	add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
5486	158142c2	bellard	zSig2 \|= ( zSig3 != 0 );
5487	158142c2	bellard	if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
5488	158142c2	bellard	shift128ExtraRightJamming(
5489	158142c2	bellard	zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
5490	158142c2	bellard	++zExp;
5491	158142c2	bellard	}
5492	158142c2	bellard	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5493	158142c2	bellard
5494	158142c2	bellard	}
5495	158142c2	bellard
5496	158142c2	bellard	/*----------------------------------------------------------------------------
5497	158142c2	bellard	\| Returns the result of dividing the quadruple-precision floating-point value
5498	158142c2	bellard	\| `a' by the corresponding value `b'. The operation is performed according to
5499	158142c2	bellard	\| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5500	158142c2	bellard	----------------------------------------------------------------------------/
5501	158142c2	bellard
5502	158142c2	bellard	float128 float128_div( float128 a, float128 b STATUS_PARAM )
5503	158142c2	bellard	{
5504	158142c2	bellard	flag aSign, bSign, zSign;
5505	158142c2	bellard	int32 aExp, bExp, zExp;
5506	bb98fe42	Andreas Färber	uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
5507	bb98fe42	Andreas Färber	uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5508	158142c2	bellard	float128 z;
5509	158142c2	bellard
5510	158142c2	bellard	aSig1 = extractFloat128Frac1( a );
5511	158142c2	bellard	aSig0 = extractFloat128Frac0( a );
5512	158142c2	bellard	aExp = extractFloat128Exp( a );
5513	158142c2	bellard	aSign = extractFloat128Sign( a );
5514	158142c2	bellard	bSig1 = extractFloat128Frac1( b );
5515	158142c2	bellard	bSig0 = extractFloat128Frac0( b );
5516	158142c2	bellard	bExp = extractFloat128Exp( b );
5517	158142c2	bellard	bSign = extractFloat128Sign( b );
5518	158142c2	bellard	zSign = aSign ^ bSign;
5519	158142c2	bellard	if ( aExp == 0x7FFF ) {
5520	158142c2	bellard	if ( aSig0 \| aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5521	158142c2	bellard	if ( bExp == 0x7FFF ) {
5522	158142c2	bellard	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5523	158142c2	bellard	goto invalid;
5524	158142c2	bellard	}
5525	158142c2	bellard	return packFloat128( zSign, 0x7FFF, 0, 0 );
5526	158142c2	bellard	}
5527	158142c2	bellard	if ( bExp == 0x7FFF ) {
5528	158142c2	bellard	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5529	158142c2	bellard	return packFloat128( zSign, 0, 0, 0 );
5530	158142c2	bellard	}
5531	158142c2	bellard	if ( bExp == 0 ) {
5532	158142c2	bellard	if ( ( bSig0 \| bSig1 ) == 0 ) {
5533	158142c2	bellard	if ( ( aExp \| aSig0 \| aSig1 ) == 0 ) {
5534	158142c2	bellard	invalid:
5535	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
5536	158142c2	bellard	z.low = float128_default_nan_low;
5537	158142c2	bellard	z.high = float128_default_nan_high;
5538	158142c2	bellard	return z;
5539	158142c2	bellard	}
5540	158142c2	bellard	float_raise( float_flag_divbyzero STATUS_VAR);
5541	158142c2	bellard	return packFloat128( zSign, 0x7FFF, 0, 0 );
5542	158142c2	bellard	}
5543	158142c2	bellard	normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5544	158142c2	bellard	}
5545	158142c2	bellard	if ( aExp == 0 ) {
5546	158142c2	bellard	if ( ( aSig0 \| aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5547	158142c2	bellard	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5548	158142c2	bellard	}
5549	158142c2	bellard	zExp = aExp - bExp + 0x3FFD;
5550	158142c2	bellard	shortShift128Left(
5551	158142c2	bellard	aSig0 \| LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
5552	158142c2	bellard	shortShift128Left(
5553	158142c2	bellard	bSig0 \| LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
5554	158142c2	bellard	if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
5555	158142c2	bellard	shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
5556	158142c2	bellard	++zExp;
5557	158142c2	bellard	}
5558	158142c2	bellard	zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
5559	158142c2	bellard	mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
5560	158142c2	bellard	sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
5561	bb98fe42	Andreas Färber	while ( (int64_t) rem0 < 0 ) {
5562	158142c2	bellard	--zSig0;
5563	158142c2	bellard	add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
5564	158142c2	bellard	}
5565	158142c2	bellard	zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
5566	158142c2	bellard	if ( ( zSig1 & 0x3FFF ) <= 4 ) {
5567	158142c2	bellard	mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
5568	158142c2	bellard	sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
5569	bb98fe42	Andreas Färber	while ( (int64_t) rem1 < 0 ) {
5570	158142c2	bellard	--zSig1;
5571	158142c2	bellard	add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
5572	158142c2	bellard	}
5573	158142c2	bellard	zSig1 \|= ( ( rem1 \| rem2 \| rem3 ) != 0 );
5574	158142c2	bellard	}
5575	158142c2	bellard	shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
5576	158142c2	bellard	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5577	158142c2	bellard
5578	158142c2	bellard	}
5579	158142c2	bellard
5580	158142c2	bellard	/*----------------------------------------------------------------------------
5581	158142c2	bellard	\| Returns the remainder of the quadruple-precision floating-point value `a'
5582	158142c2	bellard	\| with respect to the corresponding value `b'. The operation is performed
5583	158142c2	bellard	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5584	158142c2	bellard	----------------------------------------------------------------------------/
5585	158142c2	bellard
5586	158142c2	bellard	float128 float128_rem( float128 a, float128 b STATUS_PARAM )
5587	158142c2	bellard	{
5588	ed086f3d	Blue Swirl	flag aSign, zSign;
5589	158142c2	bellard	int32 aExp, bExp, expDiff;
5590	bb98fe42	Andreas Färber	uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
5591	bb98fe42	Andreas Färber	uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
5592	bb98fe42	Andreas Färber	int64_t sigMean0;
5593	158142c2	bellard	float128 z;
5594	158142c2	bellard
5595	158142c2	bellard	aSig1 = extractFloat128Frac1( a );
5596	158142c2	bellard	aSig0 = extractFloat128Frac0( a );
5597	158142c2	bellard	aExp = extractFloat128Exp( a );
5598	158142c2	bellard	aSign = extractFloat128Sign( a );
5599	158142c2	bellard	bSig1 = extractFloat128Frac1( b );
5600	158142c2	bellard	bSig0 = extractFloat128Frac0( b );
5601	158142c2	bellard	bExp = extractFloat128Exp( b );
5602	158142c2	bellard	if ( aExp == 0x7FFF ) {
5603	158142c2	bellard	if ( ( aSig0 \| aSig1 )
5604	158142c2	bellard	\|\| ( ( bExp == 0x7FFF ) && ( bSig0 \| bSig1 ) ) ) {
5605	158142c2	bellard	return propagateFloat128NaN( a, b STATUS_VAR );
5606	158142c2	bellard	}
5607	158142c2	bellard	goto invalid;
5608	158142c2	bellard	}
5609	158142c2	bellard	if ( bExp == 0x7FFF ) {
5610	158142c2	bellard	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5611	158142c2	bellard	return a;
5612	158142c2	bellard	}
5613	158142c2	bellard	if ( bExp == 0 ) {
5614	158142c2	bellard	if ( ( bSig0 \| bSig1 ) == 0 ) {
5615	158142c2	bellard	invalid:
5616	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
5617	158142c2	bellard	z.low = float128_default_nan_low;
5618	158142c2	bellard	z.high = float128_default_nan_high;
5619	158142c2	bellard	return z;
5620	158142c2	bellard	}
5621	158142c2	bellard	normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5622	158142c2	bellard	}
5623	158142c2	bellard	if ( aExp == 0 ) {
5624	158142c2	bellard	if ( ( aSig0 \| aSig1 ) == 0 ) return a;
5625	158142c2	bellard	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5626	158142c2	bellard	}
5627	158142c2	bellard	expDiff = aExp - bExp;
5628	158142c2	bellard	if ( expDiff < -1 ) return a;
5629	158142c2	bellard	shortShift128Left(
5630	158142c2	bellard	aSig0 \| LIT64( 0x0001000000000000 ),
5631	158142c2	bellard	aSig1,
5632	158142c2	bellard	15 - ( expDiff < 0 ),
5633	158142c2	bellard	&aSig0,
5634	158142c2	bellard	&aSig1
5635	158142c2	bellard	);
5636	158142c2	bellard	shortShift128Left(
5637	158142c2	bellard	bSig0 \| LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
5638	158142c2	bellard	q = le128( bSig0, bSig1, aSig0, aSig1 );
5639	158142c2	bellard	if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
5640	158142c2	bellard	expDiff -= 64;
5641	158142c2	bellard	while ( 0 < expDiff ) {
5642	158142c2	bellard	q = estimateDiv128To64( aSig0, aSig1, bSig0 );
5643	158142c2	bellard	q = ( 4 < q ) ? q - 4 : 0;
5644	158142c2	bellard	mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
5645	158142c2	bellard	shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
5646	158142c2	bellard	shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
5647	158142c2	bellard	sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
5648	158142c2	bellard	expDiff -= 61;
5649	158142c2	bellard	}
5650	158142c2	bellard	if ( -64 < expDiff ) {
5651	158142c2	bellard	q = estimateDiv128To64( aSig0, aSig1, bSig0 );
5652	158142c2	bellard	q = ( 4 < q ) ? q - 4 : 0;
5653	158142c2	bellard	q >>= - expDiff;
5654	158142c2	bellard	shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
5655	158142c2	bellard	expDiff += 52;
5656	158142c2	bellard	if ( expDiff < 0 ) {
5657	158142c2	bellard	shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
5658	158142c2	bellard	}
5659	158142c2	bellard	else {
5660	158142c2	bellard	shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
5661	158142c2	bellard	}
5662	158142c2	bellard	mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
5663	158142c2	bellard	sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
5664	158142c2	bellard	}
5665	158142c2	bellard	else {
5666	158142c2	bellard	shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
5667	158142c2	bellard	shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
5668	158142c2	bellard	}
5669	158142c2	bellard	do {
5670	158142c2	bellard	alternateASig0 = aSig0;
5671	158142c2	bellard	alternateASig1 = aSig1;
5672	158142c2	bellard	++q;
5673	158142c2	bellard	sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
5674	bb98fe42	Andreas Färber	} while ( 0 <= (int64_t) aSig0 );
5675	158142c2	bellard	add128(
5676	bb98fe42	Andreas Färber	aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
5677	158142c2	bellard	if ( ( sigMean0 < 0 )
5678	158142c2	bellard	\|\| ( ( ( sigMean0 \| sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
5679	158142c2	bellard	aSig0 = alternateASig0;
5680	158142c2	bellard	aSig1 = alternateASig1;
5681	158142c2	bellard	}
5682	bb98fe42	Andreas Färber	zSign = ( (int64_t) aSig0 < 0 );
5683	158142c2	bellard	if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
5684	158142c2	bellard	return
5685	158142c2	bellard	normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 STATUS_VAR );
5686	158142c2	bellard
5687	158142c2	bellard	}
5688	158142c2	bellard
5689	158142c2	bellard	/*----------------------------------------------------------------------------
5690	158142c2	bellard	\| Returns the square root of the quadruple-precision floating-point value `a'.
5691	158142c2	bellard	\| The operation is performed according to the IEC/IEEE Standard for Binary
5692	158142c2	bellard	\| Floating-Point Arithmetic.
5693	158142c2	bellard	----------------------------------------------------------------------------/
5694	158142c2	bellard
5695	158142c2	bellard	float128 float128_sqrt( float128 a STATUS_PARAM )
5696	158142c2	bellard	{
5697	158142c2	bellard	flag aSign;
5698	158142c2	bellard	int32 aExp, zExp;
5699	bb98fe42	Andreas Färber	uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
5700	bb98fe42	Andreas Färber	uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5701	158142c2	bellard	float128 z;
5702	158142c2	bellard
5703	158142c2	bellard	aSig1 = extractFloat128Frac1( a );
5704	158142c2	bellard	aSig0 = extractFloat128Frac0( a );
5705	158142c2	bellard	aExp = extractFloat128Exp( a );
5706	158142c2	bellard	aSign = extractFloat128Sign( a );
5707	158142c2	bellard	if ( aExp == 0x7FFF ) {
5708	158142c2	bellard	if ( aSig0 \| aSig1 ) return propagateFloat128NaN( a, a STATUS_VAR );
5709	158142c2	bellard	if ( ! aSign ) return a;
5710	158142c2	bellard	goto invalid;
5711	158142c2	bellard	}
5712	158142c2	bellard	if ( aSign ) {
5713	158142c2	bellard	if ( ( aExp \| aSig0 \| aSig1 ) == 0 ) return a;
5714	158142c2	bellard	invalid:
5715	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
5716	158142c2	bellard	z.low = float128_default_nan_low;
5717	158142c2	bellard	z.high = float128_default_nan_high;
5718	158142c2	bellard	return z;
5719	158142c2	bellard	}
5720	158142c2	bellard	if ( aExp == 0 ) {
5721	158142c2	bellard	if ( ( aSig0 \| aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
5722	158142c2	bellard	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5723	158142c2	bellard	}
5724	158142c2	bellard	zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
5725	158142c2	bellard	aSig0 \|= LIT64( 0x0001000000000000 );
5726	158142c2	bellard	zSig0 = estimateSqrt32( aExp, aSig0>>17 );
5727	158142c2	bellard	shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
5728	158142c2	bellard	zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
5729	158142c2	bellard	doubleZSig0 = zSig0<<1;
5730	158142c2	bellard	mul64To128( zSig0, zSig0, &term0, &term1 );
5731	158142c2	bellard	sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
5732	bb98fe42	Andreas Färber	while ( (int64_t) rem0 < 0 ) {
5733	158142c2	bellard	--zSig0;
5734	158142c2	bellard	doubleZSig0 -= 2;
5735	158142c2	bellard	add128( rem0, rem1, zSig0>>63, doubleZSig0 \| 1, &rem0, &rem1 );
5736	158142c2	bellard	}
5737	158142c2	bellard	zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
5738	158142c2	bellard	if ( ( zSig1 & 0x1FFF ) <= 5 ) {
5739	158142c2	bellard	if ( zSig1 == 0 ) zSig1 = 1;
5740	158142c2	bellard	mul64To128( doubleZSig0, zSig1, &term1, &term2 );
5741	158142c2	bellard	sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5742	158142c2	bellard	mul64To128( zSig1, zSig1, &term2, &term3 );
5743	158142c2	bellard	sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
5744	bb98fe42	Andreas Färber	while ( (int64_t) rem1 < 0 ) {
5745	158142c2	bellard	--zSig1;
5746	158142c2	bellard	shortShift128Left( 0, zSig1, 1, &term2, &term3 );
5747	158142c2	bellard	term3 \|= 1;
5748	158142c2	bellard	term2 \|= doubleZSig0;
5749	158142c2	bellard	add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
5750	158142c2	bellard	}
5751	158142c2	bellard	zSig1 \|= ( ( rem1 \| rem2 \| rem3 ) != 0 );
5752	158142c2	bellard	}
5753	158142c2	bellard	shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
5754	158142c2	bellard	return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5755	158142c2	bellard
5756	158142c2	bellard	}
5757	158142c2	bellard
5758	158142c2	bellard	/*----------------------------------------------------------------------------
5759	158142c2	bellard	\| Returns 1 if the quadruple-precision floating-point value `a' is equal to
5760	b689362d	Aurelien Jarno	\| the corresponding value `b', and 0 otherwise. The invalid exception is
5761	b689362d	Aurelien Jarno	\| raised if either operand is a NaN. Otherwise, the comparison is performed
5762	158142c2	bellard	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5763	158142c2	bellard	----------------------------------------------------------------------------/
5764	158142c2	bellard
5765	b689362d	Aurelien Jarno	int float128_eq( float128 a, float128 b STATUS_PARAM )
5766	158142c2	bellard	{
5767	158142c2	bellard
5768	158142c2	bellard	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
5769	158142c2	bellard	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
5770	158142c2	bellard	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
5771	158142c2	bellard	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
5772	158142c2	bellard	) {
5773	b689362d	Aurelien Jarno	float_raise( float_flag_invalid STATUS_VAR);
5774	158142c2	bellard	return 0;
5775	158142c2	bellard	}
5776	158142c2	bellard	return
5777	158142c2	bellard	( a.low == b.low )
5778	158142c2	bellard	&& ( ( a.high == b.high )
5779	158142c2	bellard	\|\| ( ( a.low == 0 )
5780	bb98fe42	Andreas Färber	&& ( (uint64_t) ( ( a.high \| b.high )<<1 ) == 0 ) )
5781	158142c2	bellard	);
5782	158142c2	bellard
5783	158142c2	bellard	}
5784	158142c2	bellard
5785	158142c2	bellard	/*----------------------------------------------------------------------------
5786	158142c2	bellard	\| Returns 1 if the quadruple-precision floating-point value `a' is less than
5787	f5a64251	Aurelien Jarno	\| or equal to the corresponding value `b', and 0 otherwise. The invalid
5788	f5a64251	Aurelien Jarno	\| exception is raised if either operand is a NaN. The comparison is performed
5789	f5a64251	Aurelien Jarno	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5790	158142c2	bellard	----------------------------------------------------------------------------/
5791	158142c2	bellard
5792	750afe93	bellard	int float128_le( float128 a, float128 b STATUS_PARAM )
5793	158142c2	bellard	{
5794	158142c2	bellard	flag aSign, bSign;
5795	158142c2	bellard
5796	158142c2	bellard	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
5797	158142c2	bellard	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
5798	158142c2	bellard	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
5799	158142c2	bellard	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
5800	158142c2	bellard	) {
5801	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
5802	158142c2	bellard	return 0;
5803	158142c2	bellard	}
5804	158142c2	bellard	aSign = extractFloat128Sign( a );
5805	158142c2	bellard	bSign = extractFloat128Sign( b );
5806	158142c2	bellard	if ( aSign != bSign ) {
5807	158142c2	bellard	return
5808	158142c2	bellard	aSign
5809	bb98fe42	Andreas Färber	\|\| ( ( ( (uint64_t) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
5810	158142c2	bellard	== 0 );
5811	158142c2	bellard	}
5812	158142c2	bellard	return
5813	158142c2	bellard	aSign ? le128( b.high, b.low, a.high, a.low )
5814	158142c2	bellard	: le128( a.high, a.low, b.high, b.low );
5815	158142c2	bellard
5816	158142c2	bellard	}
5817	158142c2	bellard
5818	158142c2	bellard	/*----------------------------------------------------------------------------
5819	158142c2	bellard	\| Returns 1 if the quadruple-precision floating-point value `a' is less than
5820	f5a64251	Aurelien Jarno	\| the corresponding value `b', and 0 otherwise. The invalid exception is
5821	f5a64251	Aurelien Jarno	\| raised if either operand is a NaN. The comparison is performed according
5822	f5a64251	Aurelien Jarno	\| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5823	158142c2	bellard	----------------------------------------------------------------------------/
5824	158142c2	bellard
5825	750afe93	bellard	int float128_lt( float128 a, float128 b STATUS_PARAM )
5826	158142c2	bellard	{
5827	158142c2	bellard	flag aSign, bSign;
5828	158142c2	bellard
5829	158142c2	bellard	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
5830	158142c2	bellard	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
5831	158142c2	bellard	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
5832	158142c2	bellard	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
5833	158142c2	bellard	) {
5834	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
5835	158142c2	bellard	return 0;
5836	158142c2	bellard	}
5837	158142c2	bellard	aSign = extractFloat128Sign( a );
5838	158142c2	bellard	bSign = extractFloat128Sign( b );
5839	158142c2	bellard	if ( aSign != bSign ) {
5840	158142c2	bellard	return
5841	158142c2	bellard	aSign
5842	bb98fe42	Andreas Färber	&& ( ( ( (uint64_t) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
5843	158142c2	bellard	!= 0 );
5844	158142c2	bellard	}
5845	158142c2	bellard	return
5846	158142c2	bellard	aSign ? lt128( b.high, b.low, a.high, a.low )
5847	158142c2	bellard	: lt128( a.high, a.low, b.high, b.low );
5848	158142c2	bellard
5849	158142c2	bellard	}
5850	158142c2	bellard
5851	158142c2	bellard	/*----------------------------------------------------------------------------
5852	67b7861d	Aurelien Jarno	\| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
5853	f5a64251	Aurelien Jarno	\| be compared, and 0 otherwise. The invalid exception is raised if either
5854	f5a64251	Aurelien Jarno	\| operand is a NaN. The comparison is performed according to the IEC/IEEE
5855	f5a64251	Aurelien Jarno	\| Standard for Binary Floating-Point Arithmetic.
5856	67b7861d	Aurelien Jarno	----------------------------------------------------------------------------/
5857	67b7861d	Aurelien Jarno
5858	67b7861d	Aurelien Jarno	int float128_unordered( float128 a, float128 b STATUS_PARAM )
5859	67b7861d	Aurelien Jarno	{
5860	67b7861d	Aurelien Jarno	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
5861	67b7861d	Aurelien Jarno	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
5862	67b7861d	Aurelien Jarno	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
5863	67b7861d	Aurelien Jarno	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
5864	67b7861d	Aurelien Jarno	) {
5865	67b7861d	Aurelien Jarno	float_raise( float_flag_invalid STATUS_VAR);
5866	67b7861d	Aurelien Jarno	return 1;
5867	67b7861d	Aurelien Jarno	}
5868	67b7861d	Aurelien Jarno	return 0;
5869	67b7861d	Aurelien Jarno	}
5870	67b7861d	Aurelien Jarno
5871	67b7861d	Aurelien Jarno	/*----------------------------------------------------------------------------
5872	158142c2	bellard	\| Returns 1 if the quadruple-precision floating-point value `a' is equal to
5873	f5a64251	Aurelien Jarno	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
5874	f5a64251	Aurelien Jarno	\| exception. The comparison is performed according to the IEC/IEEE Standard
5875	f5a64251	Aurelien Jarno	\| for Binary Floating-Point Arithmetic.
5876	158142c2	bellard	----------------------------------------------------------------------------/
5877	158142c2	bellard
5878	b689362d	Aurelien Jarno	int float128_eq_quiet( float128 a, float128 b STATUS_PARAM )
5879	158142c2	bellard	{
5880	158142c2	bellard
5881	158142c2	bellard	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
5882	158142c2	bellard	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
5883	158142c2	bellard	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
5884	158142c2	bellard	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
5885	158142c2	bellard	) {
5886	b689362d	Aurelien Jarno	if ( float128_is_signaling_nan( a )
5887	b689362d	Aurelien Jarno	\|\| float128_is_signaling_nan( b ) ) {
5888	b689362d	Aurelien Jarno	float_raise( float_flag_invalid STATUS_VAR);
5889	b689362d	Aurelien Jarno	}
5890	158142c2	bellard	return 0;
5891	158142c2	bellard	}
5892	158142c2	bellard	return
5893	158142c2	bellard	( a.low == b.low )
5894	158142c2	bellard	&& ( ( a.high == b.high )
5895	158142c2	bellard	\|\| ( ( a.low == 0 )
5896	bb98fe42	Andreas Färber	&& ( (uint64_t) ( ( a.high \| b.high )<<1 ) == 0 ) )
5897	158142c2	bellard	);
5898	158142c2	bellard
5899	158142c2	bellard	}
5900	158142c2	bellard
5901	158142c2	bellard	/*----------------------------------------------------------------------------
5902	158142c2	bellard	\| Returns 1 if the quadruple-precision floating-point value `a' is less than
5903	158142c2	bellard	\| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
5904	158142c2	bellard	\| cause an exception. Otherwise, the comparison is performed according to the
5905	158142c2	bellard	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5906	158142c2	bellard	----------------------------------------------------------------------------/
5907	158142c2	bellard
5908	750afe93	bellard	int float128_le_quiet( float128 a, float128 b STATUS_PARAM )
5909	158142c2	bellard	{
5910	158142c2	bellard	flag aSign, bSign;
5911	158142c2	bellard
5912	158142c2	bellard	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
5913	158142c2	bellard	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
5914	158142c2	bellard	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
5915	158142c2	bellard	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
5916	158142c2	bellard	) {
5917	158142c2	bellard	if ( float128_is_signaling_nan( a )
5918	158142c2	bellard	\|\| float128_is_signaling_nan( b ) ) {
5919	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
5920	158142c2	bellard	}
5921	158142c2	bellard	return 0;
5922	158142c2	bellard	}
5923	158142c2	bellard	aSign = extractFloat128Sign( a );
5924	158142c2	bellard	bSign = extractFloat128Sign( b );
5925	158142c2	bellard	if ( aSign != bSign ) {
5926	158142c2	bellard	return
5927	158142c2	bellard	aSign
5928	bb98fe42	Andreas Färber	\|\| ( ( ( (uint64_t) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
5929	158142c2	bellard	== 0 );
5930	158142c2	bellard	}
5931	158142c2	bellard	return
5932	158142c2	bellard	aSign ? le128( b.high, b.low, a.high, a.low )
5933	158142c2	bellard	: le128( a.high, a.low, b.high, b.low );
5934	158142c2	bellard
5935	158142c2	bellard	}
5936	158142c2	bellard
5937	158142c2	bellard	/*----------------------------------------------------------------------------
5938	158142c2	bellard	\| Returns 1 if the quadruple-precision floating-point value `a' is less than
5939	158142c2	bellard	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
5940	158142c2	bellard	\| exception. Otherwise, the comparison is performed according to the IEC/IEEE
5941	158142c2	bellard	\| Standard for Binary Floating-Point Arithmetic.
5942	158142c2	bellard	----------------------------------------------------------------------------/
5943	158142c2	bellard
5944	750afe93	bellard	int float128_lt_quiet( float128 a, float128 b STATUS_PARAM )
5945	158142c2	bellard	{
5946	158142c2	bellard	flag aSign, bSign;
5947	158142c2	bellard
5948	158142c2	bellard	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
5949	158142c2	bellard	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
5950	158142c2	bellard	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
5951	158142c2	bellard	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
5952	158142c2	bellard	) {
5953	158142c2	bellard	if ( float128_is_signaling_nan( a )
5954	158142c2	bellard	\|\| float128_is_signaling_nan( b ) ) {
5955	158142c2	bellard	float_raise( float_flag_invalid STATUS_VAR);
5956	158142c2	bellard	}
5957	158142c2	bellard	return 0;
5958	158142c2	bellard	}
5959	158142c2	bellard	aSign = extractFloat128Sign( a );
5960	158142c2	bellard	bSign = extractFloat128Sign( b );
5961	158142c2	bellard	if ( aSign != bSign ) {
5962	158142c2	bellard	return
5963	158142c2	bellard	aSign
5964	bb98fe42	Andreas Färber	&& ( ( ( (uint64_t) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
5965	158142c2	bellard	!= 0 );
5966	158142c2	bellard	}
5967	158142c2	bellard	return
5968	158142c2	bellard	aSign ? lt128( b.high, b.low, a.high, a.low )
5969	158142c2	bellard	: lt128( a.high, a.low, b.high, b.low );
5970	158142c2	bellard
5971	158142c2	bellard	}
5972	158142c2	bellard
5973	67b7861d	Aurelien Jarno	/*----------------------------------------------------------------------------
5974	67b7861d	Aurelien Jarno	\| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
5975	67b7861d	Aurelien Jarno	\| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
5976	67b7861d	Aurelien Jarno	\| comparison is performed according to the IEC/IEEE Standard for Binary
5977	67b7861d	Aurelien Jarno	\| Floating-Point Arithmetic.
5978	67b7861d	Aurelien Jarno	----------------------------------------------------------------------------/
5979	67b7861d	Aurelien Jarno
5980	67b7861d	Aurelien Jarno	int float128_unordered_quiet( float128 a, float128 b STATUS_PARAM )
5981	67b7861d	Aurelien Jarno	{
5982	67b7861d	Aurelien Jarno	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
5983	67b7861d	Aurelien Jarno	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
5984	67b7861d	Aurelien Jarno	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
5985	67b7861d	Aurelien Jarno	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
5986	67b7861d	Aurelien Jarno	) {
5987	67b7861d	Aurelien Jarno	if ( float128_is_signaling_nan( a )
5988	67b7861d	Aurelien Jarno	\|\| float128_is_signaling_nan( b ) ) {
5989	67b7861d	Aurelien Jarno	float_raise( float_flag_invalid STATUS_VAR);
5990	67b7861d	Aurelien Jarno	}
5991	67b7861d	Aurelien Jarno	return 1;
5992	67b7861d	Aurelien Jarno	}
5993	67b7861d	Aurelien Jarno	return 0;
5994	67b7861d	Aurelien Jarno	}
5995	67b7861d	Aurelien Jarno
5996	158142c2	bellard	#endif
5997	158142c2	bellard
5998	1d6bda35	bellard	/* misc functions */
5999	1d6bda35	bellard	float32 uint32_to_float32( unsigned int a STATUS_PARAM )
6000	1d6bda35	bellard	{
6001	1d6bda35	bellard	return int64_to_float32(a STATUS_VAR);
6002	1d6bda35	bellard	}
6003	1d6bda35	bellard
6004	1d6bda35	bellard	float64 uint32_to_float64( unsigned int a STATUS_PARAM )
6005	1d6bda35	bellard	{
6006	1d6bda35	bellard	return int64_to_float64(a STATUS_VAR);
6007	1d6bda35	bellard	}
6008	1d6bda35	bellard
6009	1d6bda35	bellard	unsigned int float32_to_uint32( float32 a STATUS_PARAM )
6010	1d6bda35	bellard	{
6011	1d6bda35	bellard	int64_t v;
6012	1d6bda35	bellard	unsigned int res;
6013	1d6bda35	bellard
6014	1d6bda35	bellard	v = float32_to_int64(a STATUS_VAR);
6015	1d6bda35	bellard	if (v < 0) {
6016	1d6bda35	bellard	res = 0;
6017	1d6bda35	bellard	float_raise( float_flag_invalid STATUS_VAR);
6018	1d6bda35	bellard	} else if (v > 0xffffffff) {
6019	1d6bda35	bellard	res = 0xffffffff;
6020	1d6bda35	bellard	float_raise( float_flag_invalid STATUS_VAR);
6021	1d6bda35	bellard	} else {
6022	1d6bda35	bellard	res = v;
6023	1d6bda35	bellard	}
6024	1d6bda35	bellard	return res;
6025	1d6bda35	bellard	}
6026	1d6bda35	bellard
6027	1d6bda35	bellard	unsigned int float32_to_uint32_round_to_zero( float32 a STATUS_PARAM )
6028	1d6bda35	bellard	{
6029	1d6bda35	bellard	int64_t v;
6030	1d6bda35	bellard	unsigned int res;
6031	1d6bda35	bellard
6032	1d6bda35	bellard	v = float32_to_int64_round_to_zero(a STATUS_VAR);
6033	1d6bda35	bellard	if (v < 0) {
6034	1d6bda35	bellard	res = 0;
6035	1d6bda35	bellard	float_raise( float_flag_invalid STATUS_VAR);
6036	1d6bda35	bellard	} else if (v > 0xffffffff) {
6037	1d6bda35	bellard	res = 0xffffffff;
6038	1d6bda35	bellard	float_raise( float_flag_invalid STATUS_VAR);
6039	1d6bda35	bellard	} else {
6040	1d6bda35	bellard	res = v;
6041	1d6bda35	bellard	}
6042	1d6bda35	bellard	return res;
6043	1d6bda35	bellard	}
6044	1d6bda35	bellard
6045	cbcef455	Peter Maydell	unsigned int float32_to_uint16_round_to_zero( float32 a STATUS_PARAM )
6046	cbcef455	Peter Maydell	{
6047	cbcef455	Peter Maydell	int64_t v;
6048	cbcef455	Peter Maydell	unsigned int res;
6049	cbcef455	Peter Maydell
6050	cbcef455	Peter Maydell	v = float32_to_int64_round_to_zero(a STATUS_VAR);
6051	cbcef455	Peter Maydell	if (v < 0) {
6052	cbcef455	Peter Maydell	res = 0;
6053	cbcef455	Peter Maydell	float_raise( float_flag_invalid STATUS_VAR);
6054	cbcef455	Peter Maydell	} else if (v > 0xffff) {
6055	cbcef455	Peter Maydell	res = 0xffff;
6056	cbcef455	Peter Maydell	float_raise( float_flag_invalid STATUS_VAR);
6057	cbcef455	Peter Maydell	} else {
6058	cbcef455	Peter Maydell	res = v;
6059	cbcef455	Peter Maydell	}
6060	cbcef455	Peter Maydell	return res;
6061	cbcef455	Peter Maydell	}
6062	cbcef455	Peter Maydell
6063	1d6bda35	bellard	unsigned int float64_to_uint32( float64 a STATUS_PARAM )
6064	1d6bda35	bellard	{
6065	1d6bda35	bellard	int64_t v;
6066	1d6bda35	bellard	unsigned int res;
6067	1d6bda35	bellard
6068	1d6bda35	bellard	v = float64_to_int64(a STATUS_VAR);
6069	1d6bda35	bellard	if (v < 0) {
6070	1d6bda35	bellard	res = 0;
6071	1d6bda35	bellard	float_raise( float_flag_invalid STATUS_VAR);
6072	1d6bda35	bellard	} else if (v > 0xffffffff) {
6073	1d6bda35	bellard	res = 0xffffffff;
6074	1d6bda35	bellard	float_raise( float_flag_invalid STATUS_VAR);
6075	1d6bda35	bellard	} else {
6076	1d6bda35	bellard	res = v;
6077	1d6bda35	bellard	}
6078	1d6bda35	bellard	return res;
6079	1d6bda35	bellard	}
6080	1d6bda35	bellard
6081	1d6bda35	bellard	unsigned int float64_to_uint32_round_to_zero( float64 a STATUS_PARAM )
6082	1d6bda35	bellard	{
6083	1d6bda35	bellard	int64_t v;
6084	1d6bda35	bellard	unsigned int res;
6085	1d6bda35	bellard
6086	1d6bda35	bellard	v = float64_to_int64_round_to_zero(a STATUS_VAR);
6087	1d6bda35	bellard	if (v < 0) {
6088	1d6bda35	bellard	res = 0;
6089	1d6bda35	bellard	float_raise( float_flag_invalid STATUS_VAR);
6090	1d6bda35	bellard	} else if (v > 0xffffffff) {
6091	1d6bda35	bellard	res = 0xffffffff;
6092	1d6bda35	bellard	float_raise( float_flag_invalid STATUS_VAR);
6093	1d6bda35	bellard	} else {
6094	1d6bda35	bellard	res = v;
6095	1d6bda35	bellard	}
6096	1d6bda35	bellard	return res;
6097	1d6bda35	bellard	}
6098	1d6bda35	bellard
6099	cbcef455	Peter Maydell	unsigned int float64_to_uint16_round_to_zero( float64 a STATUS_PARAM )
6100	cbcef455	Peter Maydell	{
6101	cbcef455	Peter Maydell	int64_t v;
6102	cbcef455	Peter Maydell	unsigned int res;
6103	cbcef455	Peter Maydell
6104	cbcef455	Peter Maydell	v = float64_to_int64_round_to_zero(a STATUS_VAR);
6105	cbcef455	Peter Maydell	if (v < 0) {
6106	cbcef455	Peter Maydell	res = 0;
6107	cbcef455	Peter Maydell	float_raise( float_flag_invalid STATUS_VAR);
6108	cbcef455	Peter Maydell	} else if (v > 0xffff) {
6109	cbcef455	Peter Maydell	res = 0xffff;
6110	cbcef455	Peter Maydell	float_raise( float_flag_invalid STATUS_VAR);
6111	cbcef455	Peter Maydell	} else {
6112	cbcef455	Peter Maydell	res = v;
6113	cbcef455	Peter Maydell	}
6114	cbcef455	Peter Maydell	return res;
6115	cbcef455	Peter Maydell	}
6116	cbcef455	Peter Maydell
6117	f090c9d4	pbrook	/* FIXME: This looks broken. */
6118	75d62a58	j_mayer	uint64_t float64_to_uint64 (float64 a STATUS_PARAM)
6119	75d62a58	j_mayer	{
6120	75d62a58	j_mayer	int64_t v;
6121	75d62a58	j_mayer
6122	f090c9d4	pbrook	v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
6123	f090c9d4	pbrook	v += float64_val(a);
6124	f090c9d4	pbrook	v = float64_to_int64(make_float64(v) STATUS_VAR);
6125	75d62a58	j_mayer
6126	75d62a58	j_mayer	return v - INT64_MIN;
6127	75d62a58	j_mayer	}
6128	75d62a58	j_mayer
6129	75d62a58	j_mayer	uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM)
6130	75d62a58	j_mayer	{
6131	75d62a58	j_mayer	int64_t v;
6132	75d62a58	j_mayer
6133	f090c9d4	pbrook	v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
6134	f090c9d4	pbrook	v += float64_val(a);
6135	f090c9d4	pbrook	v = float64_to_int64_round_to_zero(make_float64(v) STATUS_VAR);
6136	75d62a58	j_mayer
6137	75d62a58	j_mayer	return v - INT64_MIN;
6138	75d62a58	j_mayer	}
6139	75d62a58	j_mayer
6140	1d6bda35	bellard	#define COMPARE(s, nan_exp) \
6141	750afe93	bellard	INLINE int float ## s ## _compare_internal( float ## s a, float ## s b, \
6142	1d6bda35	bellard	int is_quiet STATUS_PARAM ) \
6143	1d6bda35	bellard	{ \
6144	1d6bda35	bellard	flag aSign, bSign; \
6145	bb98fe42	Andreas Färber	uint ## s ## _t av, bv; \
6146	37d18660	Peter Maydell	a = float ## s ## _squash_input_denormal(a STATUS_VAR); \
6147	37d18660	Peter Maydell	b = float ## s ## _squash_input_denormal(b STATUS_VAR); \
6148	1d6bda35	bellard	\
6149	1d6bda35	bellard	if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) && \
6150	1d6bda35	bellard	extractFloat ## s ## Frac( a ) ) \|\| \
6151	1d6bda35	bellard	( ( extractFloat ## s ## Exp( b ) == nan_exp ) && \
6152	1d6bda35	bellard	extractFloat ## s ## Frac( b ) )) { \
6153	1d6bda35	bellard	if (!is_quiet \|\| \
6154	1d6bda35	bellard	float ## s ## _is_signaling_nan( a ) \|\| \
6155	1d6bda35	bellard	float ## s ## _is_signaling_nan( b ) ) { \
6156	1d6bda35	bellard	float_raise( float_flag_invalid STATUS_VAR); \
6157	1d6bda35	bellard	} \
6158	1d6bda35	bellard	return float_relation_unordered; \
6159	1d6bda35	bellard	} \
6160	1d6bda35	bellard	aSign = extractFloat ## s ## Sign( a ); \
6161	1d6bda35	bellard	bSign = extractFloat ## s ## Sign( b ); \
6162	f090c9d4	pbrook	av = float ## s ## _val(a); \
6163	cd8a2533	blueswir1	bv = float ## s ## _val(b); \
6164	1d6bda35	bellard	if ( aSign != bSign ) { \
6165	bb98fe42	Andreas Färber	if ( (uint ## s ## _t) ( ( av \| bv )<<1 ) == 0 ) { \
6166	1d6bda35	bellard	/* zero case */ \
6167	1d6bda35	bellard	return float_relation_equal; \
6168	1d6bda35	bellard	} else { \
6169	1d6bda35	bellard	return 1 - (2 * aSign); \
6170	1d6bda35	bellard	} \
6171	1d6bda35	bellard	} else { \
6172	f090c9d4	pbrook	if (av == bv) { \
6173	1d6bda35	bellard	return float_relation_equal; \
6174	1d6bda35	bellard	} else { \
6175	f090c9d4	pbrook	return 1 - 2 * (aSign ^ ( av < bv )); \
6176	1d6bda35	bellard	} \
6177	1d6bda35	bellard	} \
6178	1d6bda35	bellard	} \
6179	1d6bda35	bellard	\
6180	750afe93	bellard	int float ## s ## _compare( float ## s a, float ## s b STATUS_PARAM ) \
6181	1d6bda35	bellard	{ \
6182	1d6bda35	bellard	return float ## s ## _compare_internal(a, b, 0 STATUS_VAR); \
6183	1d6bda35	bellard	} \
6184	1d6bda35	bellard	\
6185	750afe93	bellard	int float ## s ## _compare_quiet( float ## s a, float ## s b STATUS_PARAM ) \
6186	1d6bda35	bellard	{ \
6187	1d6bda35	bellard	return float ## s ## _compare_internal(a, b, 1 STATUS_VAR); \
6188	1d6bda35	bellard	}
6189	1d6bda35	bellard
6190	1d6bda35	bellard	COMPARE(32, 0xff)
6191	1d6bda35	bellard	COMPARE(64, 0x7ff)
6192	9ee6e8bb	pbrook
6193	f6714d36	Aurelien Jarno	INLINE int floatx80_compare_internal( floatx80 a, floatx80 b,
6194	f6714d36	Aurelien Jarno	int is_quiet STATUS_PARAM )
6195	f6714d36	Aurelien Jarno	{
6196	f6714d36	Aurelien Jarno	flag aSign, bSign;
6197	f6714d36	Aurelien Jarno
6198	f6714d36	Aurelien Jarno	if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6199	f6714d36	Aurelien Jarno	( extractFloatx80Frac( a )<<1 ) ) \|\|
6200	f6714d36	Aurelien Jarno	( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6201	f6714d36	Aurelien Jarno	( extractFloatx80Frac( b )<<1 ) )) {
6202	f6714d36	Aurelien Jarno	if (!is_quiet \|\|
6203	f6714d36	Aurelien Jarno	floatx80_is_signaling_nan( a ) \|\|
6204	f6714d36	Aurelien Jarno	floatx80_is_signaling_nan( b ) ) {
6205	f6714d36	Aurelien Jarno	float_raise( float_flag_invalid STATUS_VAR);
6206	f6714d36	Aurelien Jarno	}
6207	f6714d36	Aurelien Jarno	return float_relation_unordered;
6208	f6714d36	Aurelien Jarno	}
6209	f6714d36	Aurelien Jarno	aSign = extractFloatx80Sign( a );
6210	f6714d36	Aurelien Jarno	bSign = extractFloatx80Sign( b );
6211	f6714d36	Aurelien Jarno	if ( aSign != bSign ) {
6212	f6714d36	Aurelien Jarno
6213	f6714d36	Aurelien Jarno	if ( ( ( (uint16_t) ( ( a.high \| b.high ) << 1 ) ) == 0) &&
6214	f6714d36	Aurelien Jarno	( ( a.low \| b.low ) == 0 ) ) {
6215	f6714d36	Aurelien Jarno	/* zero case */
6216	f6714d36	Aurelien Jarno	return float_relation_equal;
6217	f6714d36	Aurelien Jarno	} else {
6218	f6714d36	Aurelien Jarno	return 1 - (2 * aSign);
6219	f6714d36	Aurelien Jarno	}
6220	f6714d36	Aurelien Jarno	} else {
6221	f6714d36	Aurelien Jarno	if (a.low == b.low && a.high == b.high) {
6222	f6714d36	Aurelien Jarno	return float_relation_equal;
6223	f6714d36	Aurelien Jarno	} else {
6224	f6714d36	Aurelien Jarno	return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6225	f6714d36	Aurelien Jarno	}
6226	f6714d36	Aurelien Jarno	}
6227	f6714d36	Aurelien Jarno	}
6228	f6714d36	Aurelien Jarno
6229	f6714d36	Aurelien Jarno	int floatx80_compare( floatx80 a, floatx80 b STATUS_PARAM )
6230	f6714d36	Aurelien Jarno	{
6231	f6714d36	Aurelien Jarno	return floatx80_compare_internal(a, b, 0 STATUS_VAR);
6232	f6714d36	Aurelien Jarno	}
6233	f6714d36	Aurelien Jarno
6234	f6714d36	Aurelien Jarno	int floatx80_compare_quiet( floatx80 a, floatx80 b STATUS_PARAM )
6235	f6714d36	Aurelien Jarno	{
6236	f6714d36	Aurelien Jarno	return floatx80_compare_internal(a, b, 1 STATUS_VAR);
6237	f6714d36	Aurelien Jarno	}
6238	f6714d36	Aurelien Jarno
6239	1f587329	blueswir1	INLINE int float128_compare_internal( float128 a, float128 b,
6240	1f587329	blueswir1	int is_quiet STATUS_PARAM )
6241	1f587329	blueswir1	{
6242	1f587329	blueswir1	flag aSign, bSign;
6243	1f587329	blueswir1
6244	1f587329	blueswir1	if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
6245	1f587329	blueswir1	( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) ) \|\|
6246	1f587329	blueswir1	( ( extractFloat128Exp( b ) == 0x7fff ) &&
6247	1f587329	blueswir1	( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )) {
6248	1f587329	blueswir1	if (!is_quiet \|\|
6249	1f587329	blueswir1	float128_is_signaling_nan( a ) \|\|
6250	1f587329	blueswir1	float128_is_signaling_nan( b ) ) {
6251	1f587329	blueswir1	float_raise( float_flag_invalid STATUS_VAR);
6252	1f587329	blueswir1	}
6253	1f587329	blueswir1	return float_relation_unordered;
6254	1f587329	blueswir1	}
6255	1f587329	blueswir1	aSign = extractFloat128Sign( a );
6256	1f587329	blueswir1	bSign = extractFloat128Sign( b );
6257	1f587329	blueswir1	if ( aSign != bSign ) {
6258	1f587329	blueswir1	if ( ( ( ( a.high \| b.high )<<1 ) \| a.low \| b.low ) == 0 ) {
6259	1f587329	blueswir1	/* zero case */
6260	1f587329	blueswir1	return float_relation_equal;
6261	1f587329	blueswir1	} else {
6262	1f587329	blueswir1	return 1 - (2 * aSign);
6263	1f587329	blueswir1	}
6264	1f587329	blueswir1	} else {
6265	1f587329	blueswir1	if (a.low == b.low && a.high == b.high) {
6266	1f587329	blueswir1	return float_relation_equal;
6267	1f587329	blueswir1	} else {
6268	1f587329	blueswir1	return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6269	1f587329	blueswir1	}
6270	1f587329	blueswir1	}
6271	1f587329	blueswir1	}
6272	1f587329	blueswir1
6273	1f587329	blueswir1	int float128_compare( float128 a, float128 b STATUS_PARAM )
6274	1f587329	blueswir1	{
6275	1f587329	blueswir1	return float128_compare_internal(a, b, 0 STATUS_VAR);
6276	1f587329	blueswir1	}
6277	1f587329	blueswir1
6278	1f587329	blueswir1	int float128_compare_quiet( float128 a, float128 b STATUS_PARAM )
6279	1f587329	blueswir1	{
6280	1f587329	blueswir1	return float128_compare_internal(a, b, 1 STATUS_VAR);
6281	1f587329	blueswir1	}
6282	1f587329	blueswir1
6283	274f1b04	Peter Maydell	/* min() and max() functions. These can't be implemented as
6284	274f1b04	Peter Maydell	* 'compare and pick one input' because that would mishandle
6285	274f1b04	Peter Maydell	* NaNs and +0 vs -0.
6286	274f1b04	Peter Maydell	*/
6287	274f1b04	Peter Maydell	#define MINMAX(s, nan_exp) \
6288	274f1b04	Peter Maydell	INLINE float ## s float ## s ## _minmax(float ## s a, float ## s b, \
6289	274f1b04	Peter Maydell	int ismin STATUS_PARAM ) \
6290	274f1b04	Peter Maydell	{ \
6291	274f1b04	Peter Maydell	flag aSign, bSign; \
6292	274f1b04	Peter Maydell	uint ## s ## _t av, bv; \
6293	274f1b04	Peter Maydell	a = float ## s ## _squash_input_denormal(a STATUS_VAR); \
6294	274f1b04	Peter Maydell	b = float ## s ## _squash_input_denormal(b STATUS_VAR); \
6295	274f1b04	Peter Maydell	if (float ## s ## _is_any_nan(a) \|\| \
6296	274f1b04	Peter Maydell	float ## s ## _is_any_nan(b)) { \
6297	274f1b04	Peter Maydell	return propagateFloat ## s ## NaN(a, b STATUS_VAR); \
6298	274f1b04	Peter Maydell	} \
6299	274f1b04	Peter Maydell	aSign = extractFloat ## s ## Sign(a); \
6300	274f1b04	Peter Maydell	bSign = extractFloat ## s ## Sign(b); \
6301	274f1b04	Peter Maydell	av = float ## s ## _val(a); \
6302	274f1b04	Peter Maydell	bv = float ## s ## _val(b); \
6303	274f1b04	Peter Maydell	if (aSign != bSign) { \
6304	274f1b04	Peter Maydell	if (ismin) { \
6305	274f1b04	Peter Maydell	return aSign ? a : b; \
6306	274f1b04	Peter Maydell	} else { \
6307	274f1b04	Peter Maydell	return aSign ? b : a; \
6308	274f1b04	Peter Maydell	} \
6309	274f1b04	Peter Maydell	} else { \
6310	274f1b04	Peter Maydell	if (ismin) { \
6311	274f1b04	Peter Maydell	return (aSign ^ (av < bv)) ? a : b; \
6312	274f1b04	Peter Maydell	} else { \
6313	274f1b04	Peter Maydell	return (aSign ^ (av < bv)) ? b : a; \
6314	274f1b04	Peter Maydell	} \
6315	274f1b04	Peter Maydell	} \
6316	274f1b04	Peter Maydell	} \
6317	274f1b04	Peter Maydell	\
6318	274f1b04	Peter Maydell	float ## s float ## s ## _min(float ## s a, float ## s b STATUS_PARAM) \
6319	274f1b04	Peter Maydell	{ \
6320	274f1b04	Peter Maydell	return float ## s ## _minmax(a, b, 1 STATUS_VAR); \
6321	274f1b04	Peter Maydell	} \
6322	274f1b04	Peter Maydell	\
6323	274f1b04	Peter Maydell	float ## s float ## s ## _max(float ## s a, float ## s b STATUS_PARAM) \
6324	274f1b04	Peter Maydell	{ \
6325	274f1b04	Peter Maydell	return float ## s ## _minmax(a, b, 0 STATUS_VAR); \
6326	274f1b04	Peter Maydell	}
6327	274f1b04	Peter Maydell
6328	274f1b04	Peter Maydell	MINMAX(32, 0xff)
6329	274f1b04	Peter Maydell	MINMAX(64, 0x7ff)
6330	274f1b04	Peter Maydell
6331	274f1b04	Peter Maydell
6332	9ee6e8bb	pbrook	/* Multiply A by 2 raised to the power N. */
6333	9ee6e8bb	pbrook	float32 float32_scalbn( float32 a, int n STATUS_PARAM )
6334	9ee6e8bb	pbrook	{
6335	9ee6e8bb	pbrook	flag aSign;
6336	326b9e98	Aurelien Jarno	int16_t aExp;
6337	bb98fe42	Andreas Färber	uint32_t aSig;
6338	9ee6e8bb	pbrook
6339	37d18660	Peter Maydell	a = float32_squash_input_denormal(a STATUS_VAR);
6340	9ee6e8bb	pbrook	aSig = extractFloat32Frac( a );
6341	9ee6e8bb	pbrook	aExp = extractFloat32Exp( a );
6342	9ee6e8bb	pbrook	aSign = extractFloat32Sign( a );
6343	9ee6e8bb	pbrook
6344	9ee6e8bb	pbrook	if ( aExp == 0xFF ) {
6345	326b9e98	Aurelien Jarno	if ( aSig ) {
6346	326b9e98	Aurelien Jarno	return propagateFloat32NaN( a, a STATUS_VAR );
6347	326b9e98	Aurelien Jarno	}
6348	9ee6e8bb	pbrook	return a;
6349	9ee6e8bb	pbrook	}
6350	69397542	pbrook	if ( aExp != 0 )
6351	69397542	pbrook	aSig \|= 0x00800000;
6352	69397542	pbrook	else if ( aSig == 0 )
6353	69397542	pbrook	return a;
6354	69397542	pbrook
6355	326b9e98	Aurelien Jarno	if (n > 0x200) {
6356	326b9e98	Aurelien Jarno	n = 0x200;
6357	326b9e98	Aurelien Jarno	} else if (n < -0x200) {
6358	326b9e98	Aurelien Jarno	n = -0x200;
6359	326b9e98	Aurelien Jarno	}
6360	326b9e98	Aurelien Jarno
6361	69397542	pbrook	aExp += n - 1;
6362	69397542	pbrook	aSig <<= 7;
6363	69397542	pbrook	return normalizeRoundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
6364	9ee6e8bb	pbrook	}
6365	9ee6e8bb	pbrook
6366	9ee6e8bb	pbrook	float64 float64_scalbn( float64 a, int n STATUS_PARAM )
6367	9ee6e8bb	pbrook	{
6368	9ee6e8bb	pbrook	flag aSign;
6369	326b9e98	Aurelien Jarno	int16_t aExp;
6370	bb98fe42	Andreas Färber	uint64_t aSig;
6371	9ee6e8bb	pbrook
6372	37d18660	Peter Maydell	a = float64_squash_input_denormal(a STATUS_VAR);
6373	9ee6e8bb	pbrook	aSig = extractFloat64Frac( a );
6374	9ee6e8bb	pbrook	aExp = extractFloat64Exp( a );
6375	9ee6e8bb	pbrook	aSign = extractFloat64Sign( a );
6376	9ee6e8bb	pbrook
6377	9ee6e8bb	pbrook	if ( aExp == 0x7FF ) {
6378	326b9e98	Aurelien Jarno	if ( aSig ) {
6379	326b9e98	Aurelien Jarno	return propagateFloat64NaN( a, a STATUS_VAR );
6380	326b9e98	Aurelien Jarno	}
6381	9ee6e8bb	pbrook	return a;
6382	9ee6e8bb	pbrook	}
6383	69397542	pbrook	if ( aExp != 0 )
6384	69397542	pbrook	aSig \|= LIT64( 0x0010000000000000 );
6385	69397542	pbrook	else if ( aSig == 0 )
6386	69397542	pbrook	return a;
6387	69397542	pbrook
6388	326b9e98	Aurelien Jarno	if (n > 0x1000) {
6389	326b9e98	Aurelien Jarno	n = 0x1000;
6390	326b9e98	Aurelien Jarno	} else if (n < -0x1000) {
6391	326b9e98	Aurelien Jarno	n = -0x1000;
6392	326b9e98	Aurelien Jarno	}
6393	326b9e98	Aurelien Jarno
6394	69397542	pbrook	aExp += n - 1;
6395	69397542	pbrook	aSig <<= 10;
6396	69397542	pbrook	return normalizeRoundAndPackFloat64( aSign, aExp, aSig STATUS_VAR );
6397	9ee6e8bb	pbrook	}
6398	9ee6e8bb	pbrook
6399	9ee6e8bb	pbrook	#ifdef FLOATX80
6400	9ee6e8bb	pbrook	floatx80 floatx80_scalbn( floatx80 a, int n STATUS_PARAM )
6401	9ee6e8bb	pbrook	{
6402	9ee6e8bb	pbrook	flag aSign;
6403	326b9e98	Aurelien Jarno	int32_t aExp;
6404	bb98fe42	Andreas Färber	uint64_t aSig;
6405	9ee6e8bb	pbrook
6406	9ee6e8bb	pbrook	aSig = extractFloatx80Frac( a );
6407	9ee6e8bb	pbrook	aExp = extractFloatx80Exp( a );
6408	9ee6e8bb	pbrook	aSign = extractFloatx80Sign( a );
6409	9ee6e8bb	pbrook
6410	326b9e98	Aurelien Jarno	if ( aExp == 0x7FFF ) {
6411	326b9e98	Aurelien Jarno	if ( aSig<<1 ) {
6412	326b9e98	Aurelien Jarno	return propagateFloatx80NaN( a, a STATUS_VAR );
6413	326b9e98	Aurelien Jarno	}
6414	9ee6e8bb	pbrook	return a;
6415	9ee6e8bb	pbrook	}
6416	326b9e98	Aurelien Jarno
6417	69397542	pbrook	if (aExp == 0 && aSig == 0)
6418	69397542	pbrook	return a;
6419	69397542	pbrook
6420	326b9e98	Aurelien Jarno	if (n > 0x10000) {
6421	326b9e98	Aurelien Jarno	n = 0x10000;
6422	326b9e98	Aurelien Jarno	} else if (n < -0x10000) {
6423	326b9e98	Aurelien Jarno	n = -0x10000;
6424	326b9e98	Aurelien Jarno	}
6425	326b9e98	Aurelien Jarno
6426	9ee6e8bb	pbrook	aExp += n;
6427	69397542	pbrook	return normalizeRoundAndPackFloatx80( STATUS(floatx80_rounding_precision),
6428	69397542	pbrook	aSign, aExp, aSig, 0 STATUS_VAR );
6429	9ee6e8bb	pbrook	}
6430	9ee6e8bb	pbrook	#endif
6431	9ee6e8bb	pbrook
6432	9ee6e8bb	pbrook	#ifdef FLOAT128
6433	9ee6e8bb	pbrook	float128 float128_scalbn( float128 a, int n STATUS_PARAM )
6434	9ee6e8bb	pbrook	{
6435	9ee6e8bb	pbrook	flag aSign;
6436	326b9e98	Aurelien Jarno	int32_t aExp;
6437	bb98fe42	Andreas Färber	uint64_t aSig0, aSig1;
6438	9ee6e8bb	pbrook
6439	9ee6e8bb	pbrook	aSig1 = extractFloat128Frac1( a );
6440	9ee6e8bb	pbrook	aSig0 = extractFloat128Frac0( a );
6441	9ee6e8bb	pbrook	aExp = extractFloat128Exp( a );
6442	9ee6e8bb	pbrook	aSign = extractFloat128Sign( a );
6443	9ee6e8bb	pbrook	if ( aExp == 0x7FFF ) {
6444	326b9e98	Aurelien Jarno	if ( aSig0 \| aSig1 ) {
6445	326b9e98	Aurelien Jarno	return propagateFloat128NaN( a, a STATUS_VAR );
6446	326b9e98	Aurelien Jarno	}
6447	9ee6e8bb	pbrook	return a;
6448	9ee6e8bb	pbrook	}
6449	69397542	pbrook	if ( aExp != 0 )
6450	69397542	pbrook	aSig0 \|= LIT64( 0x0001000000000000 );
6451	69397542	pbrook	else if ( aSig0 == 0 && aSig1 == 0 )
6452	69397542	pbrook	return a;
6453	69397542	pbrook
6454	326b9e98	Aurelien Jarno	if (n > 0x10000) {
6455	326b9e98	Aurelien Jarno	n = 0x10000;
6456	326b9e98	Aurelien Jarno	} else if (n < -0x10000) {
6457	326b9e98	Aurelien Jarno	n = -0x10000;
6458	326b9e98	Aurelien Jarno	}
6459	326b9e98	Aurelien Jarno
6460	69397542	pbrook	aExp += n - 1;
6461	69397542	pbrook	return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
6462	69397542	pbrook	STATUS_VAR );
6463	9ee6e8bb	pbrook
6464	9ee6e8bb	pbrook	}
6465	9ee6e8bb	pbrook	#endif

Archipelago » qemu

root / fpu / softfloat.c @ a74cdab4