Statistics
| Branch: | Revision:

root / fpu / softfloat-macros.h @ 8d725fac

History | View | Annotate | Download (23.9 kB)

1 8d725fac Andreas Färber
/*
2 8d725fac Andreas Färber
 * QEMU float support macros
3 8d725fac Andreas Färber
 *
4 8d725fac Andreas Färber
 * Derived from SoftFloat.
5 8d725fac Andreas Färber
 */
6 158142c2 bellard
7 158142c2 bellard
/*============================================================================
8 158142c2 bellard

9 158142c2 bellard
This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
10 158142c2 bellard
Arithmetic Package, Release 2b.
11 158142c2 bellard

12 158142c2 bellard
Written by John R. Hauser.  This work was made possible in part by the
13 158142c2 bellard
International Computer Science Institute, located at Suite 600, 1947 Center
14 158142c2 bellard
Street, Berkeley, California 94704.  Funding was partially provided by the
15 158142c2 bellard
National Science Foundation under grant MIP-9311980.  The original version
16 158142c2 bellard
of this code was written as part of a project to build a fixed-point vector
17 158142c2 bellard
processor in collaboration with the University of California at Berkeley,
18 158142c2 bellard
overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
19 158142c2 bellard
is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
20 158142c2 bellard
arithmetic/SoftFloat.html'.
21 158142c2 bellard

22 158142c2 bellard
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
23 158142c2 bellard
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
24 158142c2 bellard
RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
25 158142c2 bellard
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
26 158142c2 bellard
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
27 158142c2 bellard
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
28 158142c2 bellard
INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR
29 158142c2 bellard
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
30 158142c2 bellard

31 158142c2 bellard
Derivative works are acceptable, even for commercial purposes, so long as
32 158142c2 bellard
(1) the source code for the derivative work includes prominent notice that
33 158142c2 bellard
the work is derivative, and (2) the source code includes prominent notice with
34 158142c2 bellard
these four paragraphs for those parts of this code that are retained.
35 158142c2 bellard

36 158142c2 bellard
=============================================================================*/
37 158142c2 bellard
38 158142c2 bellard
/*----------------------------------------------------------------------------
39 158142c2 bellard
| Shifts `a' right by the number of bits given in `count'.  If any nonzero
40 158142c2 bellard
| bits are shifted off, they are ``jammed'' into the least significant bit of
41 158142c2 bellard
| the result by setting the least significant bit to 1.  The value of `count'
42 158142c2 bellard
| can be arbitrarily large; in particular, if `count' is greater than 32, the
43 158142c2 bellard
| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
44 158142c2 bellard
| The result is stored in the location pointed to by `zPtr'.
45 158142c2 bellard
*----------------------------------------------------------------------------*/
46 158142c2 bellard
47 158142c2 bellard
INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
48 158142c2 bellard
{
49 158142c2 bellard
    bits32 z;
50 158142c2 bellard
51 158142c2 bellard
    if ( count == 0 ) {
52 158142c2 bellard
        z = a;
53 158142c2 bellard
    }
54 158142c2 bellard
    else if ( count < 32 ) {
55 158142c2 bellard
        z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
56 158142c2 bellard
    }
57 158142c2 bellard
    else {
58 158142c2 bellard
        z = ( a != 0 );
59 158142c2 bellard
    }
60 158142c2 bellard
    *zPtr = z;
61 158142c2 bellard
62 158142c2 bellard
}
63 158142c2 bellard
64 158142c2 bellard
/*----------------------------------------------------------------------------
65 158142c2 bellard
| Shifts `a' right by the number of bits given in `count'.  If any nonzero
66 158142c2 bellard
| bits are shifted off, they are ``jammed'' into the least significant bit of
67 158142c2 bellard
| the result by setting the least significant bit to 1.  The value of `count'
68 158142c2 bellard
| can be arbitrarily large; in particular, if `count' is greater than 64, the
69 158142c2 bellard
| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
70 158142c2 bellard
| The result is stored in the location pointed to by `zPtr'.
71 158142c2 bellard
*----------------------------------------------------------------------------*/
72 158142c2 bellard
73 158142c2 bellard
INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
74 158142c2 bellard
{
75 158142c2 bellard
    bits64 z;
76 158142c2 bellard
77 158142c2 bellard
    if ( count == 0 ) {
78 158142c2 bellard
        z = a;
79 158142c2 bellard
    }
80 158142c2 bellard
    else if ( count < 64 ) {
81 158142c2 bellard
        z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
82 158142c2 bellard
    }
83 158142c2 bellard
    else {
84 158142c2 bellard
        z = ( a != 0 );
85 158142c2 bellard
    }
86 158142c2 bellard
    *zPtr = z;
87 158142c2 bellard
88 158142c2 bellard
}
89 158142c2 bellard
90 158142c2 bellard
/*----------------------------------------------------------------------------
91 158142c2 bellard
| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
92 158142c2 bellard
| _plus_ the number of bits given in `count'.  The shifted result is at most
93 158142c2 bellard
| 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
94 158142c2 bellard
| bits shifted off form a second 64-bit result as follows:  The _last_ bit
95 158142c2 bellard
| shifted off is the most-significant bit of the extra result, and the other
96 158142c2 bellard
| 63 bits of the extra result are all zero if and only if _all_but_the_last_
97 158142c2 bellard
| bits shifted off were all zero.  This extra result is stored in the location
98 158142c2 bellard
| pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
99 158142c2 bellard
|     (This routine makes more sense if `a0' and `a1' are considered to form
100 158142c2 bellard
| a fixed-point value with binary point between `a0' and `a1'.  This fixed-
101 158142c2 bellard
| point value is shifted right by the number of bits given in `count', and
102 158142c2 bellard
| the integer part of the result is returned at the location pointed to by
103 158142c2 bellard
| `z0Ptr'.  The fractional part of the result may be slightly corrupted as
104 158142c2 bellard
| described above, and is returned at the location pointed to by `z1Ptr'.)
105 158142c2 bellard
*----------------------------------------------------------------------------*/
106 158142c2 bellard
107 158142c2 bellard
INLINE void
108 158142c2 bellard
 shift64ExtraRightJamming(
109 158142c2 bellard
     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
110 158142c2 bellard
{
111 158142c2 bellard
    bits64 z0, z1;
112 158142c2 bellard
    int8 negCount = ( - count ) & 63;
113 158142c2 bellard
114 158142c2 bellard
    if ( count == 0 ) {
115 158142c2 bellard
        z1 = a1;
116 158142c2 bellard
        z0 = a0;
117 158142c2 bellard
    }
118 158142c2 bellard
    else if ( count < 64 ) {
119 158142c2 bellard
        z1 = ( a0<<negCount ) | ( a1 != 0 );
120 158142c2 bellard
        z0 = a0>>count;
121 158142c2 bellard
    }
122 158142c2 bellard
    else {
123 158142c2 bellard
        if ( count == 64 ) {
124 158142c2 bellard
            z1 = a0 | ( a1 != 0 );
125 158142c2 bellard
        }
126 158142c2 bellard
        else {
127 158142c2 bellard
            z1 = ( ( a0 | a1 ) != 0 );
128 158142c2 bellard
        }
129 158142c2 bellard
        z0 = 0;
130 158142c2 bellard
    }
131 158142c2 bellard
    *z1Ptr = z1;
132 158142c2 bellard
    *z0Ptr = z0;
133 158142c2 bellard
134 158142c2 bellard
}
135 158142c2 bellard
136 158142c2 bellard
/*----------------------------------------------------------------------------
137 158142c2 bellard
| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
138 158142c2 bellard
| number of bits given in `count'.  Any bits shifted off are lost.  The value
139 158142c2 bellard
| of `count' can be arbitrarily large; in particular, if `count' is greater
140 158142c2 bellard
| than 128, the result will be 0.  The result is broken into two 64-bit pieces
141 158142c2 bellard
| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
142 158142c2 bellard
*----------------------------------------------------------------------------*/
143 158142c2 bellard
144 158142c2 bellard
INLINE void
145 158142c2 bellard
 shift128Right(
146 158142c2 bellard
     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
147 158142c2 bellard
{
148 158142c2 bellard
    bits64 z0, z1;
149 158142c2 bellard
    int8 negCount = ( - count ) & 63;
150 158142c2 bellard
151 158142c2 bellard
    if ( count == 0 ) {
152 158142c2 bellard
        z1 = a1;
153 158142c2 bellard
        z0 = a0;
154 158142c2 bellard
    }
155 158142c2 bellard
    else if ( count < 64 ) {
156 158142c2 bellard
        z1 = ( a0<<negCount ) | ( a1>>count );
157 158142c2 bellard
        z0 = a0>>count;
158 158142c2 bellard
    }
159 158142c2 bellard
    else {
160 158142c2 bellard
        z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
161 158142c2 bellard
        z0 = 0;
162 158142c2 bellard
    }
163 158142c2 bellard
    *z1Ptr = z1;
164 158142c2 bellard
    *z0Ptr = z0;
165 158142c2 bellard
166 158142c2 bellard
}
167 158142c2 bellard
168 158142c2 bellard
/*----------------------------------------------------------------------------
169 158142c2 bellard
| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
170 158142c2 bellard
| number of bits given in `count'.  If any nonzero bits are shifted off, they
171 158142c2 bellard
| are ``jammed'' into the least significant bit of the result by setting the
172 158142c2 bellard
| least significant bit to 1.  The value of `count' can be arbitrarily large;
173 158142c2 bellard
| in particular, if `count' is greater than 128, the result will be either
174 158142c2 bellard
| 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
175 158142c2 bellard
| nonzero.  The result is broken into two 64-bit pieces which are stored at
176 158142c2 bellard
| the locations pointed to by `z0Ptr' and `z1Ptr'.
177 158142c2 bellard
*----------------------------------------------------------------------------*/
178 158142c2 bellard
179 158142c2 bellard
INLINE void
180 158142c2 bellard
 shift128RightJamming(
181 158142c2 bellard
     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
182 158142c2 bellard
{
183 158142c2 bellard
    bits64 z0, z1;
184 158142c2 bellard
    int8 negCount = ( - count ) & 63;
185 158142c2 bellard
186 158142c2 bellard
    if ( count == 0 ) {
187 158142c2 bellard
        z1 = a1;
188 158142c2 bellard
        z0 = a0;
189 158142c2 bellard
    }
190 158142c2 bellard
    else if ( count < 64 ) {
191 158142c2 bellard
        z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
192 158142c2 bellard
        z0 = a0>>count;
193 158142c2 bellard
    }
194 158142c2 bellard
    else {
195 158142c2 bellard
        if ( count == 64 ) {
196 158142c2 bellard
            z1 = a0 | ( a1 != 0 );
197 158142c2 bellard
        }
198 158142c2 bellard
        else if ( count < 128 ) {
199 158142c2 bellard
            z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
200 158142c2 bellard
        }
201 158142c2 bellard
        else {
202 158142c2 bellard
            z1 = ( ( a0 | a1 ) != 0 );
203 158142c2 bellard
        }
204 158142c2 bellard
        z0 = 0;
205 158142c2 bellard
    }
206 158142c2 bellard
    *z1Ptr = z1;
207 158142c2 bellard
    *z0Ptr = z0;
208 158142c2 bellard
209 158142c2 bellard
}
210 158142c2 bellard
211 158142c2 bellard
/*----------------------------------------------------------------------------
212 158142c2 bellard
| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
213 158142c2 bellard
| by 64 _plus_ the number of bits given in `count'.  The shifted result is
214 158142c2 bellard
| at most 128 nonzero bits; these are broken into two 64-bit pieces which are
215 158142c2 bellard
| stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
216 158142c2 bellard
| off form a third 64-bit result as follows:  The _last_ bit shifted off is
217 158142c2 bellard
| the most-significant bit of the extra result, and the other 63 bits of the
218 158142c2 bellard
| extra result are all zero if and only if _all_but_the_last_ bits shifted off
219 158142c2 bellard
| were all zero.  This extra result is stored in the location pointed to by
220 158142c2 bellard
| `z2Ptr'.  The value of `count' can be arbitrarily large.
221 158142c2 bellard
|     (This routine makes more sense if `a0', `a1', and `a2' are considered
222 158142c2 bellard
| to form a fixed-point value with binary point between `a1' and `a2'.  This
223 158142c2 bellard
| fixed-point value is shifted right by the number of bits given in `count',
224 158142c2 bellard
| and the integer part of the result is returned at the locations pointed to
225 158142c2 bellard
| by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
226 158142c2 bellard
| corrupted as described above, and is returned at the location pointed to by
227 158142c2 bellard
| `z2Ptr'.)
228 158142c2 bellard
*----------------------------------------------------------------------------*/
229 158142c2 bellard
230 158142c2 bellard
INLINE void
231 158142c2 bellard
 shift128ExtraRightJamming(
232 158142c2 bellard
     bits64 a0,
233 158142c2 bellard
     bits64 a1,
234 158142c2 bellard
     bits64 a2,
235 158142c2 bellard
     int16 count,
236 158142c2 bellard
     bits64 *z0Ptr,
237 158142c2 bellard
     bits64 *z1Ptr,
238 158142c2 bellard
     bits64 *z2Ptr
239 158142c2 bellard
 )
240 158142c2 bellard
{
241 158142c2 bellard
    bits64 z0, z1, z2;
242 158142c2 bellard
    int8 negCount = ( - count ) & 63;
243 158142c2 bellard
244 158142c2 bellard
    if ( count == 0 ) {
245 158142c2 bellard
        z2 = a2;
246 158142c2 bellard
        z1 = a1;
247 158142c2 bellard
        z0 = a0;
248 158142c2 bellard
    }
249 158142c2 bellard
    else {
250 158142c2 bellard
        if ( count < 64 ) {
251 158142c2 bellard
            z2 = a1<<negCount;
252 158142c2 bellard
            z1 = ( a0<<negCount ) | ( a1>>count );
253 158142c2 bellard
            z0 = a0>>count;
254 158142c2 bellard
        }
255 158142c2 bellard
        else {
256 158142c2 bellard
            if ( count == 64 ) {
257 158142c2 bellard
                z2 = a1;
258 158142c2 bellard
                z1 = a0;
259 158142c2 bellard
            }
260 158142c2 bellard
            else {
261 158142c2 bellard
                a2 |= a1;
262 158142c2 bellard
                if ( count < 128 ) {
263 158142c2 bellard
                    z2 = a0<<negCount;
264 158142c2 bellard
                    z1 = a0>>( count & 63 );
265 158142c2 bellard
                }
266 158142c2 bellard
                else {
267 158142c2 bellard
                    z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
268 158142c2 bellard
                    z1 = 0;
269 158142c2 bellard
                }
270 158142c2 bellard
            }
271 158142c2 bellard
            z0 = 0;
272 158142c2 bellard
        }
273 158142c2 bellard
        z2 |= ( a2 != 0 );
274 158142c2 bellard
    }
275 158142c2 bellard
    *z2Ptr = z2;
276 158142c2 bellard
    *z1Ptr = z1;
277 158142c2 bellard
    *z0Ptr = z0;
278 158142c2 bellard
279 158142c2 bellard
}
280 158142c2 bellard
281 158142c2 bellard
/*----------------------------------------------------------------------------
282 158142c2 bellard
| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
283 158142c2 bellard
| number of bits given in `count'.  Any bits shifted off are lost.  The value
284 158142c2 bellard
| of `count' must be less than 64.  The result is broken into two 64-bit
285 158142c2 bellard
| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
286 158142c2 bellard
*----------------------------------------------------------------------------*/
287 158142c2 bellard
288 158142c2 bellard
INLINE void
289 158142c2 bellard
 shortShift128Left(
290 158142c2 bellard
     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
291 158142c2 bellard
{
292 158142c2 bellard
293 158142c2 bellard
    *z1Ptr = a1<<count;
294 158142c2 bellard
    *z0Ptr =
295 158142c2 bellard
        ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
296 158142c2 bellard
297 158142c2 bellard
}
298 158142c2 bellard
299 158142c2 bellard
/*----------------------------------------------------------------------------
300 158142c2 bellard
| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
301 158142c2 bellard
| by the number of bits given in `count'.  Any bits shifted off are lost.
302 158142c2 bellard
| The value of `count' must be less than 64.  The result is broken into three
303 158142c2 bellard
| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
304 158142c2 bellard
| `z1Ptr', and `z2Ptr'.
305 158142c2 bellard
*----------------------------------------------------------------------------*/
306 158142c2 bellard
307 158142c2 bellard
INLINE void
308 158142c2 bellard
 shortShift192Left(
309 158142c2 bellard
     bits64 a0,
310 158142c2 bellard
     bits64 a1,
311 158142c2 bellard
     bits64 a2,
312 158142c2 bellard
     int16 count,
313 158142c2 bellard
     bits64 *z0Ptr,
314 158142c2 bellard
     bits64 *z1Ptr,
315 158142c2 bellard
     bits64 *z2Ptr
316 158142c2 bellard
 )
317 158142c2 bellard
{
318 158142c2 bellard
    bits64 z0, z1, z2;
319 158142c2 bellard
    int8 negCount;
320 158142c2 bellard
321 158142c2 bellard
    z2 = a2<<count;
322 158142c2 bellard
    z1 = a1<<count;
323 158142c2 bellard
    z0 = a0<<count;
324 158142c2 bellard
    if ( 0 < count ) {
325 158142c2 bellard
        negCount = ( ( - count ) & 63 );
326 158142c2 bellard
        z1 |= a2>>negCount;
327 158142c2 bellard
        z0 |= a1>>negCount;
328 158142c2 bellard
    }
329 158142c2 bellard
    *z2Ptr = z2;
330 158142c2 bellard
    *z1Ptr = z1;
331 158142c2 bellard
    *z0Ptr = z0;
332 158142c2 bellard
333 158142c2 bellard
}
334 158142c2 bellard
335 158142c2 bellard
/*----------------------------------------------------------------------------
336 158142c2 bellard
| Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
337 158142c2 bellard
| value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
338 158142c2 bellard
| any carry out is lost.  The result is broken into two 64-bit pieces which
339 158142c2 bellard
| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
340 158142c2 bellard
*----------------------------------------------------------------------------*/
341 158142c2 bellard
342 158142c2 bellard
INLINE void
343 158142c2 bellard
 add128(
344 158142c2 bellard
     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
345 158142c2 bellard
{
346 158142c2 bellard
    bits64 z1;
347 158142c2 bellard
348 158142c2 bellard
    z1 = a1 + b1;
349 158142c2 bellard
    *z1Ptr = z1;
350 158142c2 bellard
    *z0Ptr = a0 + b0 + ( z1 < a1 );
351 158142c2 bellard
352 158142c2 bellard
}
353 158142c2 bellard
354 158142c2 bellard
/*----------------------------------------------------------------------------
355 158142c2 bellard
| Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
356 158142c2 bellard
| 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
357 158142c2 bellard
| modulo 2^192, so any carry out is lost.  The result is broken into three
358 158142c2 bellard
| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
359 158142c2 bellard
| `z1Ptr', and `z2Ptr'.
360 158142c2 bellard
*----------------------------------------------------------------------------*/
361 158142c2 bellard
362 158142c2 bellard
INLINE void
363 158142c2 bellard
 add192(
364 158142c2 bellard
     bits64 a0,
365 158142c2 bellard
     bits64 a1,
366 158142c2 bellard
     bits64 a2,
367 158142c2 bellard
     bits64 b0,
368 158142c2 bellard
     bits64 b1,
369 158142c2 bellard
     bits64 b2,
370 158142c2 bellard
     bits64 *z0Ptr,
371 158142c2 bellard
     bits64 *z1Ptr,
372 158142c2 bellard
     bits64 *z2Ptr
373 158142c2 bellard
 )
374 158142c2 bellard
{
375 158142c2 bellard
    bits64 z0, z1, z2;
376 158142c2 bellard
    int8 carry0, carry1;
377 158142c2 bellard
378 158142c2 bellard
    z2 = a2 + b2;
379 158142c2 bellard
    carry1 = ( z2 < a2 );
380 158142c2 bellard
    z1 = a1 + b1;
381 158142c2 bellard
    carry0 = ( z1 < a1 );
382 158142c2 bellard
    z0 = a0 + b0;
383 158142c2 bellard
    z1 += carry1;
384 158142c2 bellard
    z0 += ( z1 < carry1 );
385 158142c2 bellard
    z0 += carry0;
386 158142c2 bellard
    *z2Ptr = z2;
387 158142c2 bellard
    *z1Ptr = z1;
388 158142c2 bellard
    *z0Ptr = z0;
389 158142c2 bellard
390 158142c2 bellard
}
391 158142c2 bellard
392 158142c2 bellard
/*----------------------------------------------------------------------------
393 158142c2 bellard
| Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
394 158142c2 bellard
| 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
395 158142c2 bellard
| 2^128, so any borrow out (carry out) is lost.  The result is broken into two
396 158142c2 bellard
| 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
397 158142c2 bellard
| `z1Ptr'.
398 158142c2 bellard
*----------------------------------------------------------------------------*/
399 158142c2 bellard
400 158142c2 bellard
INLINE void
401 158142c2 bellard
 sub128(
402 158142c2 bellard
     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
403 158142c2 bellard
{
404 158142c2 bellard
405 158142c2 bellard
    *z1Ptr = a1 - b1;
406 158142c2 bellard
    *z0Ptr = a0 - b0 - ( a1 < b1 );
407 158142c2 bellard
408 158142c2 bellard
}
409 158142c2 bellard
410 158142c2 bellard
/*----------------------------------------------------------------------------
411 158142c2 bellard
| Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
412 158142c2 bellard
| from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
413 158142c2 bellard
| Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
414 158142c2 bellard
| result is broken into three 64-bit pieces which are stored at the locations
415 158142c2 bellard
| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
416 158142c2 bellard
*----------------------------------------------------------------------------*/
417 158142c2 bellard
418 158142c2 bellard
INLINE void
419 158142c2 bellard
 sub192(
420 158142c2 bellard
     bits64 a0,
421 158142c2 bellard
     bits64 a1,
422 158142c2 bellard
     bits64 a2,
423 158142c2 bellard
     bits64 b0,
424 158142c2 bellard
     bits64 b1,
425 158142c2 bellard
     bits64 b2,
426 158142c2 bellard
     bits64 *z0Ptr,
427 158142c2 bellard
     bits64 *z1Ptr,
428 158142c2 bellard
     bits64 *z2Ptr
429 158142c2 bellard
 )
430 158142c2 bellard
{
431 158142c2 bellard
    bits64 z0, z1, z2;
432 158142c2 bellard
    int8 borrow0, borrow1;
433 158142c2 bellard
434 158142c2 bellard
    z2 = a2 - b2;
435 158142c2 bellard
    borrow1 = ( a2 < b2 );
436 158142c2 bellard
    z1 = a1 - b1;
437 158142c2 bellard
    borrow0 = ( a1 < b1 );
438 158142c2 bellard
    z0 = a0 - b0;
439 158142c2 bellard
    z0 -= ( z1 < borrow1 );
440 158142c2 bellard
    z1 -= borrow1;
441 158142c2 bellard
    z0 -= borrow0;
442 158142c2 bellard
    *z2Ptr = z2;
443 158142c2 bellard
    *z1Ptr = z1;
444 158142c2 bellard
    *z0Ptr = z0;
445 158142c2 bellard
446 158142c2 bellard
}
447 158142c2 bellard
448 158142c2 bellard
/*----------------------------------------------------------------------------
449 158142c2 bellard
| Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
450 158142c2 bellard
| into two 64-bit pieces which are stored at the locations pointed to by
451 158142c2 bellard
| `z0Ptr' and `z1Ptr'.
452 158142c2 bellard
*----------------------------------------------------------------------------*/
453 158142c2 bellard
454 158142c2 bellard
INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
455 158142c2 bellard
{
456 158142c2 bellard
    bits32 aHigh, aLow, bHigh, bLow;
457 158142c2 bellard
    bits64 z0, zMiddleA, zMiddleB, z1;
458 158142c2 bellard
459 158142c2 bellard
    aLow = a;
460 158142c2 bellard
    aHigh = a>>32;
461 158142c2 bellard
    bLow = b;
462 158142c2 bellard
    bHigh = b>>32;
463 158142c2 bellard
    z1 = ( (bits64) aLow ) * bLow;
464 158142c2 bellard
    zMiddleA = ( (bits64) aLow ) * bHigh;
465 158142c2 bellard
    zMiddleB = ( (bits64) aHigh ) * bLow;
466 158142c2 bellard
    z0 = ( (bits64) aHigh ) * bHigh;
467 158142c2 bellard
    zMiddleA += zMiddleB;
468 158142c2 bellard
    z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
469 158142c2 bellard
    zMiddleA <<= 32;
470 158142c2 bellard
    z1 += zMiddleA;
471 158142c2 bellard
    z0 += ( z1 < zMiddleA );
472 158142c2 bellard
    *z1Ptr = z1;
473 158142c2 bellard
    *z0Ptr = z0;
474 158142c2 bellard
475 158142c2 bellard
}
476 158142c2 bellard
477 158142c2 bellard
/*----------------------------------------------------------------------------
478 158142c2 bellard
| Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
479 158142c2 bellard
| `b' to obtain a 192-bit product.  The product is broken into three 64-bit
480 158142c2 bellard
| pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
481 158142c2 bellard
| `z2Ptr'.
482 158142c2 bellard
*----------------------------------------------------------------------------*/
483 158142c2 bellard
484 158142c2 bellard
INLINE void
485 158142c2 bellard
 mul128By64To192(
486 158142c2 bellard
     bits64 a0,
487 158142c2 bellard
     bits64 a1,
488 158142c2 bellard
     bits64 b,
489 158142c2 bellard
     bits64 *z0Ptr,
490 158142c2 bellard
     bits64 *z1Ptr,
491 158142c2 bellard
     bits64 *z2Ptr
492 158142c2 bellard
 )
493 158142c2 bellard
{
494 158142c2 bellard
    bits64 z0, z1, z2, more1;
495 158142c2 bellard
496 158142c2 bellard
    mul64To128( a1, b, &z1, &z2 );
497 158142c2 bellard
    mul64To128( a0, b, &z0, &more1 );
498 158142c2 bellard
    add128( z0, more1, 0, z1, &z0, &z1 );
499 158142c2 bellard
    *z2Ptr = z2;
500 158142c2 bellard
    *z1Ptr = z1;
501 158142c2 bellard
    *z0Ptr = z0;
502 158142c2 bellard
503 158142c2 bellard
}
504 158142c2 bellard
505 158142c2 bellard
/*----------------------------------------------------------------------------
506 158142c2 bellard
| Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
507 158142c2 bellard
| 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
508 158142c2 bellard
| product.  The product is broken into four 64-bit pieces which are stored at
509 158142c2 bellard
| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
510 158142c2 bellard
*----------------------------------------------------------------------------*/
511 158142c2 bellard
512 158142c2 bellard
INLINE void
513 158142c2 bellard
 mul128To256(
514 158142c2 bellard
     bits64 a0,
515 158142c2 bellard
     bits64 a1,
516 158142c2 bellard
     bits64 b0,
517 158142c2 bellard
     bits64 b1,
518 158142c2 bellard
     bits64 *z0Ptr,
519 158142c2 bellard
     bits64 *z1Ptr,
520 158142c2 bellard
     bits64 *z2Ptr,
521 158142c2 bellard
     bits64 *z3Ptr
522 158142c2 bellard
 )
523 158142c2 bellard
{
524 158142c2 bellard
    bits64 z0, z1, z2, z3;
525 158142c2 bellard
    bits64 more1, more2;
526 158142c2 bellard
527 158142c2 bellard
    mul64To128( a1, b1, &z2, &z3 );
528 158142c2 bellard
    mul64To128( a1, b0, &z1, &more2 );
529 158142c2 bellard
    add128( z1, more2, 0, z2, &z1, &z2 );
530 158142c2 bellard
    mul64To128( a0, b0, &z0, &more1 );
531 158142c2 bellard
    add128( z0, more1, 0, z1, &z0, &z1 );
532 158142c2 bellard
    mul64To128( a0, b1, &more1, &more2 );
533 158142c2 bellard
    add128( more1, more2, 0, z2, &more1, &z2 );
534 158142c2 bellard
    add128( z0, z1, 0, more1, &z0, &z1 );
535 158142c2 bellard
    *z3Ptr = z3;
536 158142c2 bellard
    *z2Ptr = z2;
537 158142c2 bellard
    *z1Ptr = z1;
538 158142c2 bellard
    *z0Ptr = z0;
539 158142c2 bellard
540 158142c2 bellard
}
541 158142c2 bellard
542 158142c2 bellard
/*----------------------------------------------------------------------------
543 158142c2 bellard
| Returns an approximation to the 64-bit integer quotient obtained by dividing
544 158142c2 bellard
| `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
545 158142c2 bellard
| divisor `b' must be at least 2^63.  If q is the exact quotient truncated
546 158142c2 bellard
| toward zero, the approximation returned lies between q and q + 2 inclusive.
547 158142c2 bellard
| If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
548 158142c2 bellard
| unsigned integer is returned.
549 158142c2 bellard
*----------------------------------------------------------------------------*/
550 158142c2 bellard
551 158142c2 bellard
static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
552 158142c2 bellard
{
553 158142c2 bellard
    bits64 b0, b1;
554 158142c2 bellard
    bits64 rem0, rem1, term0, term1;
555 158142c2 bellard
    bits64 z;
556 158142c2 bellard
557 158142c2 bellard
    if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
558 158142c2 bellard
    b0 = b>>32;
559 158142c2 bellard
    z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
560 158142c2 bellard
    mul64To128( b, z, &term0, &term1 );
561 158142c2 bellard
    sub128( a0, a1, term0, term1, &rem0, &rem1 );
562 158142c2 bellard
    while ( ( (sbits64) rem0 ) < 0 ) {
563 158142c2 bellard
        z -= LIT64( 0x100000000 );
564 158142c2 bellard
        b1 = b<<32;
565 158142c2 bellard
        add128( rem0, rem1, b0, b1, &rem0, &rem1 );
566 158142c2 bellard
    }
567 158142c2 bellard
    rem0 = ( rem0<<32 ) | ( rem1>>32 );
568 158142c2 bellard
    z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
569 158142c2 bellard
    return z;
570 158142c2 bellard
571 158142c2 bellard
}
572 158142c2 bellard
573 158142c2 bellard
/*----------------------------------------------------------------------------
574 158142c2 bellard
| Returns an approximation to the square root of the 32-bit significand given
575 158142c2 bellard
| by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
576 158142c2 bellard
| `aExp' (the least significant bit) is 1, the integer returned approximates
577 158142c2 bellard
| 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
578 158142c2 bellard
| is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
579 158142c2 bellard
| case, the approximation returned lies strictly within +/-2 of the exact
580 158142c2 bellard
| value.
581 158142c2 bellard
*----------------------------------------------------------------------------*/
582 158142c2 bellard
583 158142c2 bellard
static bits32 estimateSqrt32( int16 aExp, bits32 a )
584 158142c2 bellard
{
585 158142c2 bellard
    static const bits16 sqrtOddAdjustments[] = {
586 158142c2 bellard
        0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
587 158142c2 bellard
        0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
588 158142c2 bellard
    };
589 158142c2 bellard
    static const bits16 sqrtEvenAdjustments[] = {
590 158142c2 bellard
        0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
591 158142c2 bellard
        0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
592 158142c2 bellard
    };
593 158142c2 bellard
    int8 index;
594 158142c2 bellard
    bits32 z;
595 158142c2 bellard
596 158142c2 bellard
    index = ( a>>27 ) & 15;
597 158142c2 bellard
    if ( aExp & 1 ) {
598 3f4cb3d3 blueswir1
        z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ (int)index ];
599 158142c2 bellard
        z = ( ( a / z )<<14 ) + ( z<<15 );
600 158142c2 bellard
        a >>= 1;
601 158142c2 bellard
    }
602 158142c2 bellard
    else {
603 3f4cb3d3 blueswir1
        z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ (int)index ];
604 158142c2 bellard
        z = a / z + z;
605 158142c2 bellard
        z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
606 158142c2 bellard
        if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
607 158142c2 bellard
    }
608 158142c2 bellard
    return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
609 158142c2 bellard
610 158142c2 bellard
}
611 158142c2 bellard
612 158142c2 bellard
/*----------------------------------------------------------------------------
613 158142c2 bellard
| Returns the number of leading 0 bits before the most-significant 1 bit of
614 158142c2 bellard
| `a'.  If `a' is zero, 32 is returned.
615 158142c2 bellard
*----------------------------------------------------------------------------*/
616 158142c2 bellard
617 158142c2 bellard
static int8 countLeadingZeros32( bits32 a )
618 158142c2 bellard
{
619 158142c2 bellard
    static const int8 countLeadingZerosHigh[] = {
620 158142c2 bellard
        8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
621 158142c2 bellard
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
622 158142c2 bellard
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
623 158142c2 bellard
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
624 158142c2 bellard
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
625 158142c2 bellard
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
626 158142c2 bellard
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
627 158142c2 bellard
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
628 158142c2 bellard
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
629 158142c2 bellard
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
630 158142c2 bellard
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
631 158142c2 bellard
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
632 158142c2 bellard
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
633 158142c2 bellard
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
634 158142c2 bellard
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
635 158142c2 bellard
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
636 158142c2 bellard
    };
637 158142c2 bellard
    int8 shiftCount;
638 158142c2 bellard
639 158142c2 bellard
    shiftCount = 0;
640 158142c2 bellard
    if ( a < 0x10000 ) {
641 158142c2 bellard
        shiftCount += 16;
642 158142c2 bellard
        a <<= 16;
643 158142c2 bellard
    }
644 158142c2 bellard
    if ( a < 0x1000000 ) {
645 158142c2 bellard
        shiftCount += 8;
646 158142c2 bellard
        a <<= 8;
647 158142c2 bellard
    }
648 158142c2 bellard
    shiftCount += countLeadingZerosHigh[ a>>24 ];
649 158142c2 bellard
    return shiftCount;
650 158142c2 bellard
651 158142c2 bellard
}
652 158142c2 bellard
653 158142c2 bellard
/*----------------------------------------------------------------------------
654 158142c2 bellard
| Returns the number of leading 0 bits before the most-significant 1 bit of
655 158142c2 bellard
| `a'.  If `a' is zero, 64 is returned.
656 158142c2 bellard
*----------------------------------------------------------------------------*/
657 158142c2 bellard
658 158142c2 bellard
static int8 countLeadingZeros64( bits64 a )
659 158142c2 bellard
{
660 158142c2 bellard
    int8 shiftCount;
661 158142c2 bellard
662 158142c2 bellard
    shiftCount = 0;
663 158142c2 bellard
    if ( a < ( (bits64) 1 )<<32 ) {
664 158142c2 bellard
        shiftCount += 32;
665 158142c2 bellard
    }
666 158142c2 bellard
    else {
667 158142c2 bellard
        a >>= 32;
668 158142c2 bellard
    }
669 158142c2 bellard
    shiftCount += countLeadingZeros32( a );
670 158142c2 bellard
    return shiftCount;
671 158142c2 bellard
672 158142c2 bellard
}
673 158142c2 bellard
674 158142c2 bellard
/*----------------------------------------------------------------------------
675 158142c2 bellard
| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
676 158142c2 bellard
| is equal to the 128-bit value formed by concatenating `b0' and `b1'.
677 158142c2 bellard
| Otherwise, returns 0.
678 158142c2 bellard
*----------------------------------------------------------------------------*/
679 158142c2 bellard
680 158142c2 bellard
INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
681 158142c2 bellard
{
682 158142c2 bellard
683 158142c2 bellard
    return ( a0 == b0 ) && ( a1 == b1 );
684 158142c2 bellard
685 158142c2 bellard
}
686 158142c2 bellard
687 158142c2 bellard
/*----------------------------------------------------------------------------
688 158142c2 bellard
| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
689 158142c2 bellard
| than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
690 158142c2 bellard
| Otherwise, returns 0.
691 158142c2 bellard
*----------------------------------------------------------------------------*/
692 158142c2 bellard
693 158142c2 bellard
INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
694 158142c2 bellard
{
695 158142c2 bellard
696 158142c2 bellard
    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
697 158142c2 bellard
698 158142c2 bellard
}
699 158142c2 bellard
700 158142c2 bellard
/*----------------------------------------------------------------------------
701 158142c2 bellard
| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
702 158142c2 bellard
| than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
703 158142c2 bellard
| returns 0.
704 158142c2 bellard
*----------------------------------------------------------------------------*/
705 158142c2 bellard
706 158142c2 bellard
INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
707 158142c2 bellard
{
708 158142c2 bellard
709 158142c2 bellard
    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
710 158142c2 bellard
711 158142c2 bellard
}
712 158142c2 bellard
713 158142c2 bellard
/*----------------------------------------------------------------------------
714 158142c2 bellard
| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
715 158142c2 bellard
| not equal to the 128-bit value formed by concatenating `b0' and `b1'.
716 158142c2 bellard
| Otherwise, returns 0.
717 158142c2 bellard
*----------------------------------------------------------------------------*/
718 158142c2 bellard
719 158142c2 bellard
INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
720 158142c2 bellard
{
721 158142c2 bellard
722 158142c2 bellard
    return ( a0 != b0 ) || ( a1 != b1 );
723 158142c2 bellard
724 158142c2 bellard
}