Statistics
| Branch: | Revision:

root / fpu / softfloat-macros.h @ 5fafdf24

History | View | Annotate | Download (23.8 kB)

1 158142c2 bellard
2 158142c2 bellard
/*============================================================================
3 158142c2 bellard

4 158142c2 bellard
This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
5 158142c2 bellard
Arithmetic Package, Release 2b.
6 158142c2 bellard

7 158142c2 bellard
Written by John R. Hauser.  This work was made possible in part by the
8 158142c2 bellard
International Computer Science Institute, located at Suite 600, 1947 Center
9 158142c2 bellard
Street, Berkeley, California 94704.  Funding was partially provided by the
10 158142c2 bellard
National Science Foundation under grant MIP-9311980.  The original version
11 158142c2 bellard
of this code was written as part of a project to build a fixed-point vector
12 158142c2 bellard
processor in collaboration with the University of California at Berkeley,
13 158142c2 bellard
overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
14 158142c2 bellard
is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
15 158142c2 bellard
arithmetic/SoftFloat.html'.
16 158142c2 bellard

17 158142c2 bellard
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
18 158142c2 bellard
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
19 158142c2 bellard
RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
20 158142c2 bellard
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
21 158142c2 bellard
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
22 158142c2 bellard
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
23 158142c2 bellard
INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR
24 158142c2 bellard
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
25 158142c2 bellard

26 158142c2 bellard
Derivative works are acceptable, even for commercial purposes, so long as
27 158142c2 bellard
(1) the source code for the derivative work includes prominent notice that
28 158142c2 bellard
the work is derivative, and (2) the source code includes prominent notice with
29 158142c2 bellard
these four paragraphs for those parts of this code that are retained.
30 158142c2 bellard

31 158142c2 bellard
=============================================================================*/
32 158142c2 bellard
33 158142c2 bellard
/*----------------------------------------------------------------------------
34 158142c2 bellard
| Shifts `a' right by the number of bits given in `count'.  If any nonzero
35 158142c2 bellard
| bits are shifted off, they are ``jammed'' into the least significant bit of
36 158142c2 bellard
| the result by setting the least significant bit to 1.  The value of `count'
37 158142c2 bellard
| can be arbitrarily large; in particular, if `count' is greater than 32, the
38 158142c2 bellard
| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
39 158142c2 bellard
| The result is stored in the location pointed to by `zPtr'.
40 158142c2 bellard
*----------------------------------------------------------------------------*/
41 158142c2 bellard
42 158142c2 bellard
INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
43 158142c2 bellard
{
44 158142c2 bellard
    bits32 z;
45 158142c2 bellard
46 158142c2 bellard
    if ( count == 0 ) {
47 158142c2 bellard
        z = a;
48 158142c2 bellard
    }
49 158142c2 bellard
    else if ( count < 32 ) {
50 158142c2 bellard
        z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
51 158142c2 bellard
    }
52 158142c2 bellard
    else {
53 158142c2 bellard
        z = ( a != 0 );
54 158142c2 bellard
    }
55 158142c2 bellard
    *zPtr = z;
56 158142c2 bellard
57 158142c2 bellard
}
58 158142c2 bellard
59 158142c2 bellard
/*----------------------------------------------------------------------------
60 158142c2 bellard
| Shifts `a' right by the number of bits given in `count'.  If any nonzero
61 158142c2 bellard
| bits are shifted off, they are ``jammed'' into the least significant bit of
62 158142c2 bellard
| the result by setting the least significant bit to 1.  The value of `count'
63 158142c2 bellard
| can be arbitrarily large; in particular, if `count' is greater than 64, the
64 158142c2 bellard
| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
65 158142c2 bellard
| The result is stored in the location pointed to by `zPtr'.
66 158142c2 bellard
*----------------------------------------------------------------------------*/
67 158142c2 bellard
68 158142c2 bellard
INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
69 158142c2 bellard
{
70 158142c2 bellard
    bits64 z;
71 158142c2 bellard
72 158142c2 bellard
    if ( count == 0 ) {
73 158142c2 bellard
        z = a;
74 158142c2 bellard
    }
75 158142c2 bellard
    else if ( count < 64 ) {
76 158142c2 bellard
        z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
77 158142c2 bellard
    }
78 158142c2 bellard
    else {
79 158142c2 bellard
        z = ( a != 0 );
80 158142c2 bellard
    }
81 158142c2 bellard
    *zPtr = z;
82 158142c2 bellard
83 158142c2 bellard
}
84 158142c2 bellard
85 158142c2 bellard
/*----------------------------------------------------------------------------
86 158142c2 bellard
| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
87 158142c2 bellard
| _plus_ the number of bits given in `count'.  The shifted result is at most
88 158142c2 bellard
| 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
89 158142c2 bellard
| bits shifted off form a second 64-bit result as follows:  The _last_ bit
90 158142c2 bellard
| shifted off is the most-significant bit of the extra result, and the other
91 158142c2 bellard
| 63 bits of the extra result are all zero if and only if _all_but_the_last_
92 158142c2 bellard
| bits shifted off were all zero.  This extra result is stored in the location
93 158142c2 bellard
| pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
94 158142c2 bellard
|     (This routine makes more sense if `a0' and `a1' are considered to form
95 158142c2 bellard
| a fixed-point value with binary point between `a0' and `a1'.  This fixed-
96 158142c2 bellard
| point value is shifted right by the number of bits given in `count', and
97 158142c2 bellard
| the integer part of the result is returned at the location pointed to by
98 158142c2 bellard
| `z0Ptr'.  The fractional part of the result may be slightly corrupted as
99 158142c2 bellard
| described above, and is returned at the location pointed to by `z1Ptr'.)
100 158142c2 bellard
*----------------------------------------------------------------------------*/
101 158142c2 bellard
102 158142c2 bellard
INLINE void
103 158142c2 bellard
 shift64ExtraRightJamming(
104 158142c2 bellard
     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
105 158142c2 bellard
{
106 158142c2 bellard
    bits64 z0, z1;
107 158142c2 bellard
    int8 negCount = ( - count ) & 63;
108 158142c2 bellard
109 158142c2 bellard
    if ( count == 0 ) {
110 158142c2 bellard
        z1 = a1;
111 158142c2 bellard
        z0 = a0;
112 158142c2 bellard
    }
113 158142c2 bellard
    else if ( count < 64 ) {
114 158142c2 bellard
        z1 = ( a0<<negCount ) | ( a1 != 0 );
115 158142c2 bellard
        z0 = a0>>count;
116 158142c2 bellard
    }
117 158142c2 bellard
    else {
118 158142c2 bellard
        if ( count == 64 ) {
119 158142c2 bellard
            z1 = a0 | ( a1 != 0 );
120 158142c2 bellard
        }
121 158142c2 bellard
        else {
122 158142c2 bellard
            z1 = ( ( a0 | a1 ) != 0 );
123 158142c2 bellard
        }
124 158142c2 bellard
        z0 = 0;
125 158142c2 bellard
    }
126 158142c2 bellard
    *z1Ptr = z1;
127 158142c2 bellard
    *z0Ptr = z0;
128 158142c2 bellard
129 158142c2 bellard
}
130 158142c2 bellard
131 158142c2 bellard
/*----------------------------------------------------------------------------
132 158142c2 bellard
| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
133 158142c2 bellard
| number of bits given in `count'.  Any bits shifted off are lost.  The value
134 158142c2 bellard
| of `count' can be arbitrarily large; in particular, if `count' is greater
135 158142c2 bellard
| than 128, the result will be 0.  The result is broken into two 64-bit pieces
136 158142c2 bellard
| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
137 158142c2 bellard
*----------------------------------------------------------------------------*/
138 158142c2 bellard
139 158142c2 bellard
INLINE void
140 158142c2 bellard
 shift128Right(
141 158142c2 bellard
     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
142 158142c2 bellard
{
143 158142c2 bellard
    bits64 z0, z1;
144 158142c2 bellard
    int8 negCount = ( - count ) & 63;
145 158142c2 bellard
146 158142c2 bellard
    if ( count == 0 ) {
147 158142c2 bellard
        z1 = a1;
148 158142c2 bellard
        z0 = a0;
149 158142c2 bellard
    }
150 158142c2 bellard
    else if ( count < 64 ) {
151 158142c2 bellard
        z1 = ( a0<<negCount ) | ( a1>>count );
152 158142c2 bellard
        z0 = a0>>count;
153 158142c2 bellard
    }
154 158142c2 bellard
    else {
155 158142c2 bellard
        z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
156 158142c2 bellard
        z0 = 0;
157 158142c2 bellard
    }
158 158142c2 bellard
    *z1Ptr = z1;
159 158142c2 bellard
    *z0Ptr = z0;
160 158142c2 bellard
161 158142c2 bellard
}
162 158142c2 bellard
163 158142c2 bellard
/*----------------------------------------------------------------------------
164 158142c2 bellard
| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
165 158142c2 bellard
| number of bits given in `count'.  If any nonzero bits are shifted off, they
166 158142c2 bellard
| are ``jammed'' into the least significant bit of the result by setting the
167 158142c2 bellard
| least significant bit to 1.  The value of `count' can be arbitrarily large;
168 158142c2 bellard
| in particular, if `count' is greater than 128, the result will be either
169 158142c2 bellard
| 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
170 158142c2 bellard
| nonzero.  The result is broken into two 64-bit pieces which are stored at
171 158142c2 bellard
| the locations pointed to by `z0Ptr' and `z1Ptr'.
172 158142c2 bellard
*----------------------------------------------------------------------------*/
173 158142c2 bellard
174 158142c2 bellard
INLINE void
175 158142c2 bellard
 shift128RightJamming(
176 158142c2 bellard
     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
177 158142c2 bellard
{
178 158142c2 bellard
    bits64 z0, z1;
179 158142c2 bellard
    int8 negCount = ( - count ) & 63;
180 158142c2 bellard
181 158142c2 bellard
    if ( count == 0 ) {
182 158142c2 bellard
        z1 = a1;
183 158142c2 bellard
        z0 = a0;
184 158142c2 bellard
    }
185 158142c2 bellard
    else if ( count < 64 ) {
186 158142c2 bellard
        z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
187 158142c2 bellard
        z0 = a0>>count;
188 158142c2 bellard
    }
189 158142c2 bellard
    else {
190 158142c2 bellard
        if ( count == 64 ) {
191 158142c2 bellard
            z1 = a0 | ( a1 != 0 );
192 158142c2 bellard
        }
193 158142c2 bellard
        else if ( count < 128 ) {
194 158142c2 bellard
            z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
195 158142c2 bellard
        }
196 158142c2 bellard
        else {
197 158142c2 bellard
            z1 = ( ( a0 | a1 ) != 0 );
198 158142c2 bellard
        }
199 158142c2 bellard
        z0 = 0;
200 158142c2 bellard
    }
201 158142c2 bellard
    *z1Ptr = z1;
202 158142c2 bellard
    *z0Ptr = z0;
203 158142c2 bellard
204 158142c2 bellard
}
205 158142c2 bellard
206 158142c2 bellard
/*----------------------------------------------------------------------------
207 158142c2 bellard
| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
208 158142c2 bellard
| by 64 _plus_ the number of bits given in `count'.  The shifted result is
209 158142c2 bellard
| at most 128 nonzero bits; these are broken into two 64-bit pieces which are
210 158142c2 bellard
| stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
211 158142c2 bellard
| off form a third 64-bit result as follows:  The _last_ bit shifted off is
212 158142c2 bellard
| the most-significant bit of the extra result, and the other 63 bits of the
213 158142c2 bellard
| extra result are all zero if and only if _all_but_the_last_ bits shifted off
214 158142c2 bellard
| were all zero.  This extra result is stored in the location pointed to by
215 158142c2 bellard
| `z2Ptr'.  The value of `count' can be arbitrarily large.
216 158142c2 bellard
|     (This routine makes more sense if `a0', `a1', and `a2' are considered
217 158142c2 bellard
| to form a fixed-point value with binary point between `a1' and `a2'.  This
218 158142c2 bellard
| fixed-point value is shifted right by the number of bits given in `count',
219 158142c2 bellard
| and the integer part of the result is returned at the locations pointed to
220 158142c2 bellard
| by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
221 158142c2 bellard
| corrupted as described above, and is returned at the location pointed to by
222 158142c2 bellard
| `z2Ptr'.)
223 158142c2 bellard
*----------------------------------------------------------------------------*/
224 158142c2 bellard
225 158142c2 bellard
INLINE void
226 158142c2 bellard
 shift128ExtraRightJamming(
227 158142c2 bellard
     bits64 a0,
228 158142c2 bellard
     bits64 a1,
229 158142c2 bellard
     bits64 a2,
230 158142c2 bellard
     int16 count,
231 158142c2 bellard
     bits64 *z0Ptr,
232 158142c2 bellard
     bits64 *z1Ptr,
233 158142c2 bellard
     bits64 *z2Ptr
234 158142c2 bellard
 )
235 158142c2 bellard
{
236 158142c2 bellard
    bits64 z0, z1, z2;
237 158142c2 bellard
    int8 negCount = ( - count ) & 63;
238 158142c2 bellard
239 158142c2 bellard
    if ( count == 0 ) {
240 158142c2 bellard
        z2 = a2;
241 158142c2 bellard
        z1 = a1;
242 158142c2 bellard
        z0 = a0;
243 158142c2 bellard
    }
244 158142c2 bellard
    else {
245 158142c2 bellard
        if ( count < 64 ) {
246 158142c2 bellard
            z2 = a1<<negCount;
247 158142c2 bellard
            z1 = ( a0<<negCount ) | ( a1>>count );
248 158142c2 bellard
            z0 = a0>>count;
249 158142c2 bellard
        }
250 158142c2 bellard
        else {
251 158142c2 bellard
            if ( count == 64 ) {
252 158142c2 bellard
                z2 = a1;
253 158142c2 bellard
                z1 = a0;
254 158142c2 bellard
            }
255 158142c2 bellard
            else {
256 158142c2 bellard
                a2 |= a1;
257 158142c2 bellard
                if ( count < 128 ) {
258 158142c2 bellard
                    z2 = a0<<negCount;
259 158142c2 bellard
                    z1 = a0>>( count & 63 );
260 158142c2 bellard
                }
261 158142c2 bellard
                else {
262 158142c2 bellard
                    z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
263 158142c2 bellard
                    z1 = 0;
264 158142c2 bellard
                }
265 158142c2 bellard
            }
266 158142c2 bellard
            z0 = 0;
267 158142c2 bellard
        }
268 158142c2 bellard
        z2 |= ( a2 != 0 );
269 158142c2 bellard
    }
270 158142c2 bellard
    *z2Ptr = z2;
271 158142c2 bellard
    *z1Ptr = z1;
272 158142c2 bellard
    *z0Ptr = z0;
273 158142c2 bellard
274 158142c2 bellard
}
275 158142c2 bellard
276 158142c2 bellard
/*----------------------------------------------------------------------------
277 158142c2 bellard
| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
278 158142c2 bellard
| number of bits given in `count'.  Any bits shifted off are lost.  The value
279 158142c2 bellard
| of `count' must be less than 64.  The result is broken into two 64-bit
280 158142c2 bellard
| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
281 158142c2 bellard
*----------------------------------------------------------------------------*/
282 158142c2 bellard
283 158142c2 bellard
INLINE void
284 158142c2 bellard
 shortShift128Left(
285 158142c2 bellard
     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
286 158142c2 bellard
{
287 158142c2 bellard
288 158142c2 bellard
    *z1Ptr = a1<<count;
289 158142c2 bellard
    *z0Ptr =
290 158142c2 bellard
        ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
291 158142c2 bellard
292 158142c2 bellard
}
293 158142c2 bellard
294 158142c2 bellard
/*----------------------------------------------------------------------------
295 158142c2 bellard
| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
296 158142c2 bellard
| by the number of bits given in `count'.  Any bits shifted off are lost.
297 158142c2 bellard
| The value of `count' must be less than 64.  The result is broken into three
298 158142c2 bellard
| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
299 158142c2 bellard
| `z1Ptr', and `z2Ptr'.
300 158142c2 bellard
*----------------------------------------------------------------------------*/
301 158142c2 bellard
302 158142c2 bellard
INLINE void
303 158142c2 bellard
 shortShift192Left(
304 158142c2 bellard
     bits64 a0,
305 158142c2 bellard
     bits64 a1,
306 158142c2 bellard
     bits64 a2,
307 158142c2 bellard
     int16 count,
308 158142c2 bellard
     bits64 *z0Ptr,
309 158142c2 bellard
     bits64 *z1Ptr,
310 158142c2 bellard
     bits64 *z2Ptr
311 158142c2 bellard
 )
312 158142c2 bellard
{
313 158142c2 bellard
    bits64 z0, z1, z2;
314 158142c2 bellard
    int8 negCount;
315 158142c2 bellard
316 158142c2 bellard
    z2 = a2<<count;
317 158142c2 bellard
    z1 = a1<<count;
318 158142c2 bellard
    z0 = a0<<count;
319 158142c2 bellard
    if ( 0 < count ) {
320 158142c2 bellard
        negCount = ( ( - count ) & 63 );
321 158142c2 bellard
        z1 |= a2>>negCount;
322 158142c2 bellard
        z0 |= a1>>negCount;
323 158142c2 bellard
    }
324 158142c2 bellard
    *z2Ptr = z2;
325 158142c2 bellard
    *z1Ptr = z1;
326 158142c2 bellard
    *z0Ptr = z0;
327 158142c2 bellard
328 158142c2 bellard
}
329 158142c2 bellard
330 158142c2 bellard
/*----------------------------------------------------------------------------
331 158142c2 bellard
| Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
332 158142c2 bellard
| value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
333 158142c2 bellard
| any carry out is lost.  The result is broken into two 64-bit pieces which
334 158142c2 bellard
| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
335 158142c2 bellard
*----------------------------------------------------------------------------*/
336 158142c2 bellard
337 158142c2 bellard
INLINE void
338 158142c2 bellard
 add128(
339 158142c2 bellard
     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
340 158142c2 bellard
{
341 158142c2 bellard
    bits64 z1;
342 158142c2 bellard
343 158142c2 bellard
    z1 = a1 + b1;
344 158142c2 bellard
    *z1Ptr = z1;
345 158142c2 bellard
    *z0Ptr = a0 + b0 + ( z1 < a1 );
346 158142c2 bellard
347 158142c2 bellard
}
348 158142c2 bellard
349 158142c2 bellard
/*----------------------------------------------------------------------------
350 158142c2 bellard
| Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
351 158142c2 bellard
| 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
352 158142c2 bellard
| modulo 2^192, so any carry out is lost.  The result is broken into three
353 158142c2 bellard
| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
354 158142c2 bellard
| `z1Ptr', and `z2Ptr'.
355 158142c2 bellard
*----------------------------------------------------------------------------*/
356 158142c2 bellard
357 158142c2 bellard
INLINE void
358 158142c2 bellard
 add192(
359 158142c2 bellard
     bits64 a0,
360 158142c2 bellard
     bits64 a1,
361 158142c2 bellard
     bits64 a2,
362 158142c2 bellard
     bits64 b0,
363 158142c2 bellard
     bits64 b1,
364 158142c2 bellard
     bits64 b2,
365 158142c2 bellard
     bits64 *z0Ptr,
366 158142c2 bellard
     bits64 *z1Ptr,
367 158142c2 bellard
     bits64 *z2Ptr
368 158142c2 bellard
 )
369 158142c2 bellard
{
370 158142c2 bellard
    bits64 z0, z1, z2;
371 158142c2 bellard
    int8 carry0, carry1;
372 158142c2 bellard
373 158142c2 bellard
    z2 = a2 + b2;
374 158142c2 bellard
    carry1 = ( z2 < a2 );
375 158142c2 bellard
    z1 = a1 + b1;
376 158142c2 bellard
    carry0 = ( z1 < a1 );
377 158142c2 bellard
    z0 = a0 + b0;
378 158142c2 bellard
    z1 += carry1;
379 158142c2 bellard
    z0 += ( z1 < carry1 );
380 158142c2 bellard
    z0 += carry0;
381 158142c2 bellard
    *z2Ptr = z2;
382 158142c2 bellard
    *z1Ptr = z1;
383 158142c2 bellard
    *z0Ptr = z0;
384 158142c2 bellard
385 158142c2 bellard
}
386 158142c2 bellard
387 158142c2 bellard
/*----------------------------------------------------------------------------
388 158142c2 bellard
| Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
389 158142c2 bellard
| 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
390 158142c2 bellard
| 2^128, so any borrow out (carry out) is lost.  The result is broken into two
391 158142c2 bellard
| 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
392 158142c2 bellard
| `z1Ptr'.
393 158142c2 bellard
*----------------------------------------------------------------------------*/
394 158142c2 bellard
395 158142c2 bellard
INLINE void
396 158142c2 bellard
 sub128(
397 158142c2 bellard
     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
398 158142c2 bellard
{
399 158142c2 bellard
400 158142c2 bellard
    *z1Ptr = a1 - b1;
401 158142c2 bellard
    *z0Ptr = a0 - b0 - ( a1 < b1 );
402 158142c2 bellard
403 158142c2 bellard
}
404 158142c2 bellard
405 158142c2 bellard
/*----------------------------------------------------------------------------
406 158142c2 bellard
| Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
407 158142c2 bellard
| from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
408 158142c2 bellard
| Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
409 158142c2 bellard
| result is broken into three 64-bit pieces which are stored at the locations
410 158142c2 bellard
| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
411 158142c2 bellard
*----------------------------------------------------------------------------*/
412 158142c2 bellard
413 158142c2 bellard
INLINE void
414 158142c2 bellard
 sub192(
415 158142c2 bellard
     bits64 a0,
416 158142c2 bellard
     bits64 a1,
417 158142c2 bellard
     bits64 a2,
418 158142c2 bellard
     bits64 b0,
419 158142c2 bellard
     bits64 b1,
420 158142c2 bellard
     bits64 b2,
421 158142c2 bellard
     bits64 *z0Ptr,
422 158142c2 bellard
     bits64 *z1Ptr,
423 158142c2 bellard
     bits64 *z2Ptr
424 158142c2 bellard
 )
425 158142c2 bellard
{
426 158142c2 bellard
    bits64 z0, z1, z2;
427 158142c2 bellard
    int8 borrow0, borrow1;
428 158142c2 bellard
429 158142c2 bellard
    z2 = a2 - b2;
430 158142c2 bellard
    borrow1 = ( a2 < b2 );
431 158142c2 bellard
    z1 = a1 - b1;
432 158142c2 bellard
    borrow0 = ( a1 < b1 );
433 158142c2 bellard
    z0 = a0 - b0;
434 158142c2 bellard
    z0 -= ( z1 < borrow1 );
435 158142c2 bellard
    z1 -= borrow1;
436 158142c2 bellard
    z0 -= borrow0;
437 158142c2 bellard
    *z2Ptr = z2;
438 158142c2 bellard
    *z1Ptr = z1;
439 158142c2 bellard
    *z0Ptr = z0;
440 158142c2 bellard
441 158142c2 bellard
}
442 158142c2 bellard
443 158142c2 bellard
/*----------------------------------------------------------------------------
444 158142c2 bellard
| Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
445 158142c2 bellard
| into two 64-bit pieces which are stored at the locations pointed to by
446 158142c2 bellard
| `z0Ptr' and `z1Ptr'.
447 158142c2 bellard
*----------------------------------------------------------------------------*/
448 158142c2 bellard
449 158142c2 bellard
INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
450 158142c2 bellard
{
451 158142c2 bellard
    bits32 aHigh, aLow, bHigh, bLow;
452 158142c2 bellard
    bits64 z0, zMiddleA, zMiddleB, z1;
453 158142c2 bellard
454 158142c2 bellard
    aLow = a;
455 158142c2 bellard
    aHigh = a>>32;
456 158142c2 bellard
    bLow = b;
457 158142c2 bellard
    bHigh = b>>32;
458 158142c2 bellard
    z1 = ( (bits64) aLow ) * bLow;
459 158142c2 bellard
    zMiddleA = ( (bits64) aLow ) * bHigh;
460 158142c2 bellard
    zMiddleB = ( (bits64) aHigh ) * bLow;
461 158142c2 bellard
    z0 = ( (bits64) aHigh ) * bHigh;
462 158142c2 bellard
    zMiddleA += zMiddleB;
463 158142c2 bellard
    z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
464 158142c2 bellard
    zMiddleA <<= 32;
465 158142c2 bellard
    z1 += zMiddleA;
466 158142c2 bellard
    z0 += ( z1 < zMiddleA );
467 158142c2 bellard
    *z1Ptr = z1;
468 158142c2 bellard
    *z0Ptr = z0;
469 158142c2 bellard
470 158142c2 bellard
}
471 158142c2 bellard
472 158142c2 bellard
/*----------------------------------------------------------------------------
473 158142c2 bellard
| Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
474 158142c2 bellard
| `b' to obtain a 192-bit product.  The product is broken into three 64-bit
475 158142c2 bellard
| pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
476 158142c2 bellard
| `z2Ptr'.
477 158142c2 bellard
*----------------------------------------------------------------------------*/
478 158142c2 bellard
479 158142c2 bellard
INLINE void
480 158142c2 bellard
 mul128By64To192(
481 158142c2 bellard
     bits64 a0,
482 158142c2 bellard
     bits64 a1,
483 158142c2 bellard
     bits64 b,
484 158142c2 bellard
     bits64 *z0Ptr,
485 158142c2 bellard
     bits64 *z1Ptr,
486 158142c2 bellard
     bits64 *z2Ptr
487 158142c2 bellard
 )
488 158142c2 bellard
{
489 158142c2 bellard
    bits64 z0, z1, z2, more1;
490 158142c2 bellard
491 158142c2 bellard
    mul64To128( a1, b, &z1, &z2 );
492 158142c2 bellard
    mul64To128( a0, b, &z0, &more1 );
493 158142c2 bellard
    add128( z0, more1, 0, z1, &z0, &z1 );
494 158142c2 bellard
    *z2Ptr = z2;
495 158142c2 bellard
    *z1Ptr = z1;
496 158142c2 bellard
    *z0Ptr = z0;
497 158142c2 bellard
498 158142c2 bellard
}
499 158142c2 bellard
500 158142c2 bellard
/*----------------------------------------------------------------------------
501 158142c2 bellard
| Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
502 158142c2 bellard
| 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
503 158142c2 bellard
| product.  The product is broken into four 64-bit pieces which are stored at
504 158142c2 bellard
| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
505 158142c2 bellard
*----------------------------------------------------------------------------*/
506 158142c2 bellard
507 158142c2 bellard
INLINE void
508 158142c2 bellard
 mul128To256(
509 158142c2 bellard
     bits64 a0,
510 158142c2 bellard
     bits64 a1,
511 158142c2 bellard
     bits64 b0,
512 158142c2 bellard
     bits64 b1,
513 158142c2 bellard
     bits64 *z0Ptr,
514 158142c2 bellard
     bits64 *z1Ptr,
515 158142c2 bellard
     bits64 *z2Ptr,
516 158142c2 bellard
     bits64 *z3Ptr
517 158142c2 bellard
 )
518 158142c2 bellard
{
519 158142c2 bellard
    bits64 z0, z1, z2, z3;
520 158142c2 bellard
    bits64 more1, more2;
521 158142c2 bellard
522 158142c2 bellard
    mul64To128( a1, b1, &z2, &z3 );
523 158142c2 bellard
    mul64To128( a1, b0, &z1, &more2 );
524 158142c2 bellard
    add128( z1, more2, 0, z2, &z1, &z2 );
525 158142c2 bellard
    mul64To128( a0, b0, &z0, &more1 );
526 158142c2 bellard
    add128( z0, more1, 0, z1, &z0, &z1 );
527 158142c2 bellard
    mul64To128( a0, b1, &more1, &more2 );
528 158142c2 bellard
    add128( more1, more2, 0, z2, &more1, &z2 );
529 158142c2 bellard
    add128( z0, z1, 0, more1, &z0, &z1 );
530 158142c2 bellard
    *z3Ptr = z3;
531 158142c2 bellard
    *z2Ptr = z2;
532 158142c2 bellard
    *z1Ptr = z1;
533 158142c2 bellard
    *z0Ptr = z0;
534 158142c2 bellard
535 158142c2 bellard
}
536 158142c2 bellard
537 158142c2 bellard
/*----------------------------------------------------------------------------
538 158142c2 bellard
| Returns an approximation to the 64-bit integer quotient obtained by dividing
539 158142c2 bellard
| `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
540 158142c2 bellard
| divisor `b' must be at least 2^63.  If q is the exact quotient truncated
541 158142c2 bellard
| toward zero, the approximation returned lies between q and q + 2 inclusive.
542 158142c2 bellard
| If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
543 158142c2 bellard
| unsigned integer is returned.
544 158142c2 bellard
*----------------------------------------------------------------------------*/
545 158142c2 bellard
546 158142c2 bellard
static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
547 158142c2 bellard
{
548 158142c2 bellard
    bits64 b0, b1;
549 158142c2 bellard
    bits64 rem0, rem1, term0, term1;
550 158142c2 bellard
    bits64 z;
551 158142c2 bellard
552 158142c2 bellard
    if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
553 158142c2 bellard
    b0 = b>>32;
554 158142c2 bellard
    z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
555 158142c2 bellard
    mul64To128( b, z, &term0, &term1 );
556 158142c2 bellard
    sub128( a0, a1, term0, term1, &rem0, &rem1 );
557 158142c2 bellard
    while ( ( (sbits64) rem0 ) < 0 ) {
558 158142c2 bellard
        z -= LIT64( 0x100000000 );
559 158142c2 bellard
        b1 = b<<32;
560 158142c2 bellard
        add128( rem0, rem1, b0, b1, &rem0, &rem1 );
561 158142c2 bellard
    }
562 158142c2 bellard
    rem0 = ( rem0<<32 ) | ( rem1>>32 );
563 158142c2 bellard
    z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
564 158142c2 bellard
    return z;
565 158142c2 bellard
566 158142c2 bellard
}
567 158142c2 bellard
568 158142c2 bellard
/*----------------------------------------------------------------------------
569 158142c2 bellard
| Returns an approximation to the square root of the 32-bit significand given
570 158142c2 bellard
| by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
571 158142c2 bellard
| `aExp' (the least significant bit) is 1, the integer returned approximates
572 158142c2 bellard
| 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
573 158142c2 bellard
| is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
574 158142c2 bellard
| case, the approximation returned lies strictly within +/-2 of the exact
575 158142c2 bellard
| value.
576 158142c2 bellard
*----------------------------------------------------------------------------*/
577 158142c2 bellard
578 158142c2 bellard
static bits32 estimateSqrt32( int16 aExp, bits32 a )
579 158142c2 bellard
{
580 158142c2 bellard
    static const bits16 sqrtOddAdjustments[] = {
581 158142c2 bellard
        0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
582 158142c2 bellard
        0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
583 158142c2 bellard
    };
584 158142c2 bellard
    static const bits16 sqrtEvenAdjustments[] = {
585 158142c2 bellard
        0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
586 158142c2 bellard
        0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
587 158142c2 bellard
    };
588 158142c2 bellard
    int8 index;
589 158142c2 bellard
    bits32 z;
590 158142c2 bellard
591 158142c2 bellard
    index = ( a>>27 ) & 15;
592 158142c2 bellard
    if ( aExp & 1 ) {
593 158142c2 bellard
        z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
594 158142c2 bellard
        z = ( ( a / z )<<14 ) + ( z<<15 );
595 158142c2 bellard
        a >>= 1;
596 158142c2 bellard
    }
597 158142c2 bellard
    else {
598 158142c2 bellard
        z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
599 158142c2 bellard
        z = a / z + z;
600 158142c2 bellard
        z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
601 158142c2 bellard
        if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
602 158142c2 bellard
    }
603 158142c2 bellard
    return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
604 158142c2 bellard
605 158142c2 bellard
}
606 158142c2 bellard
607 158142c2 bellard
/*----------------------------------------------------------------------------
608 158142c2 bellard
| Returns the number of leading 0 bits before the most-significant 1 bit of
609 158142c2 bellard
| `a'.  If `a' is zero, 32 is returned.
610 158142c2 bellard
*----------------------------------------------------------------------------*/
611 158142c2 bellard
612 158142c2 bellard
static int8 countLeadingZeros32( bits32 a )
613 158142c2 bellard
{
614 158142c2 bellard
    static const int8 countLeadingZerosHigh[] = {
615 158142c2 bellard
        8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
616 158142c2 bellard
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
617 158142c2 bellard
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
618 158142c2 bellard
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
619 158142c2 bellard
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
620 158142c2 bellard
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
621 158142c2 bellard
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
622 158142c2 bellard
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
623 158142c2 bellard
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
624 158142c2 bellard
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
625 158142c2 bellard
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
626 158142c2 bellard
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
627 158142c2 bellard
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
628 158142c2 bellard
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
629 158142c2 bellard
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
630 158142c2 bellard
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
631 158142c2 bellard
    };
632 158142c2 bellard
    int8 shiftCount;
633 158142c2 bellard
634 158142c2 bellard
    shiftCount = 0;
635 158142c2 bellard
    if ( a < 0x10000 ) {
636 158142c2 bellard
        shiftCount += 16;
637 158142c2 bellard
        a <<= 16;
638 158142c2 bellard
    }
639 158142c2 bellard
    if ( a < 0x1000000 ) {
640 158142c2 bellard
        shiftCount += 8;
641 158142c2 bellard
        a <<= 8;
642 158142c2 bellard
    }
643 158142c2 bellard
    shiftCount += countLeadingZerosHigh[ a>>24 ];
644 158142c2 bellard
    return shiftCount;
645 158142c2 bellard
646 158142c2 bellard
}
647 158142c2 bellard
648 158142c2 bellard
/*----------------------------------------------------------------------------
649 158142c2 bellard
| Returns the number of leading 0 bits before the most-significant 1 bit of
650 158142c2 bellard
| `a'.  If `a' is zero, 64 is returned.
651 158142c2 bellard
*----------------------------------------------------------------------------*/
652 158142c2 bellard
653 158142c2 bellard
static int8 countLeadingZeros64( bits64 a )
654 158142c2 bellard
{
655 158142c2 bellard
    int8 shiftCount;
656 158142c2 bellard
657 158142c2 bellard
    shiftCount = 0;
658 158142c2 bellard
    if ( a < ( (bits64) 1 )<<32 ) {
659 158142c2 bellard
        shiftCount += 32;
660 158142c2 bellard
    }
661 158142c2 bellard
    else {
662 158142c2 bellard
        a >>= 32;
663 158142c2 bellard
    }
664 158142c2 bellard
    shiftCount += countLeadingZeros32( a );
665 158142c2 bellard
    return shiftCount;
666 158142c2 bellard
667 158142c2 bellard
}
668 158142c2 bellard
669 158142c2 bellard
/*----------------------------------------------------------------------------
670 158142c2 bellard
| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
671 158142c2 bellard
| is equal to the 128-bit value formed by concatenating `b0' and `b1'.
672 158142c2 bellard
| Otherwise, returns 0.
673 158142c2 bellard
*----------------------------------------------------------------------------*/
674 158142c2 bellard
675 158142c2 bellard
INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
676 158142c2 bellard
{
677 158142c2 bellard
678 158142c2 bellard
    return ( a0 == b0 ) && ( a1 == b1 );
679 158142c2 bellard
680 158142c2 bellard
}
681 158142c2 bellard
682 158142c2 bellard
/*----------------------------------------------------------------------------
683 158142c2 bellard
| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
684 158142c2 bellard
| than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
685 158142c2 bellard
| Otherwise, returns 0.
686 158142c2 bellard
*----------------------------------------------------------------------------*/
687 158142c2 bellard
688 158142c2 bellard
INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
689 158142c2 bellard
{
690 158142c2 bellard
691 158142c2 bellard
    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
692 158142c2 bellard
693 158142c2 bellard
}
694 158142c2 bellard
695 158142c2 bellard
/*----------------------------------------------------------------------------
696 158142c2 bellard
| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
697 158142c2 bellard
| than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
698 158142c2 bellard
| returns 0.
699 158142c2 bellard
*----------------------------------------------------------------------------*/
700 158142c2 bellard
701 158142c2 bellard
INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
702 158142c2 bellard
{
703 158142c2 bellard
704 158142c2 bellard
    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
705 158142c2 bellard
706 158142c2 bellard
}
707 158142c2 bellard
708 158142c2 bellard
/*----------------------------------------------------------------------------
709 158142c2 bellard
| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
710 158142c2 bellard
| not equal to the 128-bit value formed by concatenating `b0' and `b1'.
711 158142c2 bellard
| Otherwise, returns 0.
712 158142c2 bellard
*----------------------------------------------------------------------------*/
713 158142c2 bellard
714 158142c2 bellard
INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
715 158142c2 bellard
{
716 158142c2 bellard
717 158142c2 bellard
    return ( a0 != b0 ) || ( a1 != b1 );
718 158142c2 bellard
719 158142c2 bellard
}