root / target-arm / nwfpe / softfloat-macros @ a8d3431a
History | View | Annotate | Download (23.5 kB)
1 |
|
---|---|
2 |
/* |
3 |
=============================================================================== |
4 |
|
5 |
This C source fragment is part of the SoftFloat IEC/IEEE Floating-point |
6 |
Arithmetic Package, Release 2. |
7 |
|
8 |
Written by John R. Hauser. This work was made possible in part by the |
9 |
International Computer Science Institute, located at Suite 600, 1947 Center |
10 |
Street, Berkeley, California 94704. Funding was partially provided by the |
11 |
National Science Foundation under grant MIP-9311980. The original version |
12 |
of this code was written as part of a project to build a fixed-point vector |
13 |
processor in collaboration with the University of California at Berkeley, |
14 |
overseen by Profs. Nelson Morgan and John Wawrzynek. More information |
15 |
is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ |
16 |
arithmetic/softfloat.html'. |
17 |
|
18 |
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort |
19 |
has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT |
20 |
TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO |
21 |
PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY |
22 |
AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. |
23 |
|
24 |
Derivative works are acceptable, even for commercial purposes, so long as |
25 |
(1) they include prominent notice that the work is derivative, and (2) they |
26 |
include prominent notice akin to these three paragraphs for those parts of |
27 |
this code that are retained. |
28 |
|
29 |
=============================================================================== |
30 |
*/ |
31 |
|
32 |
/* |
33 |
------------------------------------------------------------------------------- |
34 |
Shifts `a' right by the number of bits given in `count'. If any nonzero |
35 |
bits are shifted off, they are ``jammed'' into the least significant bit of |
36 |
the result by setting the least significant bit to 1. The value of `count' |
37 |
can be arbitrarily large; in particular, if `count' is greater than 32, the |
38 |
result will be either 0 or 1, depending on whether `a' is zero or nonzero. |
39 |
The result is stored in the location pointed to by `zPtr'. |
40 |
------------------------------------------------------------------------------- |
41 |
*/ |
42 |
INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr ) |
43 |
{ |
44 |
bits32 z; |
45 |
if ( count == 0 ) { |
46 |
z = a; |
47 |
} |
48 |
else if ( count < 32 ) { |
49 |
z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 ); |
50 |
} |
51 |
else { |
52 |
z = ( a != 0 ); |
53 |
} |
54 |
*zPtr = z; |
55 |
} |
56 |
|
57 |
/* |
58 |
------------------------------------------------------------------------------- |
59 |
Shifts `a' right by the number of bits given in `count'. If any nonzero |
60 |
bits are shifted off, they are ``jammed'' into the least significant bit of |
61 |
the result by setting the least significant bit to 1. The value of `count' |
62 |
can be arbitrarily large; in particular, if `count' is greater than 64, the |
63 |
result will be either 0 or 1, depending on whether `a' is zero or nonzero. |
64 |
The result is stored in the location pointed to by `zPtr'. |
65 |
------------------------------------------------------------------------------- |
66 |
*/ |
67 |
INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr ) |
68 |
{ |
69 |
bits64 z; |
70 |
|
71 |
// __asm__("@shift64RightJamming -- start"); |
72 |
if ( count == 0 ) { |
73 |
z = a; |
74 |
} |
75 |
else if ( count < 64 ) { |
76 |
z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 ); |
77 |
} |
78 |
else { |
79 |
z = ( a != 0 ); |
80 |
} |
81 |
// __asm__("@shift64RightJamming -- end"); |
82 |
*zPtr = z; |
83 |
} |
84 |
|
85 |
/* |
86 |
------------------------------------------------------------------------------- |
87 |
Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64 |
88 |
_plus_ the number of bits given in `count'. The shifted result is at most |
89 |
64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The |
90 |
bits shifted off form a second 64-bit result as follows: The _last_ bit |
91 |
shifted off is the most-significant bit of the extra result, and the other |
92 |
63 bits of the extra result are all zero if and only if _all_but_the_last_ |
93 |
bits shifted off were all zero. This extra result is stored in the location |
94 |
pointed to by `z1Ptr'. The value of `count' can be arbitrarily large. |
95 |
(This routine makes more sense if `a0' and `a1' are considered to form a |
96 |
fixed-point value with binary point between `a0' and `a1'. This fixed-point |
97 |
value is shifted right by the number of bits given in `count', and the |
98 |
integer part of the result is returned at the location pointed to by |
99 |
`z0Ptr'. The fractional part of the result may be slightly corrupted as |
100 |
described above, and is returned at the location pointed to by `z1Ptr'.) |
101 |
------------------------------------------------------------------------------- |
102 |
*/ |
103 |
INLINE void |
104 |
shift64ExtraRightJamming( |
105 |
bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) |
106 |
{ |
107 |
bits64 z0, z1; |
108 |
int8 negCount = ( - count ) & 63; |
109 |
|
110 |
if ( count == 0 ) { |
111 |
z1 = a1; |
112 |
z0 = a0; |
113 |
} |
114 |
else if ( count < 64 ) { |
115 |
z1 = ( a0<<negCount ) | ( a1 != 0 ); |
116 |
z0 = a0>>count; |
117 |
} |
118 |
else { |
119 |
if ( count == 64 ) { |
120 |
z1 = a0 | ( a1 != 0 ); |
121 |
} |
122 |
else { |
123 |
z1 = ( ( a0 | a1 ) != 0 ); |
124 |
} |
125 |
z0 = 0; |
126 |
} |
127 |
*z1Ptr = z1; |
128 |
*z0Ptr = z0; |
129 |
|
130 |
} |
131 |
|
132 |
/* |
133 |
------------------------------------------------------------------------------- |
134 |
Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the |
135 |
number of bits given in `count'. Any bits shifted off are lost. The value |
136 |
of `count' can be arbitrarily large; in particular, if `count' is greater |
137 |
than 128, the result will be 0. The result is broken into two 64-bit pieces |
138 |
which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. |
139 |
------------------------------------------------------------------------------- |
140 |
*/ |
141 |
INLINE void |
142 |
shift128Right( |
143 |
bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) |
144 |
{ |
145 |
bits64 z0, z1; |
146 |
int8 negCount = ( - count ) & 63; |
147 |
|
148 |
if ( count == 0 ) { |
149 |
z1 = a1; |
150 |
z0 = a0; |
151 |
} |
152 |
else if ( count < 64 ) { |
153 |
z1 = ( a0<<negCount ) | ( a1>>count ); |
154 |
z0 = a0>>count; |
155 |
} |
156 |
else { |
157 |
z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0; |
158 |
z0 = 0; |
159 |
} |
160 |
*z1Ptr = z1; |
161 |
*z0Ptr = z0; |
162 |
|
163 |
} |
164 |
|
165 |
/* |
166 |
------------------------------------------------------------------------------- |
167 |
Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the |
168 |
number of bits given in `count'. If any nonzero bits are shifted off, they |
169 |
are ``jammed'' into the least significant bit of the result by setting the |
170 |
least significant bit to 1. The value of `count' can be arbitrarily large; |
171 |
in particular, if `count' is greater than 128, the result will be either 0 |
172 |
or 1, depending on whether the concatenation of `a0' and `a1' is zero or |
173 |
nonzero. The result is broken into two 64-bit pieces which are stored at |
174 |
the locations pointed to by `z0Ptr' and `z1Ptr'. |
175 |
------------------------------------------------------------------------------- |
176 |
*/ |
177 |
INLINE void |
178 |
shift128RightJamming( |
179 |
bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) |
180 |
{ |
181 |
bits64 z0, z1; |
182 |
int8 negCount = ( - count ) & 63; |
183 |
|
184 |
if ( count == 0 ) { |
185 |
z1 = a1; |
186 |
z0 = a0; |
187 |
} |
188 |
else if ( count < 64 ) { |
189 |
z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 ); |
190 |
z0 = a0>>count; |
191 |
} |
192 |
else { |
193 |
if ( count == 64 ) { |
194 |
z1 = a0 | ( a1 != 0 ); |
195 |
} |
196 |
else if ( count < 128 ) { |
197 |
z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 ); |
198 |
} |
199 |
else { |
200 |
z1 = ( ( a0 | a1 ) != 0 ); |
201 |
} |
202 |
z0 = 0; |
203 |
} |
204 |
*z1Ptr = z1; |
205 |
*z0Ptr = z0; |
206 |
|
207 |
} |
208 |
|
209 |
/* |
210 |
------------------------------------------------------------------------------- |
211 |
Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right |
212 |
by 64 _plus_ the number of bits given in `count'. The shifted result is |
213 |
at most 128 nonzero bits; these are broken into two 64-bit pieces which are |
214 |
stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted |
215 |
off form a third 64-bit result as follows: The _last_ bit shifted off is |
216 |
the most-significant bit of the extra result, and the other 63 bits of the |
217 |
extra result are all zero if and only if _all_but_the_last_ bits shifted off |
218 |
were all zero. This extra result is stored in the location pointed to by |
219 |
`z2Ptr'. The value of `count' can be arbitrarily large. |
220 |
(This routine makes more sense if `a0', `a1', and `a2' are considered |
221 |
to form a fixed-point value with binary point between `a1' and `a2'. This |
222 |
fixed-point value is shifted right by the number of bits given in `count', |
223 |
and the integer part of the result is returned at the locations pointed to |
224 |
by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly |
225 |
corrupted as described above, and is returned at the location pointed to by |
226 |
`z2Ptr'.) |
227 |
------------------------------------------------------------------------------- |
228 |
*/ |
229 |
INLINE void |
230 |
shift128ExtraRightJamming( |
231 |
bits64 a0, |
232 |
bits64 a1, |
233 |
bits64 a2, |
234 |
int16 count, |
235 |
bits64 *z0Ptr, |
236 |
bits64 *z1Ptr, |
237 |
bits64 *z2Ptr |
238 |
) |
239 |
{ |
240 |
bits64 z0, z1, z2; |
241 |
int8 negCount = ( - count ) & 63; |
242 |
|
243 |
if ( count == 0 ) { |
244 |
z2 = a2; |
245 |
z1 = a1; |
246 |
z0 = a0; |
247 |
} |
248 |
else { |
249 |
if ( count < 64 ) { |
250 |
z2 = a1<<negCount; |
251 |
z1 = ( a0<<negCount ) | ( a1>>count ); |
252 |
z0 = a0>>count; |
253 |
} |
254 |
else { |
255 |
if ( count == 64 ) { |
256 |
z2 = a1; |
257 |
z1 = a0; |
258 |
} |
259 |
else { |
260 |
a2 |= a1; |
261 |
if ( count < 128 ) { |
262 |
z2 = a0<<negCount; |
263 |
z1 = a0>>( count & 63 ); |
264 |
} |
265 |
else { |
266 |
z2 = ( count == 128 ) ? a0 : ( a0 != 0 ); |
267 |
z1 = 0; |
268 |
} |
269 |
} |
270 |
z0 = 0; |
271 |
} |
272 |
z2 |= ( a2 != 0 ); |
273 |
} |
274 |
*z2Ptr = z2; |
275 |
*z1Ptr = z1; |
276 |
*z0Ptr = z0; |
277 |
|
278 |
} |
279 |
|
280 |
/* |
281 |
------------------------------------------------------------------------------- |
282 |
Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the |
283 |
number of bits given in `count'. Any bits shifted off are lost. The value |
284 |
of `count' must be less than 64. The result is broken into two 64-bit |
285 |
pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. |
286 |
------------------------------------------------------------------------------- |
287 |
*/ |
288 |
INLINE void |
289 |
shortShift128Left( |
290 |
bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) |
291 |
{ |
292 |
|
293 |
*z1Ptr = a1<<count; |
294 |
*z0Ptr = |
295 |
( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) ); |
296 |
|
297 |
} |
298 |
|
299 |
/* |
300 |
------------------------------------------------------------------------------- |
301 |
Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left |
302 |
by the number of bits given in `count'. Any bits shifted off are lost. |
303 |
The value of `count' must be less than 64. The result is broken into three |
304 |
64-bit pieces which are stored at the locations pointed to by `z0Ptr', |
305 |
`z1Ptr', and `z2Ptr'. |
306 |
------------------------------------------------------------------------------- |
307 |
*/ |
308 |
INLINE void |
309 |
shortShift192Left( |
310 |
bits64 a0, |
311 |
bits64 a1, |
312 |
bits64 a2, |
313 |
int16 count, |
314 |
bits64 *z0Ptr, |
315 |
bits64 *z1Ptr, |
316 |
bits64 *z2Ptr |
317 |
) |
318 |
{ |
319 |
bits64 z0, z1, z2; |
320 |
int8 negCount; |
321 |
|
322 |
z2 = a2<<count; |
323 |
z1 = a1<<count; |
324 |
z0 = a0<<count; |
325 |
if ( 0 < count ) { |
326 |
negCount = ( ( - count ) & 63 ); |
327 |
z1 |= a2>>negCount; |
328 |
z0 |= a1>>negCount; |
329 |
} |
330 |
*z2Ptr = z2; |
331 |
*z1Ptr = z1; |
332 |
*z0Ptr = z0; |
333 |
|
334 |
} |
335 |
|
336 |
/* |
337 |
------------------------------------------------------------------------------- |
338 |
Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit |
339 |
value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so |
340 |
any carry out is lost. The result is broken into two 64-bit pieces which |
341 |
are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. |
342 |
------------------------------------------------------------------------------- |
343 |
*/ |
344 |
INLINE void |
345 |
add128( |
346 |
bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr ) |
347 |
{ |
348 |
bits64 z1; |
349 |
|
350 |
z1 = a1 + b1; |
351 |
*z1Ptr = z1; |
352 |
*z0Ptr = a0 + b0 + ( z1 < a1 ); |
353 |
|
354 |
} |
355 |
|
356 |
/* |
357 |
------------------------------------------------------------------------------- |
358 |
Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the |
359 |
192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is |
360 |
modulo 2^192, so any carry out is lost. The result is broken into three |
361 |
64-bit pieces which are stored at the locations pointed to by `z0Ptr', |
362 |
`z1Ptr', and `z2Ptr'. |
363 |
------------------------------------------------------------------------------- |
364 |
*/ |
365 |
INLINE void |
366 |
add192( |
367 |
bits64 a0, |
368 |
bits64 a1, |
369 |
bits64 a2, |
370 |
bits64 b0, |
371 |
bits64 b1, |
372 |
bits64 b2, |
373 |
bits64 *z0Ptr, |
374 |
bits64 *z1Ptr, |
375 |
bits64 *z2Ptr |
376 |
) |
377 |
{ |
378 |
bits64 z0, z1, z2; |
379 |
int8 carry0, carry1; |
380 |
|
381 |
z2 = a2 + b2; |
382 |
carry1 = ( z2 < a2 ); |
383 |
z1 = a1 + b1; |
384 |
carry0 = ( z1 < a1 ); |
385 |
z0 = a0 + b0; |
386 |
z1 += carry1; |
387 |
z0 += ( z1 < carry1 ); |
388 |
z0 += carry0; |
389 |
*z2Ptr = z2; |
390 |
*z1Ptr = z1; |
391 |
*z0Ptr = z0; |
392 |
|
393 |
} |
394 |
|
395 |
/* |
396 |
------------------------------------------------------------------------------- |
397 |
Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the |
398 |
128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo |
399 |
2^128, so any borrow out (carry out) is lost. The result is broken into two |
400 |
64-bit pieces which are stored at the locations pointed to by `z0Ptr' and |
401 |
`z1Ptr'. |
402 |
------------------------------------------------------------------------------- |
403 |
*/ |
404 |
INLINE void |
405 |
sub128( |
406 |
bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr ) |
407 |
{ |
408 |
|
409 |
*z1Ptr = a1 - b1; |
410 |
*z0Ptr = a0 - b0 - ( a1 < b1 ); |
411 |
|
412 |
} |
413 |
|
414 |
/* |
415 |
------------------------------------------------------------------------------- |
416 |
Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2' |
417 |
from the 192-bit value formed by concatenating `a0', `a1', and `a2'. |
418 |
Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The |
419 |
result is broken into three 64-bit pieces which are stored at the locations |
420 |
pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'. |
421 |
------------------------------------------------------------------------------- |
422 |
*/ |
423 |
INLINE void |
424 |
sub192( |
425 |
bits64 a0, |
426 |
bits64 a1, |
427 |
bits64 a2, |
428 |
bits64 b0, |
429 |
bits64 b1, |
430 |
bits64 b2, |
431 |
bits64 *z0Ptr, |
432 |
bits64 *z1Ptr, |
433 |
bits64 *z2Ptr |
434 |
) |
435 |
{ |
436 |
bits64 z0, z1, z2; |
437 |
int8 borrow0, borrow1; |
438 |
|
439 |
z2 = a2 - b2; |
440 |
borrow1 = ( a2 < b2 ); |
441 |
z1 = a1 - b1; |
442 |
borrow0 = ( a1 < b1 ); |
443 |
z0 = a0 - b0; |
444 |
z0 -= ( z1 < borrow1 ); |
445 |
z1 -= borrow1; |
446 |
z0 -= borrow0; |
447 |
*z2Ptr = z2; |
448 |
*z1Ptr = z1; |
449 |
*z0Ptr = z0; |
450 |
|
451 |
} |
452 |
|
453 |
/* |
454 |
------------------------------------------------------------------------------- |
455 |
Multiplies `a' by `b' to obtain a 128-bit product. The product is broken |
456 |
into two 64-bit pieces which are stored at the locations pointed to by |
457 |
`z0Ptr' and `z1Ptr'. |
458 |
------------------------------------------------------------------------------- |
459 |
*/ |
460 |
INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr ) |
461 |
{ |
462 |
bits32 aHigh, aLow, bHigh, bLow; |
463 |
bits64 z0, zMiddleA, zMiddleB, z1; |
464 |
|
465 |
aLow = a; |
466 |
aHigh = a>>32; |
467 |
bLow = b; |
468 |
bHigh = b>>32; |
469 |
z1 = ( (bits64) aLow ) * bLow; |
470 |
zMiddleA = ( (bits64) aLow ) * bHigh; |
471 |
zMiddleB = ( (bits64) aHigh ) * bLow; |
472 |
z0 = ( (bits64) aHigh ) * bHigh; |
473 |
zMiddleA += zMiddleB; |
474 |
z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 ); |
475 |
zMiddleA <<= 32; |
476 |
z1 += zMiddleA; |
477 |
z0 += ( z1 < zMiddleA ); |
478 |
*z1Ptr = z1; |
479 |
*z0Ptr = z0; |
480 |
|
481 |
} |
482 |
|
483 |
/* |
484 |
------------------------------------------------------------------------------- |
485 |
Multiplies the 128-bit value formed by concatenating `a0' and `a1' by `b' to |
486 |
obtain a 192-bit product. The product is broken into three 64-bit pieces |
487 |
which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and |
488 |
`z2Ptr'. |
489 |
------------------------------------------------------------------------------- |
490 |
*/ |
491 |
INLINE void |
492 |
mul128By64To192( |
493 |
bits64 a0, |
494 |
bits64 a1, |
495 |
bits64 b, |
496 |
bits64 *z0Ptr, |
497 |
bits64 *z1Ptr, |
498 |
bits64 *z2Ptr |
499 |
) |
500 |
{ |
501 |
bits64 z0, z1, z2, more1; |
502 |
|
503 |
mul64To128( a1, b, &z1, &z2 ); |
504 |
mul64To128( a0, b, &z0, &more1 ); |
505 |
add128( z0, more1, 0, z1, &z0, &z1 ); |
506 |
*z2Ptr = z2; |
507 |
*z1Ptr = z1; |
508 |
*z0Ptr = z0; |
509 |
|
510 |
} |
511 |
|
512 |
/* |
513 |
------------------------------------------------------------------------------- |
514 |
Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the |
515 |
128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit |
516 |
product. The product is broken into four 64-bit pieces which are stored at |
517 |
the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. |
518 |
------------------------------------------------------------------------------- |
519 |
*/ |
520 |
INLINE void |
521 |
mul128To256( |
522 |
bits64 a0, |
523 |
bits64 a1, |
524 |
bits64 b0, |
525 |
bits64 b1, |
526 |
bits64 *z0Ptr, |
527 |
bits64 *z1Ptr, |
528 |
bits64 *z2Ptr, |
529 |
bits64 *z3Ptr |
530 |
) |
531 |
{ |
532 |
bits64 z0, z1, z2, z3; |
533 |
bits64 more1, more2; |
534 |
|
535 |
mul64To128( a1, b1, &z2, &z3 ); |
536 |
mul64To128( a1, b0, &z1, &more2 ); |
537 |
add128( z1, more2, 0, z2, &z1, &z2 ); |
538 |
mul64To128( a0, b0, &z0, &more1 ); |
539 |
add128( z0, more1, 0, z1, &z0, &z1 ); |
540 |
mul64To128( a0, b1, &more1, &more2 ); |
541 |
add128( more1, more2, 0, z2, &more1, &z2 ); |
542 |
add128( z0, z1, 0, more1, &z0, &z1 ); |
543 |
*z3Ptr = z3; |
544 |
*z2Ptr = z2; |
545 |
*z1Ptr = z1; |
546 |
*z0Ptr = z0; |
547 |
|
548 |
} |
549 |
|
550 |
/* |
551 |
------------------------------------------------------------------------------- |
552 |
Returns an approximation to the 64-bit integer quotient obtained by dividing |
553 |
`b' into the 128-bit value formed by concatenating `a0' and `a1'. The |
554 |
divisor `b' must be at least 2^63. If q is the exact quotient truncated |
555 |
toward zero, the approximation returned lies between q and q + 2 inclusive. |
556 |
If the exact quotient q is larger than 64 bits, the maximum positive 64-bit |
557 |
unsigned integer is returned. |
558 |
------------------------------------------------------------------------------- |
559 |
*/ |
560 |
static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b ) |
561 |
{ |
562 |
bits64 b0, b1; |
563 |
bits64 rem0, rem1, term0, term1; |
564 |
bits64 z; |
565 |
if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF ); |
566 |
b0 = b>>32; |
567 |
z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32; |
568 |
mul64To128( b, z, &term0, &term1 ); |
569 |
sub128( a0, a1, term0, term1, &rem0, &rem1 ); |
570 |
while ( ( (sbits64) rem0 ) < 0 ) { |
571 |
z -= LIT64( 0x100000000 ); |
572 |
b1 = b<<32; |
573 |
add128( rem0, rem1, b0, b1, &rem0, &rem1 ); |
574 |
} |
575 |
rem0 = ( rem0<<32 ) | ( rem1>>32 ); |
576 |
z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0; |
577 |
return z; |
578 |
|
579 |
} |
580 |
|
581 |
/* |
582 |
------------------------------------------------------------------------------- |
583 |
Returns an approximation to the square root of the 32-bit significand given |
584 |
by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of |
585 |
`aExp' (the least significant bit) is 1, the integer returned approximates |
586 |
2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp' |
587 |
is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either |
588 |
case, the approximation returned lies strictly within +/-2 of the exact |
589 |
value. |
590 |
------------------------------------------------------------------------------- |
591 |
*/ |
592 |
static bits32 estimateSqrt32( int16 aExp, bits32 a ) |
593 |
{ |
594 |
static const bits16 sqrtOddAdjustments[] = { |
595 |
0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0, |
596 |
0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67 |
597 |
}; |
598 |
static const bits16 sqrtEvenAdjustments[] = { |
599 |
0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E, |
600 |
0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002 |
601 |
}; |
602 |
int8 index; |
603 |
bits32 z; |
604 |
|
605 |
index = ( a>>27 ) & 15; |
606 |
if ( aExp & 1 ) { |
607 |
z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ]; |
608 |
z = ( ( a / z )<<14 ) + ( z<<15 ); |
609 |
a >>= 1; |
610 |
} |
611 |
else { |
612 |
z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ]; |
613 |
z = a / z + z; |
614 |
z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 ); |
615 |
if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 ); |
616 |
} |
617 |
return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 ); |
618 |
|
619 |
} |
620 |
|
621 |
/* |
622 |
------------------------------------------------------------------------------- |
623 |
Returns the number of leading 0 bits before the most-significant 1 bit |
624 |
of `a'. If `a' is zero, 32 is returned. |
625 |
------------------------------------------------------------------------------- |
626 |
*/ |
627 |
static int8 countLeadingZeros32( bits32 a ) |
628 |
{ |
629 |
static const int8 countLeadingZerosHigh[] = { |
630 |
8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, |
631 |
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
632 |
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
633 |
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
634 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
635 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
636 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
637 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
638 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
639 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
640 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
641 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
642 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
643 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
644 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
645 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
646 |
}; |
647 |
int8 shiftCount; |
648 |
|
649 |
shiftCount = 0; |
650 |
if ( a < 0x10000 ) { |
651 |
shiftCount += 16; |
652 |
a <<= 16; |
653 |
} |
654 |
if ( a < 0x1000000 ) { |
655 |
shiftCount += 8; |
656 |
a <<= 8; |
657 |
} |
658 |
shiftCount += countLeadingZerosHigh[ a>>24 ]; |
659 |
return shiftCount; |
660 |
|
661 |
} |
662 |
|
663 |
/* |
664 |
------------------------------------------------------------------------------- |
665 |
Returns the number of leading 0 bits before the most-significant 1 bit |
666 |
of `a'. If `a' is zero, 64 is returned. |
667 |
------------------------------------------------------------------------------- |
668 |
*/ |
669 |
static int8 countLeadingZeros64( bits64 a ) |
670 |
{ |
671 |
int8 shiftCount; |
672 |
|
673 |
shiftCount = 0; |
674 |
if ( a < ( (bits64) 1 )<<32 ) { |
675 |
shiftCount += 32; |
676 |
} |
677 |
else { |
678 |
a >>= 32; |
679 |
} |
680 |
shiftCount += countLeadingZeros32( a ); |
681 |
return shiftCount; |
682 |
|
683 |
} |
684 |
|
685 |
/* |
686 |
------------------------------------------------------------------------------- |
687 |
Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' |
688 |
is equal to the 128-bit value formed by concatenating `b0' and `b1'. |
689 |
Otherwise, returns 0. |
690 |
------------------------------------------------------------------------------- |
691 |
*/ |
692 |
INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) |
693 |
{ |
694 |
|
695 |
return ( a0 == b0 ) && ( a1 == b1 ); |
696 |
|
697 |
} |
698 |
|
699 |
/* |
700 |
------------------------------------------------------------------------------- |
701 |
Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less |
702 |
than or equal to the 128-bit value formed by concatenating `b0' and `b1'. |
703 |
Otherwise, returns 0. |
704 |
------------------------------------------------------------------------------- |
705 |
*/ |
706 |
INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) |
707 |
{ |
708 |
|
709 |
return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) ); |
710 |
|
711 |
} |
712 |
|
713 |
/* |
714 |
------------------------------------------------------------------------------- |
715 |
Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less |
716 |
than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise, |
717 |
returns 0. |
718 |
------------------------------------------------------------------------------- |
719 |
*/ |
720 |
INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) |
721 |
{ |
722 |
|
723 |
return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) ); |
724 |
|
725 |
} |
726 |
|
727 |
/* |
728 |
------------------------------------------------------------------------------- |
729 |
Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is |
730 |
not equal to the 128-bit value formed by concatenating `b0' and `b1'. |
731 |
Otherwise, returns 0. |
732 |
------------------------------------------------------------------------------- |
733 |
*/ |
734 |
INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) |
735 |
{ |
736 |
|
737 |
return ( a0 != b0 ) || ( a1 != b1 ); |
738 |
|
739 |
} |
740 |
|