root / fpu / softfloatmacros.h @ 3f4cb3d3
History  View  Annotate  Download (23.8 kB)
1  

2 
/*============================================================================

3 

4 
This C source fragment is part of the SoftFloat IEC/IEEE Floatingpoint

5 
Arithmetic Package, Release 2b.

6 

7 
Written by John R. Hauser. This work was made possible in part by the

8 
International Computer Science Institute, located at Suite 600, 1947 Center

9 
Street, Berkeley, California 94704. Funding was partially provided by the

10 
National Science Foundation under grant MIP9311980. The original version

11 
of this code was written as part of a project to build a fixedpoint vector

12 
processor in collaboration with the University of California at Berkeley,

13 
overseen by Profs. Nelson Morgan and John Wawrzynek. More information

14 
is available through the Web page `http://www.cs.berkeley.edu/~jhauser/

15 
arithmetic/SoftFloat.html'.

16 

17 
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has

18 
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES

19 
RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS

20 
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,

21 
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE

22 
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE

23 
INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR

24 
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.

25 

26 
Derivative works are acceptable, even for commercial purposes, so long as

27 
(1) the source code for the derivative work includes prominent notice that

28 
the work is derivative, and (2) the source code includes prominent notice with

29 
these four paragraphs for those parts of this code that are retained.

30 

31 
=============================================================================*/

32  
33 
/*

34 
 Shifts `a' right by the number of bits given in `count'. If any nonzero

35 
 bits are shifted off, they are ``jammed'' into the least significant bit of

36 
 the result by setting the least significant bit to 1. The value of `count'

37 
 can be arbitrarily large; in particular, if `count' is greater than 32, the

38 
 result will be either 0 or 1, depending on whether `a' is zero or nonzero.

39 
 The result is stored in the location pointed to by `zPtr'.

40 
**/

41  
42 
INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )

43 
{ 
44 
bits32 z; 
45  
46 
if ( count == 0 ) { 
47 
z = a; 
48 
} 
49 
else if ( count < 32 ) { 
50 
z = ( a>>count )  ( ( a<<( (  count ) & 31 ) ) != 0 ); 
51 
} 
52 
else {

53 
z = ( a != 0 );

54 
} 
55 
*zPtr = z; 
56  
57 
} 
58  
59 
/*

60 
 Shifts `a' right by the number of bits given in `count'. If any nonzero

61 
 bits are shifted off, they are ``jammed'' into the least significant bit of

62 
 the result by setting the least significant bit to 1. The value of `count'

63 
 can be arbitrarily large; in particular, if `count' is greater than 64, the

64 
 result will be either 0 or 1, depending on whether `a' is zero or nonzero.

65 
 The result is stored in the location pointed to by `zPtr'.

66 
**/

67  
68 
INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )

69 
{ 
70 
bits64 z; 
71  
72 
if ( count == 0 ) { 
73 
z = a; 
74 
} 
75 
else if ( count < 64 ) { 
76 
z = ( a>>count )  ( ( a<<( (  count ) & 63 ) ) != 0 ); 
77 
} 
78 
else {

79 
z = ( a != 0 );

80 
} 
81 
*zPtr = z; 
82  
83 
} 
84  
85 
/*

86 
 Shifts the 128bit value formed by concatenating `a0' and `a1' right by 64

87 
 _plus_ the number of bits given in `count'. The shifted result is at most

88 
 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The

89 
 bits shifted off form a second 64bit result as follows: The _last_ bit

90 
 shifted off is the mostsignificant bit of the extra result, and the other

91 
 63 bits of the extra result are all zero if and only if _all_but_the_last_

92 
 bits shifted off were all zero. This extra result is stored in the location

93 
 pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.

94 
 (This routine makes more sense if `a0' and `a1' are considered to form

95 
 a fixedpoint value with binary point between `a0' and `a1'. This fixed

96 
 point value is shifted right by the number of bits given in `count', and

97 
 the integer part of the result is returned at the location pointed to by

98 
 `z0Ptr'. The fractional part of the result may be slightly corrupted as

99 
 described above, and is returned at the location pointed to by `z1Ptr'.)

100 
**/

101  
102 
INLINE void

103 
shift64ExtraRightJamming( 
104 
bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) 
105 
{ 
106 
bits64 z0, z1; 
107 
int8 negCount = (  count ) & 63;

108  
109 
if ( count == 0 ) { 
110 
z1 = a1; 
111 
z0 = a0; 
112 
} 
113 
else if ( count < 64 ) { 
114 
z1 = ( a0<<negCount )  ( a1 != 0 );

115 
z0 = a0>>count; 
116 
} 
117 
else {

118 
if ( count == 64 ) { 
119 
z1 = a0  ( a1 != 0 );

120 
} 
121 
else {

122 
z1 = ( ( a0  a1 ) != 0 );

123 
} 
124 
z0 = 0;

125 
} 
126 
*z1Ptr = z1; 
127 
*z0Ptr = z0; 
128  
129 
} 
130  
131 
/*

132 
 Shifts the 128bit value formed by concatenating `a0' and `a1' right by the

133 
 number of bits given in `count'. Any bits shifted off are lost. The value

134 
 of `count' can be arbitrarily large; in particular, if `count' is greater

135 
 than 128, the result will be 0. The result is broken into two 64bit pieces

136 
 which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.

137 
**/

138  
139 
INLINE void

140 
shift128Right( 
141 
bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) 
142 
{ 
143 
bits64 z0, z1; 
144 
int8 negCount = (  count ) & 63;

145  
146 
if ( count == 0 ) { 
147 
z1 = a1; 
148 
z0 = a0; 
149 
} 
150 
else if ( count < 64 ) { 
151 
z1 = ( a0<<negCount )  ( a1>>count ); 
152 
z0 = a0>>count; 
153 
} 
154 
else {

155 
z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0; 
156 
z0 = 0;

157 
} 
158 
*z1Ptr = z1; 
159 
*z0Ptr = z0; 
160  
161 
} 
162  
163 
/*

164 
 Shifts the 128bit value formed by concatenating `a0' and `a1' right by the

165 
 number of bits given in `count'. If any nonzero bits are shifted off, they

166 
 are ``jammed'' into the least significant bit of the result by setting the

167 
 least significant bit to 1. The value of `count' can be arbitrarily large;

168 
 in particular, if `count' is greater than 128, the result will be either

169 
 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or

170 
 nonzero. The result is broken into two 64bit pieces which are stored at

171 
 the locations pointed to by `z0Ptr' and `z1Ptr'.

172 
**/

173  
174 
INLINE void

175 
shift128RightJamming( 
176 
bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) 
177 
{ 
178 
bits64 z0, z1; 
179 
int8 negCount = (  count ) & 63;

180  
181 
if ( count == 0 ) { 
182 
z1 = a1; 
183 
z0 = a0; 
184 
} 
185 
else if ( count < 64 ) { 
186 
z1 = ( a0<<negCount )  ( a1>>count )  ( ( a1<<negCount ) != 0 );

187 
z0 = a0>>count; 
188 
} 
189 
else {

190 
if ( count == 64 ) { 
191 
z1 = a0  ( a1 != 0 );

192 
} 
193 
else if ( count < 128 ) { 
194 
z1 = ( a0>>( count & 63 ) )  ( ( ( a0<<negCount )  a1 ) != 0 ); 
195 
} 
196 
else {

197 
z1 = ( ( a0  a1 ) != 0 );

198 
} 
199 
z0 = 0;

200 
} 
201 
*z1Ptr = z1; 
202 
*z0Ptr = z0; 
203  
204 
} 
205  
206 
/*

207 
 Shifts the 192bit value formed by concatenating `a0', `a1', and `a2' right

208 
 by 64 _plus_ the number of bits given in `count'. The shifted result is

209 
 at most 128 nonzero bits; these are broken into two 64bit pieces which are

210 
 stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted

211 
 off form a third 64bit result as follows: The _last_ bit shifted off is

212 
 the mostsignificant bit of the extra result, and the other 63 bits of the

213 
 extra result are all zero if and only if _all_but_the_last_ bits shifted off

214 
 were all zero. This extra result is stored in the location pointed to by

215 
 `z2Ptr'. The value of `count' can be arbitrarily large.

216 
 (This routine makes more sense if `a0', `a1', and `a2' are considered

217 
 to form a fixedpoint value with binary point between `a1' and `a2'. This

218 
 fixedpoint value is shifted right by the number of bits given in `count',

219 
 and the integer part of the result is returned at the locations pointed to

220 
 by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly

221 
 corrupted as described above, and is returned at the location pointed to by

222 
 `z2Ptr'.)

223 
**/

224  
225 
INLINE void

226 
shift128ExtraRightJamming( 
227 
bits64 a0, 
228 
bits64 a1, 
229 
bits64 a2, 
230 
int16 count, 
231 
bits64 *z0Ptr, 
232 
bits64 *z1Ptr, 
233 
bits64 *z2Ptr 
234 
) 
235 
{ 
236 
bits64 z0, z1, z2; 
237 
int8 negCount = (  count ) & 63;

238  
239 
if ( count == 0 ) { 
240 
z2 = a2; 
241 
z1 = a1; 
242 
z0 = a0; 
243 
} 
244 
else {

245 
if ( count < 64 ) { 
246 
z2 = a1<<negCount; 
247 
z1 = ( a0<<negCount )  ( a1>>count ); 
248 
z0 = a0>>count; 
249 
} 
250 
else {

251 
if ( count == 64 ) { 
252 
z2 = a1; 
253 
z1 = a0; 
254 
} 
255 
else {

256 
a2 = a1; 
257 
if ( count < 128 ) { 
258 
z2 = a0<<negCount; 
259 
z1 = a0>>( count & 63 );

260 
} 
261 
else {

262 
z2 = ( count == 128 ) ? a0 : ( a0 != 0 ); 
263 
z1 = 0;

264 
} 
265 
} 
266 
z0 = 0;

267 
} 
268 
z2 = ( a2 != 0 );

269 
} 
270 
*z2Ptr = z2; 
271 
*z1Ptr = z1; 
272 
*z0Ptr = z0; 
273  
274 
} 
275  
276 
/*

277 
 Shifts the 128bit value formed by concatenating `a0' and `a1' left by the

278 
 number of bits given in `count'. Any bits shifted off are lost. The value

279 
 of `count' must be less than 64. The result is broken into two 64bit

280 
 pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.

281 
**/

282  
283 
INLINE void

284 
shortShift128Left( 
285 
bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) 
286 
{ 
287  
288 
*z1Ptr = a1<<count; 
289 
*z0Ptr = 
290 
( count == 0 ) ? a0 : ( a0<<count )  ( a1>>( (  count ) & 63 ) ); 
291  
292 
} 
293  
294 
/*

295 
 Shifts the 192bit value formed by concatenating `a0', `a1', and `a2' left

296 
 by the number of bits given in `count'. Any bits shifted off are lost.

297 
 The value of `count' must be less than 64. The result is broken into three

298 
 64bit pieces which are stored at the locations pointed to by `z0Ptr',

299 
 `z1Ptr', and `z2Ptr'.

300 
**/

301  
302 
INLINE void

303 
shortShift192Left( 
304 
bits64 a0, 
305 
bits64 a1, 
306 
bits64 a2, 
307 
int16 count, 
308 
bits64 *z0Ptr, 
309 
bits64 *z1Ptr, 
310 
bits64 *z2Ptr 
311 
) 
312 
{ 
313 
bits64 z0, z1, z2; 
314 
int8 negCount; 
315  
316 
z2 = a2<<count; 
317 
z1 = a1<<count; 
318 
z0 = a0<<count; 
319 
if ( 0 < count ) { 
320 
negCount = ( (  count ) & 63 );

321 
z1 = a2>>negCount; 
322 
z0 = a1>>negCount; 
323 
} 
324 
*z2Ptr = z2; 
325 
*z1Ptr = z1; 
326 
*z0Ptr = z0; 
327  
328 
} 
329  
330 
/*

331 
 Adds the 128bit value formed by concatenating `a0' and `a1' to the 128bit

332 
 value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so

333 
 any carry out is lost. The result is broken into two 64bit pieces which

334 
 are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.

335 
**/

336  
337 
INLINE void

338 
add128( 
339 
bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr ) 
340 
{ 
341 
bits64 z1; 
342  
343 
z1 = a1 + b1; 
344 
*z1Ptr = z1; 
345 
*z0Ptr = a0 + b0 + ( z1 < a1 ); 
346  
347 
} 
348  
349 
/*

350 
 Adds the 192bit value formed by concatenating `a0', `a1', and `a2' to the

351 
 192bit value formed by concatenating `b0', `b1', and `b2'. Addition is

352 
 modulo 2^192, so any carry out is lost. The result is broken into three

353 
 64bit pieces which are stored at the locations pointed to by `z0Ptr',

354 
 `z1Ptr', and `z2Ptr'.

355 
**/

356  
357 
INLINE void

358 
add192( 
359 
bits64 a0, 
360 
bits64 a1, 
361 
bits64 a2, 
362 
bits64 b0, 
363 
bits64 b1, 
364 
bits64 b2, 
365 
bits64 *z0Ptr, 
366 
bits64 *z1Ptr, 
367 
bits64 *z2Ptr 
368 
) 
369 
{ 
370 
bits64 z0, z1, z2; 
371 
int8 carry0, carry1; 
372  
373 
z2 = a2 + b2; 
374 
carry1 = ( z2 < a2 ); 
375 
z1 = a1 + b1; 
376 
carry0 = ( z1 < a1 ); 
377 
z0 = a0 + b0; 
378 
z1 += carry1; 
379 
z0 += ( z1 < carry1 ); 
380 
z0 += carry0; 
381 
*z2Ptr = z2; 
382 
*z1Ptr = z1; 
383 
*z0Ptr = z0; 
384  
385 
} 
386  
387 
/*

388 
 Subtracts the 128bit value formed by concatenating `b0' and `b1' from the

389 
 128bit value formed by concatenating `a0' and `a1'. Subtraction is modulo

390 
 2^128, so any borrow out (carry out) is lost. The result is broken into two

391 
 64bit pieces which are stored at the locations pointed to by `z0Ptr' and

392 
 `z1Ptr'.

393 
**/

394  
395 
INLINE void

396 
sub128( 
397 
bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr ) 
398 
{ 
399  
400 
*z1Ptr = a1  b1; 
401 
*z0Ptr = a0  b0  ( a1 < b1 ); 
402  
403 
} 
404  
405 
/*

406 
 Subtracts the 192bit value formed by concatenating `b0', `b1', and `b2'

407 
 from the 192bit value formed by concatenating `a0', `a1', and `a2'.

408 
 Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The

409 
 result is broken into three 64bit pieces which are stored at the locations

410 
 pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.

411 
**/

412  
413 
INLINE void

414 
sub192( 
415 
bits64 a0, 
416 
bits64 a1, 
417 
bits64 a2, 
418 
bits64 b0, 
419 
bits64 b1, 
420 
bits64 b2, 
421 
bits64 *z0Ptr, 
422 
bits64 *z1Ptr, 
423 
bits64 *z2Ptr 
424 
) 
425 
{ 
426 
bits64 z0, z1, z2; 
427 
int8 borrow0, borrow1; 
428  
429 
z2 = a2  b2; 
430 
borrow1 = ( a2 < b2 ); 
431 
z1 = a1  b1; 
432 
borrow0 = ( a1 < b1 ); 
433 
z0 = a0  b0; 
434 
z0 = ( z1 < borrow1 ); 
435 
z1 = borrow1; 
436 
z0 = borrow0; 
437 
*z2Ptr = z2; 
438 
*z1Ptr = z1; 
439 
*z0Ptr = z0; 
440  
441 
} 
442  
443 
/*

444 
 Multiplies `a' by `b' to obtain a 128bit product. The product is broken

445 
 into two 64bit pieces which are stored at the locations pointed to by

446 
 `z0Ptr' and `z1Ptr'.

447 
**/

448  
449 
INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )

450 
{ 
451 
bits32 aHigh, aLow, bHigh, bLow; 
452 
bits64 z0, zMiddleA, zMiddleB, z1; 
453  
454 
aLow = a; 
455 
aHigh = a>>32;

456 
bLow = b; 
457 
bHigh = b>>32;

458 
z1 = ( (bits64) aLow ) * bLow; 
459 
zMiddleA = ( (bits64) aLow ) * bHigh; 
460 
zMiddleB = ( (bits64) aHigh ) * bLow; 
461 
z0 = ( (bits64) aHigh ) * bHigh; 
462 
zMiddleA += zMiddleB; 
463 
z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 ); 
464 
zMiddleA <<= 32;

465 
z1 += zMiddleA; 
466 
z0 += ( z1 < zMiddleA ); 
467 
*z1Ptr = z1; 
468 
*z0Ptr = z0; 
469  
470 
} 
471  
472 
/*

473 
 Multiplies the 128bit value formed by concatenating `a0' and `a1' by

474 
 `b' to obtain a 192bit product. The product is broken into three 64bit

475 
 pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and

476 
 `z2Ptr'.

477 
**/

478  
479 
INLINE void

480 
mul128By64To192( 
481 
bits64 a0, 
482 
bits64 a1, 
483 
bits64 b, 
484 
bits64 *z0Ptr, 
485 
bits64 *z1Ptr, 
486 
bits64 *z2Ptr 
487 
) 
488 
{ 
489 
bits64 z0, z1, z2, more1; 
490  
491 
mul64To128( a1, b, &z1, &z2 ); 
492 
mul64To128( a0, b, &z0, &more1 ); 
493 
add128( z0, more1, 0, z1, &z0, &z1 );

494 
*z2Ptr = z2; 
495 
*z1Ptr = z1; 
496 
*z0Ptr = z0; 
497  
498 
} 
499  
500 
/*

501 
 Multiplies the 128bit value formed by concatenating `a0' and `a1' to the

502 
 128bit value formed by concatenating `b0' and `b1' to obtain a 256bit

503 
 product. The product is broken into four 64bit pieces which are stored at

504 
 the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.

505 
**/

506  
507 
INLINE void

508 
mul128To256( 
509 
bits64 a0, 
510 
bits64 a1, 
511 
bits64 b0, 
512 
bits64 b1, 
513 
bits64 *z0Ptr, 
514 
bits64 *z1Ptr, 
515 
bits64 *z2Ptr, 
516 
bits64 *z3Ptr 
517 
) 
518 
{ 
519 
bits64 z0, z1, z2, z3; 
520 
bits64 more1, more2; 
521  
522 
mul64To128( a1, b1, &z2, &z3 ); 
523 
mul64To128( a1, b0, &z1, &more2 ); 
524 
add128( z1, more2, 0, z2, &z1, &z2 );

525 
mul64To128( a0, b0, &z0, &more1 ); 
526 
add128( z0, more1, 0, z1, &z0, &z1 );

527 
mul64To128( a0, b1, &more1, &more2 ); 
528 
add128( more1, more2, 0, z2, &more1, &z2 );

529 
add128( z0, z1, 0, more1, &z0, &z1 );

530 
*z3Ptr = z3; 
531 
*z2Ptr = z2; 
532 
*z1Ptr = z1; 
533 
*z0Ptr = z0; 
534  
535 
} 
536  
537 
/*

538 
 Returns an approximation to the 64bit integer quotient obtained by dividing

539 
 `b' into the 128bit value formed by concatenating `a0' and `a1'. The

540 
 divisor `b' must be at least 2^63. If q is the exact quotient truncated

541 
 toward zero, the approximation returned lies between q and q + 2 inclusive.

542 
 If the exact quotient q is larger than 64 bits, the maximum positive 64bit

543 
 unsigned integer is returned.

544 
**/

545  
546 
static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )

547 
{ 
548 
bits64 b0, b1; 
549 
bits64 rem0, rem1, term0, term1; 
550 
bits64 z; 
551  
552 
if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF ); 
553 
b0 = b>>32;

554 
z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32; 
555 
mul64To128( b, z, &term0, &term1 ); 
556 
sub128( a0, a1, term0, term1, &rem0, &rem1 ); 
557 
while ( ( (sbits64) rem0 ) < 0 ) { 
558 
z = LIT64( 0x100000000 );

559 
b1 = b<<32;

560 
add128( rem0, rem1, b0, b1, &rem0, &rem1 ); 
561 
} 
562 
rem0 = ( rem0<<32 )  ( rem1>>32 ); 
563 
z = ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0; 
564 
return z;

565  
566 
} 
567  
568 
/*

569 
 Returns an approximation to the square root of the 32bit significand given

570 
 by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of

571 
 `aExp' (the least significant bit) is 1, the integer returned approximates

572 
 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'

573 
 is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either

574 
 case, the approximation returned lies strictly within +/2 of the exact

575 
 value.

576 
**/

577  
578 
static bits32 estimateSqrt32( int16 aExp, bits32 a )

579 
{ 
580 
static const bits16 sqrtOddAdjustments[] = { 
581 
0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0, 
582 
0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67 
583 
}; 
584 
static const bits16 sqrtEvenAdjustments[] = { 
585 
0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E, 
586 
0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002 
587 
}; 
588 
int8 index; 
589 
bits32 z; 
590  
591 
index = ( a>>27 ) & 15; 
592 
if ( aExp & 1 ) { 
593 
z = 0x4000 + ( a>>17 )  sqrtOddAdjustments[ (int)index ]; 
594 
z = ( ( a / z )<<14 ) + ( z<<15 ); 
595 
a >>= 1;

596 
} 
597 
else {

598 
z = 0x8000 + ( a>>17 )  sqrtEvenAdjustments[ (int)index ]; 
599 
z = a / z + z; 
600 
z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 ); 
601 
if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 ); 
602 
} 
603 
return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 ); 
604  
605 
} 
606  
607 
/*

608 
 Returns the number of leading 0 bits before the mostsignificant 1 bit of

609 
 `a'. If `a' is zero, 32 is returned.

610 
**/

611  
612 
static int8 countLeadingZeros32( bits32 a )

613 
{ 
614 
static const int8 countLeadingZerosHigh[] = { 
615 
8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 
616 
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 
617 
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 
618 
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 
619 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
620 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
621 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
622 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
623 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
624 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
625 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
626 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
627 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
628 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
629 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
630 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 
631 
}; 
632 
int8 shiftCount; 
633  
634 
shiftCount = 0;

635 
if ( a < 0x10000 ) { 
636 
shiftCount += 16;

637 
a <<= 16;

638 
} 
639 
if ( a < 0x1000000 ) { 
640 
shiftCount += 8;

641 
a <<= 8;

642 
} 
643 
shiftCount += countLeadingZerosHigh[ a>>24 ];

644 
return shiftCount;

645  
646 
} 
647  
648 
/*

649 
 Returns the number of leading 0 bits before the mostsignificant 1 bit of

650 
 `a'. If `a' is zero, 64 is returned.

651 
**/

652  
653 
static int8 countLeadingZeros64( bits64 a )

654 
{ 
655 
int8 shiftCount; 
656  
657 
shiftCount = 0;

658 
if ( a < ( (bits64) 1 )<<32 ) { 
659 
shiftCount += 32;

660 
} 
661 
else {

662 
a >>= 32;

663 
} 
664 
shiftCount += countLeadingZeros32( a ); 
665 
return shiftCount;

666  
667 
} 
668  
669 
/*

670 
 Returns 1 if the 128bit value formed by concatenating `a0' and `a1'

671 
 is equal to the 128bit value formed by concatenating `b0' and `b1'.

672 
 Otherwise, returns 0.

673 
**/

674  
675 
INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) 
676 
{ 
677  
678 
return ( a0 == b0 ) && ( a1 == b1 );

679  
680 
} 
681  
682 
/*

683 
 Returns 1 if the 128bit value formed by concatenating `a0' and `a1' is less

684 
 than or equal to the 128bit value formed by concatenating `b0' and `b1'.

685 
 Otherwise, returns 0.

686 
**/

687  
688 
INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) 
689 
{ 
690  
691 
return ( a0 < b0 )  ( ( a0 == b0 ) && ( a1 <= b1 ) );

692  
693 
} 
694  
695 
/*

696 
 Returns 1 if the 128bit value formed by concatenating `a0' and `a1' is less

697 
 than the 128bit value formed by concatenating `b0' and `b1'. Otherwise,

698 
 returns 0.

699 
**/

700  
701 
INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) 
702 
{ 
703  
704 
return ( a0 < b0 )  ( ( a0 == b0 ) && ( a1 < b1 ) );

705  
706 
} 
707  
708 
/*

709 
 Returns 1 if the 128bit value formed by concatenating `a0' and `a1' is

710 
 not equal to the 128bit value formed by concatenating `b0' and `b1'.

711 
 Otherwise, returns 0.

712 
**/

713  
714 
INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) 
715 
{ 
716  
717 
return ( a0 != b0 )  ( a1 != b1 );

718  
719 
} 