Statistics
| Branch: | Revision:

root / fpu / softfloat-macros.h @ 8d725fac

History | View | Annotate | Download (23.9 kB)

1
/*
2
 * QEMU float support macros
3
 *
4
 * Derived from SoftFloat.
5
 */
6

    
7
/*============================================================================
8

9
This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
10
Arithmetic Package, Release 2b.
11

12
Written by John R. Hauser.  This work was made possible in part by the
13
International Computer Science Institute, located at Suite 600, 1947 Center
14
Street, Berkeley, California 94704.  Funding was partially provided by the
15
National Science Foundation under grant MIP-9311980.  The original version
16
of this code was written as part of a project to build a fixed-point vector
17
processor in collaboration with the University of California at Berkeley,
18
overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
19
is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
20
arithmetic/SoftFloat.html'.
21

22
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
23
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
24
RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
25
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
26
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
27
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
28
INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR
29
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
30

31
Derivative works are acceptable, even for commercial purposes, so long as
32
(1) the source code for the derivative work includes prominent notice that
33
the work is derivative, and (2) the source code includes prominent notice with
34
these four paragraphs for those parts of this code that are retained.
35

36
=============================================================================*/
37

    
38
/*----------------------------------------------------------------------------
39
| Shifts `a' right by the number of bits given in `count'.  If any nonzero
40
| bits are shifted off, they are ``jammed'' into the least significant bit of
41
| the result by setting the least significant bit to 1.  The value of `count'
42
| can be arbitrarily large; in particular, if `count' is greater than 32, the
43
| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
44
| The result is stored in the location pointed to by `zPtr'.
45
*----------------------------------------------------------------------------*/
46

    
47
INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
48
{
49
    bits32 z;
50

    
51
    if ( count == 0 ) {
52
        z = a;
53
    }
54
    else if ( count < 32 ) {
55
        z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
56
    }
57
    else {
58
        z = ( a != 0 );
59
    }
60
    *zPtr = z;
61

    
62
}
63

    
64
/*----------------------------------------------------------------------------
65
| Shifts `a' right by the number of bits given in `count'.  If any nonzero
66
| bits are shifted off, they are ``jammed'' into the least significant bit of
67
| the result by setting the least significant bit to 1.  The value of `count'
68
| can be arbitrarily large; in particular, if `count' is greater than 64, the
69
| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
70
| The result is stored in the location pointed to by `zPtr'.
71
*----------------------------------------------------------------------------*/
72

    
73
INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
74
{
75
    bits64 z;
76

    
77
    if ( count == 0 ) {
78
        z = a;
79
    }
80
    else if ( count < 64 ) {
81
        z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
82
    }
83
    else {
84
        z = ( a != 0 );
85
    }
86
    *zPtr = z;
87

    
88
}
89

    
90
/*----------------------------------------------------------------------------
91
| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
92
| _plus_ the number of bits given in `count'.  The shifted result is at most
93
| 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
94
| bits shifted off form a second 64-bit result as follows:  The _last_ bit
95
| shifted off is the most-significant bit of the extra result, and the other
96
| 63 bits of the extra result are all zero if and only if _all_but_the_last_
97
| bits shifted off were all zero.  This extra result is stored in the location
98
| pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
99
|     (This routine makes more sense if `a0' and `a1' are considered to form
100
| a fixed-point value with binary point between `a0' and `a1'.  This fixed-
101
| point value is shifted right by the number of bits given in `count', and
102
| the integer part of the result is returned at the location pointed to by
103
| `z0Ptr'.  The fractional part of the result may be slightly corrupted as
104
| described above, and is returned at the location pointed to by `z1Ptr'.)
105
*----------------------------------------------------------------------------*/
106

    
107
INLINE void
108
 shift64ExtraRightJamming(
109
     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
110
{
111
    bits64 z0, z1;
112
    int8 negCount = ( - count ) & 63;
113

    
114
    if ( count == 0 ) {
115
        z1 = a1;
116
        z0 = a0;
117
    }
118
    else if ( count < 64 ) {
119
        z1 = ( a0<<negCount ) | ( a1 != 0 );
120
        z0 = a0>>count;
121
    }
122
    else {
123
        if ( count == 64 ) {
124
            z1 = a0 | ( a1 != 0 );
125
        }
126
        else {
127
            z1 = ( ( a0 | a1 ) != 0 );
128
        }
129
        z0 = 0;
130
    }
131
    *z1Ptr = z1;
132
    *z0Ptr = z0;
133

    
134
}
135

    
136
/*----------------------------------------------------------------------------
137
| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
138
| number of bits given in `count'.  Any bits shifted off are lost.  The value
139
| of `count' can be arbitrarily large; in particular, if `count' is greater
140
| than 128, the result will be 0.  The result is broken into two 64-bit pieces
141
| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
142
*----------------------------------------------------------------------------*/
143

    
144
INLINE void
145
 shift128Right(
146
     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
147
{
148
    bits64 z0, z1;
149
    int8 negCount = ( - count ) & 63;
150

    
151
    if ( count == 0 ) {
152
        z1 = a1;
153
        z0 = a0;
154
    }
155
    else if ( count < 64 ) {
156
        z1 = ( a0<<negCount ) | ( a1>>count );
157
        z0 = a0>>count;
158
    }
159
    else {
160
        z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
161
        z0 = 0;
162
    }
163
    *z1Ptr = z1;
164
    *z0Ptr = z0;
165

    
166
}
167

    
168
/*----------------------------------------------------------------------------
169
| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
170
| number of bits given in `count'.  If any nonzero bits are shifted off, they
171
| are ``jammed'' into the least significant bit of the result by setting the
172
| least significant bit to 1.  The value of `count' can be arbitrarily large;
173
| in particular, if `count' is greater than 128, the result will be either
174
| 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
175
| nonzero.  The result is broken into two 64-bit pieces which are stored at
176
| the locations pointed to by `z0Ptr' and `z1Ptr'.
177
*----------------------------------------------------------------------------*/
178

    
179
INLINE void
180
 shift128RightJamming(
181
     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
182
{
183
    bits64 z0, z1;
184
    int8 negCount = ( - count ) & 63;
185

    
186
    if ( count == 0 ) {
187
        z1 = a1;
188
        z0 = a0;
189
    }
190
    else if ( count < 64 ) {
191
        z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
192
        z0 = a0>>count;
193
    }
194
    else {
195
        if ( count == 64 ) {
196
            z1 = a0 | ( a1 != 0 );
197
        }
198
        else if ( count < 128 ) {
199
            z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
200
        }
201
        else {
202
            z1 = ( ( a0 | a1 ) != 0 );
203
        }
204
        z0 = 0;
205
    }
206
    *z1Ptr = z1;
207
    *z0Ptr = z0;
208

    
209
}
210

    
211
/*----------------------------------------------------------------------------
212
| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
213
| by 64 _plus_ the number of bits given in `count'.  The shifted result is
214
| at most 128 nonzero bits; these are broken into two 64-bit pieces which are
215
| stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
216
| off form a third 64-bit result as follows:  The _last_ bit shifted off is
217
| the most-significant bit of the extra result, and the other 63 bits of the
218
| extra result are all zero if and only if _all_but_the_last_ bits shifted off
219
| were all zero.  This extra result is stored in the location pointed to by
220
| `z2Ptr'.  The value of `count' can be arbitrarily large.
221
|     (This routine makes more sense if `a0', `a1', and `a2' are considered
222
| to form a fixed-point value with binary point between `a1' and `a2'.  This
223
| fixed-point value is shifted right by the number of bits given in `count',
224
| and the integer part of the result is returned at the locations pointed to
225
| by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
226
| corrupted as described above, and is returned at the location pointed to by
227
| `z2Ptr'.)
228
*----------------------------------------------------------------------------*/
229

    
230
INLINE void
231
 shift128ExtraRightJamming(
232
     bits64 a0,
233
     bits64 a1,
234
     bits64 a2,
235
     int16 count,
236
     bits64 *z0Ptr,
237
     bits64 *z1Ptr,
238
     bits64 *z2Ptr
239
 )
240
{
241
    bits64 z0, z1, z2;
242
    int8 negCount = ( - count ) & 63;
243

    
244
    if ( count == 0 ) {
245
        z2 = a2;
246
        z1 = a1;
247
        z0 = a0;
248
    }
249
    else {
250
        if ( count < 64 ) {
251
            z2 = a1<<negCount;
252
            z1 = ( a0<<negCount ) | ( a1>>count );
253
            z0 = a0>>count;
254
        }
255
        else {
256
            if ( count == 64 ) {
257
                z2 = a1;
258
                z1 = a0;
259
            }
260
            else {
261
                a2 |= a1;
262
                if ( count < 128 ) {
263
                    z2 = a0<<negCount;
264
                    z1 = a0>>( count & 63 );
265
                }
266
                else {
267
                    z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
268
                    z1 = 0;
269
                }
270
            }
271
            z0 = 0;
272
        }
273
        z2 |= ( a2 != 0 );
274
    }
275
    *z2Ptr = z2;
276
    *z1Ptr = z1;
277
    *z0Ptr = z0;
278

    
279
}
280

    
281
/*----------------------------------------------------------------------------
282
| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
283
| number of bits given in `count'.  Any bits shifted off are lost.  The value
284
| of `count' must be less than 64.  The result is broken into two 64-bit
285
| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
286
*----------------------------------------------------------------------------*/
287

    
288
INLINE void
289
 shortShift128Left(
290
     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
291
{
292

    
293
    *z1Ptr = a1<<count;
294
    *z0Ptr =
295
        ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
296

    
297
}
298

    
299
/*----------------------------------------------------------------------------
300
| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
301
| by the number of bits given in `count'.  Any bits shifted off are lost.
302
| The value of `count' must be less than 64.  The result is broken into three
303
| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
304
| `z1Ptr', and `z2Ptr'.
305
*----------------------------------------------------------------------------*/
306

    
307
INLINE void
308
 shortShift192Left(
309
     bits64 a0,
310
     bits64 a1,
311
     bits64 a2,
312
     int16 count,
313
     bits64 *z0Ptr,
314
     bits64 *z1Ptr,
315
     bits64 *z2Ptr
316
 )
317
{
318
    bits64 z0, z1, z2;
319
    int8 negCount;
320

    
321
    z2 = a2<<count;
322
    z1 = a1<<count;
323
    z0 = a0<<count;
324
    if ( 0 < count ) {
325
        negCount = ( ( - count ) & 63 );
326
        z1 |= a2>>negCount;
327
        z0 |= a1>>negCount;
328
    }
329
    *z2Ptr = z2;
330
    *z1Ptr = z1;
331
    *z0Ptr = z0;
332

    
333
}
334

    
335
/*----------------------------------------------------------------------------
336
| Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
337
| value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
338
| any carry out is lost.  The result is broken into two 64-bit pieces which
339
| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
340
*----------------------------------------------------------------------------*/
341

    
342
INLINE void
343
 add128(
344
     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
345
{
346
    bits64 z1;
347

    
348
    z1 = a1 + b1;
349
    *z1Ptr = z1;
350
    *z0Ptr = a0 + b0 + ( z1 < a1 );
351

    
352
}
353

    
354
/*----------------------------------------------------------------------------
355
| Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
356
| 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
357
| modulo 2^192, so any carry out is lost.  The result is broken into three
358
| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
359
| `z1Ptr', and `z2Ptr'.
360
*----------------------------------------------------------------------------*/
361

    
362
INLINE void
363
 add192(
364
     bits64 a0,
365
     bits64 a1,
366
     bits64 a2,
367
     bits64 b0,
368
     bits64 b1,
369
     bits64 b2,
370
     bits64 *z0Ptr,
371
     bits64 *z1Ptr,
372
     bits64 *z2Ptr
373
 )
374
{
375
    bits64 z0, z1, z2;
376
    int8 carry0, carry1;
377

    
378
    z2 = a2 + b2;
379
    carry1 = ( z2 < a2 );
380
    z1 = a1 + b1;
381
    carry0 = ( z1 < a1 );
382
    z0 = a0 + b0;
383
    z1 += carry1;
384
    z0 += ( z1 < carry1 );
385
    z0 += carry0;
386
    *z2Ptr = z2;
387
    *z1Ptr = z1;
388
    *z0Ptr = z0;
389

    
390
}
391

    
392
/*----------------------------------------------------------------------------
393
| Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
394
| 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
395
| 2^128, so any borrow out (carry out) is lost.  The result is broken into two
396
| 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
397
| `z1Ptr'.
398
*----------------------------------------------------------------------------*/
399

    
400
INLINE void
401
 sub128(
402
     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
403
{
404

    
405
    *z1Ptr = a1 - b1;
406
    *z0Ptr = a0 - b0 - ( a1 < b1 );
407

    
408
}
409

    
410
/*----------------------------------------------------------------------------
411
| Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
412
| from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
413
| Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
414
| result is broken into three 64-bit pieces which are stored at the locations
415
| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
416
*----------------------------------------------------------------------------*/
417

    
418
INLINE void
419
 sub192(
420
     bits64 a0,
421
     bits64 a1,
422
     bits64 a2,
423
     bits64 b0,
424
     bits64 b1,
425
     bits64 b2,
426
     bits64 *z0Ptr,
427
     bits64 *z1Ptr,
428
     bits64 *z2Ptr
429
 )
430
{
431
    bits64 z0, z1, z2;
432
    int8 borrow0, borrow1;
433

    
434
    z2 = a2 - b2;
435
    borrow1 = ( a2 < b2 );
436
    z1 = a1 - b1;
437
    borrow0 = ( a1 < b1 );
438
    z0 = a0 - b0;
439
    z0 -= ( z1 < borrow1 );
440
    z1 -= borrow1;
441
    z0 -= borrow0;
442
    *z2Ptr = z2;
443
    *z1Ptr = z1;
444
    *z0Ptr = z0;
445

    
446
}
447

    
448
/*----------------------------------------------------------------------------
449
| Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
450
| into two 64-bit pieces which are stored at the locations pointed to by
451
| `z0Ptr' and `z1Ptr'.
452
*----------------------------------------------------------------------------*/
453

    
454
INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
455
{
456
    bits32 aHigh, aLow, bHigh, bLow;
457
    bits64 z0, zMiddleA, zMiddleB, z1;
458

    
459
    aLow = a;
460
    aHigh = a>>32;
461
    bLow = b;
462
    bHigh = b>>32;
463
    z1 = ( (bits64) aLow ) * bLow;
464
    zMiddleA = ( (bits64) aLow ) * bHigh;
465
    zMiddleB = ( (bits64) aHigh ) * bLow;
466
    z0 = ( (bits64) aHigh ) * bHigh;
467
    zMiddleA += zMiddleB;
468
    z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
469
    zMiddleA <<= 32;
470
    z1 += zMiddleA;
471
    z0 += ( z1 < zMiddleA );
472
    *z1Ptr = z1;
473
    *z0Ptr = z0;
474

    
475
}
476

    
477
/*----------------------------------------------------------------------------
478
| Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
479
| `b' to obtain a 192-bit product.  The product is broken into three 64-bit
480
| pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
481
| `z2Ptr'.
482
*----------------------------------------------------------------------------*/
483

    
484
INLINE void
485
 mul128By64To192(
486
     bits64 a0,
487
     bits64 a1,
488
     bits64 b,
489
     bits64 *z0Ptr,
490
     bits64 *z1Ptr,
491
     bits64 *z2Ptr
492
 )
493
{
494
    bits64 z0, z1, z2, more1;
495

    
496
    mul64To128( a1, b, &z1, &z2 );
497
    mul64To128( a0, b, &z0, &more1 );
498
    add128( z0, more1, 0, z1, &z0, &z1 );
499
    *z2Ptr = z2;
500
    *z1Ptr = z1;
501
    *z0Ptr = z0;
502

    
503
}
504

    
505
/*----------------------------------------------------------------------------
506
| Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
507
| 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
508
| product.  The product is broken into four 64-bit pieces which are stored at
509
| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
510
*----------------------------------------------------------------------------*/
511

    
512
INLINE void
513
 mul128To256(
514
     bits64 a0,
515
     bits64 a1,
516
     bits64 b0,
517
     bits64 b1,
518
     bits64 *z0Ptr,
519
     bits64 *z1Ptr,
520
     bits64 *z2Ptr,
521
     bits64 *z3Ptr
522
 )
523
{
524
    bits64 z0, z1, z2, z3;
525
    bits64 more1, more2;
526

    
527
    mul64To128( a1, b1, &z2, &z3 );
528
    mul64To128( a1, b0, &z1, &more2 );
529
    add128( z1, more2, 0, z2, &z1, &z2 );
530
    mul64To128( a0, b0, &z0, &more1 );
531
    add128( z0, more1, 0, z1, &z0, &z1 );
532
    mul64To128( a0, b1, &more1, &more2 );
533
    add128( more1, more2, 0, z2, &more1, &z2 );
534
    add128( z0, z1, 0, more1, &z0, &z1 );
535
    *z3Ptr = z3;
536
    *z2Ptr = z2;
537
    *z1Ptr = z1;
538
    *z0Ptr = z0;
539

    
540
}
541

    
542
/*----------------------------------------------------------------------------
543
| Returns an approximation to the 64-bit integer quotient obtained by dividing
544
| `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
545
| divisor `b' must be at least 2^63.  If q is the exact quotient truncated
546
| toward zero, the approximation returned lies between q and q + 2 inclusive.
547
| If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
548
| unsigned integer is returned.
549
*----------------------------------------------------------------------------*/
550

    
551
static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
552
{
553
    bits64 b0, b1;
554
    bits64 rem0, rem1, term0, term1;
555
    bits64 z;
556

    
557
    if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
558
    b0 = b>>32;
559
    z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
560
    mul64To128( b, z, &term0, &term1 );
561
    sub128( a0, a1, term0, term1, &rem0, &rem1 );
562
    while ( ( (sbits64) rem0 ) < 0 ) {
563
        z -= LIT64( 0x100000000 );
564
        b1 = b<<32;
565
        add128( rem0, rem1, b0, b1, &rem0, &rem1 );
566
    }
567
    rem0 = ( rem0<<32 ) | ( rem1>>32 );
568
    z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
569
    return z;
570

    
571
}
572

    
573
/*----------------------------------------------------------------------------
574
| Returns an approximation to the square root of the 32-bit significand given
575
| by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
576
| `aExp' (the least significant bit) is 1, the integer returned approximates
577
| 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
578
| is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
579
| case, the approximation returned lies strictly within +/-2 of the exact
580
| value.
581
*----------------------------------------------------------------------------*/
582

    
583
static bits32 estimateSqrt32( int16 aExp, bits32 a )
584
{
585
    static const bits16 sqrtOddAdjustments[] = {
586
        0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
587
        0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
588
    };
589
    static const bits16 sqrtEvenAdjustments[] = {
590
        0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
591
        0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
592
    };
593
    int8 index;
594
    bits32 z;
595

    
596
    index = ( a>>27 ) & 15;
597
    if ( aExp & 1 ) {
598
        z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ (int)index ];
599
        z = ( ( a / z )<<14 ) + ( z<<15 );
600
        a >>= 1;
601
    }
602
    else {
603
        z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ (int)index ];
604
        z = a / z + z;
605
        z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
606
        if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
607
    }
608
    return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
609

    
610
}
611

    
612
/*----------------------------------------------------------------------------
613
| Returns the number of leading 0 bits before the most-significant 1 bit of
614
| `a'.  If `a' is zero, 32 is returned.
615
*----------------------------------------------------------------------------*/
616

    
617
static int8 countLeadingZeros32( bits32 a )
618
{
619
    static const int8 countLeadingZerosHigh[] = {
620
        8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
621
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
622
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
623
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
624
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
625
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
626
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
627
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
628
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
629
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
630
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
631
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
632
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
633
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
634
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
635
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
636
    };
637
    int8 shiftCount;
638

    
639
    shiftCount = 0;
640
    if ( a < 0x10000 ) {
641
        shiftCount += 16;
642
        a <<= 16;
643
    }
644
    if ( a < 0x1000000 ) {
645
        shiftCount += 8;
646
        a <<= 8;
647
    }
648
    shiftCount += countLeadingZerosHigh[ a>>24 ];
649
    return shiftCount;
650

    
651
}
652

    
653
/*----------------------------------------------------------------------------
654
| Returns the number of leading 0 bits before the most-significant 1 bit of
655
| `a'.  If `a' is zero, 64 is returned.
656
*----------------------------------------------------------------------------*/
657

    
658
static int8 countLeadingZeros64( bits64 a )
659
{
660
    int8 shiftCount;
661

    
662
    shiftCount = 0;
663
    if ( a < ( (bits64) 1 )<<32 ) {
664
        shiftCount += 32;
665
    }
666
    else {
667
        a >>= 32;
668
    }
669
    shiftCount += countLeadingZeros32( a );
670
    return shiftCount;
671

    
672
}
673

    
674
/*----------------------------------------------------------------------------
675
| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
676
| is equal to the 128-bit value formed by concatenating `b0' and `b1'.
677
| Otherwise, returns 0.
678
*----------------------------------------------------------------------------*/
679

    
680
INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
681
{
682

    
683
    return ( a0 == b0 ) && ( a1 == b1 );
684

    
685
}
686

    
687
/*----------------------------------------------------------------------------
688
| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
689
| than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
690
| Otherwise, returns 0.
691
*----------------------------------------------------------------------------*/
692

    
693
INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
694
{
695

    
696
    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
697

    
698
}
699

    
700
/*----------------------------------------------------------------------------
701
| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
702
| than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
703
| returns 0.
704
*----------------------------------------------------------------------------*/
705

    
706
INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
707
{
708

    
709
    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
710

    
711
}
712

    
713
/*----------------------------------------------------------------------------
714
| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
715
| not equal to the 128-bit value formed by concatenating `b0' and `b1'.
716
| Otherwise, returns 0.
717
*----------------------------------------------------------------------------*/
718

    
719
INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
720
{
721

    
722
    return ( a0 != b0 ) || ( a1 != b1 );
723

    
724
}