Statistics
| Branch: | Revision:

root / fpu / softfloat-macros.h @ a9899996

History | View | Annotate | Download (24.9 kB)

1
/*
2
 * QEMU float support macros
3
 *
4
 * Derived from SoftFloat.
5
 */
6

    
7
/*============================================================================
8

9
This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
10
Arithmetic Package, Release 2b.
11

12
Written by John R. Hauser.  This work was made possible in part by the
13
International Computer Science Institute, located at Suite 600, 1947 Center
14
Street, Berkeley, California 94704.  Funding was partially provided by the
15
National Science Foundation under grant MIP-9311980.  The original version
16
of this code was written as part of a project to build a fixed-point vector
17
processor in collaboration with the University of California at Berkeley,
18
overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
19
is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
20
arithmetic/SoftFloat.html'.
21

22
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
23
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
24
RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
25
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
26
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
27
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
28
INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR
29
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
30

31
Derivative works are acceptable, even for commercial purposes, so long as
32
(1) the source code for the derivative work includes prominent notice that
33
the work is derivative, and (2) the source code includes prominent notice with
34
these four paragraphs for those parts of this code that are retained.
35

36
=============================================================================*/
37

    
38
/*----------------------------------------------------------------------------
39
| This macro tests for minimum version of the GNU C compiler.
40
*----------------------------------------------------------------------------*/
41
#if defined(__GNUC__) && defined(__GNUC_MINOR__)
42
# define SOFTFLOAT_GNUC_PREREQ(maj, min) \
43
         ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
44
#else
45
# define SOFTFLOAT_GNUC_PREREQ(maj, min) 0
46
#endif
47

    
48

    
49
/*----------------------------------------------------------------------------
50
| Shifts `a' right by the number of bits given in `count'.  If any nonzero
51
| bits are shifted off, they are ``jammed'' into the least significant bit of
52
| the result by setting the least significant bit to 1.  The value of `count'
53
| can be arbitrarily large; in particular, if `count' is greater than 32, the
54
| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
55
| The result is stored in the location pointed to by `zPtr'.
56
*----------------------------------------------------------------------------*/
57

    
58
INLINE void shift32RightJamming( uint32_t a, int16 count, uint32_t *zPtr )
59
{
60
    uint32_t z;
61

    
62
    if ( count == 0 ) {
63
        z = a;
64
    }
65
    else if ( count < 32 ) {
66
        z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
67
    }
68
    else {
69
        z = ( a != 0 );
70
    }
71
    *zPtr = z;
72

    
73
}
74

    
75
/*----------------------------------------------------------------------------
76
| Shifts `a' right by the number of bits given in `count'.  If any nonzero
77
| bits are shifted off, they are ``jammed'' into the least significant bit of
78
| the result by setting the least significant bit to 1.  The value of `count'
79
| can be arbitrarily large; in particular, if `count' is greater than 64, the
80
| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
81
| The result is stored in the location pointed to by `zPtr'.
82
*----------------------------------------------------------------------------*/
83

    
84
INLINE void shift64RightJamming( uint64_t a, int16 count, uint64_t *zPtr )
85
{
86
    uint64_t z;
87

    
88
    if ( count == 0 ) {
89
        z = a;
90
    }
91
    else if ( count < 64 ) {
92
        z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
93
    }
94
    else {
95
        z = ( a != 0 );
96
    }
97
    *zPtr = z;
98

    
99
}
100

    
101
/*----------------------------------------------------------------------------
102
| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
103
| _plus_ the number of bits given in `count'.  The shifted result is at most
104
| 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
105
| bits shifted off form a second 64-bit result as follows:  The _last_ bit
106
| shifted off is the most-significant bit of the extra result, and the other
107
| 63 bits of the extra result are all zero if and only if _all_but_the_last_
108
| bits shifted off were all zero.  This extra result is stored in the location
109
| pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
110
|     (This routine makes more sense if `a0' and `a1' are considered to form
111
| a fixed-point value with binary point between `a0' and `a1'.  This fixed-
112
| point value is shifted right by the number of bits given in `count', and
113
| the integer part of the result is returned at the location pointed to by
114
| `z0Ptr'.  The fractional part of the result may be slightly corrupted as
115
| described above, and is returned at the location pointed to by `z1Ptr'.)
116
*----------------------------------------------------------------------------*/
117

    
118
INLINE void
119
 shift64ExtraRightJamming(
120
     uint64_t a0, uint64_t a1, int16 count, uint64_t *z0Ptr, uint64_t *z1Ptr )
121
{
122
    uint64_t z0, z1;
123
    int8 negCount = ( - count ) & 63;
124

    
125
    if ( count == 0 ) {
126
        z1 = a1;
127
        z0 = a0;
128
    }
129
    else if ( count < 64 ) {
130
        z1 = ( a0<<negCount ) | ( a1 != 0 );
131
        z0 = a0>>count;
132
    }
133
    else {
134
        if ( count == 64 ) {
135
            z1 = a0 | ( a1 != 0 );
136
        }
137
        else {
138
            z1 = ( ( a0 | a1 ) != 0 );
139
        }
140
        z0 = 0;
141
    }
142
    *z1Ptr = z1;
143
    *z0Ptr = z0;
144

    
145
}
146

    
147
/*----------------------------------------------------------------------------
148
| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
149
| number of bits given in `count'.  Any bits shifted off are lost.  The value
150
| of `count' can be arbitrarily large; in particular, if `count' is greater
151
| than 128, the result will be 0.  The result is broken into two 64-bit pieces
152
| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
153
*----------------------------------------------------------------------------*/
154

    
155
INLINE void
156
 shift128Right(
157
     uint64_t a0, uint64_t a1, int16 count, uint64_t *z0Ptr, uint64_t *z1Ptr )
158
{
159
    uint64_t z0, z1;
160
    int8 negCount = ( - count ) & 63;
161

    
162
    if ( count == 0 ) {
163
        z1 = a1;
164
        z0 = a0;
165
    }
166
    else if ( count < 64 ) {
167
        z1 = ( a0<<negCount ) | ( a1>>count );
168
        z0 = a0>>count;
169
    }
170
    else {
171
        z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
172
        z0 = 0;
173
    }
174
    *z1Ptr = z1;
175
    *z0Ptr = z0;
176

    
177
}
178

    
179
/*----------------------------------------------------------------------------
180
| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
181
| number of bits given in `count'.  If any nonzero bits are shifted off, they
182
| are ``jammed'' into the least significant bit of the result by setting the
183
| least significant bit to 1.  The value of `count' can be arbitrarily large;
184
| in particular, if `count' is greater than 128, the result will be either
185
| 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
186
| nonzero.  The result is broken into two 64-bit pieces which are stored at
187
| the locations pointed to by `z0Ptr' and `z1Ptr'.
188
*----------------------------------------------------------------------------*/
189

    
190
INLINE void
191
 shift128RightJamming(
192
     uint64_t a0, uint64_t a1, int16 count, uint64_t *z0Ptr, uint64_t *z1Ptr )
193
{
194
    uint64_t z0, z1;
195
    int8 negCount = ( - count ) & 63;
196

    
197
    if ( count == 0 ) {
198
        z1 = a1;
199
        z0 = a0;
200
    }
201
    else if ( count < 64 ) {
202
        z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
203
        z0 = a0>>count;
204
    }
205
    else {
206
        if ( count == 64 ) {
207
            z1 = a0 | ( a1 != 0 );
208
        }
209
        else if ( count < 128 ) {
210
            z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
211
        }
212
        else {
213
            z1 = ( ( a0 | a1 ) != 0 );
214
        }
215
        z0 = 0;
216
    }
217
    *z1Ptr = z1;
218
    *z0Ptr = z0;
219

    
220
}
221

    
222
/*----------------------------------------------------------------------------
223
| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
224
| by 64 _plus_ the number of bits given in `count'.  The shifted result is
225
| at most 128 nonzero bits; these are broken into two 64-bit pieces which are
226
| stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
227
| off form a third 64-bit result as follows:  The _last_ bit shifted off is
228
| the most-significant bit of the extra result, and the other 63 bits of the
229
| extra result are all zero if and only if _all_but_the_last_ bits shifted off
230
| were all zero.  This extra result is stored in the location pointed to by
231
| `z2Ptr'.  The value of `count' can be arbitrarily large.
232
|     (This routine makes more sense if `a0', `a1', and `a2' are considered
233
| to form a fixed-point value with binary point between `a1' and `a2'.  This
234
| fixed-point value is shifted right by the number of bits given in `count',
235
| and the integer part of the result is returned at the locations pointed to
236
| by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
237
| corrupted as described above, and is returned at the location pointed to by
238
| `z2Ptr'.)
239
*----------------------------------------------------------------------------*/
240

    
241
INLINE void
242
 shift128ExtraRightJamming(
243
     uint64_t a0,
244
     uint64_t a1,
245
     uint64_t a2,
246
     int16 count,
247
     uint64_t *z0Ptr,
248
     uint64_t *z1Ptr,
249
     uint64_t *z2Ptr
250
 )
251
{
252
    uint64_t z0, z1, z2;
253
    int8 negCount = ( - count ) & 63;
254

    
255
    if ( count == 0 ) {
256
        z2 = a2;
257
        z1 = a1;
258
        z0 = a0;
259
    }
260
    else {
261
        if ( count < 64 ) {
262
            z2 = a1<<negCount;
263
            z1 = ( a0<<negCount ) | ( a1>>count );
264
            z0 = a0>>count;
265
        }
266
        else {
267
            if ( count == 64 ) {
268
                z2 = a1;
269
                z1 = a0;
270
            }
271
            else {
272
                a2 |= a1;
273
                if ( count < 128 ) {
274
                    z2 = a0<<negCount;
275
                    z1 = a0>>( count & 63 );
276
                }
277
                else {
278
                    z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
279
                    z1 = 0;
280
                }
281
            }
282
            z0 = 0;
283
        }
284
        z2 |= ( a2 != 0 );
285
    }
286
    *z2Ptr = z2;
287
    *z1Ptr = z1;
288
    *z0Ptr = z0;
289

    
290
}
291

    
292
/*----------------------------------------------------------------------------
293
| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
294
| number of bits given in `count'.  Any bits shifted off are lost.  The value
295
| of `count' must be less than 64.  The result is broken into two 64-bit
296
| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
297
*----------------------------------------------------------------------------*/
298

    
299
INLINE void
300
 shortShift128Left(
301
     uint64_t a0, uint64_t a1, int16 count, uint64_t *z0Ptr, uint64_t *z1Ptr )
302
{
303

    
304
    *z1Ptr = a1<<count;
305
    *z0Ptr =
306
        ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
307

    
308
}
309

    
310
/*----------------------------------------------------------------------------
311
| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
312
| by the number of bits given in `count'.  Any bits shifted off are lost.
313
| The value of `count' must be less than 64.  The result is broken into three
314
| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
315
| `z1Ptr', and `z2Ptr'.
316
*----------------------------------------------------------------------------*/
317

    
318
INLINE void
319
 shortShift192Left(
320
     uint64_t a0,
321
     uint64_t a1,
322
     uint64_t a2,
323
     int16 count,
324
     uint64_t *z0Ptr,
325
     uint64_t *z1Ptr,
326
     uint64_t *z2Ptr
327
 )
328
{
329
    uint64_t z0, z1, z2;
330
    int8 negCount;
331

    
332
    z2 = a2<<count;
333
    z1 = a1<<count;
334
    z0 = a0<<count;
335
    if ( 0 < count ) {
336
        negCount = ( ( - count ) & 63 );
337
        z1 |= a2>>negCount;
338
        z0 |= a1>>negCount;
339
    }
340
    *z2Ptr = z2;
341
    *z1Ptr = z1;
342
    *z0Ptr = z0;
343

    
344
}
345

    
346
/*----------------------------------------------------------------------------
347
| Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
348
| value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
349
| any carry out is lost.  The result is broken into two 64-bit pieces which
350
| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
351
*----------------------------------------------------------------------------*/
352

    
353
INLINE void
354
 add128(
355
     uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
356
{
357
    uint64_t z1;
358

    
359
    z1 = a1 + b1;
360
    *z1Ptr = z1;
361
    *z0Ptr = a0 + b0 + ( z1 < a1 );
362

    
363
}
364

    
365
/*----------------------------------------------------------------------------
366
| Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
367
| 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
368
| modulo 2^192, so any carry out is lost.  The result is broken into three
369
| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
370
| `z1Ptr', and `z2Ptr'.
371
*----------------------------------------------------------------------------*/
372

    
373
INLINE void
374
 add192(
375
     uint64_t a0,
376
     uint64_t a1,
377
     uint64_t a2,
378
     uint64_t b0,
379
     uint64_t b1,
380
     uint64_t b2,
381
     uint64_t *z0Ptr,
382
     uint64_t *z1Ptr,
383
     uint64_t *z2Ptr
384
 )
385
{
386
    uint64_t z0, z1, z2;
387
    int8 carry0, carry1;
388

    
389
    z2 = a2 + b2;
390
    carry1 = ( z2 < a2 );
391
    z1 = a1 + b1;
392
    carry0 = ( z1 < a1 );
393
    z0 = a0 + b0;
394
    z1 += carry1;
395
    z0 += ( z1 < carry1 );
396
    z0 += carry0;
397
    *z2Ptr = z2;
398
    *z1Ptr = z1;
399
    *z0Ptr = z0;
400

    
401
}
402

    
403
/*----------------------------------------------------------------------------
404
| Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
405
| 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
406
| 2^128, so any borrow out (carry out) is lost.  The result is broken into two
407
| 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
408
| `z1Ptr'.
409
*----------------------------------------------------------------------------*/
410

    
411
INLINE void
412
 sub128(
413
     uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
414
{
415

    
416
    *z1Ptr = a1 - b1;
417
    *z0Ptr = a0 - b0 - ( a1 < b1 );
418

    
419
}
420

    
421
/*----------------------------------------------------------------------------
422
| Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
423
| from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
424
| Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
425
| result is broken into three 64-bit pieces which are stored at the locations
426
| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
427
*----------------------------------------------------------------------------*/
428

    
429
INLINE void
430
 sub192(
431
     uint64_t a0,
432
     uint64_t a1,
433
     uint64_t a2,
434
     uint64_t b0,
435
     uint64_t b1,
436
     uint64_t b2,
437
     uint64_t *z0Ptr,
438
     uint64_t *z1Ptr,
439
     uint64_t *z2Ptr
440
 )
441
{
442
    uint64_t z0, z1, z2;
443
    int8 borrow0, borrow1;
444

    
445
    z2 = a2 - b2;
446
    borrow1 = ( a2 < b2 );
447
    z1 = a1 - b1;
448
    borrow0 = ( a1 < b1 );
449
    z0 = a0 - b0;
450
    z0 -= ( z1 < borrow1 );
451
    z1 -= borrow1;
452
    z0 -= borrow0;
453
    *z2Ptr = z2;
454
    *z1Ptr = z1;
455
    *z0Ptr = z0;
456

    
457
}
458

    
459
/*----------------------------------------------------------------------------
460
| Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
461
| into two 64-bit pieces which are stored at the locations pointed to by
462
| `z0Ptr' and `z1Ptr'.
463
*----------------------------------------------------------------------------*/
464

    
465
INLINE void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr )
466
{
467
    uint32_t aHigh, aLow, bHigh, bLow;
468
    uint64_t z0, zMiddleA, zMiddleB, z1;
469

    
470
    aLow = a;
471
    aHigh = a>>32;
472
    bLow = b;
473
    bHigh = b>>32;
474
    z1 = ( (uint64_t) aLow ) * bLow;
475
    zMiddleA = ( (uint64_t) aLow ) * bHigh;
476
    zMiddleB = ( (uint64_t) aHigh ) * bLow;
477
    z0 = ( (uint64_t) aHigh ) * bHigh;
478
    zMiddleA += zMiddleB;
479
    z0 += ( ( (uint64_t) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
480
    zMiddleA <<= 32;
481
    z1 += zMiddleA;
482
    z0 += ( z1 < zMiddleA );
483
    *z1Ptr = z1;
484
    *z0Ptr = z0;
485

    
486
}
487

    
488
/*----------------------------------------------------------------------------
489
| Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
490
| `b' to obtain a 192-bit product.  The product is broken into three 64-bit
491
| pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
492
| `z2Ptr'.
493
*----------------------------------------------------------------------------*/
494

    
495
INLINE void
496
 mul128By64To192(
497
     uint64_t a0,
498
     uint64_t a1,
499
     uint64_t b,
500
     uint64_t *z0Ptr,
501
     uint64_t *z1Ptr,
502
     uint64_t *z2Ptr
503
 )
504
{
505
    uint64_t z0, z1, z2, more1;
506

    
507
    mul64To128( a1, b, &z1, &z2 );
508
    mul64To128( a0, b, &z0, &more1 );
509
    add128( z0, more1, 0, z1, &z0, &z1 );
510
    *z2Ptr = z2;
511
    *z1Ptr = z1;
512
    *z0Ptr = z0;
513

    
514
}
515

    
516
/*----------------------------------------------------------------------------
517
| Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
518
| 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
519
| product.  The product is broken into four 64-bit pieces which are stored at
520
| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
521
*----------------------------------------------------------------------------*/
522

    
523
INLINE void
524
 mul128To256(
525
     uint64_t a0,
526
     uint64_t a1,
527
     uint64_t b0,
528
     uint64_t b1,
529
     uint64_t *z0Ptr,
530
     uint64_t *z1Ptr,
531
     uint64_t *z2Ptr,
532
     uint64_t *z3Ptr
533
 )
534
{
535
    uint64_t z0, z1, z2, z3;
536
    uint64_t more1, more2;
537

    
538
    mul64To128( a1, b1, &z2, &z3 );
539
    mul64To128( a1, b0, &z1, &more2 );
540
    add128( z1, more2, 0, z2, &z1, &z2 );
541
    mul64To128( a0, b0, &z0, &more1 );
542
    add128( z0, more1, 0, z1, &z0, &z1 );
543
    mul64To128( a0, b1, &more1, &more2 );
544
    add128( more1, more2, 0, z2, &more1, &z2 );
545
    add128( z0, z1, 0, more1, &z0, &z1 );
546
    *z3Ptr = z3;
547
    *z2Ptr = z2;
548
    *z1Ptr = z1;
549
    *z0Ptr = z0;
550

    
551
}
552

    
553
/*----------------------------------------------------------------------------
554
| Returns an approximation to the 64-bit integer quotient obtained by dividing
555
| `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
556
| divisor `b' must be at least 2^63.  If q is the exact quotient truncated
557
| toward zero, the approximation returned lies between q and q + 2 inclusive.
558
| If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
559
| unsigned integer is returned.
560
*----------------------------------------------------------------------------*/
561

    
562
static uint64_t estimateDiv128To64( uint64_t a0, uint64_t a1, uint64_t b )
563
{
564
    uint64_t b0, b1;
565
    uint64_t rem0, rem1, term0, term1;
566
    uint64_t z;
567

    
568
    if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
569
    b0 = b>>32;
570
    z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
571
    mul64To128( b, z, &term0, &term1 );
572
    sub128( a0, a1, term0, term1, &rem0, &rem1 );
573
    while ( ( (int64_t) rem0 ) < 0 ) {
574
        z -= LIT64( 0x100000000 );
575
        b1 = b<<32;
576
        add128( rem0, rem1, b0, b1, &rem0, &rem1 );
577
    }
578
    rem0 = ( rem0<<32 ) | ( rem1>>32 );
579
    z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
580
    return z;
581

    
582
}
583

    
584
/*----------------------------------------------------------------------------
585
| Returns an approximation to the square root of the 32-bit significand given
586
| by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
587
| `aExp' (the least significant bit) is 1, the integer returned approximates
588
| 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
589
| is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
590
| case, the approximation returned lies strictly within +/-2 of the exact
591
| value.
592
*----------------------------------------------------------------------------*/
593

    
594
static uint32_t estimateSqrt32( int16 aExp, uint32_t a )
595
{
596
    static const uint16_t sqrtOddAdjustments[] = {
597
        0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
598
        0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
599
    };
600
    static const uint16_t sqrtEvenAdjustments[] = {
601
        0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
602
        0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
603
    };
604
    int8 index;
605
    uint32_t z;
606

    
607
    index = ( a>>27 ) & 15;
608
    if ( aExp & 1 ) {
609
        z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ (int)index ];
610
        z = ( ( a / z )<<14 ) + ( z<<15 );
611
        a >>= 1;
612
    }
613
    else {
614
        z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ (int)index ];
615
        z = a / z + z;
616
        z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
617
        if ( z <= a ) return (uint32_t) ( ( (int32_t) a )>>1 );
618
    }
619
    return ( (uint32_t) ( ( ( (uint64_t) a )<<31 ) / z ) ) + ( z>>1 );
620

    
621
}
622

    
623
/*----------------------------------------------------------------------------
624
| Returns the number of leading 0 bits before the most-significant 1 bit of
625
| `a'.  If `a' is zero, 32 is returned.
626
*----------------------------------------------------------------------------*/
627

    
628
static int8 countLeadingZeros32( uint32_t a )
629
{
630
#if SOFTFLOAT_GNUC_PREREQ(3, 4)
631
    if (a) {
632
        return __builtin_clz(a);
633
    } else {
634
        return 32;
635
    }
636
#else
637
    static const int8 countLeadingZerosHigh[] = {
638
        8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
639
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
640
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
641
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
642
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
643
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
644
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
645
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
646
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
647
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
648
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
649
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
650
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
651
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
652
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
653
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
654
    };
655
    int8 shiftCount;
656

    
657
    shiftCount = 0;
658
    if ( a < 0x10000 ) {
659
        shiftCount += 16;
660
        a <<= 16;
661
    }
662
    if ( a < 0x1000000 ) {
663
        shiftCount += 8;
664
        a <<= 8;
665
    }
666
    shiftCount += countLeadingZerosHigh[ a>>24 ];
667
    return shiftCount;
668
#endif
669
}
670

    
671
/*----------------------------------------------------------------------------
672
| Returns the number of leading 0 bits before the most-significant 1 bit of
673
| `a'.  If `a' is zero, 64 is returned.
674
*----------------------------------------------------------------------------*/
675

    
676
static int8 countLeadingZeros64( uint64_t a )
677
{
678
#if SOFTFLOAT_GNUC_PREREQ(3, 4)
679
    if (a) {
680
        return __builtin_clzll(a);
681
    } else {
682
        return 64;
683
    }
684
#else
685
    int8 shiftCount;
686

    
687
    shiftCount = 0;
688
    if ( a < ( (uint64_t) 1 )<<32 ) {
689
        shiftCount += 32;
690
    }
691
    else {
692
        a >>= 32;
693
    }
694
    shiftCount += countLeadingZeros32( a );
695
    return shiftCount;
696
#endif
697
}
698

    
699
/*----------------------------------------------------------------------------
700
| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
701
| is equal to the 128-bit value formed by concatenating `b0' and `b1'.
702
| Otherwise, returns 0.
703
*----------------------------------------------------------------------------*/
704

    
705
INLINE flag eq128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
706
{
707

    
708
    return ( a0 == b0 ) && ( a1 == b1 );
709

    
710
}
711

    
712
/*----------------------------------------------------------------------------
713
| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
714
| than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
715
| Otherwise, returns 0.
716
*----------------------------------------------------------------------------*/
717

    
718
INLINE flag le128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
719
{
720

    
721
    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
722

    
723
}
724

    
725
/*----------------------------------------------------------------------------
726
| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
727
| than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
728
| returns 0.
729
*----------------------------------------------------------------------------*/
730

    
731
INLINE flag lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
732
{
733

    
734
    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
735

    
736
}
737

    
738
/*----------------------------------------------------------------------------
739
| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
740
| not equal to the 128-bit value formed by concatenating `b0' and `b1'.
741
| Otherwise, returns 0.
742
*----------------------------------------------------------------------------*/
743

    
744
INLINE flag ne128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
745
{
746

    
747
    return ( a0 != b0 ) || ( a1 != b1 );
748

    
749
}