Revision 222a3336 target-i386/ops_sse.h

b/target-i386/ops_sse.h
1 1
/*
2
 *  MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/PNI support
2
 *  MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI support
3 3
 *
4 4
 *  Copyright (c) 2005 Fabrice Bellard
5
 *  Copyright (c) 2008 Intel Corporation  <andrew.zaborowski@intel.com>
5 6
 *
6 7
 * This library is free software; you can redistribute it and/or
7 8
 * modify it under the terms of the GNU Lesser General Public
......
1420 1421
    *d = r;
1421 1422
}
1422 1423

  
1424
#define XMM0 env->xmm_regs[0]
1425

  
1426
#if SHIFT == 1
1427
#define SSE_HELPER_V(name, elem, num, F)\
1428
void glue(name, SUFFIX) (Reg *d, Reg *s)\
1429
{\
1430
    d->elem(0) = F(d->elem(0), s->elem(0), XMM0.elem(0));\
1431
    d->elem(1) = F(d->elem(1), s->elem(1), XMM0.elem(1));\
1432
    if (num > 2) {\
1433
        d->elem(2) = F(d->elem(2), s->elem(2), XMM0.elem(2));\
1434
        d->elem(3) = F(d->elem(3), s->elem(3), XMM0.elem(3));\
1435
        if (num > 4) {\
1436
            d->elem(4) = F(d->elem(4), s->elem(4), XMM0.elem(4));\
1437
            d->elem(5) = F(d->elem(5), s->elem(5), XMM0.elem(5));\
1438
            d->elem(6) = F(d->elem(6), s->elem(6), XMM0.elem(6));\
1439
            d->elem(7) = F(d->elem(7), s->elem(7), XMM0.elem(7));\
1440
            if (num > 8) {\
1441
                d->elem(8) = F(d->elem(8), s->elem(8), XMM0.elem(8));\
1442
                d->elem(9) = F(d->elem(9), s->elem(9), XMM0.elem(9));\
1443
                d->elem(10) = F(d->elem(10), s->elem(10), XMM0.elem(10));\
1444
                d->elem(11) = F(d->elem(11), s->elem(11), XMM0.elem(11));\
1445
                d->elem(12) = F(d->elem(12), s->elem(12), XMM0.elem(12));\
1446
                d->elem(13) = F(d->elem(13), s->elem(13), XMM0.elem(13));\
1447
                d->elem(14) = F(d->elem(14), s->elem(14), XMM0.elem(14));\
1448
                d->elem(15) = F(d->elem(15), s->elem(15), XMM0.elem(15));\
1449
            }\
1450
        }\
1451
    }\
1452
}
1453

  
1454
#define SSE_HELPER_I(name, elem, num, F)\
1455
void glue(name, SUFFIX) (Reg *d, Reg *s, uint32_t imm)\
1456
{\
1457
    d->elem(0) = F(d->elem(0), s->elem(0), ((imm >> 0) & 1));\
1458
    d->elem(1) = F(d->elem(1), s->elem(1), ((imm >> 1) & 1));\
1459
    if (num > 2) {\
1460
        d->elem(2) = F(d->elem(2), s->elem(2), ((imm >> 2) & 1));\
1461
        d->elem(3) = F(d->elem(3), s->elem(3), ((imm >> 3) & 1));\
1462
        if (num > 4) {\
1463
            d->elem(4) = F(d->elem(4), s->elem(4), ((imm >> 4) & 1));\
1464
            d->elem(5) = F(d->elem(5), s->elem(5), ((imm >> 5) & 1));\
1465
            d->elem(6) = F(d->elem(6), s->elem(6), ((imm >> 6) & 1));\
1466
            d->elem(7) = F(d->elem(7), s->elem(7), ((imm >> 7) & 1));\
1467
            if (num > 8) {\
1468
                d->elem(8) = F(d->elem(8), s->elem(8), ((imm >> 8) & 1));\
1469
                d->elem(9) = F(d->elem(9), s->elem(9), ((imm >> 9) & 1));\
1470
                d->elem(10) = F(d->elem(10), s->elem(10), ((imm >> 10) & 1));\
1471
                d->elem(11) = F(d->elem(11), s->elem(11), ((imm >> 11) & 1));\
1472
                d->elem(12) = F(d->elem(12), s->elem(12), ((imm >> 12) & 1));\
1473
                d->elem(13) = F(d->elem(13), s->elem(13), ((imm >> 13) & 1));\
1474
                d->elem(14) = F(d->elem(14), s->elem(14), ((imm >> 14) & 1));\
1475
                d->elem(15) = F(d->elem(15), s->elem(15), ((imm >> 15) & 1));\
1476
            }\
1477
        }\
1478
    }\
1479
}
1480

  
1481
/* SSE4.1 op helpers */
1482
#define FBLENDVB(d, s, m) (m & 0x80) ? s : d
1483
#define FBLENDVPS(d, s, m) (m & 0x80000000) ? s : d
1484
#define FBLENDVPD(d, s, m) (m & 0x8000000000000000) ? s : d
1485
SSE_HELPER_V(helper_pblendvb, B, 16, FBLENDVB)
1486
SSE_HELPER_V(helper_blendvps, L, 4, FBLENDVPS)
1487
SSE_HELPER_V(helper_blendvpd, Q, 2, FBLENDVPD)
1488

  
1489
void glue(helper_ptest, SUFFIX) (Reg *d, Reg *s)
1490
{
1491
    uint64_t zf = (s->Q(0) &  d->Q(0)) | (s->Q(1) &  d->Q(1));
1492
    uint64_t cf = (s->Q(0) & ~d->Q(0)) | (s->Q(1) & ~d->Q(1));
1493

  
1494
    CC_SRC = (zf ? 0 : CC_Z) | (cf ? 0 : CC_C);
1495
}
1496

  
1497
#define SSE_HELPER_F(name, elem, num, F)\
1498
void glue(name, SUFFIX) (Reg *d, Reg *s)\
1499
{\
1500
    d->elem(0) = F(0);\
1501
    d->elem(1) = F(1);\
1502
    d->elem(2) = F(2);\
1503
    d->elem(3) = F(3);\
1504
    if (num > 3) {\
1505
        d->elem(4) = F(4);\
1506
        d->elem(5) = F(5);\
1507
        if (num > 5) {\
1508
            d->elem(6) = F(6);\
1509
            d->elem(7) = F(7);\
1510
        }\
1511
    }\
1512
}
1513

  
1514
SSE_HELPER_F(helper_pmovsxbw, W, 8, (int8_t) s->B)
1515
SSE_HELPER_F(helper_pmovsxbd, L, 4, (int8_t) s->B)
1516
SSE_HELPER_F(helper_pmovsxbq, Q, 2, (int8_t) s->B)
1517
SSE_HELPER_F(helper_pmovsxwd, L, 4, (int16_t) s->W)
1518
SSE_HELPER_F(helper_pmovsxwq, Q, 2, (int16_t) s->W)
1519
SSE_HELPER_F(helper_pmovsxdq, Q, 2, (int32_t) s->L)
1520
SSE_HELPER_F(helper_pmovzxbw, W, 8, s->B)
1521
SSE_HELPER_F(helper_pmovzxbd, L, 4, s->B)
1522
SSE_HELPER_F(helper_pmovzxbq, Q, 2, s->B)
1523
SSE_HELPER_F(helper_pmovzxwd, L, 4, s->W)
1524
SSE_HELPER_F(helper_pmovzxwq, Q, 2, s->W)
1525
SSE_HELPER_F(helper_pmovzxdq, Q, 2, s->L)
1526

  
1527
void glue(helper_pmuldq, SUFFIX) (Reg *d, Reg *s)
1528
{
1529
    d->Q(0) = (int64_t) (int32_t) d->L(0) * (int32_t) s->L(0);
1530
    d->Q(1) = (int64_t) (int32_t) d->L(2) * (int32_t) s->L(2);
1531
}
1532

  
1533
#define FCMPEQQ(d, s) d == s ? -1 : 0
1534
SSE_HELPER_Q(helper_pcmpeqq, FCMPEQQ)
1535

  
1536
void glue(helper_packusdw, SUFFIX) (Reg *d, Reg *s)
1537
{
1538
    d->W(0) = satuw((int32_t) d->L(0));
1539
    d->W(1) = satuw((int32_t) d->L(1));
1540
    d->W(2) = satuw((int32_t) d->L(2));
1541
    d->W(3) = satuw((int32_t) d->L(3));
1542
    d->W(4) = satuw((int32_t) s->L(0));
1543
    d->W(5) = satuw((int32_t) s->L(1));
1544
    d->W(6) = satuw((int32_t) s->L(2));
1545
    d->W(7) = satuw((int32_t) s->L(3));
1546
}
1547

  
1548
#define FMINSB(d, s) MIN((int8_t) d, (int8_t) s)
1549
#define FMINSD(d, s) MIN((int32_t) d, (int32_t) s)
1550
#define FMAXSB(d, s) MAX((int8_t) d, (int8_t) s)
1551
#define FMAXSD(d, s) MAX((int32_t) d, (int32_t) s)
1552
SSE_HELPER_B(helper_pminsb, FMINSB)
1553
SSE_HELPER_L(helper_pminsd, FMINSD)
1554
SSE_HELPER_W(helper_pminuw, MIN)
1555
SSE_HELPER_L(helper_pminud, MIN)
1556
SSE_HELPER_B(helper_pmaxsb, FMAXSB)
1557
SSE_HELPER_L(helper_pmaxsd, FMAXSD)
1558
SSE_HELPER_W(helper_pmaxuw, MAX)
1559
SSE_HELPER_L(helper_pmaxud, MAX)
1560

  
1561
#define FMULLD(d, s) (int32_t) d * (int32_t) s
1562
SSE_HELPER_L(helper_pmulld, FMULLD)
1563

  
1564
void glue(helper_phminposuw, SUFFIX) (Reg *d, Reg *s)
1565
{
1566
    int idx = 0;
1567

  
1568
    if (s->W(1) < s->W(idx))
1569
        idx = 1;
1570
    if (s->W(2) < s->W(idx))
1571
        idx = 2;
1572
    if (s->W(3) < s->W(idx))
1573
        idx = 3;
1574
    if (s->W(4) < s->W(idx))
1575
        idx = 4;
1576
    if (s->W(5) < s->W(idx))
1577
        idx = 5;
1578
    if (s->W(6) < s->W(idx))
1579
        idx = 6;
1580
    if (s->W(7) < s->W(idx))
1581
        idx = 7;
1582

  
1583
    d->Q(1) = 0;
1584
    d->L(1) = 0;
1585
    d->W(1) = idx;
1586
    d->W(0) = s->W(idx);
1587
}
1588

  
1589
void glue(helper_roundps, SUFFIX) (Reg *d, Reg *s, uint32_t mode)
1590
{
1591
    signed char prev_rounding_mode;
1592

  
1593
    prev_rounding_mode = env->sse_status.float_rounding_mode;
1594
    if (!(mode & (1 << 2)))
1595
        switch (mode & 3) {
1596
        case 0:
1597
            set_float_rounding_mode(float_round_nearest_even, &env->sse_status);
1598
            break;
1599
        case 1:
1600
            set_float_rounding_mode(float_round_down, &env->sse_status);
1601
            break;
1602
        case 2:
1603
            set_float_rounding_mode(float_round_up, &env->sse_status);
1604
            break;
1605
        case 3:
1606
            set_float_rounding_mode(float_round_to_zero, &env->sse_status);
1607
            break;
1608
        }
1609

  
1610
    d->L(0) = float64_round_to_int(s->L(0), &env->sse_status);
1611
    d->L(1) = float64_round_to_int(s->L(1), &env->sse_status);
1612
    d->L(2) = float64_round_to_int(s->L(2), &env->sse_status);
1613
    d->L(3) = float64_round_to_int(s->L(3), &env->sse_status);
1614

  
1615
#if 0 /* TODO */
1616
    if (mode & (1 << 3))
1617
        set_float_exception_flags(
1618
                        get_float_exception_flags(&env->sse_status) &
1619
                        ~float_flag_inexact,
1620
                        &env->sse_status);
1621
#endif
1622
    env->sse_status.float_rounding_mode = prev_rounding_mode;
1623
}
1624

  
1625
void glue(helper_roundpd, SUFFIX) (Reg *d, Reg *s, uint32_t mode)
1626
{
1627
    signed char prev_rounding_mode;
1628

  
1629
    prev_rounding_mode = env->sse_status.float_rounding_mode;
1630
    if (!(mode & (1 << 2)))
1631
        switch (mode & 3) {
1632
        case 0:
1633
            set_float_rounding_mode(float_round_nearest_even, &env->sse_status);
1634
            break;
1635
        case 1:
1636
            set_float_rounding_mode(float_round_down, &env->sse_status);
1637
            break;
1638
        case 2:
1639
            set_float_rounding_mode(float_round_up, &env->sse_status);
1640
            break;
1641
        case 3:
1642
            set_float_rounding_mode(float_round_to_zero, &env->sse_status);
1643
            break;
1644
        }
1645

  
1646
    d->Q(0) = float64_round_to_int(s->Q(0), &env->sse_status);
1647
    d->Q(1) = float64_round_to_int(s->Q(1), &env->sse_status);
1648

  
1649
#if 0 /* TODO */
1650
    if (mode & (1 << 3))
1651
        set_float_exception_flags(
1652
                        get_float_exception_flags(&env->sse_status) &
1653
                        ~float_flag_inexact,
1654
                        &env->sse_status);
1655
#endif
1656
    env->sse_status.float_rounding_mode = prev_rounding_mode;
1657
}
1658

  
1659
void glue(helper_roundss, SUFFIX) (Reg *d, Reg *s, uint32_t mode)
1660
{
1661
    signed char prev_rounding_mode;
1662

  
1663
    prev_rounding_mode = env->sse_status.float_rounding_mode;
1664
    if (!(mode & (1 << 2)))
1665
        switch (mode & 3) {
1666
        case 0:
1667
            set_float_rounding_mode(float_round_nearest_even, &env->sse_status);
1668
            break;
1669
        case 1:
1670
            set_float_rounding_mode(float_round_down, &env->sse_status);
1671
            break;
1672
        case 2:
1673
            set_float_rounding_mode(float_round_up, &env->sse_status);
1674
            break;
1675
        case 3:
1676
            set_float_rounding_mode(float_round_to_zero, &env->sse_status);
1677
            break;
1678
        }
1679

  
1680
    d->L(0) = float64_round_to_int(s->L(0), &env->sse_status);
1681

  
1682
#if 0 /* TODO */
1683
    if (mode & (1 << 3))
1684
        set_float_exception_flags(
1685
                        get_float_exception_flags(&env->sse_status) &
1686
                        ~float_flag_inexact,
1687
                        &env->sse_status);
1688
#endif
1689
    env->sse_status.float_rounding_mode = prev_rounding_mode;
1690
}
1691

  
1692
void glue(helper_roundsd, SUFFIX) (Reg *d, Reg *s, uint32_t mode)
1693
{
1694
    signed char prev_rounding_mode;
1695

  
1696
    prev_rounding_mode = env->sse_status.float_rounding_mode;
1697
    if (!(mode & (1 << 2)))
1698
        switch (mode & 3) {
1699
        case 0:
1700
            set_float_rounding_mode(float_round_nearest_even, &env->sse_status);
1701
            break;
1702
        case 1:
1703
            set_float_rounding_mode(float_round_down, &env->sse_status);
1704
            break;
1705
        case 2:
1706
            set_float_rounding_mode(float_round_up, &env->sse_status);
1707
            break;
1708
        case 3:
1709
            set_float_rounding_mode(float_round_to_zero, &env->sse_status);
1710
            break;
1711
        }
1712

  
1713
    d->Q(0) = float64_round_to_int(s->Q(0), &env->sse_status);
1714

  
1715
#if 0 /* TODO */
1716
    if (mode & (1 << 3))
1717
        set_float_exception_flags(
1718
                        get_float_exception_flags(&env->sse_status) &
1719
                        ~float_flag_inexact,
1720
                        &env->sse_status);
1721
#endif
1722
    env->sse_status.float_rounding_mode = prev_rounding_mode;
1723
}
1724

  
1725
#define FBLENDP(d, s, m) m ? s : d
1726
SSE_HELPER_I(helper_blendps, L, 4, FBLENDP)
1727
SSE_HELPER_I(helper_blendpd, Q, 2, FBLENDP)
1728
SSE_HELPER_I(helper_pblendw, W, 8, FBLENDP)
1729

  
1730
void glue(helper_dpps, SUFFIX) (Reg *d, Reg *s, uint32_t mask)
1731
{
1732
    float32 iresult = 0 /*float32_zero*/;
1733

  
1734
    if (mask & (1 << 4))
1735
        iresult = float32_add(iresult,
1736
                        float32_mul(d->L(0), s->L(0), &env->sse_status),
1737
                        &env->sse_status);
1738
    if (mask & (1 << 5))
1739
        iresult = float32_add(iresult,
1740
                        float32_mul(d->L(1), s->L(1), &env->sse_status),
1741
                        &env->sse_status);
1742
    if (mask & (1 << 6))
1743
        iresult = float32_add(iresult,
1744
                        float32_mul(d->L(2), s->L(2), &env->sse_status),
1745
                        &env->sse_status);
1746
    if (mask & (1 << 7))
1747
        iresult = float32_add(iresult,
1748
                        float32_mul(d->L(3), s->L(3), &env->sse_status),
1749
                        &env->sse_status);
1750
    d->L(0) = (mask & (1 << 0)) ? iresult : 0 /*float32_zero*/;
1751
    d->L(1) = (mask & (1 << 1)) ? iresult : 0 /*float32_zero*/;
1752
    d->L(2) = (mask & (1 << 2)) ? iresult : 0 /*float32_zero*/;
1753
    d->L(3) = (mask & (1 << 3)) ? iresult : 0 /*float32_zero*/;
1754
}
1755

  
1756
void glue(helper_dppd, SUFFIX) (Reg *d, Reg *s, uint32_t mask)
1757
{
1758
    float64 iresult = 0 /*float64_zero*/;
1759

  
1760
    if (mask & (1 << 4))
1761
        iresult = float64_add(iresult,
1762
                        float64_mul(d->Q(0), s->Q(0), &env->sse_status),
1763
                        &env->sse_status);
1764
    if (mask & (1 << 5))
1765
        iresult = float64_add(iresult,
1766
                        float64_mul(d->Q(1), s->Q(1), &env->sse_status),
1767
                        &env->sse_status);
1768
    d->Q(0) = (mask & (1 << 0)) ? iresult : 0 /*float64_zero*/;
1769
    d->Q(1) = (mask & (1 << 1)) ? iresult : 0 /*float64_zero*/;
1770
}
1771

  
1772
void glue(helper_mpsadbw, SUFFIX) (Reg *d, Reg *s, uint32_t offset)
1773
{
1774
    int s0 = (offset & 3) << 2;
1775
    int d0 = (offset & 4) << 0;
1776
    int i;
1777
    Reg r;
1778

  
1779
    for (i = 0; i < 8; i++, d0++) {
1780
        r.W(i) = 0;
1781
        r.W(i) += abs1(d->B(d0 + 0) - s->B(s0 + 0));
1782
        r.W(i) += abs1(d->B(d0 + 1) - s->B(s0 + 1));
1783
        r.W(i) += abs1(d->B(d0 + 2) - s->B(s0 + 2));
1784
        r.W(i) += abs1(d->B(d0 + 3) - s->B(s0 + 3));
1785
    }
1786

  
1787
    *d = r;
1788
}
1789

  
1790
/* SSE4.2 op helpers */
1791
/* it's unclear whether signed or unsigned */
1792
#define FCMPGTQ(d, s) d > s ? -1 : 0
1793
SSE_HELPER_Q(helper_pcmpgtq, FCMPGTQ)
1794

  
1795
static inline int pcmp_elen(int reg, uint32_t ctrl)
1796
{
1797
    int val;
1798

  
1799
    /* Presence of REX.W is indicated by a bit higher than 7 set */
1800
    if (ctrl >> 8)
1801
        val = abs1((int64_t) env->regs[reg]);
1802
    else
1803
        val = abs1((int32_t) env->regs[reg]);
1804

  
1805
    if (ctrl & 1) {
1806
        if (val > 8)
1807
            return 8;
1808
    } else
1809
        if (val > 16)
1810
            return 16;
1811

  
1812
    return val;
1813
}
1814

  
1815
static inline int pcmp_ilen(Reg *r, uint8_t ctrl)
1816
{
1817
    int val = 0;
1818

  
1819
    if (ctrl & 1) {
1820
        while (val < 8 && r->W(val))
1821
            val++;
1822
    } else
1823
        while (val < 16 && r->B(val))
1824
            val++;
1825

  
1826
    return val;
1827
}
1828

  
1829
static inline int pcmp_val(Reg *r, uint8_t ctrl, int i)
1830
{
1831
    switch ((ctrl >> 0) & 3) {
1832
    case 0:
1833
        return r->B(i);
1834
    case 1:
1835
        return r->W(i);
1836
    case 2:
1837
        return (int8_t) r->B(i);
1838
    case 3:
1839
    default:
1840
        return (int16_t) r->W(i);
1841
    }
1842
}
1843

  
1844
static inline unsigned pcmpxstrx(Reg *d, Reg *s,
1845
                int8_t ctrl, int valids, int validd)
1846
{
1847
    unsigned int res = 0;
1848
    int v;
1849
    int j, i;
1850
    int upper = (ctrl & 1) ? 7 : 15;
1851

  
1852
    valids--;
1853
    validd--;
1854

  
1855
    CC_SRC = (valids < upper ? CC_Z : 0) | (validd < upper ? CC_S : 0);
1856

  
1857
    switch ((ctrl >> 2) & 3) {
1858
    case 0:
1859
        for (j = valids; j >= 0; j--) {
1860
            res <<= 1;
1861
            v = pcmp_val(s, ctrl, j);
1862
            for (i = validd; i >= 0; i--)
1863
                res |= (v == pcmp_val(d, ctrl, i));
1864
        }
1865
        break;
1866
    case 1:
1867
        for (j = valids; j >= 0; j--) {
1868
            res <<= 1;
1869
            v = pcmp_val(s, ctrl, j);
1870
            for (i = ((validd - 1) | 1); i >= 0; i -= 2)
1871
                res |= (pcmp_val(d, ctrl, i - 0) <= v &&
1872
                        pcmp_val(d, ctrl, i - 1) >= v);
1873
        }
1874
        break;
1875
    case 2:
1876
        res = (2 << (upper - MAX(valids, validd))) - 1;
1877
        res <<= MAX(valids, validd) - MIN(valids, validd);
1878
        for (i = MIN(valids, validd); i >= 0; i--) {
1879
            res <<= 1;
1880
            v = pcmp_val(s, ctrl, i);
1881
            res |= (v == pcmp_val(d, ctrl, i));
1882
        }
1883
        break;
1884
    case 3:
1885
        for (j = valids - validd; j >= 0; j--) {
1886
            res <<= 1;
1887
            res |= 1;
1888
            for (i = MIN(upper - j, validd); i >= 0; i--)
1889
                res &= (pcmp_val(s, ctrl, i + j) == pcmp_val(d, ctrl, i));
1890
        }
1891
        break;
1892
    }
1893

  
1894
    switch ((ctrl >> 4) & 3) {
1895
    case 1:
1896
        res ^= (2 << upper) - 1;
1897
        break;
1898
    case 3:
1899
        res ^= (2 << valids) - 1;
1900
        break;
1901
    }
1902

  
1903
    if (res)
1904
       CC_SRC |= CC_C;
1905
    if (res & 1)
1906
       CC_SRC |= CC_O;
1907

  
1908
    return res;
1909
}
1910

  
1911
static inline int rffs1(unsigned int val)
1912
{
1913
    int ret = 1, hi;
1914

  
1915
    for (hi = sizeof(val) * 4; hi; hi /= 2)
1916
        if (val >> hi) {
1917
            val >>= hi;
1918
            ret += hi;
1919
        }
1920

  
1921
    return ret;
1922
}
1923

  
1924
static inline int ffs1(unsigned int val)
1925
{
1926
    int ret = 1, hi;
1927

  
1928
    for (hi = sizeof(val) * 4; hi; hi /= 2)
1929
        if (val << hi) {
1930
            val <<= hi;
1931
            ret += hi;
1932
        }
1933

  
1934
    return ret;
1935
}
1936

  
1937
void glue(helper_pcmpestri, SUFFIX) (Reg *d, Reg *s, uint32_t ctrl)
1938
{
1939
    unsigned int res = pcmpxstrx(d, s, ctrl,
1940
                    pcmp_elen(R_EDX, ctrl),
1941
                    pcmp_elen(R_EAX, ctrl));
1942

  
1943
    if (res)
1944
        env->regs[R_ECX] = ((ctrl & (1 << 6)) ? rffs1 : ffs1)(res) - 1;
1945
    else
1946
        env->regs[R_ECX] = 16 >> (ctrl & (1 << 0));
1947
}
1948

  
1949
void glue(helper_pcmpestrm, SUFFIX) (Reg *d, Reg *s, uint32_t ctrl)
1950
{
1951
    int i;
1952
    unsigned int res = pcmpxstrx(d, s, ctrl,
1953
                    pcmp_elen(R_EDX, ctrl),
1954
                    pcmp_elen(R_EAX, ctrl));
1955

  
1956
    if ((ctrl >> 6) & 1) {
1957
        if (ctrl & 1)
1958
            for (i = 0; i <= 8; i--, res >>= 1)
1959
                d->W(i) = (res & 1) ? ~0 : 0;
1960
        else
1961
            for (i = 0; i <= 16; i--, res >>= 1)
1962
                d->B(i) = (res & 1) ? ~0 : 0;
1963
    } else {
1964
        d->Q(1) = 0;
1965
        d->Q(0) = res;
1966
    }
1967
}
1968

  
1969
void glue(helper_pcmpistri, SUFFIX) (Reg *d, Reg *s, uint32_t ctrl)
1970
{
1971
    unsigned int res = pcmpxstrx(d, s, ctrl,
1972
                    pcmp_ilen(s, ctrl),
1973
                    pcmp_ilen(d, ctrl));
1974

  
1975
    if (res)
1976
        env->regs[R_ECX] = ((ctrl & (1 << 6)) ? rffs1 : ffs1)(res) - 1;
1977
    else
1978
        env->regs[R_ECX] = 16 >> (ctrl & (1 << 0));
1979
}
1980

  
1981
void glue(helper_pcmpistrm, SUFFIX) (Reg *d, Reg *s, uint32_t ctrl)
1982
{
1983
    int i;
1984
    unsigned int res = pcmpxstrx(d, s, ctrl,
1985
                    pcmp_ilen(s, ctrl),
1986
                    pcmp_ilen(d, ctrl));
1987

  
1988
    if ((ctrl >> 6) & 1) {
1989
        if (ctrl & 1)
1990
            for (i = 0; i <= 8; i--, res >>= 1)
1991
                d->W(i) = (res & 1) ? ~0 : 0;
1992
        else
1993
            for (i = 0; i <= 16; i--, res >>= 1)
1994
                d->B(i) = (res & 1) ? ~0 : 0;
1995
    } else {
1996
        d->Q(1) = 0;
1997
        d->Q(0) = res;
1998
    }
1999
}
2000

  
2001
#define CRCPOLY        0x1edc6f41
2002
#define CRCPOLY_BITREV 0x82f63b78
2003
target_ulong helper_crc32(uint32_t crc1, target_ulong msg, uint32_t len)
2004
{
2005
    target_ulong crc = (msg & ((target_ulong) -1 >>
2006
                            (TARGET_LONG_BITS - len))) ^ crc1;
2007

  
2008
    while (len--)
2009
        crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_BITREV : 0);
2010

  
2011
    return crc;
2012
}
2013

  
2014
#define POPMASK(i)     ((target_ulong) -1 / ((1LL << (1 << i)) + 1))
2015
#define POPCOUNT(n, i) (n & POPMASK(i)) + ((n >> (1 << i)) & POPMASK(i))
2016
target_ulong helper_popcnt(target_ulong n, uint32_t type)
2017
{
2018
    CC_SRC = n ? 0 : CC_Z;
2019

  
2020
    n = POPCOUNT(n, 0);
2021
    n = POPCOUNT(n, 1);
2022
    n = POPCOUNT(n, 2);
2023
    n = POPCOUNT(n, 3);
2024
    if (type == 1)
2025
        return n & 0xff;
2026

  
2027
    n = POPCOUNT(n, 4);
2028
#ifndef TARGET_X86_64
2029
    return n;
2030
#else
2031
    if (type == 2)
2032
        return n & 0xff;
2033

  
2034
    return POPCOUNT(n, 5);
2035
#endif
2036
}
2037
#endif
2038

  
1423 2039
#undef SHIFT
1424 2040
#undef XMM_ONLY
1425 2041
#undef Reg

Also available in: Unified diff