Revision 02acedf9

b/target-arm/helpers.h
461 461

  
462 462
DEF_HELPER_2(set_teecr, void, env, i32)
463 463

  
464
DEF_HELPER_3(neon_unzip8, void, env, i32, i32)
465
DEF_HELPER_3(neon_unzip16, void, env, i32, i32)
466
DEF_HELPER_3(neon_qunzip8, void, env, i32, i32)
467
DEF_HELPER_3(neon_qunzip16, void, env, i32, i32)
468
DEF_HELPER_3(neon_qunzip32, void, env, i32, i32)
469

  
464 470
#include "def-helper.h"
b/target-arm/neon_helper.c
1693 1693
    float32 f1 = float32_abs(vfp_itos(b));
1694 1694
    return (float32_compare_quiet(f0, f1, NFS) > 0) ? ~0 : 0;
1695 1695
}
1696

  
1697
#define ELEM(V, N, SIZE) (((V) >> ((N) * (SIZE))) & ((1ull << (SIZE)) - 1))
1698

  
1699
void HELPER(neon_qunzip8)(CPUState *env, uint32_t rd, uint32_t rm)
1700
{
1701
    uint64_t zm0 = float64_val(env->vfp.regs[rm]);
1702
    uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
1703
    uint64_t zd0 = float64_val(env->vfp.regs[rd]);
1704
    uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
1705
    uint64_t d0 = ELEM(zd0, 0, 8) | (ELEM(zd0, 2, 8) << 8)
1706
        | (ELEM(zd0, 4, 8) << 16) | (ELEM(zd0, 6, 8) << 24)
1707
        | (ELEM(zd1, 0, 8) << 32) | (ELEM(zd1, 2, 8) << 40)
1708
        | (ELEM(zd1, 4, 8) << 48) | (ELEM(zd1, 6, 8) << 56);
1709
    uint64_t d1 = ELEM(zm0, 0, 8) | (ELEM(zm0, 2, 8) << 8)
1710
        | (ELEM(zm0, 4, 8) << 16) | (ELEM(zm0, 6, 8) << 24)
1711
        | (ELEM(zm1, 0, 8) << 32) | (ELEM(zm1, 2, 8) << 40)
1712
        | (ELEM(zm1, 4, 8) << 48) | (ELEM(zm1, 6, 8) << 56);
1713
    uint64_t m0 = ELEM(zd0, 1, 8) | (ELEM(zd0, 3, 8) << 8)
1714
        | (ELEM(zd0, 5, 8) << 16) | (ELEM(zd0, 7, 8) << 24)
1715
        | (ELEM(zd1, 1, 8) << 32) | (ELEM(zd1, 3, 8) << 40)
1716
        | (ELEM(zd1, 5, 8) << 48) | (ELEM(zd1, 7, 8) << 56);
1717
    uint64_t m1 = ELEM(zm0, 1, 8) | (ELEM(zm0, 3, 8) << 8)
1718
        | (ELEM(zm0, 5, 8) << 16) | (ELEM(zm0, 7, 8) << 24)
1719
        | (ELEM(zm1, 1, 8) << 32) | (ELEM(zm1, 3, 8) << 40)
1720
        | (ELEM(zm1, 5, 8) << 48) | (ELEM(zm1, 7, 8) << 56);
1721
    env->vfp.regs[rm] = make_float64(m0);
1722
    env->vfp.regs[rm + 1] = make_float64(m1);
1723
    env->vfp.regs[rd] = make_float64(d0);
1724
    env->vfp.regs[rd + 1] = make_float64(d1);
1725
}
1726

  
1727
void HELPER(neon_qunzip16)(CPUState *env, uint32_t rd, uint32_t rm)
1728
{
1729
    uint64_t zm0 = float64_val(env->vfp.regs[rm]);
1730
    uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
1731
    uint64_t zd0 = float64_val(env->vfp.regs[rd]);
1732
    uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
1733
    uint64_t d0 = ELEM(zd0, 0, 16) | (ELEM(zd0, 2, 16) << 16)
1734
        | (ELEM(zd1, 0, 16) << 32) | (ELEM(zd1, 2, 16) << 48);
1735
    uint64_t d1 = ELEM(zm0, 0, 16) | (ELEM(zm0, 2, 16) << 16)
1736
        | (ELEM(zm1, 0, 16) << 32) | (ELEM(zm1, 2, 16) << 48);
1737
    uint64_t m0 = ELEM(zd0, 1, 16) | (ELEM(zd0, 3, 16) << 16)
1738
        | (ELEM(zd1, 1, 16) << 32) | (ELEM(zd1, 3, 16) << 48);
1739
    uint64_t m1 = ELEM(zm0, 1, 16) | (ELEM(zm0, 3, 16) << 16)
1740
        | (ELEM(zm1, 1, 16) << 32) | (ELEM(zm1, 3, 16) << 48);
1741
    env->vfp.regs[rm] = make_float64(m0);
1742
    env->vfp.regs[rm + 1] = make_float64(m1);
1743
    env->vfp.regs[rd] = make_float64(d0);
1744
    env->vfp.regs[rd + 1] = make_float64(d1);
1745
}
1746

  
1747
void HELPER(neon_qunzip32)(CPUState *env, uint32_t rd, uint32_t rm)
1748
{
1749
    uint64_t zm0 = float64_val(env->vfp.regs[rm]);
1750
    uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
1751
    uint64_t zd0 = float64_val(env->vfp.regs[rd]);
1752
    uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
1753
    uint64_t d0 = ELEM(zd0, 0, 32) | (ELEM(zd1, 0, 32) << 32);
1754
    uint64_t d1 = ELEM(zm0, 0, 32) | (ELEM(zm1, 0, 32) << 32);
1755
    uint64_t m0 = ELEM(zd0, 1, 32) | (ELEM(zd1, 1, 32) << 32);
1756
    uint64_t m1 = ELEM(zm0, 1, 32) | (ELEM(zm1, 1, 32) << 32);
1757
    env->vfp.regs[rm] = make_float64(m0);
1758
    env->vfp.regs[rm + 1] = make_float64(m1);
1759
    env->vfp.regs[rd] = make_float64(d0);
1760
    env->vfp.regs[rd + 1] = make_float64(d1);
1761
}
1762

  
1763
void HELPER(neon_unzip8)(CPUState *env, uint32_t rd, uint32_t rm)
1764
{
1765
    uint64_t zm = float64_val(env->vfp.regs[rm]);
1766
    uint64_t zd = float64_val(env->vfp.regs[rd]);
1767
    uint64_t d0 = ELEM(zd, 0, 8) | (ELEM(zd, 2, 8) << 8)
1768
        | (ELEM(zd, 4, 8) << 16) | (ELEM(zd, 6, 8) << 24)
1769
        | (ELEM(zm, 0, 8) << 32) | (ELEM(zm, 2, 8) << 40)
1770
        | (ELEM(zm, 4, 8) << 48) | (ELEM(zm, 6, 8) << 56);
1771
    uint64_t m0 = ELEM(zd, 1, 8) | (ELEM(zd, 3, 8) << 8)
1772
        | (ELEM(zd, 5, 8) << 16) | (ELEM(zd, 7, 8) << 24)
1773
        | (ELEM(zm, 1, 8) << 32) | (ELEM(zm, 3, 8) << 40)
1774
        | (ELEM(zm, 5, 8) << 48) | (ELEM(zm, 7, 8) << 56);
1775
    env->vfp.regs[rm] = make_float64(m0);
1776
    env->vfp.regs[rd] = make_float64(d0);
1777
}
1778

  
1779
void HELPER(neon_unzip16)(CPUState *env, uint32_t rd, uint32_t rm)
1780
{
1781
    uint64_t zm = float64_val(env->vfp.regs[rm]);
1782
    uint64_t zd = float64_val(env->vfp.regs[rd]);
1783
    uint64_t d0 = ELEM(zd, 0, 16) | (ELEM(zd, 2, 16) << 16)
1784
        | (ELEM(zm, 0, 16) << 32) | (ELEM(zm, 2, 16) << 48);
1785
    uint64_t m0 = ELEM(zd, 1, 16) | (ELEM(zd, 3, 16) << 16)
1786
        | (ELEM(zm, 1, 16) << 32) | (ELEM(zm, 3, 16) << 48);
1787
    env->vfp.regs[rm] = make_float64(m0);
1788
    env->vfp.regs[rd] = make_float64(d0);
1789
}
b/target-arm/translate.c
3614 3614
    return tmp;
3615 3615
}
3616 3616

  
3617
static void gen_neon_unzip_u8(TCGv t0, TCGv t1)
3617
static int gen_neon_unzip(int rd, int rm, int size, int q)
3618 3618
{
3619
    TCGv rd, rm, tmp;
3620

  
3621
    rd = new_tmp();
3622
    rm = new_tmp();
3623
    tmp = new_tmp();
3624

  
3625
    tcg_gen_andi_i32(rd, t0, 0xff);
3626
    tcg_gen_shri_i32(tmp, t0, 8);
3627
    tcg_gen_andi_i32(tmp, tmp, 0xff00);
3628
    tcg_gen_or_i32(rd, rd, tmp);
3629
    tcg_gen_shli_i32(tmp, t1, 16);
3630
    tcg_gen_andi_i32(tmp, tmp, 0xff0000);
3631
    tcg_gen_or_i32(rd, rd, tmp);
3632
    tcg_gen_shli_i32(tmp, t1, 8);
3633
    tcg_gen_andi_i32(tmp, tmp, 0xff000000);
3634
    tcg_gen_or_i32(rd, rd, tmp);
3635

  
3636
    tcg_gen_shri_i32(rm, t0, 8);
3637
    tcg_gen_andi_i32(rm, rm, 0xff);
3638
    tcg_gen_shri_i32(tmp, t0, 16);
3639
    tcg_gen_andi_i32(tmp, tmp, 0xff00);
3640
    tcg_gen_or_i32(rm, rm, tmp);
3641
    tcg_gen_shli_i32(tmp, t1, 8);
3642
    tcg_gen_andi_i32(tmp, tmp, 0xff0000);
3643
    tcg_gen_or_i32(rm, rm, tmp);
3644
    tcg_gen_andi_i32(tmp, t1, 0xff000000);
3645
    tcg_gen_or_i32(t1, rm, tmp);
3646
    tcg_gen_mov_i32(t0, rd);
3647

  
3648
    dead_tmp(tmp);
3649
    dead_tmp(rm);
3650
    dead_tmp(rd);
3619
    TCGv tmp, tmp2;
3620
    if (size == 3 || (!q && size == 2)) {
3621
        return 1;
3622
    }
3623
    tmp = tcg_const_i32(rd);
3624
    tmp2 = tcg_const_i32(rm);
3625
    if (q) {
3626
        switch (size) {
3627
        case 0:
3628
            gen_helper_neon_qunzip8(cpu_env, tmp, tmp2);
3629
            break;
3630
        case 1:
3631
            gen_helper_neon_qunzip16(cpu_env, tmp, tmp2);
3632
            break;
3633
        case 2:
3634
            gen_helper_neon_qunzip32(cpu_env, tmp, tmp2);
3635
            break;
3636
        default:
3637
            abort();
3638
        }
3639
    } else {
3640
        switch (size) {
3641
        case 0:
3642
            gen_helper_neon_unzip8(cpu_env, tmp, tmp2);
3643
            break;
3644
        case 1:
3645
            gen_helper_neon_unzip16(cpu_env, tmp, tmp2);
3646
            break;
3647
        default:
3648
            abort();
3649
        }
3650
    }
3651
    tcg_temp_free_i32(tmp);
3652
    tcg_temp_free_i32(tmp2);
3653
    return 0;
3651 3654
}
3652 3655

  
3653 3656
static void gen_neon_zip_u8(TCGv t0, TCGv t1)
......
3705 3708
    dead_tmp(tmp);
3706 3709
}
3707 3710

  
3708
static void gen_neon_unzip(int reg, int q, int tmp, int size)
3709
{
3710
    int n;
3711
    TCGv t0, t1;
3712

  
3713
    for (n = 0; n < q + 1; n += 2) {
3714
        t0 = neon_load_reg(reg, n);
3715
        t1 = neon_load_reg(reg, n + 1);
3716
        switch (size) {
3717
        case 0: gen_neon_unzip_u8(t0, t1); break;
3718
        case 1: gen_neon_zip_u16(t0, t1); break; /* zip and unzip are the same.  */
3719
        case 2: /* no-op */; break;
3720
        default: abort();
3721
        }
3722
        neon_store_scratch(tmp + n, t0);
3723
        neon_store_scratch(tmp + n + 1, t1);
3724
    }
3725
}
3726

  
3727 3711
static void gen_neon_trn_u8(TCGv t0, TCGv t1)
3728 3712
{
3729 3713
    TCGv rd, tmp;
......
5440 5424
                    }
5441 5425
                    break;
5442 5426
                case 34: /* VUZP */
5443
                    /* Reg  Before       After
5444
                       Rd   A3 A2 A1 A0  B2 B0 A2 A0
5445
                       Rm   B3 B2 B1 B0  B3 B1 A3 A1
5446
                     */
5447
                    if (size == 3)
5427
                    if (gen_neon_unzip(rd, rm, size, q)) {
5448 5428
                        return 1;
5449
                    gen_neon_unzip(rd, q, 0, size);
5450
                    gen_neon_unzip(rm, q, 4, size);
5451
                    if (q) {
5452
                        static int unzip_order_q[8] =
5453
                            {0, 2, 4, 6, 1, 3, 5, 7};
5454
                        for (n = 0; n < 8; n++) {
5455
                            int reg = (n < 4) ? rd : rm;
5456
                            tmp = neon_load_scratch(unzip_order_q[n]);
5457
                            neon_store_reg(reg, n % 4, tmp);
5458
                        }
5459
                    } else {
5460
                        static int unzip_order[4] =
5461
                            {0, 4, 1, 5};
5462
                        for (n = 0; n < 4; n++) {
5463
                            int reg = (n < 2) ? rd : rm;
5464
                            tmp = neon_load_scratch(unzip_order[n]);
5465
                            neon_store_reg(reg, n % 2, tmp);
5466
                        }
5467 5429
                    }
5468 5430
                    break;
5469 5431
                case 35: /* VZIP */

Also available in: Unified diff