Revision d68a6f3a

b/target-arm/helpers.h
466 466
DEF_HELPER_3(neon_qunzip8, void, env, i32, i32)
467 467
DEF_HELPER_3(neon_qunzip16, void, env, i32, i32)
468 468
DEF_HELPER_3(neon_qunzip32, void, env, i32, i32)
469
DEF_HELPER_3(neon_zip8, void, env, i32, i32)
470
DEF_HELPER_3(neon_zip16, void, env, i32, i32)
471
DEF_HELPER_3(neon_qzip8, void, env, i32, i32)
472
DEF_HELPER_3(neon_qzip16, void, env, i32, i32)
473
DEF_HELPER_3(neon_qzip32, void, env, i32, i32)
469 474

  
470 475
#include "def-helper.h"
b/target-arm/neon_helper.c
1787 1787
    env->vfp.regs[rm] = make_float64(m0);
1788 1788
    env->vfp.regs[rd] = make_float64(d0);
1789 1789
}
1790

  
1791
void HELPER(neon_qzip8)(CPUState *env, uint32_t rd, uint32_t rm)
1792
{
1793
    uint64_t zm0 = float64_val(env->vfp.regs[rm]);
1794
    uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
1795
    uint64_t zd0 = float64_val(env->vfp.regs[rd]);
1796
    uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
1797
    uint64_t d0 = ELEM(zd0, 0, 8) | (ELEM(zm0, 0, 8) << 8)
1798
        | (ELEM(zd0, 1, 8) << 16) | (ELEM(zm0, 1, 8) << 24)
1799
        | (ELEM(zd0, 2, 8) << 32) | (ELEM(zm0, 2, 8) << 40)
1800
        | (ELEM(zd0, 3, 8) << 48) | (ELEM(zm0, 3, 8) << 56);
1801
    uint64_t d1 = ELEM(zd0, 4, 8) | (ELEM(zm0, 4, 8) << 8)
1802
        | (ELEM(zd0, 5, 8) << 16) | (ELEM(zm0, 5, 8) << 24)
1803
        | (ELEM(zd0, 6, 8) << 32) | (ELEM(zm0, 6, 8) << 40)
1804
        | (ELEM(zd0, 7, 8) << 48) | (ELEM(zm0, 7, 8) << 56);
1805
    uint64_t m0 = ELEM(zd1, 0, 8) | (ELEM(zm1, 0, 8) << 8)
1806
        | (ELEM(zd1, 1, 8) << 16) | (ELEM(zm1, 1, 8) << 24)
1807
        | (ELEM(zd1, 2, 8) << 32) | (ELEM(zm1, 2, 8) << 40)
1808
        | (ELEM(zd1, 3, 8) << 48) | (ELEM(zm1, 3, 8) << 56);
1809
    uint64_t m1 = ELEM(zd1, 4, 8) | (ELEM(zm1, 4, 8) << 8)
1810
        | (ELEM(zd1, 5, 8) << 16) | (ELEM(zm1, 5, 8) << 24)
1811
        | (ELEM(zd1, 6, 8) << 32) | (ELEM(zm1, 6, 8) << 40)
1812
        | (ELEM(zd1, 7, 8) << 48) | (ELEM(zm1, 7, 8) << 56);
1813
    env->vfp.regs[rm] = make_float64(m0);
1814
    env->vfp.regs[rm + 1] = make_float64(m1);
1815
    env->vfp.regs[rd] = make_float64(d0);
1816
    env->vfp.regs[rd + 1] = make_float64(d1);
1817
}
1818

  
1819
void HELPER(neon_qzip16)(CPUState *env, uint32_t rd, uint32_t rm)
1820
{
1821
    uint64_t zm0 = float64_val(env->vfp.regs[rm]);
1822
    uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
1823
    uint64_t zd0 = float64_val(env->vfp.regs[rd]);
1824
    uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
1825
    uint64_t d0 = ELEM(zd0, 0, 16) | (ELEM(zm0, 0, 16) << 16)
1826
        | (ELEM(zd0, 1, 16) << 32) | (ELEM(zm0, 1, 16) << 48);
1827
    uint64_t d1 = ELEM(zd0, 2, 16) | (ELEM(zm0, 2, 16) << 16)
1828
        | (ELEM(zd0, 3, 16) << 32) | (ELEM(zm0, 3, 16) << 48);
1829
    uint64_t m0 = ELEM(zd1, 0, 16) | (ELEM(zm1, 0, 16) << 16)
1830
        | (ELEM(zd1, 1, 16) << 32) | (ELEM(zm1, 1, 16) << 48);
1831
    uint64_t m1 = ELEM(zd1, 2, 16) | (ELEM(zm1, 2, 16) << 16)
1832
        | (ELEM(zd1, 3, 16) << 32) | (ELEM(zm1, 3, 16) << 48);
1833
    env->vfp.regs[rm] = make_float64(m0);
1834
    env->vfp.regs[rm + 1] = make_float64(m1);
1835
    env->vfp.regs[rd] = make_float64(d0);
1836
    env->vfp.regs[rd + 1] = make_float64(d1);
1837
}
1838

  
1839
void HELPER(neon_qzip32)(CPUState *env, uint32_t rd, uint32_t rm)
1840
{
1841
    uint64_t zm0 = float64_val(env->vfp.regs[rm]);
1842
    uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
1843
    uint64_t zd0 = float64_val(env->vfp.regs[rd]);
1844
    uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
1845
    uint64_t d0 = ELEM(zd0, 0, 32) | (ELEM(zm0, 0, 32) << 32);
1846
    uint64_t d1 = ELEM(zd0, 1, 32) | (ELEM(zm0, 1, 32) << 32);
1847
    uint64_t m0 = ELEM(zd1, 0, 32) | (ELEM(zm1, 0, 32) << 32);
1848
    uint64_t m1 = ELEM(zd1, 1, 32) | (ELEM(zm1, 1, 32) << 32);
1849
    env->vfp.regs[rm] = make_float64(m0);
1850
    env->vfp.regs[rm + 1] = make_float64(m1);
1851
    env->vfp.regs[rd] = make_float64(d0);
1852
    env->vfp.regs[rd + 1] = make_float64(d1);
1853
}
1854

  
1855
void HELPER(neon_zip8)(CPUState *env, uint32_t rd, uint32_t rm)
1856
{
1857
    uint64_t zm = float64_val(env->vfp.regs[rm]);
1858
    uint64_t zd = float64_val(env->vfp.regs[rd]);
1859
    uint64_t d0 = ELEM(zd, 0, 8) | (ELEM(zm, 0, 8) << 8)
1860
        | (ELEM(zd, 1, 8) << 16) | (ELEM(zm, 1, 8) << 24)
1861
        | (ELEM(zd, 2, 8) << 32) | (ELEM(zm, 2, 8) << 40)
1862
        | (ELEM(zd, 3, 8) << 48) | (ELEM(zm, 3, 8) << 56);
1863
    uint64_t m0 = ELEM(zd, 4, 8) | (ELEM(zm, 4, 8) << 8)
1864
        | (ELEM(zd, 5, 8) << 16) | (ELEM(zm, 5, 8) << 24)
1865
        | (ELEM(zd, 6, 8) << 32) | (ELEM(zm, 6, 8) << 40)
1866
        | (ELEM(zd, 7, 8) << 48) | (ELEM(zm, 7, 8) << 56);
1867
    env->vfp.regs[rm] = make_float64(m0);
1868
    env->vfp.regs[rd] = make_float64(d0);
1869
}
1870

  
1871
void HELPER(neon_zip16)(CPUState *env, uint32_t rd, uint32_t rm)
1872
{
1873
    uint64_t zm = float64_val(env->vfp.regs[rm]);
1874
    uint64_t zd = float64_val(env->vfp.regs[rd]);
1875
    uint64_t d0 = ELEM(zd, 0, 16) | (ELEM(zm, 0, 16) << 16)
1876
        | (ELEM(zd, 1, 16) << 32) | (ELEM(zm, 1, 16) << 48);
1877
    uint64_t m0 = ELEM(zd, 2, 16) | (ELEM(zm, 2, 16) << 16)
1878
        | (ELEM(zd, 3, 16) << 32) | (ELEM(zm, 3, 16) << 48);
1879
    env->vfp.regs[rm] = make_float64(m0);
1880
    env->vfp.regs[rd] = make_float64(d0);
1881
}
b/target-arm/translate.c
3653 3653
    return 0;
3654 3654
}
3655 3655

  
3656
static void gen_neon_zip_u8(TCGv t0, TCGv t1)
3657
{
3658
    TCGv rd, rm, tmp;
3659

  
3660
    rd = new_tmp();
3661
    rm = new_tmp();
3662
    tmp = new_tmp();
3663

  
3664
    tcg_gen_andi_i32(rd, t0, 0xff);
3665
    tcg_gen_shli_i32(tmp, t1, 8);
3666
    tcg_gen_andi_i32(tmp, tmp, 0xff00);
3667
    tcg_gen_or_i32(rd, rd, tmp);
3668
    tcg_gen_shli_i32(tmp, t0, 16);
3669
    tcg_gen_andi_i32(tmp, tmp, 0xff0000);
3670
    tcg_gen_or_i32(rd, rd, tmp);
3671
    tcg_gen_shli_i32(tmp, t1, 24);
3672
    tcg_gen_andi_i32(tmp, tmp, 0xff000000);
3673
    tcg_gen_or_i32(rd, rd, tmp);
3674

  
3675
    tcg_gen_andi_i32(rm, t1, 0xff000000);
3676
    tcg_gen_shri_i32(tmp, t0, 8);
3677
    tcg_gen_andi_i32(tmp, tmp, 0xff0000);
3678
    tcg_gen_or_i32(rm, rm, tmp);
3679
    tcg_gen_shri_i32(tmp, t1, 8);
3680
    tcg_gen_andi_i32(tmp, tmp, 0xff00);
3681
    tcg_gen_or_i32(rm, rm, tmp);
3682
    tcg_gen_shri_i32(tmp, t0, 16);
3683
    tcg_gen_andi_i32(tmp, tmp, 0xff);
3684
    tcg_gen_or_i32(t1, rm, tmp);
3685
    tcg_gen_mov_i32(t0, rd);
3686

  
3687
    dead_tmp(tmp);
3688
    dead_tmp(rm);
3689
    dead_tmp(rd);
3690
}
3691

  
3692
static void gen_neon_zip_u16(TCGv t0, TCGv t1)
3656
static int gen_neon_zip(int rd, int rm, int size, int q)
3693 3657
{
3694 3658
    TCGv tmp, tmp2;
3695

  
3696
    tmp = new_tmp();
3697
    tmp2 = new_tmp();
3698

  
3699
    tcg_gen_andi_i32(tmp, t0, 0xffff);
3700
    tcg_gen_shli_i32(tmp2, t1, 16);
3701
    tcg_gen_or_i32(tmp, tmp, tmp2);
3702
    tcg_gen_andi_i32(t1, t1, 0xffff0000);
3703
    tcg_gen_shri_i32(tmp2, t0, 16);
3704
    tcg_gen_or_i32(t1, t1, tmp2);
3705
    tcg_gen_mov_i32(t0, tmp);
3706

  
3707
    dead_tmp(tmp2);
3708
    dead_tmp(tmp);
3659
    if (size == 3 || (!q && size == 2)) {
3660
        return 1;
3661
    }
3662
    tmp = tcg_const_i32(rd);
3663
    tmp2 = tcg_const_i32(rm);
3664
    if (q) {
3665
        switch (size) {
3666
        case 0:
3667
            gen_helper_neon_qzip8(cpu_env, tmp, tmp2);
3668
            break;
3669
        case 1:
3670
            gen_helper_neon_qzip16(cpu_env, tmp, tmp2);
3671
            break;
3672
        case 2:
3673
            gen_helper_neon_qzip32(cpu_env, tmp, tmp2);
3674
            break;
3675
        default:
3676
            abort();
3677
        }
3678
    } else {
3679
        switch (size) {
3680
        case 0:
3681
            gen_helper_neon_zip8(cpu_env, tmp, tmp2);
3682
            break;
3683
        case 1:
3684
            gen_helper_neon_zip16(cpu_env, tmp, tmp2);
3685
            break;
3686
        default:
3687
            abort();
3688
        }
3689
    }
3690
    tcg_temp_free_i32(tmp);
3691
    tcg_temp_free_i32(tmp2);
3692
    return 0;
3709 3693
}
3710 3694

  
3711 3695
static void gen_neon_trn_u8(TCGv t0, TCGv t1)
......
5429 5413
                    }
5430 5414
                    break;
5431 5415
                case 35: /* VZIP */
5432
                    /* Reg  Before       After
5433
                       Rd   A3 A2 A1 A0  B1 A1 B0 A0
5434
                       Rm   B3 B2 B1 B0  B3 A3 B2 A2
5435
                     */
5436
                    if (size == 3)
5416
                    if (gen_neon_zip(rd, rm, size, q)) {
5437 5417
                        return 1;
5438
                    count = (q ? 4 : 2);
5439
                    for (n = 0; n < count; n++) {
5440
                        tmp = neon_load_reg(rd, n);
5441
                        tmp2 = neon_load_reg(rd, n);
5442
                        switch (size) {
5443
                        case 0: gen_neon_zip_u8(tmp, tmp2); break;
5444
                        case 1: gen_neon_zip_u16(tmp, tmp2); break;
5445
                        case 2: /* no-op */; break;
5446
                        default: abort();
5447
                        }
5448
                        neon_store_scratch(n * 2, tmp);
5449
                        neon_store_scratch(n * 2 + 1, tmp2);
5450
                    }
5451
                    for (n = 0; n < count * 2; n++) {
5452
                        int reg = (n < count) ? rd : rm;
5453
                        tmp = neon_load_scratch(n);
5454
                        neon_store_reg(reg, n % count, tmp);
5455 5418
                    }
5456 5419
                    break;
5457 5420
                case 36: case 37: /* VMOVN, VQMOVUN, VQMOVN */

Also available in: Unified diff