Revision d68a6f3a
b/target-arm/helpers.h | ||
---|---|---|
466 | 466 |
DEF_HELPER_3(neon_qunzip8, void, env, i32, i32) |
467 | 467 |
DEF_HELPER_3(neon_qunzip16, void, env, i32, i32) |
468 | 468 |
DEF_HELPER_3(neon_qunzip32, void, env, i32, i32) |
469 |
DEF_HELPER_3(neon_zip8, void, env, i32, i32) |
|
470 |
DEF_HELPER_3(neon_zip16, void, env, i32, i32) |
|
471 |
DEF_HELPER_3(neon_qzip8, void, env, i32, i32) |
|
472 |
DEF_HELPER_3(neon_qzip16, void, env, i32, i32) |
|
473 |
DEF_HELPER_3(neon_qzip32, void, env, i32, i32) |
|
469 | 474 |
|
470 | 475 |
#include "def-helper.h" |
b/target-arm/neon_helper.c | ||
---|---|---|
1787 | 1787 |
env->vfp.regs[rm] = make_float64(m0); |
1788 | 1788 |
env->vfp.regs[rd] = make_float64(d0); |
1789 | 1789 |
} |
1790 |
|
|
1791 |
void HELPER(neon_qzip8)(CPUState *env, uint32_t rd, uint32_t rm) |
|
1792 |
{ |
|
1793 |
uint64_t zm0 = float64_val(env->vfp.regs[rm]); |
|
1794 |
uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]); |
|
1795 |
uint64_t zd0 = float64_val(env->vfp.regs[rd]); |
|
1796 |
uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]); |
|
1797 |
uint64_t d0 = ELEM(zd0, 0, 8) | (ELEM(zm0, 0, 8) << 8) |
|
1798 |
| (ELEM(zd0, 1, 8) << 16) | (ELEM(zm0, 1, 8) << 24) |
|
1799 |
| (ELEM(zd0, 2, 8) << 32) | (ELEM(zm0, 2, 8) << 40) |
|
1800 |
| (ELEM(zd0, 3, 8) << 48) | (ELEM(zm0, 3, 8) << 56); |
|
1801 |
uint64_t d1 = ELEM(zd0, 4, 8) | (ELEM(zm0, 4, 8) << 8) |
|
1802 |
| (ELEM(zd0, 5, 8) << 16) | (ELEM(zm0, 5, 8) << 24) |
|
1803 |
| (ELEM(zd0, 6, 8) << 32) | (ELEM(zm0, 6, 8) << 40) |
|
1804 |
| (ELEM(zd0, 7, 8) << 48) | (ELEM(zm0, 7, 8) << 56); |
|
1805 |
uint64_t m0 = ELEM(zd1, 0, 8) | (ELEM(zm1, 0, 8) << 8) |
|
1806 |
| (ELEM(zd1, 1, 8) << 16) | (ELEM(zm1, 1, 8) << 24) |
|
1807 |
| (ELEM(zd1, 2, 8) << 32) | (ELEM(zm1, 2, 8) << 40) |
|
1808 |
| (ELEM(zd1, 3, 8) << 48) | (ELEM(zm1, 3, 8) << 56); |
|
1809 |
uint64_t m1 = ELEM(zd1, 4, 8) | (ELEM(zm1, 4, 8) << 8) |
|
1810 |
| (ELEM(zd1, 5, 8) << 16) | (ELEM(zm1, 5, 8) << 24) |
|
1811 |
| (ELEM(zd1, 6, 8) << 32) | (ELEM(zm1, 6, 8) << 40) |
|
1812 |
| (ELEM(zd1, 7, 8) << 48) | (ELEM(zm1, 7, 8) << 56); |
|
1813 |
env->vfp.regs[rm] = make_float64(m0); |
|
1814 |
env->vfp.regs[rm + 1] = make_float64(m1); |
|
1815 |
env->vfp.regs[rd] = make_float64(d0); |
|
1816 |
env->vfp.regs[rd + 1] = make_float64(d1); |
|
1817 |
} |
|
1818 |
|
|
1819 |
void HELPER(neon_qzip16)(CPUState *env, uint32_t rd, uint32_t rm) |
|
1820 |
{ |
|
1821 |
uint64_t zm0 = float64_val(env->vfp.regs[rm]); |
|
1822 |
uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]); |
|
1823 |
uint64_t zd0 = float64_val(env->vfp.regs[rd]); |
|
1824 |
uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]); |
|
1825 |
uint64_t d0 = ELEM(zd0, 0, 16) | (ELEM(zm0, 0, 16) << 16) |
|
1826 |
| (ELEM(zd0, 1, 16) << 32) | (ELEM(zm0, 1, 16) << 48); |
|
1827 |
uint64_t d1 = ELEM(zd0, 2, 16) | (ELEM(zm0, 2, 16) << 16) |
|
1828 |
| (ELEM(zd0, 3, 16) << 32) | (ELEM(zm0, 3, 16) << 48); |
|
1829 |
uint64_t m0 = ELEM(zd1, 0, 16) | (ELEM(zm1, 0, 16) << 16) |
|
1830 |
| (ELEM(zd1, 1, 16) << 32) | (ELEM(zm1, 1, 16) << 48); |
|
1831 |
uint64_t m1 = ELEM(zd1, 2, 16) | (ELEM(zm1, 2, 16) << 16) |
|
1832 |
| (ELEM(zd1, 3, 16) << 32) | (ELEM(zm1, 3, 16) << 48); |
|
1833 |
env->vfp.regs[rm] = make_float64(m0); |
|
1834 |
env->vfp.regs[rm + 1] = make_float64(m1); |
|
1835 |
env->vfp.regs[rd] = make_float64(d0); |
|
1836 |
env->vfp.regs[rd + 1] = make_float64(d1); |
|
1837 |
} |
|
1838 |
|
|
1839 |
void HELPER(neon_qzip32)(CPUState *env, uint32_t rd, uint32_t rm) |
|
1840 |
{ |
|
1841 |
uint64_t zm0 = float64_val(env->vfp.regs[rm]); |
|
1842 |
uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]); |
|
1843 |
uint64_t zd0 = float64_val(env->vfp.regs[rd]); |
|
1844 |
uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]); |
|
1845 |
uint64_t d0 = ELEM(zd0, 0, 32) | (ELEM(zm0, 0, 32) << 32); |
|
1846 |
uint64_t d1 = ELEM(zd0, 1, 32) | (ELEM(zm0, 1, 32) << 32); |
|
1847 |
uint64_t m0 = ELEM(zd1, 0, 32) | (ELEM(zm1, 0, 32) << 32); |
|
1848 |
uint64_t m1 = ELEM(zd1, 1, 32) | (ELEM(zm1, 1, 32) << 32); |
|
1849 |
env->vfp.regs[rm] = make_float64(m0); |
|
1850 |
env->vfp.regs[rm + 1] = make_float64(m1); |
|
1851 |
env->vfp.regs[rd] = make_float64(d0); |
|
1852 |
env->vfp.regs[rd + 1] = make_float64(d1); |
|
1853 |
} |
|
1854 |
|
|
1855 |
void HELPER(neon_zip8)(CPUState *env, uint32_t rd, uint32_t rm) |
|
1856 |
{ |
|
1857 |
uint64_t zm = float64_val(env->vfp.regs[rm]); |
|
1858 |
uint64_t zd = float64_val(env->vfp.regs[rd]); |
|
1859 |
uint64_t d0 = ELEM(zd, 0, 8) | (ELEM(zm, 0, 8) << 8) |
|
1860 |
| (ELEM(zd, 1, 8) << 16) | (ELEM(zm, 1, 8) << 24) |
|
1861 |
| (ELEM(zd, 2, 8) << 32) | (ELEM(zm, 2, 8) << 40) |
|
1862 |
| (ELEM(zd, 3, 8) << 48) | (ELEM(zm, 3, 8) << 56); |
|
1863 |
uint64_t m0 = ELEM(zd, 4, 8) | (ELEM(zm, 4, 8) << 8) |
|
1864 |
| (ELEM(zd, 5, 8) << 16) | (ELEM(zm, 5, 8) << 24) |
|
1865 |
| (ELEM(zd, 6, 8) << 32) | (ELEM(zm, 6, 8) << 40) |
|
1866 |
| (ELEM(zd, 7, 8) << 48) | (ELEM(zm, 7, 8) << 56); |
|
1867 |
env->vfp.regs[rm] = make_float64(m0); |
|
1868 |
env->vfp.regs[rd] = make_float64(d0); |
|
1869 |
} |
|
1870 |
|
|
1871 |
void HELPER(neon_zip16)(CPUState *env, uint32_t rd, uint32_t rm) |
|
1872 |
{ |
|
1873 |
uint64_t zm = float64_val(env->vfp.regs[rm]); |
|
1874 |
uint64_t zd = float64_val(env->vfp.regs[rd]); |
|
1875 |
uint64_t d0 = ELEM(zd, 0, 16) | (ELEM(zm, 0, 16) << 16) |
|
1876 |
| (ELEM(zd, 1, 16) << 32) | (ELEM(zm, 1, 16) << 48); |
|
1877 |
uint64_t m0 = ELEM(zd, 2, 16) | (ELEM(zm, 2, 16) << 16) |
|
1878 |
| (ELEM(zd, 3, 16) << 32) | (ELEM(zm, 3, 16) << 48); |
|
1879 |
env->vfp.regs[rm] = make_float64(m0); |
|
1880 |
env->vfp.regs[rd] = make_float64(d0); |
|
1881 |
} |
b/target-arm/translate.c | ||
---|---|---|
3653 | 3653 |
return 0; |
3654 | 3654 |
} |
3655 | 3655 |
|
3656 |
static void gen_neon_zip_u8(TCGv t0, TCGv t1) |
|
3657 |
{ |
|
3658 |
TCGv rd, rm, tmp; |
|
3659 |
|
|
3660 |
rd = new_tmp(); |
|
3661 |
rm = new_tmp(); |
|
3662 |
tmp = new_tmp(); |
|
3663 |
|
|
3664 |
tcg_gen_andi_i32(rd, t0, 0xff); |
|
3665 |
tcg_gen_shli_i32(tmp, t1, 8); |
|
3666 |
tcg_gen_andi_i32(tmp, tmp, 0xff00); |
|
3667 |
tcg_gen_or_i32(rd, rd, tmp); |
|
3668 |
tcg_gen_shli_i32(tmp, t0, 16); |
|
3669 |
tcg_gen_andi_i32(tmp, tmp, 0xff0000); |
|
3670 |
tcg_gen_or_i32(rd, rd, tmp); |
|
3671 |
tcg_gen_shli_i32(tmp, t1, 24); |
|
3672 |
tcg_gen_andi_i32(tmp, tmp, 0xff000000); |
|
3673 |
tcg_gen_or_i32(rd, rd, tmp); |
|
3674 |
|
|
3675 |
tcg_gen_andi_i32(rm, t1, 0xff000000); |
|
3676 |
tcg_gen_shri_i32(tmp, t0, 8); |
|
3677 |
tcg_gen_andi_i32(tmp, tmp, 0xff0000); |
|
3678 |
tcg_gen_or_i32(rm, rm, tmp); |
|
3679 |
tcg_gen_shri_i32(tmp, t1, 8); |
|
3680 |
tcg_gen_andi_i32(tmp, tmp, 0xff00); |
|
3681 |
tcg_gen_or_i32(rm, rm, tmp); |
|
3682 |
tcg_gen_shri_i32(tmp, t0, 16); |
|
3683 |
tcg_gen_andi_i32(tmp, tmp, 0xff); |
|
3684 |
tcg_gen_or_i32(t1, rm, tmp); |
|
3685 |
tcg_gen_mov_i32(t0, rd); |
|
3686 |
|
|
3687 |
dead_tmp(tmp); |
|
3688 |
dead_tmp(rm); |
|
3689 |
dead_tmp(rd); |
|
3690 |
} |
|
3691 |
|
|
3692 |
static void gen_neon_zip_u16(TCGv t0, TCGv t1) |
|
3656 |
static int gen_neon_zip(int rd, int rm, int size, int q) |
|
3693 | 3657 |
{ |
3694 | 3658 |
TCGv tmp, tmp2; |
3695 |
|
|
3696 |
tmp = new_tmp(); |
|
3697 |
tmp2 = new_tmp(); |
|
3698 |
|
|
3699 |
tcg_gen_andi_i32(tmp, t0, 0xffff); |
|
3700 |
tcg_gen_shli_i32(tmp2, t1, 16); |
|
3701 |
tcg_gen_or_i32(tmp, tmp, tmp2); |
|
3702 |
tcg_gen_andi_i32(t1, t1, 0xffff0000); |
|
3703 |
tcg_gen_shri_i32(tmp2, t0, 16); |
|
3704 |
tcg_gen_or_i32(t1, t1, tmp2); |
|
3705 |
tcg_gen_mov_i32(t0, tmp); |
|
3706 |
|
|
3707 |
dead_tmp(tmp2); |
|
3708 |
dead_tmp(tmp); |
|
3659 |
if (size == 3 || (!q && size == 2)) { |
|
3660 |
return 1; |
|
3661 |
} |
|
3662 |
tmp = tcg_const_i32(rd); |
|
3663 |
tmp2 = tcg_const_i32(rm); |
|
3664 |
if (q) { |
|
3665 |
switch (size) { |
|
3666 |
case 0: |
|
3667 |
gen_helper_neon_qzip8(cpu_env, tmp, tmp2); |
|
3668 |
break; |
|
3669 |
case 1: |
|
3670 |
gen_helper_neon_qzip16(cpu_env, tmp, tmp2); |
|
3671 |
break; |
|
3672 |
case 2: |
|
3673 |
gen_helper_neon_qzip32(cpu_env, tmp, tmp2); |
|
3674 |
break; |
|
3675 |
default: |
|
3676 |
abort(); |
|
3677 |
} |
|
3678 |
} else { |
|
3679 |
switch (size) { |
|
3680 |
case 0: |
|
3681 |
gen_helper_neon_zip8(cpu_env, tmp, tmp2); |
|
3682 |
break; |
|
3683 |
case 1: |
|
3684 |
gen_helper_neon_zip16(cpu_env, tmp, tmp2); |
|
3685 |
break; |
|
3686 |
default: |
|
3687 |
abort(); |
|
3688 |
} |
|
3689 |
} |
|
3690 |
tcg_temp_free_i32(tmp); |
|
3691 |
tcg_temp_free_i32(tmp2); |
|
3692 |
return 0; |
|
3709 | 3693 |
} |
3710 | 3694 |
|
3711 | 3695 |
static void gen_neon_trn_u8(TCGv t0, TCGv t1) |
... | ... | |
5429 | 5413 |
} |
5430 | 5414 |
break; |
5431 | 5415 |
case 35: /* VZIP */ |
5432 |
/* Reg Before After |
|
5433 |
Rd A3 A2 A1 A0 B1 A1 B0 A0 |
|
5434 |
Rm B3 B2 B1 B0 B3 A3 B2 A2 |
|
5435 |
*/ |
|
5436 |
if (size == 3) |
|
5416 |
if (gen_neon_zip(rd, rm, size, q)) { |
|
5437 | 5417 |
return 1; |
5438 |
count = (q ? 4 : 2); |
|
5439 |
for (n = 0; n < count; n++) { |
|
5440 |
tmp = neon_load_reg(rd, n); |
|
5441 |
tmp2 = neon_load_reg(rd, n); |
|
5442 |
switch (size) { |
|
5443 |
case 0: gen_neon_zip_u8(tmp, tmp2); break; |
|
5444 |
case 1: gen_neon_zip_u16(tmp, tmp2); break; |
|
5445 |
case 2: /* no-op */; break; |
|
5446 |
default: abort(); |
|
5447 |
} |
|
5448 |
neon_store_scratch(n * 2, tmp); |
|
5449 |
neon_store_scratch(n * 2 + 1, tmp2); |
|
5450 |
} |
|
5451 |
for (n = 0; n < count * 2; n++) { |
|
5452 |
int reg = (n < count) ? rd : rm; |
|
5453 |
tmp = neon_load_scratch(n); |
|
5454 |
neon_store_reg(reg, n % count, tmp); |
|
5455 | 5418 |
} |
5456 | 5419 |
break; |
5457 | 5420 |
case 36: case 37: /* VMOVN, VQMOVUN, VQMOVN */ |
Also available in: Unified diff