Revision 02acedf9
b/target-arm/helpers.h | ||
---|---|---|
461 | 461 |
|
462 | 462 |
DEF_HELPER_2(set_teecr, void, env, i32) |
463 | 463 |
|
464 |
DEF_HELPER_3(neon_unzip8, void, env, i32, i32) |
|
465 |
DEF_HELPER_3(neon_unzip16, void, env, i32, i32) |
|
466 |
DEF_HELPER_3(neon_qunzip8, void, env, i32, i32) |
|
467 |
DEF_HELPER_3(neon_qunzip16, void, env, i32, i32) |
|
468 |
DEF_HELPER_3(neon_qunzip32, void, env, i32, i32) |
|
469 |
|
|
464 | 470 |
#include "def-helper.h" |
b/target-arm/neon_helper.c | ||
---|---|---|
1693 | 1693 |
float32 f1 = float32_abs(vfp_itos(b)); |
1694 | 1694 |
return (float32_compare_quiet(f0, f1, NFS) > 0) ? ~0 : 0; |
1695 | 1695 |
} |
1696 |
|
|
1697 |
#define ELEM(V, N, SIZE) (((V) >> ((N) * (SIZE))) & ((1ull << (SIZE)) - 1)) |
|
1698 |
|
|
1699 |
void HELPER(neon_qunzip8)(CPUState *env, uint32_t rd, uint32_t rm) |
|
1700 |
{ |
|
1701 |
uint64_t zm0 = float64_val(env->vfp.regs[rm]); |
|
1702 |
uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]); |
|
1703 |
uint64_t zd0 = float64_val(env->vfp.regs[rd]); |
|
1704 |
uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]); |
|
1705 |
uint64_t d0 = ELEM(zd0, 0, 8) | (ELEM(zd0, 2, 8) << 8) |
|
1706 |
| (ELEM(zd0, 4, 8) << 16) | (ELEM(zd0, 6, 8) << 24) |
|
1707 |
| (ELEM(zd1, 0, 8) << 32) | (ELEM(zd1, 2, 8) << 40) |
|
1708 |
| (ELEM(zd1, 4, 8) << 48) | (ELEM(zd1, 6, 8) << 56); |
|
1709 |
uint64_t d1 = ELEM(zm0, 0, 8) | (ELEM(zm0, 2, 8) << 8) |
|
1710 |
| (ELEM(zm0, 4, 8) << 16) | (ELEM(zm0, 6, 8) << 24) |
|
1711 |
| (ELEM(zm1, 0, 8) << 32) | (ELEM(zm1, 2, 8) << 40) |
|
1712 |
| (ELEM(zm1, 4, 8) << 48) | (ELEM(zm1, 6, 8) << 56); |
|
1713 |
uint64_t m0 = ELEM(zd0, 1, 8) | (ELEM(zd0, 3, 8) << 8) |
|
1714 |
| (ELEM(zd0, 5, 8) << 16) | (ELEM(zd0, 7, 8) << 24) |
|
1715 |
| (ELEM(zd1, 1, 8) << 32) | (ELEM(zd1, 3, 8) << 40) |
|
1716 |
| (ELEM(zd1, 5, 8) << 48) | (ELEM(zd1, 7, 8) << 56); |
|
1717 |
uint64_t m1 = ELEM(zm0, 1, 8) | (ELEM(zm0, 3, 8) << 8) |
|
1718 |
| (ELEM(zm0, 5, 8) << 16) | (ELEM(zm0, 7, 8) << 24) |
|
1719 |
| (ELEM(zm1, 1, 8) << 32) | (ELEM(zm1, 3, 8) << 40) |
|
1720 |
| (ELEM(zm1, 5, 8) << 48) | (ELEM(zm1, 7, 8) << 56); |
|
1721 |
env->vfp.regs[rm] = make_float64(m0); |
|
1722 |
env->vfp.regs[rm + 1] = make_float64(m1); |
|
1723 |
env->vfp.regs[rd] = make_float64(d0); |
|
1724 |
env->vfp.regs[rd + 1] = make_float64(d1); |
|
1725 |
} |
|
1726 |
|
|
1727 |
void HELPER(neon_qunzip16)(CPUState *env, uint32_t rd, uint32_t rm) |
|
1728 |
{ |
|
1729 |
uint64_t zm0 = float64_val(env->vfp.regs[rm]); |
|
1730 |
uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]); |
|
1731 |
uint64_t zd0 = float64_val(env->vfp.regs[rd]); |
|
1732 |
uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]); |
|
1733 |
uint64_t d0 = ELEM(zd0, 0, 16) | (ELEM(zd0, 2, 16) << 16) |
|
1734 |
| (ELEM(zd1, 0, 16) << 32) | (ELEM(zd1, 2, 16) << 48); |
|
1735 |
uint64_t d1 = ELEM(zm0, 0, 16) | (ELEM(zm0, 2, 16) << 16) |
|
1736 |
| (ELEM(zm1, 0, 16) << 32) | (ELEM(zm1, 2, 16) << 48); |
|
1737 |
uint64_t m0 = ELEM(zd0, 1, 16) | (ELEM(zd0, 3, 16) << 16) |
|
1738 |
| (ELEM(zd1, 1, 16) << 32) | (ELEM(zd1, 3, 16) << 48); |
|
1739 |
uint64_t m1 = ELEM(zm0, 1, 16) | (ELEM(zm0, 3, 16) << 16) |
|
1740 |
| (ELEM(zm1, 1, 16) << 32) | (ELEM(zm1, 3, 16) << 48); |
|
1741 |
env->vfp.regs[rm] = make_float64(m0); |
|
1742 |
env->vfp.regs[rm + 1] = make_float64(m1); |
|
1743 |
env->vfp.regs[rd] = make_float64(d0); |
|
1744 |
env->vfp.regs[rd + 1] = make_float64(d1); |
|
1745 |
} |
|
1746 |
|
|
1747 |
void HELPER(neon_qunzip32)(CPUState *env, uint32_t rd, uint32_t rm) |
|
1748 |
{ |
|
1749 |
uint64_t zm0 = float64_val(env->vfp.regs[rm]); |
|
1750 |
uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]); |
|
1751 |
uint64_t zd0 = float64_val(env->vfp.regs[rd]); |
|
1752 |
uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]); |
|
1753 |
uint64_t d0 = ELEM(zd0, 0, 32) | (ELEM(zd1, 0, 32) << 32); |
|
1754 |
uint64_t d1 = ELEM(zm0, 0, 32) | (ELEM(zm1, 0, 32) << 32); |
|
1755 |
uint64_t m0 = ELEM(zd0, 1, 32) | (ELEM(zd1, 1, 32) << 32); |
|
1756 |
uint64_t m1 = ELEM(zm0, 1, 32) | (ELEM(zm1, 1, 32) << 32); |
|
1757 |
env->vfp.regs[rm] = make_float64(m0); |
|
1758 |
env->vfp.regs[rm + 1] = make_float64(m1); |
|
1759 |
env->vfp.regs[rd] = make_float64(d0); |
|
1760 |
env->vfp.regs[rd + 1] = make_float64(d1); |
|
1761 |
} |
|
1762 |
|
|
1763 |
void HELPER(neon_unzip8)(CPUState *env, uint32_t rd, uint32_t rm) |
|
1764 |
{ |
|
1765 |
uint64_t zm = float64_val(env->vfp.regs[rm]); |
|
1766 |
uint64_t zd = float64_val(env->vfp.regs[rd]); |
|
1767 |
uint64_t d0 = ELEM(zd, 0, 8) | (ELEM(zd, 2, 8) << 8) |
|
1768 |
| (ELEM(zd, 4, 8) << 16) | (ELEM(zd, 6, 8) << 24) |
|
1769 |
| (ELEM(zm, 0, 8) << 32) | (ELEM(zm, 2, 8) << 40) |
|
1770 |
| (ELEM(zm, 4, 8) << 48) | (ELEM(zm, 6, 8) << 56); |
|
1771 |
uint64_t m0 = ELEM(zd, 1, 8) | (ELEM(zd, 3, 8) << 8) |
|
1772 |
| (ELEM(zd, 5, 8) << 16) | (ELEM(zd, 7, 8) << 24) |
|
1773 |
| (ELEM(zm, 1, 8) << 32) | (ELEM(zm, 3, 8) << 40) |
|
1774 |
| (ELEM(zm, 5, 8) << 48) | (ELEM(zm, 7, 8) << 56); |
|
1775 |
env->vfp.regs[rm] = make_float64(m0); |
|
1776 |
env->vfp.regs[rd] = make_float64(d0); |
|
1777 |
} |
|
1778 |
|
|
1779 |
void HELPER(neon_unzip16)(CPUState *env, uint32_t rd, uint32_t rm) |
|
1780 |
{ |
|
1781 |
uint64_t zm = float64_val(env->vfp.regs[rm]); |
|
1782 |
uint64_t zd = float64_val(env->vfp.regs[rd]); |
|
1783 |
uint64_t d0 = ELEM(zd, 0, 16) | (ELEM(zd, 2, 16) << 16) |
|
1784 |
| (ELEM(zm, 0, 16) << 32) | (ELEM(zm, 2, 16) << 48); |
|
1785 |
uint64_t m0 = ELEM(zd, 1, 16) | (ELEM(zd, 3, 16) << 16) |
|
1786 |
| (ELEM(zm, 1, 16) << 32) | (ELEM(zm, 3, 16) << 48); |
|
1787 |
env->vfp.regs[rm] = make_float64(m0); |
|
1788 |
env->vfp.regs[rd] = make_float64(d0); |
|
1789 |
} |
b/target-arm/translate.c | ||
---|---|---|
3614 | 3614 |
return tmp; |
3615 | 3615 |
} |
3616 | 3616 |
|
3617 |
static void gen_neon_unzip_u8(TCGv t0, TCGv t1)
|
|
3617 |
static int gen_neon_unzip(int rd, int rm, int size, int q)
|
|
3618 | 3618 |
{ |
3619 |
TCGv rd, rm, tmp; |
|
3620 |
|
|
3621 |
rd = new_tmp(); |
|
3622 |
rm = new_tmp(); |
|
3623 |
tmp = new_tmp(); |
|
3624 |
|
|
3625 |
tcg_gen_andi_i32(rd, t0, 0xff); |
|
3626 |
tcg_gen_shri_i32(tmp, t0, 8); |
|
3627 |
tcg_gen_andi_i32(tmp, tmp, 0xff00); |
|
3628 |
tcg_gen_or_i32(rd, rd, tmp); |
|
3629 |
tcg_gen_shli_i32(tmp, t1, 16); |
|
3630 |
tcg_gen_andi_i32(tmp, tmp, 0xff0000); |
|
3631 |
tcg_gen_or_i32(rd, rd, tmp); |
|
3632 |
tcg_gen_shli_i32(tmp, t1, 8); |
|
3633 |
tcg_gen_andi_i32(tmp, tmp, 0xff000000); |
|
3634 |
tcg_gen_or_i32(rd, rd, tmp); |
|
3635 |
|
|
3636 |
tcg_gen_shri_i32(rm, t0, 8); |
|
3637 |
tcg_gen_andi_i32(rm, rm, 0xff); |
|
3638 |
tcg_gen_shri_i32(tmp, t0, 16); |
|
3639 |
tcg_gen_andi_i32(tmp, tmp, 0xff00); |
|
3640 |
tcg_gen_or_i32(rm, rm, tmp); |
|
3641 |
tcg_gen_shli_i32(tmp, t1, 8); |
|
3642 |
tcg_gen_andi_i32(tmp, tmp, 0xff0000); |
|
3643 |
tcg_gen_or_i32(rm, rm, tmp); |
|
3644 |
tcg_gen_andi_i32(tmp, t1, 0xff000000); |
|
3645 |
tcg_gen_or_i32(t1, rm, tmp); |
|
3646 |
tcg_gen_mov_i32(t0, rd); |
|
3647 |
|
|
3648 |
dead_tmp(tmp); |
|
3649 |
dead_tmp(rm); |
|
3650 |
dead_tmp(rd); |
|
3619 |
TCGv tmp, tmp2; |
|
3620 |
if (size == 3 || (!q && size == 2)) { |
|
3621 |
return 1; |
|
3622 |
} |
|
3623 |
tmp = tcg_const_i32(rd); |
|
3624 |
tmp2 = tcg_const_i32(rm); |
|
3625 |
if (q) { |
|
3626 |
switch (size) { |
|
3627 |
case 0: |
|
3628 |
gen_helper_neon_qunzip8(cpu_env, tmp, tmp2); |
|
3629 |
break; |
|
3630 |
case 1: |
|
3631 |
gen_helper_neon_qunzip16(cpu_env, tmp, tmp2); |
|
3632 |
break; |
|
3633 |
case 2: |
|
3634 |
gen_helper_neon_qunzip32(cpu_env, tmp, tmp2); |
|
3635 |
break; |
|
3636 |
default: |
|
3637 |
abort(); |
|
3638 |
} |
|
3639 |
} else { |
|
3640 |
switch (size) { |
|
3641 |
case 0: |
|
3642 |
gen_helper_neon_unzip8(cpu_env, tmp, tmp2); |
|
3643 |
break; |
|
3644 |
case 1: |
|
3645 |
gen_helper_neon_unzip16(cpu_env, tmp, tmp2); |
|
3646 |
break; |
|
3647 |
default: |
|
3648 |
abort(); |
|
3649 |
} |
|
3650 |
} |
|
3651 |
tcg_temp_free_i32(tmp); |
|
3652 |
tcg_temp_free_i32(tmp2); |
|
3653 |
return 0; |
|
3651 | 3654 |
} |
3652 | 3655 |
|
3653 | 3656 |
static void gen_neon_zip_u8(TCGv t0, TCGv t1) |
... | ... | |
3705 | 3708 |
dead_tmp(tmp); |
3706 | 3709 |
} |
3707 | 3710 |
|
3708 |
static void gen_neon_unzip(int reg, int q, int tmp, int size) |
|
3709 |
{ |
|
3710 |
int n; |
|
3711 |
TCGv t0, t1; |
|
3712 |
|
|
3713 |
for (n = 0; n < q + 1; n += 2) { |
|
3714 |
t0 = neon_load_reg(reg, n); |
|
3715 |
t1 = neon_load_reg(reg, n + 1); |
|
3716 |
switch (size) { |
|
3717 |
case 0: gen_neon_unzip_u8(t0, t1); break; |
|
3718 |
case 1: gen_neon_zip_u16(t0, t1); break; /* zip and unzip are the same. */ |
|
3719 |
case 2: /* no-op */; break; |
|
3720 |
default: abort(); |
|
3721 |
} |
|
3722 |
neon_store_scratch(tmp + n, t0); |
|
3723 |
neon_store_scratch(tmp + n + 1, t1); |
|
3724 |
} |
|
3725 |
} |
|
3726 |
|
|
3727 | 3711 |
static void gen_neon_trn_u8(TCGv t0, TCGv t1) |
3728 | 3712 |
{ |
3729 | 3713 |
TCGv rd, tmp; |
... | ... | |
5440 | 5424 |
} |
5441 | 5425 |
break; |
5442 | 5426 |
case 34: /* VUZP */ |
5443 |
/* Reg Before After |
|
5444 |
Rd A3 A2 A1 A0 B2 B0 A2 A0 |
|
5445 |
Rm B3 B2 B1 B0 B3 B1 A3 A1 |
|
5446 |
*/ |
|
5447 |
if (size == 3) |
|
5427 |
if (gen_neon_unzip(rd, rm, size, q)) { |
|
5448 | 5428 |
return 1; |
5449 |
gen_neon_unzip(rd, q, 0, size); |
|
5450 |
gen_neon_unzip(rm, q, 4, size); |
|
5451 |
if (q) { |
|
5452 |
static int unzip_order_q[8] = |
|
5453 |
{0, 2, 4, 6, 1, 3, 5, 7}; |
|
5454 |
for (n = 0; n < 8; n++) { |
|
5455 |
int reg = (n < 4) ? rd : rm; |
|
5456 |
tmp = neon_load_scratch(unzip_order_q[n]); |
|
5457 |
neon_store_reg(reg, n % 4, tmp); |
|
5458 |
} |
|
5459 |
} else { |
|
5460 |
static int unzip_order[4] = |
|
5461 |
{0, 4, 1, 5}; |
|
5462 |
for (n = 0; n < 4; n++) { |
|
5463 |
int reg = (n < 2) ? rd : rm; |
|
5464 |
tmp = neon_load_scratch(unzip_order[n]); |
|
5465 |
neon_store_reg(reg, n % 2, tmp); |
|
5466 |
} |
|
5467 | 5429 |
} |
5468 | 5430 |
break; |
5469 | 5431 |
case 35: /* VZIP */ |
Also available in: Unified diff