Revision aa47cfdd
b/target-arm/helper.h | ||
---|---|---|
350 | 350 |
DEF_HELPER_1(neon_qneg_s16, i32, i32) |
351 | 351 |
DEF_HELPER_1(neon_qneg_s32, i32, i32) |
352 | 352 |
|
353 |
DEF_HELPER_2(neon_min_f32, i32, i32, i32) |
|
354 |
DEF_HELPER_2(neon_max_f32, i32, i32, i32) |
|
355 |
DEF_HELPER_2(neon_abd_f32, i32, i32, i32) |
|
356 |
DEF_HELPER_2(neon_add_f32, i32, i32, i32) |
|
357 |
DEF_HELPER_2(neon_sub_f32, i32, i32, i32) |
|
358 |
DEF_HELPER_2(neon_mul_f32, i32, i32, i32) |
|
359 |
DEF_HELPER_2(neon_ceq_f32, i32, i32, i32) |
|
360 |
DEF_HELPER_2(neon_cge_f32, i32, i32, i32) |
|
361 |
DEF_HELPER_2(neon_cgt_f32, i32, i32, i32) |
|
362 |
DEF_HELPER_2(neon_acge_f32, i32, i32, i32) |
|
363 |
DEF_HELPER_2(neon_acgt_f32, i32, i32, i32) |
|
353 |
DEF_HELPER_3(neon_min_f32, i32, i32, i32, ptr) |
|
354 |
DEF_HELPER_3(neon_max_f32, i32, i32, i32, ptr) |
|
355 |
DEF_HELPER_3(neon_abd_f32, i32, i32, i32, ptr) |
|
356 |
DEF_HELPER_3(neon_ceq_f32, i32, i32, i32, ptr) |
|
357 |
DEF_HELPER_3(neon_cge_f32, i32, i32, i32, ptr) |
|
358 |
DEF_HELPER_3(neon_cgt_f32, i32, i32, i32, ptr) |
|
359 |
DEF_HELPER_3(neon_acge_f32, i32, i32, i32, ptr) |
|
360 |
DEF_HELPER_3(neon_acgt_f32, i32, i32, i32, ptr) |
|
364 | 361 |
|
365 | 362 |
/* iwmmxt_helper.c */ |
366 | 363 |
DEF_HELPER_2(iwmmxt_maddsq, i64, i64, i64) |
b/target-arm/neon_helper.c | ||
---|---|---|
18 | 18 |
|
19 | 19 |
#define SET_QC() env->vfp.xregs[ARM_VFP_FPSCR] = CPSR_Q |
20 | 20 |
|
21 |
#define NFS (&env->vfp.standard_fp_status) |
|
22 |
|
|
23 | 21 |
#define NEON_TYPE1(name, type) \ |
24 | 22 |
typedef struct \ |
25 | 23 |
{ \ |
... | ... | |
1770 | 1768 |
} |
1771 | 1769 |
|
1772 | 1770 |
/* NEON Float helpers. */ |
1773 |
uint32_t HELPER(neon_min_f32)(uint32_t a, uint32_t b) |
|
1771 |
uint32_t HELPER(neon_min_f32)(uint32_t a, uint32_t b, void *fpstp)
|
|
1774 | 1772 |
{ |
1775 |
return float32_val(float32_min(make_float32(a), make_float32(b), NFS)); |
|
1773 |
float_status *fpst = fpstp; |
|
1774 |
return float32_val(float32_min(make_float32(a), make_float32(b), fpst)); |
|
1776 | 1775 |
} |
1777 | 1776 |
|
1778 |
uint32_t HELPER(neon_max_f32)(uint32_t a, uint32_t b) |
|
1777 |
uint32_t HELPER(neon_max_f32)(uint32_t a, uint32_t b, void *fpstp)
|
|
1779 | 1778 |
{ |
1780 |
return float32_val(float32_max(make_float32(a), make_float32(b), NFS)); |
|
1779 |
float_status *fpst = fpstp; |
|
1780 |
return float32_val(float32_max(make_float32(a), make_float32(b), fpst)); |
|
1781 | 1781 |
} |
1782 | 1782 |
|
1783 |
uint32_t HELPER(neon_abd_f32)(uint32_t a, uint32_t b) |
|
1783 |
uint32_t HELPER(neon_abd_f32)(uint32_t a, uint32_t b, void *fpstp)
|
|
1784 | 1784 |
{ |
1785 |
float_status *fpst = fpstp; |
|
1785 | 1786 |
float32 f0 = make_float32(a); |
1786 | 1787 |
float32 f1 = make_float32(b); |
1787 |
return float32_val(float32_abs(float32_sub(f0, f1, NFS))); |
|
1788 |
} |
|
1789 |
|
|
1790 |
uint32_t HELPER(neon_add_f32)(uint32_t a, uint32_t b) |
|
1791 |
{ |
|
1792 |
return float32_val(float32_add(make_float32(a), make_float32(b), NFS)); |
|
1793 |
} |
|
1794 |
|
|
1795 |
uint32_t HELPER(neon_sub_f32)(uint32_t a, uint32_t b) |
|
1796 |
{ |
|
1797 |
return float32_val(float32_sub(make_float32(a), make_float32(b), NFS)); |
|
1798 |
} |
|
1799 |
|
|
1800 |
uint32_t HELPER(neon_mul_f32)(uint32_t a, uint32_t b) |
|
1801 |
{ |
|
1802 |
return float32_val(float32_mul(make_float32(a), make_float32(b), NFS)); |
|
1788 |
return float32_val(float32_abs(float32_sub(f0, f1, fpst))); |
|
1803 | 1789 |
} |
1804 | 1790 |
|
1805 | 1791 |
/* Floating point comparisons produce an integer result. |
1806 | 1792 |
* Note that EQ doesn't signal InvalidOp for QNaNs but GE and GT do. |
1807 | 1793 |
* Softfloat routines return 0/1, which we convert to the 0/-1 Neon requires. |
1808 | 1794 |
*/ |
1809 |
uint32_t HELPER(neon_ceq_f32)(uint32_t a, uint32_t b) |
|
1795 |
uint32_t HELPER(neon_ceq_f32)(uint32_t a, uint32_t b, void *fpstp)
|
|
1810 | 1796 |
{ |
1811 |
return -float32_eq_quiet(make_float32(a), make_float32(b), NFS); |
|
1797 |
float_status *fpst = fpstp; |
|
1798 |
return -float32_eq_quiet(make_float32(a), make_float32(b), fpst); |
|
1812 | 1799 |
} |
1813 | 1800 |
|
1814 |
uint32_t HELPER(neon_cge_f32)(uint32_t a, uint32_t b) |
|
1801 |
uint32_t HELPER(neon_cge_f32)(uint32_t a, uint32_t b, void *fpstp)
|
|
1815 | 1802 |
{ |
1816 |
return -float32_le(make_float32(b), make_float32(a), NFS); |
|
1803 |
float_status *fpst = fpstp; |
|
1804 |
return -float32_le(make_float32(b), make_float32(a), fpst); |
|
1817 | 1805 |
} |
1818 | 1806 |
|
1819 |
uint32_t HELPER(neon_cgt_f32)(uint32_t a, uint32_t b) |
|
1807 |
uint32_t HELPER(neon_cgt_f32)(uint32_t a, uint32_t b, void *fpstp)
|
|
1820 | 1808 |
{ |
1821 |
return -float32_lt(make_float32(b), make_float32(a), NFS); |
|
1809 |
float_status *fpst = fpstp; |
|
1810 |
return -float32_lt(make_float32(b), make_float32(a), fpst); |
|
1822 | 1811 |
} |
1823 | 1812 |
|
1824 |
uint32_t HELPER(neon_acge_f32)(uint32_t a, uint32_t b) |
|
1813 |
uint32_t HELPER(neon_acge_f32)(uint32_t a, uint32_t b, void *fpstp)
|
|
1825 | 1814 |
{ |
1815 |
float_status *fpst = fpstp; |
|
1826 | 1816 |
float32 f0 = float32_abs(make_float32(a)); |
1827 | 1817 |
float32 f1 = float32_abs(make_float32(b)); |
1828 |
return -float32_le(f1, f0, NFS);
|
|
1818 |
return -float32_le(f1, f0, fpst);
|
|
1829 | 1819 |
} |
1830 | 1820 |
|
1831 |
uint32_t HELPER(neon_acgt_f32)(uint32_t a, uint32_t b) |
|
1821 |
uint32_t HELPER(neon_acgt_f32)(uint32_t a, uint32_t b, void *fpstp)
|
|
1832 | 1822 |
{ |
1823 |
float_status *fpst = fpstp; |
|
1833 | 1824 |
float32 f0 = float32_abs(make_float32(a)); |
1834 | 1825 |
float32 f1 = float32_abs(make_float32(b)); |
1835 |
return -float32_lt(f1, f0, NFS);
|
|
1826 |
return -float32_lt(f1, f0, fpst);
|
|
1836 | 1827 |
} |
1837 | 1828 |
|
1838 | 1829 |
#define ELEM(V, N, SIZE) (((V) >> ((N) * (SIZE))) & ((1ull << (SIZE)) - 1)) |
b/target-arm/translate.c | ||
---|---|---|
4857 | 4857 |
} |
4858 | 4858 |
break; |
4859 | 4859 |
case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */ |
4860 |
{ |
|
4861 |
TCGv_ptr fpstatus = get_fpstatus_ptr(1); |
|
4860 | 4862 |
switch ((u << 2) | size) { |
4861 | 4863 |
case 0: /* VADD */ |
4862 |
gen_helper_neon_add_f32(tmp, tmp, tmp2); |
|
4864 |
case 4: /* VPADD */ |
|
4865 |
gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus); |
|
4863 | 4866 |
break; |
4864 | 4867 |
case 2: /* VSUB */ |
4865 |
gen_helper_neon_sub_f32(tmp, tmp, tmp2); |
|
4866 |
break; |
|
4867 |
case 4: /* VPADD */ |
|
4868 |
gen_helper_neon_add_f32(tmp, tmp, tmp2); |
|
4868 |
gen_helper_vfp_subs(tmp, tmp, tmp2, fpstatus); |
|
4869 | 4869 |
break; |
4870 | 4870 |
case 6: /* VABD */ |
4871 |
gen_helper_neon_abd_f32(tmp, tmp, tmp2); |
|
4871 |
gen_helper_neon_abd_f32(tmp, tmp, tmp2, fpstatus);
|
|
4872 | 4872 |
break; |
4873 | 4873 |
default: |
4874 | 4874 |
abort(); |
4875 | 4875 |
} |
4876 |
tcg_temp_free_ptr(fpstatus); |
|
4876 | 4877 |
break; |
4878 |
} |
|
4877 | 4879 |
case NEON_3R_FLOAT_MULTIPLY: |
4878 |
gen_helper_neon_mul_f32(tmp, tmp, tmp2); |
|
4880 |
{ |
|
4881 |
TCGv_ptr fpstatus = get_fpstatus_ptr(1); |
|
4882 |
gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus); |
|
4879 | 4883 |
if (!u) { |
4880 | 4884 |
tcg_temp_free_i32(tmp2); |
4881 | 4885 |
tmp2 = neon_load_reg(rd, pass); |
4882 | 4886 |
if (size == 0) { |
4883 |
gen_helper_neon_add_f32(tmp, tmp, tmp2);
|
|
4887 |
gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
|
|
4884 | 4888 |
} else { |
4885 |
gen_helper_neon_sub_f32(tmp, tmp2, tmp);
|
|
4889 |
gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
|
|
4886 | 4890 |
} |
4887 | 4891 |
} |
4892 |
tcg_temp_free_ptr(fpstatus); |
|
4888 | 4893 |
break; |
4894 |
} |
|
4889 | 4895 |
case NEON_3R_FLOAT_CMP: |
4896 |
{ |
|
4897 |
TCGv_ptr fpstatus = get_fpstatus_ptr(1); |
|
4890 | 4898 |
if (!u) { |
4891 |
gen_helper_neon_ceq_f32(tmp, tmp, tmp2); |
|
4899 |
gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
|
|
4892 | 4900 |
} else { |
4893 |
if (size == 0) |
|
4894 |
gen_helper_neon_cge_f32(tmp, tmp, tmp2); |
|
4895 |
else |
|
4896 |
gen_helper_neon_cgt_f32(tmp, tmp, tmp2); |
|
4901 |
if (size == 0) { |
|
4902 |
gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus); |
|
4903 |
} else { |
|
4904 |
gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus); |
|
4905 |
} |
|
4897 | 4906 |
} |
4907 |
tcg_temp_free_ptr(fpstatus); |
|
4898 | 4908 |
break; |
4909 |
} |
|
4899 | 4910 |
case NEON_3R_FLOAT_ACMP: |
4900 |
if (size == 0) |
|
4901 |
gen_helper_neon_acge_f32(tmp, tmp, tmp2); |
|
4902 |
else |
|
4903 |
gen_helper_neon_acgt_f32(tmp, tmp, tmp2); |
|
4911 |
{ |
|
4912 |
TCGv_ptr fpstatus = get_fpstatus_ptr(1); |
|
4913 |
if (size == 0) { |
|
4914 |
gen_helper_neon_acge_f32(tmp, tmp, tmp2, fpstatus); |
|
4915 |
} else { |
|
4916 |
gen_helper_neon_acgt_f32(tmp, tmp, tmp2, fpstatus); |
|
4917 |
} |
|
4918 |
tcg_temp_free_ptr(fpstatus); |
|
4904 | 4919 |
break; |
4920 |
} |
|
4905 | 4921 |
case NEON_3R_FLOAT_MINMAX: |
4906 |
if (size == 0) |
|
4907 |
gen_helper_neon_max_f32(tmp, tmp, tmp2); |
|
4908 |
else |
|
4909 |
gen_helper_neon_min_f32(tmp, tmp, tmp2); |
|
4922 |
{ |
|
4923 |
TCGv_ptr fpstatus = get_fpstatus_ptr(1); |
|
4924 |
if (size == 0) { |
|
4925 |
gen_helper_neon_max_f32(tmp, tmp, tmp2, fpstatus); |
|
4926 |
} else { |
|
4927 |
gen_helper_neon_min_f32(tmp, tmp, tmp2, fpstatus); |
|
4928 |
} |
|
4929 |
tcg_temp_free_ptr(fpstatus); |
|
4910 | 4930 |
break; |
4931 |
} |
|
4911 | 4932 |
case NEON_3R_VRECPS_VRSQRTS: |
4912 | 4933 |
if (size == 0) |
4913 | 4934 |
gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env); |
... | ... | |
5606 | 5627 |
gen_helper_neon_qrdmulh_s32(tmp, tmp, tmp2); |
5607 | 5628 |
} |
5608 | 5629 |
} else if (op & 1) { |
5609 |
gen_helper_neon_mul_f32(tmp, tmp, tmp2); |
|
5630 |
TCGv_ptr fpstatus = get_fpstatus_ptr(1); |
|
5631 |
gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus); |
|
5632 |
tcg_temp_free_ptr(fpstatus); |
|
5610 | 5633 |
} else { |
5611 | 5634 |
switch (size) { |
5612 | 5635 |
case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break; |
... | ... | |
5624 | 5647 |
gen_neon_add(size, tmp, tmp2); |
5625 | 5648 |
break; |
5626 | 5649 |
case 1: |
5627 |
gen_helper_neon_add_f32(tmp, tmp, tmp2); |
|
5650 |
{ |
|
5651 |
TCGv_ptr fpstatus = get_fpstatus_ptr(1); |
|
5652 |
gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus); |
|
5653 |
tcg_temp_free_ptr(fpstatus); |
|
5628 | 5654 |
break; |
5655 |
} |
|
5629 | 5656 |
case 4: |
5630 | 5657 |
gen_neon_rsb(size, tmp, tmp2); |
5631 | 5658 |
break; |
5632 | 5659 |
case 5: |
5633 |
gen_helper_neon_sub_f32(tmp, tmp2, tmp); |
|
5660 |
{ |
|
5661 |
TCGv_ptr fpstatus = get_fpstatus_ptr(1); |
|
5662 |
gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus); |
|
5663 |
tcg_temp_free_ptr(fpstatus); |
|
5634 | 5664 |
break; |
5665 |
} |
|
5635 | 5666 |
default: |
5636 | 5667 |
abort(); |
5637 | 5668 |
} |
... | ... | |
6029 | 6060 |
tcg_temp_free(tmp2); |
6030 | 6061 |
break; |
6031 | 6062 |
case NEON_2RM_VCGT0_F: |
6063 |
{ |
|
6064 |
TCGv_ptr fpstatus = get_fpstatus_ptr(1); |
|
6032 | 6065 |
tmp2 = tcg_const_i32(0); |
6033 |
gen_helper_neon_cgt_f32(tmp, tmp, tmp2); |
|
6066 |
gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
|
|
6034 | 6067 |
tcg_temp_free(tmp2); |
6068 |
tcg_temp_free_ptr(fpstatus); |
|
6035 | 6069 |
break; |
6070 |
} |
|
6036 | 6071 |
case NEON_2RM_VCGE0_F: |
6072 |
{ |
|
6073 |
TCGv_ptr fpstatus = get_fpstatus_ptr(1); |
|
6037 | 6074 |
tmp2 = tcg_const_i32(0); |
6038 |
gen_helper_neon_cge_f32(tmp, tmp, tmp2); |
|
6075 |
gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
|
|
6039 | 6076 |
tcg_temp_free(tmp2); |
6077 |
tcg_temp_free_ptr(fpstatus); |
|
6040 | 6078 |
break; |
6079 |
} |
|
6041 | 6080 |
case NEON_2RM_VCEQ0_F: |
6081 |
{ |
|
6082 |
TCGv_ptr fpstatus = get_fpstatus_ptr(1); |
|
6042 | 6083 |
tmp2 = tcg_const_i32(0); |
6043 |
gen_helper_neon_ceq_f32(tmp, tmp, tmp2); |
|
6084 |
gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
|
|
6044 | 6085 |
tcg_temp_free(tmp2); |
6086 |
tcg_temp_free_ptr(fpstatus); |
|
6045 | 6087 |
break; |
6088 |
} |
|
6046 | 6089 |
case NEON_2RM_VCLE0_F: |
6090 |
{ |
|
6091 |
TCGv_ptr fpstatus = get_fpstatus_ptr(1); |
|
6047 | 6092 |
tmp2 = tcg_const_i32(0); |
6048 |
gen_helper_neon_cge_f32(tmp, tmp2, tmp); |
|
6093 |
gen_helper_neon_cge_f32(tmp, tmp2, tmp, fpstatus);
|
|
6049 | 6094 |
tcg_temp_free(tmp2); |
6095 |
tcg_temp_free_ptr(fpstatus); |
|
6050 | 6096 |
break; |
6097 |
} |
|
6051 | 6098 |
case NEON_2RM_VCLT0_F: |
6099 |
{ |
|
6100 |
TCGv_ptr fpstatus = get_fpstatus_ptr(1); |
|
6052 | 6101 |
tmp2 = tcg_const_i32(0); |
6053 |
gen_helper_neon_cgt_f32(tmp, tmp2, tmp); |
|
6102 |
gen_helper_neon_cgt_f32(tmp, tmp2, tmp, fpstatus);
|
|
6054 | 6103 |
tcg_temp_free(tmp2); |
6104 |
tcg_temp_free_ptr(fpstatus); |
|
6055 | 6105 |
break; |
6106 |
} |
|
6056 | 6107 |
case NEON_2RM_VABS_F: |
6057 | 6108 |
gen_vfp_abs(0); |
6058 | 6109 |
break; |
Also available in: Unified diff