root / target-arm / neon_helper.c @ 36802b6b
History | View | Annotate | Download (52.1 kB)
1 | e677137d | pbrook | /*
|
---|---|---|---|
2 | e677137d | pbrook | * ARM NEON vector operations.
|
3 | e677137d | pbrook | *
|
4 | e677137d | pbrook | * Copyright (c) 2007, 2008 CodeSourcery.
|
5 | e677137d | pbrook | * Written by Paul Brook
|
6 | e677137d | pbrook | *
|
7 | e677137d | pbrook | * This code is licenced under the GNU GPL v2.
|
8 | e677137d | pbrook | */
|
9 | ad69471c | pbrook | #include <stdlib.h> |
10 | ad69471c | pbrook | #include <stdio.h> |
11 | ad69471c | pbrook | |
12 | ad69471c | pbrook | #include "cpu.h" |
13 | 2a3f75b4 | Peter Maydell | #include "exec.h" |
14 | 7b59220e | Lluís | #include "helper.h" |
15 | ad69471c | pbrook | |
16 | ad69471c | pbrook | #define SIGNBIT (uint32_t)0x80000000 |
17 | ad69471c | pbrook | #define SIGNBIT64 ((uint64_t)1 << 63) |
18 | ad69471c | pbrook | |
19 | ad69471c | pbrook | #define SET_QC() env->vfp.xregs[ARM_VFP_FPSCR] = CPSR_Q
|
20 | ad69471c | pbrook | |
21 | cc49f217 | Peter Maydell | #define NFS (&env->vfp.standard_fp_status)
|
22 | ad69471c | pbrook | |
23 | ad69471c | pbrook | #define NEON_TYPE1(name, type) \
|
24 | ad69471c | pbrook | typedef struct \ |
25 | ad69471c | pbrook | { \ |
26 | ad69471c | pbrook | type v1; \ |
27 | ad69471c | pbrook | } neon_##name; |
28 | e2542fe2 | Juan Quintela | #ifdef HOST_WORDS_BIGENDIAN
|
29 | ad69471c | pbrook | #define NEON_TYPE2(name, type) \
|
30 | ad69471c | pbrook | typedef struct \ |
31 | ad69471c | pbrook | { \ |
32 | ad69471c | pbrook | type v2; \ |
33 | ad69471c | pbrook | type v1; \ |
34 | ad69471c | pbrook | } neon_##name; |
35 | ad69471c | pbrook | #define NEON_TYPE4(name, type) \
|
36 | ad69471c | pbrook | typedef struct \ |
37 | ad69471c | pbrook | { \ |
38 | ad69471c | pbrook | type v4; \ |
39 | ad69471c | pbrook | type v3; \ |
40 | ad69471c | pbrook | type v2; \ |
41 | ad69471c | pbrook | type v1; \ |
42 | ad69471c | pbrook | } neon_##name; |
43 | ad69471c | pbrook | #else
|
44 | ad69471c | pbrook | #define NEON_TYPE2(name, type) \
|
45 | ad69471c | pbrook | typedef struct \ |
46 | ad69471c | pbrook | { \ |
47 | ad69471c | pbrook | type v1; \ |
48 | ad69471c | pbrook | type v2; \ |
49 | ad69471c | pbrook | } neon_##name; |
50 | ad69471c | pbrook | #define NEON_TYPE4(name, type) \
|
51 | ad69471c | pbrook | typedef struct \ |
52 | ad69471c | pbrook | { \ |
53 | ad69471c | pbrook | type v1; \ |
54 | ad69471c | pbrook | type v2; \ |
55 | ad69471c | pbrook | type v3; \ |
56 | ad69471c | pbrook | type v4; \ |
57 | ad69471c | pbrook | } neon_##name; |
58 | ad69471c | pbrook | #endif
|
59 | ad69471c | pbrook | |
60 | ad69471c | pbrook | NEON_TYPE4(s8, int8_t) |
61 | ad69471c | pbrook | NEON_TYPE4(u8, uint8_t) |
62 | ad69471c | pbrook | NEON_TYPE2(s16, int16_t) |
63 | ad69471c | pbrook | NEON_TYPE2(u16, uint16_t) |
64 | ad69471c | pbrook | NEON_TYPE1(s32, int32_t) |
65 | ad69471c | pbrook | NEON_TYPE1(u32, uint32_t) |
66 | ad69471c | pbrook | #undef NEON_TYPE4
|
67 | ad69471c | pbrook | #undef NEON_TYPE2
|
68 | ad69471c | pbrook | #undef NEON_TYPE1
|
69 | ad69471c | pbrook | |
70 | ad69471c | pbrook | /* Copy from a uint32_t to a vector structure type. */
|
71 | ad69471c | pbrook | #define NEON_UNPACK(vtype, dest, val) do { \ |
72 | ad69471c | pbrook | union { \
|
73 | ad69471c | pbrook | vtype v; \ |
74 | ad69471c | pbrook | uint32_t i; \ |
75 | ad69471c | pbrook | } conv_u; \ |
76 | ad69471c | pbrook | conv_u.i = (val); \ |
77 | ad69471c | pbrook | dest = conv_u.v; \ |
78 | ad69471c | pbrook | } while(0) |
79 | ad69471c | pbrook | |
80 | ad69471c | pbrook | /* Copy from a vector structure type to a uint32_t. */
|
81 | ad69471c | pbrook | #define NEON_PACK(vtype, dest, val) do { \ |
82 | ad69471c | pbrook | union { \
|
83 | ad69471c | pbrook | vtype v; \ |
84 | ad69471c | pbrook | uint32_t i; \ |
85 | ad69471c | pbrook | } conv_u; \ |
86 | ad69471c | pbrook | conv_u.v = (val); \ |
87 | ad69471c | pbrook | dest = conv_u.i; \ |
88 | ad69471c | pbrook | } while(0) |
89 | ad69471c | pbrook | |
90 | ad69471c | pbrook | #define NEON_DO1 \
|
91 | ad69471c | pbrook | NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); |
92 | ad69471c | pbrook | #define NEON_DO2 \
|
93 | ad69471c | pbrook | NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \ |
94 | ad69471c | pbrook | NEON_FN(vdest.v2, vsrc1.v2, vsrc2.v2); |
95 | ad69471c | pbrook | #define NEON_DO4 \
|
96 | ad69471c | pbrook | NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \ |
97 | ad69471c | pbrook | NEON_FN(vdest.v2, vsrc1.v2, vsrc2.v2); \ |
98 | ad69471c | pbrook | NEON_FN(vdest.v3, vsrc1.v3, vsrc2.v3); \ |
99 | ad69471c | pbrook | NEON_FN(vdest.v4, vsrc1.v4, vsrc2.v4); |
100 | ad69471c | pbrook | |
101 | ad69471c | pbrook | #define NEON_VOP_BODY(vtype, n) \
|
102 | ad69471c | pbrook | { \ |
103 | ad69471c | pbrook | uint32_t res; \ |
104 | ad69471c | pbrook | vtype vsrc1; \ |
105 | ad69471c | pbrook | vtype vsrc2; \ |
106 | ad69471c | pbrook | vtype vdest; \ |
107 | ad69471c | pbrook | NEON_UNPACK(vtype, vsrc1, arg1); \ |
108 | ad69471c | pbrook | NEON_UNPACK(vtype, vsrc2, arg2); \ |
109 | ad69471c | pbrook | NEON_DO##n; \ |
110 | ad69471c | pbrook | NEON_PACK(vtype, res, vdest); \ |
111 | ad69471c | pbrook | return res; \
|
112 | ad69471c | pbrook | } |
113 | ad69471c | pbrook | |
114 | ad69471c | pbrook | #define NEON_VOP(name, vtype, n) \
|
115 | ad69471c | pbrook | uint32_t HELPER(glue(neon_,name))(uint32_t arg1, uint32_t arg2) \ |
116 | ad69471c | pbrook | NEON_VOP_BODY(vtype, n) |
117 | ad69471c | pbrook | |
118 | ad69471c | pbrook | /* Pairwise operations. */
|
119 | ad69471c | pbrook | /* For 32-bit elements each segment only contains a single element, so
|
120 | ad69471c | pbrook | the elementwise and pairwise operations are the same. */
|
121 | ad69471c | pbrook | #define NEON_PDO2 \
|
122 | ad69471c | pbrook | NEON_FN(vdest.v1, vsrc1.v1, vsrc1.v2); \ |
123 | ad69471c | pbrook | NEON_FN(vdest.v2, vsrc2.v1, vsrc2.v2); |
124 | ad69471c | pbrook | #define NEON_PDO4 \
|
125 | ad69471c | pbrook | NEON_FN(vdest.v1, vsrc1.v1, vsrc1.v2); \ |
126 | ad69471c | pbrook | NEON_FN(vdest.v2, vsrc1.v3, vsrc1.v4); \ |
127 | ad69471c | pbrook | NEON_FN(vdest.v3, vsrc2.v1, vsrc2.v2); \ |
128 | ad69471c | pbrook | NEON_FN(vdest.v4, vsrc2.v3, vsrc2.v4); \ |
129 | ad69471c | pbrook | |
130 | ad69471c | pbrook | #define NEON_POP(name, vtype, n) \
|
131 | ad69471c | pbrook | uint32_t HELPER(glue(neon_,name))(uint32_t arg1, uint32_t arg2) \ |
132 | ad69471c | pbrook | { \ |
133 | ad69471c | pbrook | uint32_t res; \ |
134 | ad69471c | pbrook | vtype vsrc1; \ |
135 | ad69471c | pbrook | vtype vsrc2; \ |
136 | ad69471c | pbrook | vtype vdest; \ |
137 | ad69471c | pbrook | NEON_UNPACK(vtype, vsrc1, arg1); \ |
138 | ad69471c | pbrook | NEON_UNPACK(vtype, vsrc2, arg2); \ |
139 | ad69471c | pbrook | NEON_PDO##n; \ |
140 | ad69471c | pbrook | NEON_PACK(vtype, res, vdest); \ |
141 | ad69471c | pbrook | return res; \
|
142 | ad69471c | pbrook | } |
143 | ad69471c | pbrook | |
144 | ad69471c | pbrook | /* Unary operators. */
|
145 | ad69471c | pbrook | #define NEON_VOP1(name, vtype, n) \
|
146 | ad69471c | pbrook | uint32_t HELPER(glue(neon_,name))(uint32_t arg) \ |
147 | ad69471c | pbrook | { \ |
148 | ad69471c | pbrook | vtype vsrc1; \ |
149 | ad69471c | pbrook | vtype vdest; \ |
150 | ad69471c | pbrook | NEON_UNPACK(vtype, vsrc1, arg); \ |
151 | ad69471c | pbrook | NEON_DO##n; \ |
152 | ad69471c | pbrook | NEON_PACK(vtype, arg, vdest); \ |
153 | ad69471c | pbrook | return arg; \
|
154 | ad69471c | pbrook | } |
155 | ad69471c | pbrook | |
156 | ad69471c | pbrook | |
157 | ad69471c | pbrook | #define NEON_USAT(dest, src1, src2, type) do { \ |
158 | ad69471c | pbrook | uint32_t tmp = (uint32_t)src1 + (uint32_t)src2; \ |
159 | ad69471c | pbrook | if (tmp != (type)tmp) { \
|
160 | ad69471c | pbrook | SET_QC(); \ |
161 | ad69471c | pbrook | dest = ~0; \
|
162 | ad69471c | pbrook | } else { \
|
163 | ad69471c | pbrook | dest = tmp; \ |
164 | ad69471c | pbrook | }} while(0) |
165 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t)
|
166 | 2a3f75b4 | Peter Maydell | NEON_VOP(qadd_u8, neon_u8, 4)
|
167 | ad69471c | pbrook | #undef NEON_FN
|
168 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t)
|
169 | 2a3f75b4 | Peter Maydell | NEON_VOP(qadd_u16, neon_u16, 2)
|
170 | ad69471c | pbrook | #undef NEON_FN
|
171 | ad69471c | pbrook | #undef NEON_USAT
|
172 | ad69471c | pbrook | |
173 | 2a3f75b4 | Peter Maydell | uint32_t HELPER(neon_qadd_u32)(uint32_t a, uint32_t b) |
174 | 72902672 | Christophe Lyon | { |
175 | 72902672 | Christophe Lyon | uint32_t res = a + b; |
176 | 72902672 | Christophe Lyon | if (res < a) {
|
177 | 72902672 | Christophe Lyon | SET_QC(); |
178 | 72902672 | Christophe Lyon | res = ~0;
|
179 | 72902672 | Christophe Lyon | } |
180 | 72902672 | Christophe Lyon | return res;
|
181 | 72902672 | Christophe Lyon | } |
182 | 72902672 | Christophe Lyon | |
183 | 2a3f75b4 | Peter Maydell | uint64_t HELPER(neon_qadd_u64)(uint64_t src1, uint64_t src2) |
184 | 72902672 | Christophe Lyon | { |
185 | 72902672 | Christophe Lyon | uint64_t res; |
186 | 72902672 | Christophe Lyon | |
187 | 72902672 | Christophe Lyon | res = src1 + src2; |
188 | 72902672 | Christophe Lyon | if (res < src1) {
|
189 | 72902672 | Christophe Lyon | SET_QC(); |
190 | 72902672 | Christophe Lyon | res = ~(uint64_t)0;
|
191 | 72902672 | Christophe Lyon | } |
192 | 72902672 | Christophe Lyon | return res;
|
193 | 72902672 | Christophe Lyon | } |
194 | 72902672 | Christophe Lyon | |
195 | ad69471c | pbrook | #define NEON_SSAT(dest, src1, src2, type) do { \ |
196 | ad69471c | pbrook | int32_t tmp = (uint32_t)src1 + (uint32_t)src2; \ |
197 | ad69471c | pbrook | if (tmp != (type)tmp) { \
|
198 | ad69471c | pbrook | SET_QC(); \ |
199 | ad69471c | pbrook | if (src2 > 0) { \ |
200 | ad69471c | pbrook | tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \ |
201 | ad69471c | pbrook | } else { \
|
202 | ad69471c | pbrook | tmp = 1 << (sizeof(type) * 8 - 1); \ |
203 | ad69471c | pbrook | } \ |
204 | ad69471c | pbrook | } \ |
205 | ad69471c | pbrook | dest = tmp; \ |
206 | ad69471c | pbrook | } while(0) |
207 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t)
|
208 | 2a3f75b4 | Peter Maydell | NEON_VOP(qadd_s8, neon_s8, 4)
|
209 | ad69471c | pbrook | #undef NEON_FN
|
210 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t)
|
211 | 2a3f75b4 | Peter Maydell | NEON_VOP(qadd_s16, neon_s16, 2)
|
212 | ad69471c | pbrook | #undef NEON_FN
|
213 | ad69471c | pbrook | #undef NEON_SSAT
|
214 | ad69471c | pbrook | |
215 | 2a3f75b4 | Peter Maydell | uint32_t HELPER(neon_qadd_s32)(uint32_t a, uint32_t b) |
216 | 72902672 | Christophe Lyon | { |
217 | 72902672 | Christophe Lyon | uint32_t res = a + b; |
218 | 72902672 | Christophe Lyon | if (((res ^ a) & SIGNBIT) && !((a ^ b) & SIGNBIT)) {
|
219 | 72902672 | Christophe Lyon | SET_QC(); |
220 | 72902672 | Christophe Lyon | res = ~(((int32_t)a >> 31) ^ SIGNBIT);
|
221 | 72902672 | Christophe Lyon | } |
222 | 72902672 | Christophe Lyon | return res;
|
223 | 72902672 | Christophe Lyon | } |
224 | 72902672 | Christophe Lyon | |
225 | 2a3f75b4 | Peter Maydell | uint64_t HELPER(neon_qadd_s64)(uint64_t src1, uint64_t src2) |
226 | 72902672 | Christophe Lyon | { |
227 | 72902672 | Christophe Lyon | uint64_t res; |
228 | 72902672 | Christophe Lyon | |
229 | 72902672 | Christophe Lyon | res = src1 + src2; |
230 | 72902672 | Christophe Lyon | if (((res ^ src1) & SIGNBIT64) && !((src1 ^ src2) & SIGNBIT64)) {
|
231 | 72902672 | Christophe Lyon | SET_QC(); |
232 | 72902672 | Christophe Lyon | res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64;
|
233 | 72902672 | Christophe Lyon | } |
234 | 72902672 | Christophe Lyon | return res;
|
235 | 72902672 | Christophe Lyon | } |
236 | 72902672 | Christophe Lyon | |
237 | ad69471c | pbrook | #define NEON_USAT(dest, src1, src2, type) do { \ |
238 | ad69471c | pbrook | uint32_t tmp = (uint32_t)src1 - (uint32_t)src2; \ |
239 | ad69471c | pbrook | if (tmp != (type)tmp) { \
|
240 | ad69471c | pbrook | SET_QC(); \ |
241 | ad69471c | pbrook | dest = 0; \
|
242 | ad69471c | pbrook | } else { \
|
243 | ad69471c | pbrook | dest = tmp; \ |
244 | ad69471c | pbrook | }} while(0) |
245 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t)
|
246 | 2a3f75b4 | Peter Maydell | NEON_VOP(qsub_u8, neon_u8, 4)
|
247 | ad69471c | pbrook | #undef NEON_FN
|
248 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t)
|
249 | 2a3f75b4 | Peter Maydell | NEON_VOP(qsub_u16, neon_u16, 2)
|
250 | ad69471c | pbrook | #undef NEON_FN
|
251 | ad69471c | pbrook | #undef NEON_USAT
|
252 | ad69471c | pbrook | |
253 | 2a3f75b4 | Peter Maydell | uint32_t HELPER(neon_qsub_u32)(uint32_t a, uint32_t b) |
254 | 72902672 | Christophe Lyon | { |
255 | 72902672 | Christophe Lyon | uint32_t res = a - b; |
256 | 72902672 | Christophe Lyon | if (res > a) {
|
257 | 72902672 | Christophe Lyon | SET_QC(); |
258 | 72902672 | Christophe Lyon | res = 0;
|
259 | 72902672 | Christophe Lyon | } |
260 | 72902672 | Christophe Lyon | return res;
|
261 | 72902672 | Christophe Lyon | } |
262 | 72902672 | Christophe Lyon | |
263 | 2a3f75b4 | Peter Maydell | uint64_t HELPER(neon_qsub_u64)(uint64_t src1, uint64_t src2) |
264 | 72902672 | Christophe Lyon | { |
265 | 72902672 | Christophe Lyon | uint64_t res; |
266 | 72902672 | Christophe Lyon | |
267 | 72902672 | Christophe Lyon | if (src1 < src2) {
|
268 | 72902672 | Christophe Lyon | SET_QC(); |
269 | 72902672 | Christophe Lyon | res = 0;
|
270 | 72902672 | Christophe Lyon | } else {
|
271 | 72902672 | Christophe Lyon | res = src1 - src2; |
272 | 72902672 | Christophe Lyon | } |
273 | 72902672 | Christophe Lyon | return res;
|
274 | 72902672 | Christophe Lyon | } |
275 | 72902672 | Christophe Lyon | |
276 | ad69471c | pbrook | #define NEON_SSAT(dest, src1, src2, type) do { \ |
277 | ad69471c | pbrook | int32_t tmp = (uint32_t)src1 - (uint32_t)src2; \ |
278 | ad69471c | pbrook | if (tmp != (type)tmp) { \
|
279 | ad69471c | pbrook | SET_QC(); \ |
280 | ad69471c | pbrook | if (src2 < 0) { \ |
281 | ad69471c | pbrook | tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \ |
282 | ad69471c | pbrook | } else { \
|
283 | ad69471c | pbrook | tmp = 1 << (sizeof(type) * 8 - 1); \ |
284 | ad69471c | pbrook | } \ |
285 | ad69471c | pbrook | } \ |
286 | ad69471c | pbrook | dest = tmp; \ |
287 | ad69471c | pbrook | } while(0) |
288 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t)
|
289 | 2a3f75b4 | Peter Maydell | NEON_VOP(qsub_s8, neon_s8, 4)
|
290 | ad69471c | pbrook | #undef NEON_FN
|
291 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t)
|
292 | 2a3f75b4 | Peter Maydell | NEON_VOP(qsub_s16, neon_s16, 2)
|
293 | ad69471c | pbrook | #undef NEON_FN
|
294 | ad69471c | pbrook | #undef NEON_SSAT
|
295 | ad69471c | pbrook | |
296 | 2a3f75b4 | Peter Maydell | uint32_t HELPER(neon_qsub_s32)(uint32_t a, uint32_t b) |
297 | 72902672 | Christophe Lyon | { |
298 | 72902672 | Christophe Lyon | uint32_t res = a - b; |
299 | 72902672 | Christophe Lyon | if (((res ^ a) & SIGNBIT) && ((a ^ b) & SIGNBIT)) {
|
300 | 72902672 | Christophe Lyon | SET_QC(); |
301 | 72902672 | Christophe Lyon | res = ~(((int32_t)a >> 31) ^ SIGNBIT);
|
302 | 72902672 | Christophe Lyon | } |
303 | 72902672 | Christophe Lyon | return res;
|
304 | 72902672 | Christophe Lyon | } |
305 | 72902672 | Christophe Lyon | |
306 | 2a3f75b4 | Peter Maydell | uint64_t HELPER(neon_qsub_s64)(uint64_t src1, uint64_t src2) |
307 | 72902672 | Christophe Lyon | { |
308 | 72902672 | Christophe Lyon | uint64_t res; |
309 | 72902672 | Christophe Lyon | |
310 | 72902672 | Christophe Lyon | res = src1 - src2; |
311 | 72902672 | Christophe Lyon | if (((res ^ src1) & SIGNBIT64) && ((src1 ^ src2) & SIGNBIT64)) {
|
312 | 72902672 | Christophe Lyon | SET_QC(); |
313 | 72902672 | Christophe Lyon | res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64;
|
314 | 72902672 | Christophe Lyon | } |
315 | 72902672 | Christophe Lyon | return res;
|
316 | 72902672 | Christophe Lyon | } |
317 | 72902672 | Christophe Lyon | |
318 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 + src2) >> 1 |
319 | ad69471c | pbrook | NEON_VOP(hadd_s8, neon_s8, 4)
|
320 | ad69471c | pbrook | NEON_VOP(hadd_u8, neon_u8, 4)
|
321 | ad69471c | pbrook | NEON_VOP(hadd_s16, neon_s16, 2)
|
322 | ad69471c | pbrook | NEON_VOP(hadd_u16, neon_u16, 2)
|
323 | ad69471c | pbrook | #undef NEON_FN
|
324 | ad69471c | pbrook | |
325 | ad69471c | pbrook | int32_t HELPER(neon_hadd_s32)(int32_t src1, int32_t src2) |
326 | ad69471c | pbrook | { |
327 | ad69471c | pbrook | int32_t dest; |
328 | ad69471c | pbrook | |
329 | ad69471c | pbrook | dest = (src1 >> 1) + (src2 >> 1); |
330 | ad69471c | pbrook | if (src1 & src2 & 1) |
331 | ad69471c | pbrook | dest++; |
332 | ad69471c | pbrook | return dest;
|
333 | ad69471c | pbrook | } |
334 | ad69471c | pbrook | |
335 | ad69471c | pbrook | uint32_t HELPER(neon_hadd_u32)(uint32_t src1, uint32_t src2) |
336 | ad69471c | pbrook | { |
337 | ad69471c | pbrook | uint32_t dest; |
338 | ad69471c | pbrook | |
339 | ad69471c | pbrook | dest = (src1 >> 1) + (src2 >> 1); |
340 | ad69471c | pbrook | if (src1 & src2 & 1) |
341 | ad69471c | pbrook | dest++; |
342 | ad69471c | pbrook | return dest;
|
343 | ad69471c | pbrook | } |
344 | ad69471c | pbrook | |
345 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 + src2 + 1) >> 1 |
346 | ad69471c | pbrook | NEON_VOP(rhadd_s8, neon_s8, 4)
|
347 | ad69471c | pbrook | NEON_VOP(rhadd_u8, neon_u8, 4)
|
348 | ad69471c | pbrook | NEON_VOP(rhadd_s16, neon_s16, 2)
|
349 | ad69471c | pbrook | NEON_VOP(rhadd_u16, neon_u16, 2)
|
350 | ad69471c | pbrook | #undef NEON_FN
|
351 | ad69471c | pbrook | |
352 | ad69471c | pbrook | int32_t HELPER(neon_rhadd_s32)(int32_t src1, int32_t src2) |
353 | ad69471c | pbrook | { |
354 | ad69471c | pbrook | int32_t dest; |
355 | ad69471c | pbrook | |
356 | ad69471c | pbrook | dest = (src1 >> 1) + (src2 >> 1); |
357 | ad69471c | pbrook | if ((src1 | src2) & 1) |
358 | ad69471c | pbrook | dest++; |
359 | ad69471c | pbrook | return dest;
|
360 | ad69471c | pbrook | } |
361 | ad69471c | pbrook | |
362 | ad69471c | pbrook | uint32_t HELPER(neon_rhadd_u32)(uint32_t src1, uint32_t src2) |
363 | ad69471c | pbrook | { |
364 | ad69471c | pbrook | uint32_t dest; |
365 | ad69471c | pbrook | |
366 | ad69471c | pbrook | dest = (src1 >> 1) + (src2 >> 1); |
367 | ad69471c | pbrook | if ((src1 | src2) & 1) |
368 | ad69471c | pbrook | dest++; |
369 | ad69471c | pbrook | return dest;
|
370 | ad69471c | pbrook | } |
371 | ad69471c | pbrook | |
372 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 - src2) >> 1 |
373 | ad69471c | pbrook | NEON_VOP(hsub_s8, neon_s8, 4)
|
374 | ad69471c | pbrook | NEON_VOP(hsub_u8, neon_u8, 4)
|
375 | ad69471c | pbrook | NEON_VOP(hsub_s16, neon_s16, 2)
|
376 | ad69471c | pbrook | NEON_VOP(hsub_u16, neon_u16, 2)
|
377 | ad69471c | pbrook | #undef NEON_FN
|
378 | ad69471c | pbrook | |
379 | ad69471c | pbrook | int32_t HELPER(neon_hsub_s32)(int32_t src1, int32_t src2) |
380 | ad69471c | pbrook | { |
381 | ad69471c | pbrook | int32_t dest; |
382 | ad69471c | pbrook | |
383 | ad69471c | pbrook | dest = (src1 >> 1) - (src2 >> 1); |
384 | ad69471c | pbrook | if ((~src1) & src2 & 1) |
385 | ad69471c | pbrook | dest--; |
386 | ad69471c | pbrook | return dest;
|
387 | ad69471c | pbrook | } |
388 | ad69471c | pbrook | |
389 | ad69471c | pbrook | uint32_t HELPER(neon_hsub_u32)(uint32_t src1, uint32_t src2) |
390 | ad69471c | pbrook | { |
391 | ad69471c | pbrook | uint32_t dest; |
392 | ad69471c | pbrook | |
393 | ad69471c | pbrook | dest = (src1 >> 1) - (src2 >> 1); |
394 | ad69471c | pbrook | if ((~src1) & src2 & 1) |
395 | ad69471c | pbrook | dest--; |
396 | ad69471c | pbrook | return dest;
|
397 | ad69471c | pbrook | } |
398 | ad69471c | pbrook | |
399 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? ~0 : 0 |
400 | ad69471c | pbrook | NEON_VOP(cgt_s8, neon_s8, 4)
|
401 | ad69471c | pbrook | NEON_VOP(cgt_u8, neon_u8, 4)
|
402 | ad69471c | pbrook | NEON_VOP(cgt_s16, neon_s16, 2)
|
403 | ad69471c | pbrook | NEON_VOP(cgt_u16, neon_u16, 2)
|
404 | ad69471c | pbrook | NEON_VOP(cgt_s32, neon_s32, 1)
|
405 | ad69471c | pbrook | NEON_VOP(cgt_u32, neon_u32, 1)
|
406 | ad69471c | pbrook | #undef NEON_FN
|
407 | ad69471c | pbrook | |
408 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 >= src2) ? ~0 : 0 |
409 | ad69471c | pbrook | NEON_VOP(cge_s8, neon_s8, 4)
|
410 | ad69471c | pbrook | NEON_VOP(cge_u8, neon_u8, 4)
|
411 | ad69471c | pbrook | NEON_VOP(cge_s16, neon_s16, 2)
|
412 | ad69471c | pbrook | NEON_VOP(cge_u16, neon_u16, 2)
|
413 | ad69471c | pbrook | NEON_VOP(cge_s32, neon_s32, 1)
|
414 | ad69471c | pbrook | NEON_VOP(cge_u32, neon_u32, 1)
|
415 | ad69471c | pbrook | #undef NEON_FN
|
416 | ad69471c | pbrook | |
417 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 < src2) ? src1 : src2
|
418 | ad69471c | pbrook | NEON_VOP(min_s8, neon_s8, 4)
|
419 | ad69471c | pbrook | NEON_VOP(min_u8, neon_u8, 4)
|
420 | ad69471c | pbrook | NEON_VOP(min_s16, neon_s16, 2)
|
421 | ad69471c | pbrook | NEON_VOP(min_u16, neon_u16, 2)
|
422 | ad69471c | pbrook | NEON_VOP(min_s32, neon_s32, 1)
|
423 | ad69471c | pbrook | NEON_VOP(min_u32, neon_u32, 1)
|
424 | ad69471c | pbrook | NEON_POP(pmin_s8, neon_s8, 4)
|
425 | ad69471c | pbrook | NEON_POP(pmin_u8, neon_u8, 4)
|
426 | ad69471c | pbrook | NEON_POP(pmin_s16, neon_s16, 2)
|
427 | ad69471c | pbrook | NEON_POP(pmin_u16, neon_u16, 2)
|
428 | ad69471c | pbrook | #undef NEON_FN
|
429 | ad69471c | pbrook | |
430 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? src1 : src2
|
431 | ad69471c | pbrook | NEON_VOP(max_s8, neon_s8, 4)
|
432 | ad69471c | pbrook | NEON_VOP(max_u8, neon_u8, 4)
|
433 | ad69471c | pbrook | NEON_VOP(max_s16, neon_s16, 2)
|
434 | ad69471c | pbrook | NEON_VOP(max_u16, neon_u16, 2)
|
435 | ad69471c | pbrook | NEON_VOP(max_s32, neon_s32, 1)
|
436 | ad69471c | pbrook | NEON_VOP(max_u32, neon_u32, 1)
|
437 | ad69471c | pbrook | NEON_POP(pmax_s8, neon_s8, 4)
|
438 | ad69471c | pbrook | NEON_POP(pmax_u8, neon_u8, 4)
|
439 | ad69471c | pbrook | NEON_POP(pmax_s16, neon_s16, 2)
|
440 | ad69471c | pbrook | NEON_POP(pmax_u16, neon_u16, 2)
|
441 | ad69471c | pbrook | #undef NEON_FN
|
442 | ad69471c | pbrook | |
443 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) \
|
444 | ad69471c | pbrook | dest = (src1 > src2) ? (src1 - src2) : (src2 - src1) |
445 | ad69471c | pbrook | NEON_VOP(abd_s8, neon_s8, 4)
|
446 | ad69471c | pbrook | NEON_VOP(abd_u8, neon_u8, 4)
|
447 | ad69471c | pbrook | NEON_VOP(abd_s16, neon_s16, 2)
|
448 | ad69471c | pbrook | NEON_VOP(abd_u16, neon_u16, 2)
|
449 | ad69471c | pbrook | NEON_VOP(abd_s32, neon_s32, 1)
|
450 | ad69471c | pbrook | NEON_VOP(abd_u32, neon_u32, 1)
|
451 | ad69471c | pbrook | #undef NEON_FN
|
452 | ad69471c | pbrook | |
453 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
454 | ad69471c | pbrook | int8_t tmp; \ |
455 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
456 | 50f67e95 | Juha Riihimäki | if (tmp >= (ssize_t)sizeof(src1) * 8 || \ |
457 | 50f67e95 | Juha Riihimäki | tmp <= -(ssize_t)sizeof(src1) * 8) { \ |
458 | ad69471c | pbrook | dest = 0; \
|
459 | ad69471c | pbrook | } else if (tmp < 0) { \ |
460 | ad69471c | pbrook | dest = src1 >> -tmp; \ |
461 | ad69471c | pbrook | } else { \
|
462 | ad69471c | pbrook | dest = src1 << tmp; \ |
463 | ad69471c | pbrook | }} while (0) |
464 | ad69471c | pbrook | NEON_VOP(shl_u8, neon_u8, 4)
|
465 | ad69471c | pbrook | NEON_VOP(shl_u16, neon_u16, 2)
|
466 | ad69471c | pbrook | NEON_VOP(shl_u32, neon_u32, 1)
|
467 | ad69471c | pbrook | #undef NEON_FN
|
468 | ad69471c | pbrook | |
469 | ad69471c | pbrook | uint64_t HELPER(neon_shl_u64)(uint64_t val, uint64_t shiftop) |
470 | ad69471c | pbrook | { |
471 | ad69471c | pbrook | int8_t shift = (int8_t)shiftop; |
472 | ad69471c | pbrook | if (shift >= 64 || shift <= -64) { |
473 | ad69471c | pbrook | val = 0;
|
474 | ad69471c | pbrook | } else if (shift < 0) { |
475 | ad69471c | pbrook | val >>= -shift; |
476 | ad69471c | pbrook | } else {
|
477 | ad69471c | pbrook | val <<= shift; |
478 | ad69471c | pbrook | } |
479 | ad69471c | pbrook | return val;
|
480 | ad69471c | pbrook | } |
481 | ad69471c | pbrook | |
482 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
483 | ad69471c | pbrook | int8_t tmp; \ |
484 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
485 | 50f67e95 | Juha Riihimäki | if (tmp >= (ssize_t)sizeof(src1) * 8) { \ |
486 | ad69471c | pbrook | dest = 0; \
|
487 | 50f67e95 | Juha Riihimäki | } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \ |
488 | ad69471c | pbrook | dest = src1 >> (sizeof(src1) * 8 - 1); \ |
489 | ad69471c | pbrook | } else if (tmp < 0) { \ |
490 | ad69471c | pbrook | dest = src1 >> -tmp; \ |
491 | ad69471c | pbrook | } else { \
|
492 | ad69471c | pbrook | dest = src1 << tmp; \ |
493 | ad69471c | pbrook | }} while (0) |
494 | ad69471c | pbrook | NEON_VOP(shl_s8, neon_s8, 4)
|
495 | ad69471c | pbrook | NEON_VOP(shl_s16, neon_s16, 2)
|
496 | ad69471c | pbrook | NEON_VOP(shl_s32, neon_s32, 1)
|
497 | ad69471c | pbrook | #undef NEON_FN
|
498 | ad69471c | pbrook | |
499 | ad69471c | pbrook | uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop) |
500 | ad69471c | pbrook | { |
501 | ad69471c | pbrook | int8_t shift = (int8_t)shiftop; |
502 | ad69471c | pbrook | int64_t val = valop; |
503 | ad69471c | pbrook | if (shift >= 64) { |
504 | ad69471c | pbrook | val = 0;
|
505 | ad69471c | pbrook | } else if (shift <= -64) { |
506 | ad69471c | pbrook | val >>= 63;
|
507 | ad69471c | pbrook | } else if (shift < 0) { |
508 | ad69471c | pbrook | val >>= -shift; |
509 | ad69471c | pbrook | } else {
|
510 | ad69471c | pbrook | val <<= shift; |
511 | ad69471c | pbrook | } |
512 | ad69471c | pbrook | return val;
|
513 | ad69471c | pbrook | } |
514 | ad69471c | pbrook | |
515 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
516 | ad69471c | pbrook | int8_t tmp; \ |
517 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
518 | 0670a7b6 | Peter Maydell | if ((tmp >= (ssize_t)sizeof(src1) * 8) \ |
519 | 0670a7b6 | Peter Maydell | || (tmp <= -(ssize_t)sizeof(src1) * 8)) { \ |
520 | ad69471c | pbrook | dest = 0; \
|
521 | ad69471c | pbrook | } else if (tmp < 0) { \ |
522 | ad69471c | pbrook | dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ |
523 | ad69471c | pbrook | } else { \
|
524 | ad69471c | pbrook | dest = src1 << tmp; \ |
525 | ad69471c | pbrook | }} while (0) |
526 | ad69471c | pbrook | NEON_VOP(rshl_s8, neon_s8, 4)
|
527 | ad69471c | pbrook | NEON_VOP(rshl_s16, neon_s16, 2)
|
528 | ad69471c | pbrook | #undef NEON_FN
|
529 | ad69471c | pbrook | |
530 | 4bd4ee07 | Christophe Lyon | /* The addition of the rounding constant may overflow, so we use an
|
531 | 4bd4ee07 | Christophe Lyon | * intermediate 64 bits accumulator. */
|
532 | 4bd4ee07 | Christophe Lyon | uint32_t HELPER(neon_rshl_s32)(uint32_t valop, uint32_t shiftop) |
533 | 4bd4ee07 | Christophe Lyon | { |
534 | 4bd4ee07 | Christophe Lyon | int32_t dest; |
535 | 4bd4ee07 | Christophe Lyon | int32_t val = (int32_t)valop; |
536 | 4bd4ee07 | Christophe Lyon | int8_t shift = (int8_t)shiftop; |
537 | 4bd4ee07 | Christophe Lyon | if ((shift >= 32) || (shift <= -32)) { |
538 | 4bd4ee07 | Christophe Lyon | dest = 0;
|
539 | 4bd4ee07 | Christophe Lyon | } else if (shift < 0) { |
540 | 4bd4ee07 | Christophe Lyon | int64_t big_dest = ((int64_t)val + (1 << (-1 - shift))); |
541 | 4bd4ee07 | Christophe Lyon | dest = big_dest >> -shift; |
542 | 4bd4ee07 | Christophe Lyon | } else {
|
543 | 4bd4ee07 | Christophe Lyon | dest = val << shift; |
544 | 4bd4ee07 | Christophe Lyon | } |
545 | 4bd4ee07 | Christophe Lyon | return dest;
|
546 | 4bd4ee07 | Christophe Lyon | } |
547 | 4bd4ee07 | Christophe Lyon | |
548 | 4bd4ee07 | Christophe Lyon | /* Handling addition overflow with 64 bits inputs values is more
|
549 | 4bd4ee07 | Christophe Lyon | * tricky than with 32 bits values. */
|
550 | ad69471c | pbrook | uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop) |
551 | ad69471c | pbrook | { |
552 | ad69471c | pbrook | int8_t shift = (int8_t)shiftop; |
553 | ad69471c | pbrook | int64_t val = valop; |
554 | 0670a7b6 | Peter Maydell | if ((shift >= 64) || (shift <= -64)) { |
555 | ad69471c | pbrook | val = 0;
|
556 | ad69471c | pbrook | } else if (shift < 0) { |
557 | 4bd4ee07 | Christophe Lyon | val >>= (-shift - 1);
|
558 | 4bd4ee07 | Christophe Lyon | if (val == INT64_MAX) {
|
559 | 4bd4ee07 | Christophe Lyon | /* In this case, it means that the rounding constant is 1,
|
560 | 4bd4ee07 | Christophe Lyon | * and the addition would overflow. Return the actual
|
561 | 4bd4ee07 | Christophe Lyon | * result directly. */
|
562 | 4bd4ee07 | Christophe Lyon | val = 0x4000000000000000LL;
|
563 | 4bd4ee07 | Christophe Lyon | } else {
|
564 | 4bd4ee07 | Christophe Lyon | val++; |
565 | 4bd4ee07 | Christophe Lyon | val >>= 1;
|
566 | 4bd4ee07 | Christophe Lyon | } |
567 | ad69471c | pbrook | } else {
|
568 | ad69471c | pbrook | val <<= shift; |
569 | ad69471c | pbrook | } |
570 | ad69471c | pbrook | return val;
|
571 | ad69471c | pbrook | } |
572 | ad69471c | pbrook | |
573 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
574 | ad69471c | pbrook | int8_t tmp; \ |
575 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
576 | 50f67e95 | Juha Riihimäki | if (tmp >= (ssize_t)sizeof(src1) * 8 || \ |
577 | 50f67e95 | Juha Riihimäki | tmp < -(ssize_t)sizeof(src1) * 8) { \ |
578 | ad69471c | pbrook | dest = 0; \
|
579 | 50f67e95 | Juha Riihimäki | } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \ |
580 | b6c63b98 | Christophe Lyon | dest = src1 >> (-tmp - 1); \
|
581 | ad69471c | pbrook | } else if (tmp < 0) { \ |
582 | ad69471c | pbrook | dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ |
583 | ad69471c | pbrook | } else { \
|
584 | ad69471c | pbrook | dest = src1 << tmp; \ |
585 | ad69471c | pbrook | }} while (0) |
586 | ad69471c | pbrook | NEON_VOP(rshl_u8, neon_u8, 4)
|
587 | ad69471c | pbrook | NEON_VOP(rshl_u16, neon_u16, 2)
|
588 | ad69471c | pbrook | #undef NEON_FN
|
589 | ad69471c | pbrook | |
590 | 4bd4ee07 | Christophe Lyon | /* The addition of the rounding constant may overflow, so we use an
|
591 | 4bd4ee07 | Christophe Lyon | * intermediate 64 bits accumulator. */
|
592 | 4bd4ee07 | Christophe Lyon | uint32_t HELPER(neon_rshl_u32)(uint32_t val, uint32_t shiftop) |
593 | 4bd4ee07 | Christophe Lyon | { |
594 | 4bd4ee07 | Christophe Lyon | uint32_t dest; |
595 | 4bd4ee07 | Christophe Lyon | int8_t shift = (int8_t)shiftop; |
596 | 4bd4ee07 | Christophe Lyon | if (shift >= 32 || shift < -32) { |
597 | 4bd4ee07 | Christophe Lyon | dest = 0;
|
598 | 4bd4ee07 | Christophe Lyon | } else if (shift == -32) { |
599 | 4bd4ee07 | Christophe Lyon | dest = val >> 31;
|
600 | 4bd4ee07 | Christophe Lyon | } else if (shift < 0) { |
601 | 4bd4ee07 | Christophe Lyon | uint64_t big_dest = ((uint64_t)val + (1 << (-1 - shift))); |
602 | 4bd4ee07 | Christophe Lyon | dest = big_dest >> -shift; |
603 | 4bd4ee07 | Christophe Lyon | } else {
|
604 | 4bd4ee07 | Christophe Lyon | dest = val << shift; |
605 | 4bd4ee07 | Christophe Lyon | } |
606 | 4bd4ee07 | Christophe Lyon | return dest;
|
607 | 4bd4ee07 | Christophe Lyon | } |
608 | 4bd4ee07 | Christophe Lyon | |
609 | 4bd4ee07 | Christophe Lyon | /* Handling addition overflow with 64 bits inputs values is more
|
610 | 4bd4ee07 | Christophe Lyon | * tricky than with 32 bits values. */
|
611 | ad69471c | pbrook | uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop) |
612 | ad69471c | pbrook | { |
613 | ad69471c | pbrook | int8_t shift = (uint8_t)shiftop; |
614 | 51e3930f | Christophe Lyon | if (shift >= 64 || shift < -64) { |
615 | ad69471c | pbrook | val = 0;
|
616 | ad69471c | pbrook | } else if (shift == -64) { |
617 | ad69471c | pbrook | /* Rounding a 1-bit result just preserves that bit. */
|
618 | ad69471c | pbrook | val >>= 63;
|
619 | 4bd4ee07 | Christophe Lyon | } else if (shift < 0) { |
620 | 4bd4ee07 | Christophe Lyon | val >>= (-shift - 1);
|
621 | 4bd4ee07 | Christophe Lyon | if (val == UINT64_MAX) {
|
622 | 4bd4ee07 | Christophe Lyon | /* In this case, it means that the rounding constant is 1,
|
623 | 4bd4ee07 | Christophe Lyon | * and the addition would overflow. Return the actual
|
624 | 4bd4ee07 | Christophe Lyon | * result directly. */
|
625 | 4bd4ee07 | Christophe Lyon | val = 0x8000000000000000ULL;
|
626 | 4bd4ee07 | Christophe Lyon | } else {
|
627 | 4bd4ee07 | Christophe Lyon | val++; |
628 | 4bd4ee07 | Christophe Lyon | val >>= 1;
|
629 | 4bd4ee07 | Christophe Lyon | } |
630 | ad69471c | pbrook | } else {
|
631 | ad69471c | pbrook | val <<= shift; |
632 | ad69471c | pbrook | } |
633 | ad69471c | pbrook | return val;
|
634 | ad69471c | pbrook | } |
635 | ad69471c | pbrook | |
636 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
637 | ad69471c | pbrook | int8_t tmp; \ |
638 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
639 | 50f67e95 | Juha Riihimäki | if (tmp >= (ssize_t)sizeof(src1) * 8) { \ |
640 | ad69471c | pbrook | if (src1) { \
|
641 | ad69471c | pbrook | SET_QC(); \ |
642 | ad69471c | pbrook | dest = ~0; \
|
643 | ad69471c | pbrook | } else { \
|
644 | ad69471c | pbrook | dest = 0; \
|
645 | ad69471c | pbrook | } \ |
646 | 50f67e95 | Juha Riihimäki | } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \ |
647 | ad69471c | pbrook | dest = 0; \
|
648 | ad69471c | pbrook | } else if (tmp < 0) { \ |
649 | ad69471c | pbrook | dest = src1 >> -tmp; \ |
650 | ad69471c | pbrook | } else { \
|
651 | ad69471c | pbrook | dest = src1 << tmp; \ |
652 | ad69471c | pbrook | if ((dest >> tmp) != src1) { \
|
653 | ad69471c | pbrook | SET_QC(); \ |
654 | ad69471c | pbrook | dest = ~0; \
|
655 | ad69471c | pbrook | } \ |
656 | ad69471c | pbrook | }} while (0) |
657 | 2a3f75b4 | Peter Maydell | NEON_VOP(qshl_u8, neon_u8, 4)
|
658 | 2a3f75b4 | Peter Maydell | NEON_VOP(qshl_u16, neon_u16, 2)
|
659 | 2a3f75b4 | Peter Maydell | NEON_VOP(qshl_u32, neon_u32, 1)
|
660 | ad69471c | pbrook | #undef NEON_FN
|
661 | ad69471c | pbrook | |
662 | 2a3f75b4 | Peter Maydell | uint64_t HELPER(neon_qshl_u64)(uint64_t val, uint64_t shiftop) |
663 | ad69471c | pbrook | { |
664 | ad69471c | pbrook | int8_t shift = (int8_t)shiftop; |
665 | ad69471c | pbrook | if (shift >= 64) { |
666 | ad69471c | pbrook | if (val) {
|
667 | ad69471c | pbrook | val = ~(uint64_t)0;
|
668 | ad69471c | pbrook | SET_QC(); |
669 | ad69471c | pbrook | } |
670 | ad69471c | pbrook | } else if (shift <= -64) { |
671 | ad69471c | pbrook | val = 0;
|
672 | ad69471c | pbrook | } else if (shift < 0) { |
673 | ad69471c | pbrook | val >>= -shift; |
674 | ad69471c | pbrook | } else {
|
675 | ad69471c | pbrook | uint64_t tmp = val; |
676 | ad69471c | pbrook | val <<= shift; |
677 | ad69471c | pbrook | if ((val >> shift) != tmp) {
|
678 | ad69471c | pbrook | SET_QC(); |
679 | ad69471c | pbrook | val = ~(uint64_t)0;
|
680 | ad69471c | pbrook | } |
681 | ad69471c | pbrook | } |
682 | ad69471c | pbrook | return val;
|
683 | ad69471c | pbrook | } |
684 | ad69471c | pbrook | |
685 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
686 | ad69471c | pbrook | int8_t tmp; \ |
687 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
688 | 50f67e95 | Juha Riihimäki | if (tmp >= (ssize_t)sizeof(src1) * 8) { \ |
689 | a5d88f3e | Peter Maydell | if (src1) { \
|
690 | ad69471c | pbrook | SET_QC(); \ |
691 | a5d88f3e | Peter Maydell | dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \ |
692 | a5d88f3e | Peter Maydell | if (src1 > 0) { \ |
693 | a5d88f3e | Peter Maydell | dest--; \ |
694 | a5d88f3e | Peter Maydell | } \ |
695 | a5d88f3e | Peter Maydell | } else { \
|
696 | a5d88f3e | Peter Maydell | dest = src1; \ |
697 | a5d88f3e | Peter Maydell | } \ |
698 | 50f67e95 | Juha Riihimäki | } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \ |
699 | ad69471c | pbrook | dest = src1 >> 31; \
|
700 | ad69471c | pbrook | } else if (tmp < 0) { \ |
701 | ad69471c | pbrook | dest = src1 >> -tmp; \ |
702 | ad69471c | pbrook | } else { \
|
703 | ad69471c | pbrook | dest = src1 << tmp; \ |
704 | ad69471c | pbrook | if ((dest >> tmp) != src1) { \
|
705 | ad69471c | pbrook | SET_QC(); \ |
706 | a5d88f3e | Peter Maydell | dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \ |
707 | a5d88f3e | Peter Maydell | if (src1 > 0) { \ |
708 | a5d88f3e | Peter Maydell | dest--; \ |
709 | a5d88f3e | Peter Maydell | } \ |
710 | ad69471c | pbrook | } \ |
711 | ad69471c | pbrook | }} while (0) |
712 | 2a3f75b4 | Peter Maydell | NEON_VOP(qshl_s8, neon_s8, 4)
|
713 | 2a3f75b4 | Peter Maydell | NEON_VOP(qshl_s16, neon_s16, 2)
|
714 | 2a3f75b4 | Peter Maydell | NEON_VOP(qshl_s32, neon_s32, 1)
|
715 | ad69471c | pbrook | #undef NEON_FN
|
716 | ad69471c | pbrook | |
717 | 2a3f75b4 | Peter Maydell | uint64_t HELPER(neon_qshl_s64)(uint64_t valop, uint64_t shiftop) |
718 | ad69471c | pbrook | { |
719 | ad69471c | pbrook | int8_t shift = (uint8_t)shiftop; |
720 | ad69471c | pbrook | int64_t val = valop; |
721 | ad69471c | pbrook | if (shift >= 64) { |
722 | ad69471c | pbrook | if (val) {
|
723 | ad69471c | pbrook | SET_QC(); |
724 | eb7a3d79 | Peter Maydell | val = (val >> 63) ^ ~SIGNBIT64;
|
725 | ad69471c | pbrook | } |
726 | 4c9b70ae | Juha Riihimäki | } else if (shift <= -64) { |
727 | ad69471c | pbrook | val >>= 63;
|
728 | ad69471c | pbrook | } else if (shift < 0) { |
729 | ad69471c | pbrook | val >>= -shift; |
730 | ad69471c | pbrook | } else {
|
731 | ad69471c | pbrook | int64_t tmp = val; |
732 | ad69471c | pbrook | val <<= shift; |
733 | ad69471c | pbrook | if ((val >> shift) != tmp) {
|
734 | ad69471c | pbrook | SET_QC(); |
735 | ad69471c | pbrook | val = (tmp >> 63) ^ ~SIGNBIT64;
|
736 | ad69471c | pbrook | } |
737 | ad69471c | pbrook | } |
738 | ad69471c | pbrook | return val;
|
739 | ad69471c | pbrook | } |
740 | ad69471c | pbrook | |
741 | 4ca4502c | Juha Riihimäki | #define NEON_FN(dest, src1, src2) do { \ |
742 | 4ca4502c | Juha Riihimäki | if (src1 & (1 << (sizeof(src1) * 8 - 1))) { \ |
743 | 4ca4502c | Juha Riihimäki | SET_QC(); \ |
744 | 4ca4502c | Juha Riihimäki | dest = 0; \
|
745 | 4ca4502c | Juha Riihimäki | } else { \
|
746 | 4ca4502c | Juha Riihimäki | int8_t tmp; \ |
747 | 4ca4502c | Juha Riihimäki | tmp = (int8_t)src2; \ |
748 | 4ca4502c | Juha Riihimäki | if (tmp >= (ssize_t)sizeof(src1) * 8) { \ |
749 | 4ca4502c | Juha Riihimäki | if (src1) { \
|
750 | 4ca4502c | Juha Riihimäki | SET_QC(); \ |
751 | 4ca4502c | Juha Riihimäki | dest = ~0; \
|
752 | 4ca4502c | Juha Riihimäki | } else { \
|
753 | 4ca4502c | Juha Riihimäki | dest = 0; \
|
754 | 4ca4502c | Juha Riihimäki | } \ |
755 | 4ca4502c | Juha Riihimäki | } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \ |
756 | 4ca4502c | Juha Riihimäki | dest = 0; \
|
757 | 4ca4502c | Juha Riihimäki | } else if (tmp < 0) { \ |
758 | 4ca4502c | Juha Riihimäki | dest = src1 >> -tmp; \ |
759 | 4ca4502c | Juha Riihimäki | } else { \
|
760 | 4ca4502c | Juha Riihimäki | dest = src1 << tmp; \ |
761 | 4ca4502c | Juha Riihimäki | if ((dest >> tmp) != src1) { \
|
762 | 4ca4502c | Juha Riihimäki | SET_QC(); \ |
763 | 4ca4502c | Juha Riihimäki | dest = ~0; \
|
764 | 4ca4502c | Juha Riihimäki | } \ |
765 | 4ca4502c | Juha Riihimäki | } \ |
766 | 4ca4502c | Juha Riihimäki | }} while (0) |
767 | 2a3f75b4 | Peter Maydell | NEON_VOP(qshlu_s8, neon_u8, 4)
|
768 | 2a3f75b4 | Peter Maydell | NEON_VOP(qshlu_s16, neon_u16, 2)
|
769 | 4ca4502c | Juha Riihimäki | #undef NEON_FN
|
770 | 4ca4502c | Juha Riihimäki | |
771 | 2a3f75b4 | Peter Maydell | uint32_t HELPER(neon_qshlu_s32)(uint32_t valop, uint32_t shiftop) |
772 | 4ca4502c | Juha Riihimäki | { |
773 | 4ca4502c | Juha Riihimäki | if ((int32_t)valop < 0) { |
774 | 4ca4502c | Juha Riihimäki | SET_QC(); |
775 | 4ca4502c | Juha Riihimäki | return 0; |
776 | 4ca4502c | Juha Riihimäki | } |
777 | 2a3f75b4 | Peter Maydell | return helper_neon_qshl_u32(valop, shiftop);
|
778 | 4ca4502c | Juha Riihimäki | } |
779 | 4ca4502c | Juha Riihimäki | |
780 | 2a3f75b4 | Peter Maydell | uint64_t HELPER(neon_qshlu_s64)(uint64_t valop, uint64_t shiftop) |
781 | 4ca4502c | Juha Riihimäki | { |
782 | 4ca4502c | Juha Riihimäki | if ((int64_t)valop < 0) { |
783 | 4ca4502c | Juha Riihimäki | SET_QC(); |
784 | 4ca4502c | Juha Riihimäki | return 0; |
785 | 4ca4502c | Juha Riihimäki | } |
786 | 2a3f75b4 | Peter Maydell | return helper_neon_qshl_u64(valop, shiftop);
|
787 | 4ca4502c | Juha Riihimäki | } |
788 | ad69471c | pbrook | |
789 | ad69471c | pbrook | /* FIXME: This is wrong. */
|
790 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
791 | ad69471c | pbrook | int8_t tmp; \ |
792 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
793 | 33ebc293 | Peter Maydell | if (tmp >= (ssize_t)sizeof(src1) * 8) { \ |
794 | 33ebc293 | Peter Maydell | if (src1) { \
|
795 | 33ebc293 | Peter Maydell | SET_QC(); \ |
796 | 33ebc293 | Peter Maydell | dest = ~0; \
|
797 | 33ebc293 | Peter Maydell | } else { \
|
798 | 33ebc293 | Peter Maydell | dest = 0; \
|
799 | 33ebc293 | Peter Maydell | } \ |
800 | 33ebc293 | Peter Maydell | } else if (tmp < -(ssize_t)sizeof(src1) * 8) { \ |
801 | 33ebc293 | Peter Maydell | dest = 0; \
|
802 | 33ebc293 | Peter Maydell | } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \ |
803 | 33ebc293 | Peter Maydell | dest = src1 >> (sizeof(src1) * 8 - 1); \ |
804 | 33ebc293 | Peter Maydell | } else if (tmp < 0) { \ |
805 | ad69471c | pbrook | dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ |
806 | ad69471c | pbrook | } else { \
|
807 | ad69471c | pbrook | dest = src1 << tmp; \ |
808 | ad69471c | pbrook | if ((dest >> tmp) != src1) { \
|
809 | ad69471c | pbrook | SET_QC(); \ |
810 | ad69471c | pbrook | dest = ~0; \
|
811 | ad69471c | pbrook | } \ |
812 | ad69471c | pbrook | }} while (0) |
813 | 2a3f75b4 | Peter Maydell | NEON_VOP(qrshl_u8, neon_u8, 4)
|
814 | 2a3f75b4 | Peter Maydell | NEON_VOP(qrshl_u16, neon_u16, 2)
|
815 | ad69471c | pbrook | #undef NEON_FN
|
816 | ad69471c | pbrook | |
817 | 4bd4ee07 | Christophe Lyon | /* The addition of the rounding constant may overflow, so we use an
|
818 | 4bd4ee07 | Christophe Lyon | * intermediate 64 bits accumulator. */
|
819 | 2a3f75b4 | Peter Maydell | uint32_t HELPER(neon_qrshl_u32)(uint32_t val, uint32_t shiftop) |
820 | 4bd4ee07 | Christophe Lyon | { |
821 | 4bd4ee07 | Christophe Lyon | uint32_t dest; |
822 | 4bd4ee07 | Christophe Lyon | int8_t shift = (int8_t)shiftop; |
823 | 33ebc293 | Peter Maydell | if (shift >= 32) { |
824 | 33ebc293 | Peter Maydell | if (val) {
|
825 | 33ebc293 | Peter Maydell | SET_QC(); |
826 | 33ebc293 | Peter Maydell | dest = ~0;
|
827 | 33ebc293 | Peter Maydell | } else {
|
828 | 33ebc293 | Peter Maydell | dest = 0;
|
829 | 33ebc293 | Peter Maydell | } |
830 | 33ebc293 | Peter Maydell | } else if (shift < -32) { |
831 | 33ebc293 | Peter Maydell | dest = 0;
|
832 | 33ebc293 | Peter Maydell | } else if (shift == -32) { |
833 | 33ebc293 | Peter Maydell | dest = val >> 31;
|
834 | 33ebc293 | Peter Maydell | } else if (shift < 0) { |
835 | 4bd4ee07 | Christophe Lyon | uint64_t big_dest = ((uint64_t)val + (1 << (-1 - shift))); |
836 | 4bd4ee07 | Christophe Lyon | dest = big_dest >> -shift; |
837 | 4bd4ee07 | Christophe Lyon | } else {
|
838 | 4bd4ee07 | Christophe Lyon | dest = val << shift; |
839 | 4bd4ee07 | Christophe Lyon | if ((dest >> shift) != val) {
|
840 | 4bd4ee07 | Christophe Lyon | SET_QC(); |
841 | 4bd4ee07 | Christophe Lyon | dest = ~0;
|
842 | 4bd4ee07 | Christophe Lyon | } |
843 | 4bd4ee07 | Christophe Lyon | } |
844 | 4bd4ee07 | Christophe Lyon | return dest;
|
845 | 4bd4ee07 | Christophe Lyon | } |
846 | 4bd4ee07 | Christophe Lyon | |
847 | 4bd4ee07 | Christophe Lyon | /* Handling addition overflow with 64 bits inputs values is more
|
848 | 4bd4ee07 | Christophe Lyon | * tricky than with 32 bits values. */
|
849 | 2a3f75b4 | Peter Maydell | uint64_t HELPER(neon_qrshl_u64)(uint64_t val, uint64_t shiftop) |
850 | ad69471c | pbrook | { |
851 | ad69471c | pbrook | int8_t shift = (int8_t)shiftop; |
852 | 33ebc293 | Peter Maydell | if (shift >= 64) { |
853 | 33ebc293 | Peter Maydell | if (val) {
|
854 | 33ebc293 | Peter Maydell | SET_QC(); |
855 | 33ebc293 | Peter Maydell | val = ~0;
|
856 | 33ebc293 | Peter Maydell | } |
857 | 33ebc293 | Peter Maydell | } else if (shift < -64) { |
858 | 33ebc293 | Peter Maydell | val = 0;
|
859 | 33ebc293 | Peter Maydell | } else if (shift == -64) { |
860 | 33ebc293 | Peter Maydell | val >>= 63;
|
861 | 33ebc293 | Peter Maydell | } else if (shift < 0) { |
862 | 4bd4ee07 | Christophe Lyon | val >>= (-shift - 1);
|
863 | 4bd4ee07 | Christophe Lyon | if (val == UINT64_MAX) {
|
864 | 4bd4ee07 | Christophe Lyon | /* In this case, it means that the rounding constant is 1,
|
865 | 4bd4ee07 | Christophe Lyon | * and the addition would overflow. Return the actual
|
866 | 4bd4ee07 | Christophe Lyon | * result directly. */
|
867 | 4bd4ee07 | Christophe Lyon | val = 0x8000000000000000ULL;
|
868 | 4bd4ee07 | Christophe Lyon | } else {
|
869 | 4bd4ee07 | Christophe Lyon | val++; |
870 | 4bd4ee07 | Christophe Lyon | val >>= 1;
|
871 | 4bd4ee07 | Christophe Lyon | } |
872 | ad69471c | pbrook | } else { \
|
873 | ad69471c | pbrook | uint64_t tmp = val; |
874 | ad69471c | pbrook | val <<= shift; |
875 | ad69471c | pbrook | if ((val >> shift) != tmp) {
|
876 | ad69471c | pbrook | SET_QC(); |
877 | ad69471c | pbrook | val = ~0;
|
878 | ad69471c | pbrook | } |
879 | ad69471c | pbrook | } |
880 | ad69471c | pbrook | return val;
|
881 | ad69471c | pbrook | } |
882 | ad69471c | pbrook | |
883 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
884 | ad69471c | pbrook | int8_t tmp; \ |
885 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
886 | 7b6ecf5b | Peter Maydell | if (tmp >= (ssize_t)sizeof(src1) * 8) { \ |
887 | 7b6ecf5b | Peter Maydell | if (src1) { \
|
888 | 7b6ecf5b | Peter Maydell | SET_QC(); \ |
889 | 7b6ecf5b | Peter Maydell | dest = (1 << (sizeof(src1) * 8 - 1)); \ |
890 | 7b6ecf5b | Peter Maydell | if (src1 > 0) { \ |
891 | 7b6ecf5b | Peter Maydell | dest--; \ |
892 | 7b6ecf5b | Peter Maydell | } \ |
893 | 7b6ecf5b | Peter Maydell | } else { \
|
894 | 7b6ecf5b | Peter Maydell | dest = 0; \
|
895 | 7b6ecf5b | Peter Maydell | } \ |
896 | 7b6ecf5b | Peter Maydell | } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \ |
897 | 7b6ecf5b | Peter Maydell | dest = 0; \
|
898 | 7b6ecf5b | Peter Maydell | } else if (tmp < 0) { \ |
899 | ad69471c | pbrook | dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ |
900 | ad69471c | pbrook | } else { \
|
901 | ad69471c | pbrook | dest = src1 << tmp; \ |
902 | ad69471c | pbrook | if ((dest >> tmp) != src1) { \
|
903 | ad69471c | pbrook | SET_QC(); \ |
904 | 960e623b | Peter Maydell | dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \ |
905 | 960e623b | Peter Maydell | if (src1 > 0) { \ |
906 | 960e623b | Peter Maydell | dest--; \ |
907 | 960e623b | Peter Maydell | } \ |
908 | ad69471c | pbrook | } \ |
909 | ad69471c | pbrook | }} while (0) |
910 | 2a3f75b4 | Peter Maydell | NEON_VOP(qrshl_s8, neon_s8, 4)
|
911 | 2a3f75b4 | Peter Maydell | NEON_VOP(qrshl_s16, neon_s16, 2)
|
912 | ad69471c | pbrook | #undef NEON_FN
|
913 | ad69471c | pbrook | |
914 | 4bd4ee07 | Christophe Lyon | /* The addition of the rounding constant may overflow, so we use an
|
915 | 4bd4ee07 | Christophe Lyon | * intermediate 64 bits accumulator. */
|
916 | 2a3f75b4 | Peter Maydell | uint32_t HELPER(neon_qrshl_s32)(uint32_t valop, uint32_t shiftop) |
917 | 4bd4ee07 | Christophe Lyon | { |
918 | 4bd4ee07 | Christophe Lyon | int32_t dest; |
919 | 4bd4ee07 | Christophe Lyon | int32_t val = (int32_t)valop; |
920 | 4bd4ee07 | Christophe Lyon | int8_t shift = (int8_t)shiftop; |
921 | 7b6ecf5b | Peter Maydell | if (shift >= 32) { |
922 | 7b6ecf5b | Peter Maydell | if (val) {
|
923 | 7b6ecf5b | Peter Maydell | SET_QC(); |
924 | 7b6ecf5b | Peter Maydell | dest = (val >> 31) ^ ~SIGNBIT;
|
925 | 7b6ecf5b | Peter Maydell | } else {
|
926 | 7b6ecf5b | Peter Maydell | dest = 0;
|
927 | 7b6ecf5b | Peter Maydell | } |
928 | 7b6ecf5b | Peter Maydell | } else if (shift <= -32) { |
929 | 7b6ecf5b | Peter Maydell | dest = 0;
|
930 | 7b6ecf5b | Peter Maydell | } else if (shift < 0) { |
931 | 4bd4ee07 | Christophe Lyon | int64_t big_dest = ((int64_t)val + (1 << (-1 - shift))); |
932 | 4bd4ee07 | Christophe Lyon | dest = big_dest >> -shift; |
933 | 4bd4ee07 | Christophe Lyon | } else {
|
934 | 4bd4ee07 | Christophe Lyon | dest = val << shift; |
935 | 4bd4ee07 | Christophe Lyon | if ((dest >> shift) != val) {
|
936 | 4bd4ee07 | Christophe Lyon | SET_QC(); |
937 | 4bd4ee07 | Christophe Lyon | dest = (val >> 31) ^ ~SIGNBIT;
|
938 | 4bd4ee07 | Christophe Lyon | } |
939 | 4bd4ee07 | Christophe Lyon | } |
940 | 4bd4ee07 | Christophe Lyon | return dest;
|
941 | 4bd4ee07 | Christophe Lyon | } |
942 | 4bd4ee07 | Christophe Lyon | |
943 | 4bd4ee07 | Christophe Lyon | /* Handling addition overflow with 64 bits inputs values is more
|
944 | 4bd4ee07 | Christophe Lyon | * tricky than with 32 bits values. */
|
945 | 2a3f75b4 | Peter Maydell | uint64_t HELPER(neon_qrshl_s64)(uint64_t valop, uint64_t shiftop) |
946 | ad69471c | pbrook | { |
947 | ad69471c | pbrook | int8_t shift = (uint8_t)shiftop; |
948 | ad69471c | pbrook | int64_t val = valop; |
949 | ad69471c | pbrook | |
950 | 7b6ecf5b | Peter Maydell | if (shift >= 64) { |
951 | 7b6ecf5b | Peter Maydell | if (val) {
|
952 | 7b6ecf5b | Peter Maydell | SET_QC(); |
953 | 7b6ecf5b | Peter Maydell | val = (val >> 63) ^ ~SIGNBIT64;
|
954 | 7b6ecf5b | Peter Maydell | } |
955 | 7b6ecf5b | Peter Maydell | } else if (shift <= -64) { |
956 | 7b6ecf5b | Peter Maydell | val = 0;
|
957 | 7b6ecf5b | Peter Maydell | } else if (shift < 0) { |
958 | 4bd4ee07 | Christophe Lyon | val >>= (-shift - 1);
|
959 | 4bd4ee07 | Christophe Lyon | if (val == INT64_MAX) {
|
960 | 4bd4ee07 | Christophe Lyon | /* In this case, it means that the rounding constant is 1,
|
961 | 4bd4ee07 | Christophe Lyon | * and the addition would overflow. Return the actual
|
962 | 4bd4ee07 | Christophe Lyon | * result directly. */
|
963 | 4bd4ee07 | Christophe Lyon | val = 0x4000000000000000ULL;
|
964 | 4bd4ee07 | Christophe Lyon | } else {
|
965 | 4bd4ee07 | Christophe Lyon | val++; |
966 | 4bd4ee07 | Christophe Lyon | val >>= 1;
|
967 | 4bd4ee07 | Christophe Lyon | } |
968 | ad69471c | pbrook | } else {
|
969 | 4bd4ee07 | Christophe Lyon | int64_t tmp = val; |
970 | ad69471c | pbrook | val <<= shift; |
971 | ad69471c | pbrook | if ((val >> shift) != tmp) {
|
972 | ad69471c | pbrook | SET_QC(); |
973 | 4bd4ee07 | Christophe Lyon | val = (tmp >> 63) ^ ~SIGNBIT64;
|
974 | ad69471c | pbrook | } |
975 | ad69471c | pbrook | } |
976 | ad69471c | pbrook | return val;
|
977 | ad69471c | pbrook | } |
978 | ad69471c | pbrook | |
979 | ad69471c | pbrook | uint32_t HELPER(neon_add_u8)(uint32_t a, uint32_t b) |
980 | ad69471c | pbrook | { |
981 | ad69471c | pbrook | uint32_t mask; |
982 | ad69471c | pbrook | mask = (a ^ b) & 0x80808080u;
|
983 | ad69471c | pbrook | a &= ~0x80808080u;
|
984 | ad69471c | pbrook | b &= ~0x80808080u;
|
985 | ad69471c | pbrook | return (a + b) ^ mask;
|
986 | ad69471c | pbrook | } |
987 | ad69471c | pbrook | |
988 | ad69471c | pbrook | uint32_t HELPER(neon_add_u16)(uint32_t a, uint32_t b) |
989 | ad69471c | pbrook | { |
990 | ad69471c | pbrook | uint32_t mask; |
991 | ad69471c | pbrook | mask = (a ^ b) & 0x80008000u;
|
992 | ad69471c | pbrook | a &= ~0x80008000u;
|
993 | ad69471c | pbrook | b &= ~0x80008000u;
|
994 | ad69471c | pbrook | return (a + b) ^ mask;
|
995 | ad69471c | pbrook | } |
996 | ad69471c | pbrook | |
997 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = src1 + src2
|
998 | ad69471c | pbrook | NEON_POP(padd_u8, neon_u8, 4)
|
999 | ad69471c | pbrook | NEON_POP(padd_u16, neon_u16, 2)
|
1000 | ad69471c | pbrook | #undef NEON_FN
|
1001 | ad69471c | pbrook | |
1002 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = src1 - src2
|
1003 | ad69471c | pbrook | NEON_VOP(sub_u8, neon_u8, 4)
|
1004 | ad69471c | pbrook | NEON_VOP(sub_u16, neon_u16, 2)
|
1005 | ad69471c | pbrook | #undef NEON_FN
|
1006 | ad69471c | pbrook | |
1007 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = src1 * src2
|
1008 | ad69471c | pbrook | NEON_VOP(mul_u8, neon_u8, 4)
|
1009 | ad69471c | pbrook | NEON_VOP(mul_u16, neon_u16, 2)
|
1010 | ad69471c | pbrook | #undef NEON_FN
|
1011 | ad69471c | pbrook | |
1012 | 1654b2d6 | aurel32 | /* Polynomial multiplication is like integer multiplication except the
|
1013 | ad69471c | pbrook | partial products are XORed, not added. */
|
1014 | ad69471c | pbrook | uint32_t HELPER(neon_mul_p8)(uint32_t op1, uint32_t op2) |
1015 | ad69471c | pbrook | { |
1016 | ad69471c | pbrook | uint32_t mask; |
1017 | ad69471c | pbrook | uint32_t result; |
1018 | ad69471c | pbrook | result = 0;
|
1019 | ad69471c | pbrook | while (op1) {
|
1020 | ad69471c | pbrook | mask = 0;
|
1021 | ad69471c | pbrook | if (op1 & 1) |
1022 | ad69471c | pbrook | mask |= 0xff;
|
1023 | ad69471c | pbrook | if (op1 & (1 << 8)) |
1024 | ad69471c | pbrook | mask |= (0xff << 8); |
1025 | ad69471c | pbrook | if (op1 & (1 << 16)) |
1026 | ad69471c | pbrook | mask |= (0xff << 16); |
1027 | ad69471c | pbrook | if (op1 & (1 << 24)) |
1028 | ad69471c | pbrook | mask |= (0xff << 24); |
1029 | ad69471c | pbrook | result ^= op2 & mask; |
1030 | ad69471c | pbrook | op1 = (op1 >> 1) & 0x7f7f7f7f; |
1031 | ad69471c | pbrook | op2 = (op2 << 1) & 0xfefefefe; |
1032 | ad69471c | pbrook | } |
1033 | ad69471c | pbrook | return result;
|
1034 | ad69471c | pbrook | } |
1035 | ad69471c | pbrook | |
1036 | e5ca24cb | Peter Maydell | uint64_t HELPER(neon_mull_p8)(uint32_t op1, uint32_t op2) |
1037 | e5ca24cb | Peter Maydell | { |
1038 | e5ca24cb | Peter Maydell | uint64_t result = 0;
|
1039 | e5ca24cb | Peter Maydell | uint64_t mask; |
1040 | e5ca24cb | Peter Maydell | uint64_t op2ex = op2; |
1041 | e5ca24cb | Peter Maydell | op2ex = (op2ex & 0xff) |
|
1042 | e5ca24cb | Peter Maydell | ((op2ex & 0xff00) << 8) | |
1043 | e5ca24cb | Peter Maydell | ((op2ex & 0xff0000) << 16) | |
1044 | e5ca24cb | Peter Maydell | ((op2ex & 0xff000000) << 24); |
1045 | e5ca24cb | Peter Maydell | while (op1) {
|
1046 | e5ca24cb | Peter Maydell | mask = 0;
|
1047 | e5ca24cb | Peter Maydell | if (op1 & 1) { |
1048 | e5ca24cb | Peter Maydell | mask |= 0xffff;
|
1049 | e5ca24cb | Peter Maydell | } |
1050 | e5ca24cb | Peter Maydell | if (op1 & (1 << 8)) { |
1051 | e5ca24cb | Peter Maydell | mask |= (0xffffU << 16); |
1052 | e5ca24cb | Peter Maydell | } |
1053 | e5ca24cb | Peter Maydell | if (op1 & (1 << 16)) { |
1054 | e5ca24cb | Peter Maydell | mask |= (0xffffULL << 32); |
1055 | e5ca24cb | Peter Maydell | } |
1056 | e5ca24cb | Peter Maydell | if (op1 & (1 << 24)) { |
1057 | e5ca24cb | Peter Maydell | mask |= (0xffffULL << 48); |
1058 | e5ca24cb | Peter Maydell | } |
1059 | e5ca24cb | Peter Maydell | result ^= op2ex & mask; |
1060 | e5ca24cb | Peter Maydell | op1 = (op1 >> 1) & 0x7f7f7f7f; |
1061 | e5ca24cb | Peter Maydell | op2ex <<= 1;
|
1062 | e5ca24cb | Peter Maydell | } |
1063 | e5ca24cb | Peter Maydell | return result;
|
1064 | e5ca24cb | Peter Maydell | } |
1065 | e5ca24cb | Peter Maydell | |
1066 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 & src2) ? -1 : 0 |
1067 | ad69471c | pbrook | NEON_VOP(tst_u8, neon_u8, 4)
|
1068 | ad69471c | pbrook | NEON_VOP(tst_u16, neon_u16, 2)
|
1069 | ad69471c | pbrook | NEON_VOP(tst_u32, neon_u32, 1)
|
1070 | ad69471c | pbrook | #undef NEON_FN
|
1071 | ad69471c | pbrook | |
1072 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 == src2) ? -1 : 0 |
1073 | ad69471c | pbrook | NEON_VOP(ceq_u8, neon_u8, 4)
|
1074 | ad69471c | pbrook | NEON_VOP(ceq_u16, neon_u16, 2)
|
1075 | ad69471c | pbrook | NEON_VOP(ceq_u32, neon_u32, 1)
|
1076 | ad69471c | pbrook | #undef NEON_FN
|
1077 | ad69471c | pbrook | |
1078 | ad69471c | pbrook | #define NEON_FN(dest, src, dummy) dest = (src < 0) ? -src : src |
1079 | ad69471c | pbrook | NEON_VOP1(abs_s8, neon_s8, 4)
|
1080 | ad69471c | pbrook | NEON_VOP1(abs_s16, neon_s16, 2)
|
1081 | ad69471c | pbrook | #undef NEON_FN
|
1082 | ad69471c | pbrook | |
1083 | ad69471c | pbrook | /* Count Leading Sign/Zero Bits. */
|
1084 | ad69471c | pbrook | static inline int do_clz8(uint8_t x) |
1085 | ad69471c | pbrook | { |
1086 | ad69471c | pbrook | int n;
|
1087 | ad69471c | pbrook | for (n = 8; x; n--) |
1088 | ad69471c | pbrook | x >>= 1;
|
1089 | ad69471c | pbrook | return n;
|
1090 | ad69471c | pbrook | } |
1091 | ad69471c | pbrook | |
1092 | ad69471c | pbrook | static inline int do_clz16(uint16_t x) |
1093 | ad69471c | pbrook | { |
1094 | ad69471c | pbrook | int n;
|
1095 | ad69471c | pbrook | for (n = 16; x; n--) |
1096 | ad69471c | pbrook | x >>= 1;
|
1097 | ad69471c | pbrook | return n;
|
1098 | ad69471c | pbrook | } |
1099 | ad69471c | pbrook | |
1100 | ad69471c | pbrook | #define NEON_FN(dest, src, dummy) dest = do_clz8(src)
|
1101 | ad69471c | pbrook | NEON_VOP1(clz_u8, neon_u8, 4)
|
1102 | ad69471c | pbrook | #undef NEON_FN
|
1103 | ad69471c | pbrook | |
1104 | ad69471c | pbrook | #define NEON_FN(dest, src, dummy) dest = do_clz16(src)
|
1105 | ad69471c | pbrook | NEON_VOP1(clz_u16, neon_u16, 2)
|
1106 | ad69471c | pbrook | #undef NEON_FN
|
1107 | ad69471c | pbrook | |
1108 | ad69471c | pbrook | #define NEON_FN(dest, src, dummy) dest = do_clz8((src < 0) ? ~src : src) - 1 |
1109 | ad69471c | pbrook | NEON_VOP1(cls_s8, neon_s8, 4)
|
1110 | ad69471c | pbrook | #undef NEON_FN
|
1111 | ad69471c | pbrook | |
1112 | ad69471c | pbrook | #define NEON_FN(dest, src, dummy) dest = do_clz16((src < 0) ? ~src : src) - 1 |
1113 | ad69471c | pbrook | NEON_VOP1(cls_s16, neon_s16, 2)
|
1114 | ad69471c | pbrook | #undef NEON_FN
|
1115 | ad69471c | pbrook | |
1116 | ad69471c | pbrook | uint32_t HELPER(neon_cls_s32)(uint32_t x) |
1117 | ad69471c | pbrook | { |
1118 | ad69471c | pbrook | int count;
|
1119 | ad69471c | pbrook | if ((int32_t)x < 0) |
1120 | ad69471c | pbrook | x = ~x; |
1121 | ad69471c | pbrook | for (count = 32; x; count--) |
1122 | ad69471c | pbrook | x = x >> 1;
|
1123 | ad69471c | pbrook | return count - 1; |
1124 | ad69471c | pbrook | } |
1125 | ad69471c | pbrook | |
1126 | ad69471c | pbrook | /* Bit count. */
|
1127 | ad69471c | pbrook | uint32_t HELPER(neon_cnt_u8)(uint32_t x) |
1128 | ad69471c | pbrook | { |
1129 | ad69471c | pbrook | x = (x & 0x55555555) + ((x >> 1) & 0x55555555); |
1130 | ad69471c | pbrook | x = (x & 0x33333333) + ((x >> 2) & 0x33333333); |
1131 | ad69471c | pbrook | x = (x & 0x0f0f0f0f) + ((x >> 4) & 0x0f0f0f0f); |
1132 | ad69471c | pbrook | return x;
|
1133 | ad69471c | pbrook | } |
1134 | ad69471c | pbrook | |
1135 | ad69471c | pbrook | #define NEON_QDMULH16(dest, src1, src2, round) do { \ |
1136 | ad69471c | pbrook | uint32_t tmp = (int32_t)(int16_t) src1 * (int16_t) src2; \ |
1137 | ad69471c | pbrook | if ((tmp ^ (tmp << 1)) & SIGNBIT) { \ |
1138 | ad69471c | pbrook | SET_QC(); \ |
1139 | ad69471c | pbrook | tmp = (tmp >> 31) ^ ~SIGNBIT; \
|
1140 | 46eece9d | Juha Riihimäki | } else { \
|
1141 | 46eece9d | Juha Riihimäki | tmp <<= 1; \
|
1142 | ad69471c | pbrook | } \ |
1143 | ad69471c | pbrook | if (round) { \
|
1144 | ad69471c | pbrook | int32_t old = tmp; \ |
1145 | ad69471c | pbrook | tmp += 1 << 15; \ |
1146 | ad69471c | pbrook | if ((int32_t)tmp < old) { \
|
1147 | ad69471c | pbrook | SET_QC(); \ |
1148 | ad69471c | pbrook | tmp = SIGNBIT - 1; \
|
1149 | ad69471c | pbrook | } \ |
1150 | ad69471c | pbrook | } \ |
1151 | ad69471c | pbrook | dest = tmp >> 16; \
|
1152 | ad69471c | pbrook | } while(0) |
1153 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 0) |
1154 | 2a3f75b4 | Peter Maydell | NEON_VOP(qdmulh_s16, neon_s16, 2)
|
1155 | ad69471c | pbrook | #undef NEON_FN
|
1156 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 1) |
1157 | 2a3f75b4 | Peter Maydell | NEON_VOP(qrdmulh_s16, neon_s16, 2)
|
1158 | ad69471c | pbrook | #undef NEON_FN
|
1159 | ad69471c | pbrook | #undef NEON_QDMULH16
|
1160 | ad69471c | pbrook | |
1161 | ad69471c | pbrook | #define NEON_QDMULH32(dest, src1, src2, round) do { \ |
1162 | ad69471c | pbrook | uint64_t tmp = (int64_t)(int32_t) src1 * (int32_t) src2; \ |
1163 | ad69471c | pbrook | if ((tmp ^ (tmp << 1)) & SIGNBIT64) { \ |
1164 | ad69471c | pbrook | SET_QC(); \ |
1165 | ad69471c | pbrook | tmp = (tmp >> 63) ^ ~SIGNBIT64; \
|
1166 | ad69471c | pbrook | } else { \
|
1167 | ad69471c | pbrook | tmp <<= 1; \
|
1168 | ad69471c | pbrook | } \ |
1169 | ad69471c | pbrook | if (round) { \
|
1170 | ad69471c | pbrook | int64_t old = tmp; \ |
1171 | ad69471c | pbrook | tmp += (int64_t)1 << 31; \ |
1172 | ad69471c | pbrook | if ((int64_t)tmp < old) { \
|
1173 | ad69471c | pbrook | SET_QC(); \ |
1174 | ad69471c | pbrook | tmp = SIGNBIT64 - 1; \
|
1175 | ad69471c | pbrook | } \ |
1176 | ad69471c | pbrook | } \ |
1177 | ad69471c | pbrook | dest = tmp >> 32; \
|
1178 | ad69471c | pbrook | } while(0) |
1179 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 0) |
1180 | 2a3f75b4 | Peter Maydell | NEON_VOP(qdmulh_s32, neon_s32, 1)
|
1181 | ad69471c | pbrook | #undef NEON_FN
|
1182 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 1) |
1183 | 2a3f75b4 | Peter Maydell | NEON_VOP(qrdmulh_s32, neon_s32, 1)
|
1184 | ad69471c | pbrook | #undef NEON_FN
|
1185 | ad69471c | pbrook | #undef NEON_QDMULH32
|
1186 | ad69471c | pbrook | |
1187 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_u8)(uint64_t x) |
1188 | ad69471c | pbrook | { |
1189 | ad69471c | pbrook | return (x & 0xffu) | ((x >> 8) & 0xff00u) | ((x >> 16) & 0xff0000u) |
1190 | ad69471c | pbrook | | ((x >> 24) & 0xff000000u); |
1191 | ad69471c | pbrook | } |
1192 | ad69471c | pbrook | |
1193 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_u16)(uint64_t x) |
1194 | ad69471c | pbrook | { |
1195 | ad69471c | pbrook | return (x & 0xffffu) | ((x >> 16) & 0xffff0000u); |
1196 | ad69471c | pbrook | } |
1197 | ad69471c | pbrook | |
1198 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_high_u8)(uint64_t x) |
1199 | ad69471c | pbrook | { |
1200 | ad69471c | pbrook | return ((x >> 8) & 0xff) | ((x >> 16) & 0xff00) |
1201 | ad69471c | pbrook | | ((x >> 24) & 0xff0000) | ((x >> 32) & 0xff000000); |
1202 | ad69471c | pbrook | } |
1203 | ad69471c | pbrook | |
1204 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_high_u16)(uint64_t x) |
1205 | ad69471c | pbrook | { |
1206 | ad69471c | pbrook | return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000); |
1207 | ad69471c | pbrook | } |
1208 | ad69471c | pbrook | |
1209 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_round_high_u8)(uint64_t x) |
1210 | ad69471c | pbrook | { |
1211 | ad69471c | pbrook | x &= 0xff80ff80ff80ff80ull;
|
1212 | ad69471c | pbrook | x += 0x0080008000800080ull;
|
1213 | ad69471c | pbrook | return ((x >> 8) & 0xff) | ((x >> 16) & 0xff00) |
1214 | ad69471c | pbrook | | ((x >> 24) & 0xff0000) | ((x >> 32) & 0xff000000); |
1215 | ad69471c | pbrook | } |
1216 | ad69471c | pbrook | |
1217 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_round_high_u16)(uint64_t x) |
1218 | ad69471c | pbrook | { |
1219 | ad69471c | pbrook | x &= 0xffff8000ffff8000ull;
|
1220 | ad69471c | pbrook | x += 0x0000800000008000ull;
|
1221 | ad69471c | pbrook | return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000); |
1222 | ad69471c | pbrook | } |
1223 | ad69471c | pbrook | |
1224 | 2a3f75b4 | Peter Maydell | uint32_t HELPER(neon_unarrow_sat8)(uint64_t x) |
1225 | af1bbf30 | Juha Riihimäki | { |
1226 | af1bbf30 | Juha Riihimäki | uint16_t s; |
1227 | af1bbf30 | Juha Riihimäki | uint8_t d; |
1228 | af1bbf30 | Juha Riihimäki | uint32_t res = 0;
|
1229 | af1bbf30 | Juha Riihimäki | #define SAT8(n) \
|
1230 | af1bbf30 | Juha Riihimäki | s = x >> n; \ |
1231 | af1bbf30 | Juha Riihimäki | if (s & 0x8000) { \ |
1232 | af1bbf30 | Juha Riihimäki | SET_QC(); \ |
1233 | af1bbf30 | Juha Riihimäki | } else { \
|
1234 | af1bbf30 | Juha Riihimäki | if (s > 0xff) { \ |
1235 | af1bbf30 | Juha Riihimäki | d = 0xff; \
|
1236 | af1bbf30 | Juha Riihimäki | SET_QC(); \ |
1237 | af1bbf30 | Juha Riihimäki | } else { \
|
1238 | af1bbf30 | Juha Riihimäki | d = s; \ |
1239 | af1bbf30 | Juha Riihimäki | } \ |
1240 | af1bbf30 | Juha Riihimäki | res |= (uint32_t)d << (n / 2); \
|
1241 | af1bbf30 | Juha Riihimäki | } |
1242 | af1bbf30 | Juha Riihimäki | |
1243 | af1bbf30 | Juha Riihimäki | SAT8(0);
|
1244 | af1bbf30 | Juha Riihimäki | SAT8(16);
|
1245 | af1bbf30 | Juha Riihimäki | SAT8(32);
|
1246 | af1bbf30 | Juha Riihimäki | SAT8(48);
|
1247 | af1bbf30 | Juha Riihimäki | #undef SAT8
|
1248 | af1bbf30 | Juha Riihimäki | return res;
|
1249 | af1bbf30 | Juha Riihimäki | } |
1250 | af1bbf30 | Juha Riihimäki | |
1251 | 2a3f75b4 | Peter Maydell | uint32_t HELPER(neon_narrow_sat_u8)(uint64_t x) |
1252 | ad69471c | pbrook | { |
1253 | ad69471c | pbrook | uint16_t s; |
1254 | ad69471c | pbrook | uint8_t d; |
1255 | ad69471c | pbrook | uint32_t res = 0;
|
1256 | ad69471c | pbrook | #define SAT8(n) \
|
1257 | ad69471c | pbrook | s = x >> n; \ |
1258 | ad69471c | pbrook | if (s > 0xff) { \ |
1259 | ad69471c | pbrook | d = 0xff; \
|
1260 | ad69471c | pbrook | SET_QC(); \ |
1261 | ad69471c | pbrook | } else { \
|
1262 | ad69471c | pbrook | d = s; \ |
1263 | ad69471c | pbrook | } \ |
1264 | ad69471c | pbrook | res |= (uint32_t)d << (n / 2);
|
1265 | ad69471c | pbrook | |
1266 | ad69471c | pbrook | SAT8(0);
|
1267 | ad69471c | pbrook | SAT8(16);
|
1268 | ad69471c | pbrook | SAT8(32);
|
1269 | ad69471c | pbrook | SAT8(48);
|
1270 | ad69471c | pbrook | #undef SAT8
|
1271 | ad69471c | pbrook | return res;
|
1272 | ad69471c | pbrook | } |
1273 | ad69471c | pbrook | |
1274 | 2a3f75b4 | Peter Maydell | uint32_t HELPER(neon_narrow_sat_s8)(uint64_t x) |
1275 | ad69471c | pbrook | { |
1276 | ad69471c | pbrook | int16_t s; |
1277 | ad69471c | pbrook | uint8_t d; |
1278 | ad69471c | pbrook | uint32_t res = 0;
|
1279 | ad69471c | pbrook | #define SAT8(n) \
|
1280 | ad69471c | pbrook | s = x >> n; \ |
1281 | ad69471c | pbrook | if (s != (int8_t)s) { \
|
1282 | ad69471c | pbrook | d = (s >> 15) ^ 0x7f; \ |
1283 | ad69471c | pbrook | SET_QC(); \ |
1284 | ad69471c | pbrook | } else { \
|
1285 | ad69471c | pbrook | d = s; \ |
1286 | ad69471c | pbrook | } \ |
1287 | ad69471c | pbrook | res |= (uint32_t)d << (n / 2);
|
1288 | ad69471c | pbrook | |
1289 | ad69471c | pbrook | SAT8(0);
|
1290 | ad69471c | pbrook | SAT8(16);
|
1291 | ad69471c | pbrook | SAT8(32);
|
1292 | ad69471c | pbrook | SAT8(48);
|
1293 | ad69471c | pbrook | #undef SAT8
|
1294 | ad69471c | pbrook | return res;
|
1295 | ad69471c | pbrook | } |
1296 | ad69471c | pbrook | |
1297 | 2a3f75b4 | Peter Maydell | uint32_t HELPER(neon_unarrow_sat16)(uint64_t x) |
1298 | af1bbf30 | Juha Riihimäki | { |
1299 | af1bbf30 | Juha Riihimäki | uint32_t high; |
1300 | af1bbf30 | Juha Riihimäki | uint32_t low; |
1301 | af1bbf30 | Juha Riihimäki | low = x; |
1302 | af1bbf30 | Juha Riihimäki | if (low & 0x80000000) { |
1303 | af1bbf30 | Juha Riihimäki | low = 0;
|
1304 | af1bbf30 | Juha Riihimäki | SET_QC(); |
1305 | af1bbf30 | Juha Riihimäki | } else if (low > 0xffff) { |
1306 | af1bbf30 | Juha Riihimäki | low = 0xffff;
|
1307 | af1bbf30 | Juha Riihimäki | SET_QC(); |
1308 | af1bbf30 | Juha Riihimäki | } |
1309 | af1bbf30 | Juha Riihimäki | high = x >> 32;
|
1310 | af1bbf30 | Juha Riihimäki | if (high & 0x80000000) { |
1311 | af1bbf30 | Juha Riihimäki | high = 0;
|
1312 | af1bbf30 | Juha Riihimäki | SET_QC(); |
1313 | af1bbf30 | Juha Riihimäki | } else if (high > 0xffff) { |
1314 | af1bbf30 | Juha Riihimäki | high = 0xffff;
|
1315 | af1bbf30 | Juha Riihimäki | SET_QC(); |
1316 | af1bbf30 | Juha Riihimäki | } |
1317 | af1bbf30 | Juha Riihimäki | return low | (high << 16); |
1318 | af1bbf30 | Juha Riihimäki | } |
1319 | af1bbf30 | Juha Riihimäki | |
1320 | 2a3f75b4 | Peter Maydell | uint32_t HELPER(neon_narrow_sat_u16)(uint64_t x) |
1321 | ad69471c | pbrook | { |
1322 | ad69471c | pbrook | uint32_t high; |
1323 | ad69471c | pbrook | uint32_t low; |
1324 | ad69471c | pbrook | low = x; |
1325 | ad69471c | pbrook | if (low > 0xffff) { |
1326 | ad69471c | pbrook | low = 0xffff;
|
1327 | ad69471c | pbrook | SET_QC(); |
1328 | ad69471c | pbrook | } |
1329 | ad69471c | pbrook | high = x >> 32;
|
1330 | ad69471c | pbrook | if (high > 0xffff) { |
1331 | ad69471c | pbrook | high = 0xffff;
|
1332 | ad69471c | pbrook | SET_QC(); |
1333 | ad69471c | pbrook | } |
1334 | ad69471c | pbrook | return low | (high << 16); |
1335 | ad69471c | pbrook | } |
1336 | ad69471c | pbrook | |
1337 | 2a3f75b4 | Peter Maydell | uint32_t HELPER(neon_narrow_sat_s16)(uint64_t x) |
1338 | ad69471c | pbrook | { |
1339 | ad69471c | pbrook | int32_t low; |
1340 | ad69471c | pbrook | int32_t high; |
1341 | ad69471c | pbrook | low = x; |
1342 | ad69471c | pbrook | if (low != (int16_t)low) {
|
1343 | ad69471c | pbrook | low = (low >> 31) ^ 0x7fff; |
1344 | ad69471c | pbrook | SET_QC(); |
1345 | ad69471c | pbrook | } |
1346 | ad69471c | pbrook | high = x >> 32;
|
1347 | ad69471c | pbrook | if (high != (int16_t)high) {
|
1348 | ad69471c | pbrook | high = (high >> 31) ^ 0x7fff; |
1349 | ad69471c | pbrook | SET_QC(); |
1350 | ad69471c | pbrook | } |
1351 | ad69471c | pbrook | return (uint16_t)low | (high << 16); |
1352 | ad69471c | pbrook | } |
1353 | ad69471c | pbrook | |
1354 | 2a3f75b4 | Peter Maydell | uint32_t HELPER(neon_unarrow_sat32)(uint64_t x) |
1355 | af1bbf30 | Juha Riihimäki | { |
1356 | af1bbf30 | Juha Riihimäki | if (x & 0x8000000000000000ull) { |
1357 | af1bbf30 | Juha Riihimäki | SET_QC(); |
1358 | af1bbf30 | Juha Riihimäki | return 0; |
1359 | af1bbf30 | Juha Riihimäki | } |
1360 | af1bbf30 | Juha Riihimäki | if (x > 0xffffffffu) { |
1361 | af1bbf30 | Juha Riihimäki | SET_QC(); |
1362 | af1bbf30 | Juha Riihimäki | return 0xffffffffu; |
1363 | af1bbf30 | Juha Riihimäki | } |
1364 | af1bbf30 | Juha Riihimäki | return x;
|
1365 | af1bbf30 | Juha Riihimäki | } |
1366 | af1bbf30 | Juha Riihimäki | |
1367 | 2a3f75b4 | Peter Maydell | uint32_t HELPER(neon_narrow_sat_u32)(uint64_t x) |
1368 | ad69471c | pbrook | { |
1369 | ad69471c | pbrook | if (x > 0xffffffffu) { |
1370 | ad69471c | pbrook | SET_QC(); |
1371 | ad69471c | pbrook | return 0xffffffffu; |
1372 | ad69471c | pbrook | } |
1373 | ad69471c | pbrook | return x;
|
1374 | ad69471c | pbrook | } |
1375 | ad69471c | pbrook | |
1376 | 2a3f75b4 | Peter Maydell | uint32_t HELPER(neon_narrow_sat_s32)(uint64_t x) |
1377 | ad69471c | pbrook | { |
1378 | ad69471c | pbrook | if ((int64_t)x != (int32_t)x) {
|
1379 | ad69471c | pbrook | SET_QC(); |
1380 | cc2212c2 | Peter Maydell | return ((int64_t)x >> 63) ^ 0x7fffffff; |
1381 | ad69471c | pbrook | } |
1382 | ad69471c | pbrook | return x;
|
1383 | ad69471c | pbrook | } |
1384 | ad69471c | pbrook | |
1385 | ad69471c | pbrook | uint64_t HELPER(neon_widen_u8)(uint32_t x) |
1386 | ad69471c | pbrook | { |
1387 | ad69471c | pbrook | uint64_t tmp; |
1388 | ad69471c | pbrook | uint64_t ret; |
1389 | ad69471c | pbrook | ret = (uint8_t)x; |
1390 | ad69471c | pbrook | tmp = (uint8_t)(x >> 8);
|
1391 | ad69471c | pbrook | ret |= tmp << 16;
|
1392 | ad69471c | pbrook | tmp = (uint8_t)(x >> 16);
|
1393 | ad69471c | pbrook | ret |= tmp << 32;
|
1394 | ad69471c | pbrook | tmp = (uint8_t)(x >> 24);
|
1395 | ad69471c | pbrook | ret |= tmp << 48;
|
1396 | ad69471c | pbrook | return ret;
|
1397 | ad69471c | pbrook | } |
1398 | ad69471c | pbrook | |
1399 | ad69471c | pbrook | uint64_t HELPER(neon_widen_s8)(uint32_t x) |
1400 | ad69471c | pbrook | { |
1401 | ad69471c | pbrook | uint64_t tmp; |
1402 | ad69471c | pbrook | uint64_t ret; |
1403 | ad69471c | pbrook | ret = (uint16_t)(int8_t)x; |
1404 | ad69471c | pbrook | tmp = (uint16_t)(int8_t)(x >> 8);
|
1405 | ad69471c | pbrook | ret |= tmp << 16;
|
1406 | ad69471c | pbrook | tmp = (uint16_t)(int8_t)(x >> 16);
|
1407 | ad69471c | pbrook | ret |= tmp << 32;
|
1408 | ad69471c | pbrook | tmp = (uint16_t)(int8_t)(x >> 24);
|
1409 | ad69471c | pbrook | ret |= tmp << 48;
|
1410 | ad69471c | pbrook | return ret;
|
1411 | ad69471c | pbrook | } |
1412 | ad69471c | pbrook | |
1413 | ad69471c | pbrook | uint64_t HELPER(neon_widen_u16)(uint32_t x) |
1414 | ad69471c | pbrook | { |
1415 | ad69471c | pbrook | uint64_t high = (uint16_t)(x >> 16);
|
1416 | ad69471c | pbrook | return ((uint16_t)x) | (high << 32); |
1417 | ad69471c | pbrook | } |
1418 | ad69471c | pbrook | |
1419 | ad69471c | pbrook | uint64_t HELPER(neon_widen_s16)(uint32_t x) |
1420 | ad69471c | pbrook | { |
1421 | ad69471c | pbrook | uint64_t high = (int16_t)(x >> 16);
|
1422 | ad69471c | pbrook | return ((uint32_t)(int16_t)x) | (high << 32); |
1423 | ad69471c | pbrook | } |
1424 | ad69471c | pbrook | |
1425 | ad69471c | pbrook | uint64_t HELPER(neon_addl_u16)(uint64_t a, uint64_t b) |
1426 | ad69471c | pbrook | { |
1427 | ad69471c | pbrook | uint64_t mask; |
1428 | ad69471c | pbrook | mask = (a ^ b) & 0x8000800080008000ull;
|
1429 | ad69471c | pbrook | a &= ~0x8000800080008000ull;
|
1430 | ad69471c | pbrook | b &= ~0x8000800080008000ull;
|
1431 | ad69471c | pbrook | return (a + b) ^ mask;
|
1432 | ad69471c | pbrook | } |
1433 | ad69471c | pbrook | |
1434 | ad69471c | pbrook | uint64_t HELPER(neon_addl_u32)(uint64_t a, uint64_t b) |
1435 | ad69471c | pbrook | { |
1436 | ad69471c | pbrook | uint64_t mask; |
1437 | ad69471c | pbrook | mask = (a ^ b) & 0x8000000080000000ull;
|
1438 | ad69471c | pbrook | a &= ~0x8000000080000000ull;
|
1439 | ad69471c | pbrook | b &= ~0x8000000080000000ull;
|
1440 | ad69471c | pbrook | return (a + b) ^ mask;
|
1441 | ad69471c | pbrook | } |
1442 | ad69471c | pbrook | |
1443 | ad69471c | pbrook | uint64_t HELPER(neon_paddl_u16)(uint64_t a, uint64_t b) |
1444 | ad69471c | pbrook | { |
1445 | ad69471c | pbrook | uint64_t tmp; |
1446 | ad69471c | pbrook | uint64_t tmp2; |
1447 | ad69471c | pbrook | |
1448 | ad69471c | pbrook | tmp = a & 0x0000ffff0000ffffull;
|
1449 | ad69471c | pbrook | tmp += (a >> 16) & 0x0000ffff0000ffffull; |
1450 | ad69471c | pbrook | tmp2 = b & 0xffff0000ffff0000ull;
|
1451 | ad69471c | pbrook | tmp2 += (b << 16) & 0xffff0000ffff0000ull; |
1452 | ad69471c | pbrook | return ( tmp & 0xffff) |
1453 | ad69471c | pbrook | | ((tmp >> 16) & 0xffff0000ull) |
1454 | ad69471c | pbrook | | ((tmp2 << 16) & 0xffff00000000ull) |
1455 | ad69471c | pbrook | | ( tmp2 & 0xffff000000000000ull);
|
1456 | ad69471c | pbrook | } |
1457 | ad69471c | pbrook | |
1458 | ad69471c | pbrook | uint64_t HELPER(neon_paddl_u32)(uint64_t a, uint64_t b) |
1459 | ad69471c | pbrook | { |
1460 | ad69471c | pbrook | uint32_t low = a + (a >> 32);
|
1461 | ad69471c | pbrook | uint32_t high = b + (b >> 32);
|
1462 | ad69471c | pbrook | return low + ((uint64_t)high << 32); |
1463 | ad69471c | pbrook | } |
1464 | ad69471c | pbrook | |
1465 | ad69471c | pbrook | uint64_t HELPER(neon_subl_u16)(uint64_t a, uint64_t b) |
1466 | ad69471c | pbrook | { |
1467 | ad69471c | pbrook | uint64_t mask; |
1468 | ad69471c | pbrook | mask = (a ^ ~b) & 0x8000800080008000ull;
|
1469 | ad69471c | pbrook | a |= 0x8000800080008000ull;
|
1470 | ad69471c | pbrook | b &= ~0x8000800080008000ull;
|
1471 | ad69471c | pbrook | return (a - b) ^ mask;
|
1472 | ad69471c | pbrook | } |
1473 | ad69471c | pbrook | |
1474 | ad69471c | pbrook | uint64_t HELPER(neon_subl_u32)(uint64_t a, uint64_t b) |
1475 | ad69471c | pbrook | { |
1476 | ad69471c | pbrook | uint64_t mask; |
1477 | ad69471c | pbrook | mask = (a ^ ~b) & 0x8000000080000000ull;
|
1478 | ad69471c | pbrook | a |= 0x8000000080000000ull;
|
1479 | ad69471c | pbrook | b &= ~0x8000000080000000ull;
|
1480 | ad69471c | pbrook | return (a - b) ^ mask;
|
1481 | ad69471c | pbrook | } |
1482 | ad69471c | pbrook | |
1483 | 2a3f75b4 | Peter Maydell | uint64_t HELPER(neon_addl_saturate_s32)(uint64_t a, uint64_t b) |
1484 | ad69471c | pbrook | { |
1485 | ad69471c | pbrook | uint32_t x, y; |
1486 | ad69471c | pbrook | uint32_t low, high; |
1487 | ad69471c | pbrook | |
1488 | ad69471c | pbrook | x = a; |
1489 | ad69471c | pbrook | y = b; |
1490 | ad69471c | pbrook | low = x + y; |
1491 | ad69471c | pbrook | if (((low ^ x) & SIGNBIT) && !((x ^ y) & SIGNBIT)) {
|
1492 | ad69471c | pbrook | SET_QC(); |
1493 | ad69471c | pbrook | low = ((int32_t)x >> 31) ^ ~SIGNBIT;
|
1494 | ad69471c | pbrook | } |
1495 | ad69471c | pbrook | x = a >> 32;
|
1496 | ad69471c | pbrook | y = b >> 32;
|
1497 | ad69471c | pbrook | high = x + y; |
1498 | ad69471c | pbrook | if (((high ^ x) & SIGNBIT) && !((x ^ y) & SIGNBIT)) {
|
1499 | ad69471c | pbrook | SET_QC(); |
1500 | ad69471c | pbrook | high = ((int32_t)x >> 31) ^ ~SIGNBIT;
|
1501 | ad69471c | pbrook | } |
1502 | ad69471c | pbrook | return low | ((uint64_t)high << 32); |
1503 | ad69471c | pbrook | } |
1504 | ad69471c | pbrook | |
1505 | 2a3f75b4 | Peter Maydell | uint64_t HELPER(neon_addl_saturate_s64)(uint64_t a, uint64_t b) |
1506 | ad69471c | pbrook | { |
1507 | ad69471c | pbrook | uint64_t result; |
1508 | ad69471c | pbrook | |
1509 | ad69471c | pbrook | result = a + b; |
1510 | ad69471c | pbrook | if (((result ^ a) & SIGNBIT64) && !((a ^ b) & SIGNBIT64)) {
|
1511 | ad69471c | pbrook | SET_QC(); |
1512 | ad69471c | pbrook | result = ((int64_t)a >> 63) ^ ~SIGNBIT64;
|
1513 | ad69471c | pbrook | } |
1514 | ad69471c | pbrook | return result;
|
1515 | ad69471c | pbrook | } |
1516 | ad69471c | pbrook | |
1517 | 4d9ad7f7 | Peter Maydell | /* We have to do the arithmetic in a larger type than
|
1518 | 4d9ad7f7 | Peter Maydell | * the input type, because for example with a signed 32 bit
|
1519 | 4d9ad7f7 | Peter Maydell | * op the absolute difference can overflow a signed 32 bit value.
|
1520 | 4d9ad7f7 | Peter Maydell | */
|
1521 | 4d9ad7f7 | Peter Maydell | #define DO_ABD(dest, x, y, intype, arithtype) do { \ |
1522 | 4d9ad7f7 | Peter Maydell | arithtype tmp_x = (intype)(x); \ |
1523 | 4d9ad7f7 | Peter Maydell | arithtype tmp_y = (intype)(y); \ |
1524 | ad69471c | pbrook | dest = ((tmp_x > tmp_y) ? tmp_x - tmp_y : tmp_y - tmp_x); \ |
1525 | ad69471c | pbrook | } while(0) |
1526 | ad69471c | pbrook | |
1527 | ad69471c | pbrook | uint64_t HELPER(neon_abdl_u16)(uint32_t a, uint32_t b) |
1528 | ad69471c | pbrook | { |
1529 | ad69471c | pbrook | uint64_t tmp; |
1530 | ad69471c | pbrook | uint64_t result; |
1531 | 4d9ad7f7 | Peter Maydell | DO_ABD(result, a, b, uint8_t, uint32_t); |
1532 | 4d9ad7f7 | Peter Maydell | DO_ABD(tmp, a >> 8, b >> 8, uint8_t, uint32_t); |
1533 | ad69471c | pbrook | result |= tmp << 16;
|
1534 | 4d9ad7f7 | Peter Maydell | DO_ABD(tmp, a >> 16, b >> 16, uint8_t, uint32_t); |
1535 | ad69471c | pbrook | result |= tmp << 32;
|
1536 | 4d9ad7f7 | Peter Maydell | DO_ABD(tmp, a >> 24, b >> 24, uint8_t, uint32_t); |
1537 | ad69471c | pbrook | result |= tmp << 48;
|
1538 | ad69471c | pbrook | return result;
|
1539 | ad69471c | pbrook | } |
1540 | ad69471c | pbrook | |
1541 | ad69471c | pbrook | uint64_t HELPER(neon_abdl_s16)(uint32_t a, uint32_t b) |
1542 | ad69471c | pbrook | { |
1543 | ad69471c | pbrook | uint64_t tmp; |
1544 | ad69471c | pbrook | uint64_t result; |
1545 | 4d9ad7f7 | Peter Maydell | DO_ABD(result, a, b, int8_t, int32_t); |
1546 | 4d9ad7f7 | Peter Maydell | DO_ABD(tmp, a >> 8, b >> 8, int8_t, int32_t); |
1547 | ad69471c | pbrook | result |= tmp << 16;
|
1548 | 4d9ad7f7 | Peter Maydell | DO_ABD(tmp, a >> 16, b >> 16, int8_t, int32_t); |
1549 | ad69471c | pbrook | result |= tmp << 32;
|
1550 | 4d9ad7f7 | Peter Maydell | DO_ABD(tmp, a >> 24, b >> 24, int8_t, int32_t); |
1551 | ad69471c | pbrook | result |= tmp << 48;
|
1552 | ad69471c | pbrook | return result;
|
1553 | ad69471c | pbrook | } |
1554 | ad69471c | pbrook | |
1555 | ad69471c | pbrook | uint64_t HELPER(neon_abdl_u32)(uint32_t a, uint32_t b) |
1556 | ad69471c | pbrook | { |
1557 | ad69471c | pbrook | uint64_t tmp; |
1558 | ad69471c | pbrook | uint64_t result; |
1559 | 4d9ad7f7 | Peter Maydell | DO_ABD(result, a, b, uint16_t, uint32_t); |
1560 | 4d9ad7f7 | Peter Maydell | DO_ABD(tmp, a >> 16, b >> 16, uint16_t, uint32_t); |
1561 | ad69471c | pbrook | return result | (tmp << 32); |
1562 | ad69471c | pbrook | } |
1563 | ad69471c | pbrook | |
1564 | ad69471c | pbrook | uint64_t HELPER(neon_abdl_s32)(uint32_t a, uint32_t b) |
1565 | ad69471c | pbrook | { |
1566 | ad69471c | pbrook | uint64_t tmp; |
1567 | ad69471c | pbrook | uint64_t result; |
1568 | 4d9ad7f7 | Peter Maydell | DO_ABD(result, a, b, int16_t, int32_t); |
1569 | 4d9ad7f7 | Peter Maydell | DO_ABD(tmp, a >> 16, b >> 16, int16_t, int32_t); |
1570 | ad69471c | pbrook | return result | (tmp << 32); |
1571 | ad69471c | pbrook | } |
1572 | ad69471c | pbrook | |
1573 | ad69471c | pbrook | uint64_t HELPER(neon_abdl_u64)(uint32_t a, uint32_t b) |
1574 | ad69471c | pbrook | { |
1575 | ad69471c | pbrook | uint64_t result; |
1576 | 4d9ad7f7 | Peter Maydell | DO_ABD(result, a, b, uint32_t, uint64_t); |
1577 | ad69471c | pbrook | return result;
|
1578 | ad69471c | pbrook | } |
1579 | ad69471c | pbrook | |
1580 | ad69471c | pbrook | uint64_t HELPER(neon_abdl_s64)(uint32_t a, uint32_t b) |
1581 | ad69471c | pbrook | { |
1582 | ad69471c | pbrook | uint64_t result; |
1583 | 4d9ad7f7 | Peter Maydell | DO_ABD(result, a, b, int32_t, int64_t); |
1584 | ad69471c | pbrook | return result;
|
1585 | ad69471c | pbrook | } |
1586 | ad69471c | pbrook | #undef DO_ABD
|
1587 | ad69471c | pbrook | |
1588 | ad69471c | pbrook | /* Widening multiply. Named type is the source type. */
|
1589 | ad69471c | pbrook | #define DO_MULL(dest, x, y, type1, type2) do { \ |
1590 | ad69471c | pbrook | type1 tmp_x = x; \ |
1591 | ad69471c | pbrook | type1 tmp_y = y; \ |
1592 | ad69471c | pbrook | dest = (type2)((type2)tmp_x * (type2)tmp_y); \ |
1593 | ad69471c | pbrook | } while(0) |
1594 | ad69471c | pbrook | |
1595 | ad69471c | pbrook | uint64_t HELPER(neon_mull_u8)(uint32_t a, uint32_t b) |
1596 | ad69471c | pbrook | { |
1597 | ad69471c | pbrook | uint64_t tmp; |
1598 | ad69471c | pbrook | uint64_t result; |
1599 | ad69471c | pbrook | |
1600 | ad69471c | pbrook | DO_MULL(result, a, b, uint8_t, uint16_t); |
1601 | ad69471c | pbrook | DO_MULL(tmp, a >> 8, b >> 8, uint8_t, uint16_t); |
1602 | ad69471c | pbrook | result |= tmp << 16;
|
1603 | ad69471c | pbrook | DO_MULL(tmp, a >> 16, b >> 16, uint8_t, uint16_t); |
1604 | ad69471c | pbrook | result |= tmp << 32;
|
1605 | ad69471c | pbrook | DO_MULL(tmp, a >> 24, b >> 24, uint8_t, uint16_t); |
1606 | ad69471c | pbrook | result |= tmp << 48;
|
1607 | ad69471c | pbrook | return result;
|
1608 | ad69471c | pbrook | } |
1609 | ad69471c | pbrook | |
1610 | ad69471c | pbrook | uint64_t HELPER(neon_mull_s8)(uint32_t a, uint32_t b) |
1611 | ad69471c | pbrook | { |
1612 | ad69471c | pbrook | uint64_t tmp; |
1613 | ad69471c | pbrook | uint64_t result; |
1614 | ad69471c | pbrook | |
1615 | ad69471c | pbrook | DO_MULL(result, a, b, int8_t, uint16_t); |
1616 | ad69471c | pbrook | DO_MULL(tmp, a >> 8, b >> 8, int8_t, uint16_t); |
1617 | ad69471c | pbrook | result |= tmp << 16;
|
1618 | ad69471c | pbrook | DO_MULL(tmp, a >> 16, b >> 16, int8_t, uint16_t); |
1619 | ad69471c | pbrook | result |= tmp << 32;
|
1620 | ad69471c | pbrook | DO_MULL(tmp, a >> 24, b >> 24, int8_t, uint16_t); |
1621 | ad69471c | pbrook | result |= tmp << 48;
|
1622 | ad69471c | pbrook | return result;
|
1623 | ad69471c | pbrook | } |
1624 | ad69471c | pbrook | |
1625 | ad69471c | pbrook | uint64_t HELPER(neon_mull_u16)(uint32_t a, uint32_t b) |
1626 | ad69471c | pbrook | { |
1627 | ad69471c | pbrook | uint64_t tmp; |
1628 | ad69471c | pbrook | uint64_t result; |
1629 | ad69471c | pbrook | |
1630 | ad69471c | pbrook | DO_MULL(result, a, b, uint16_t, uint32_t); |
1631 | ad69471c | pbrook | DO_MULL(tmp, a >> 16, b >> 16, uint16_t, uint32_t); |
1632 | ad69471c | pbrook | return result | (tmp << 32); |
1633 | ad69471c | pbrook | } |
1634 | ad69471c | pbrook | |
1635 | ad69471c | pbrook | uint64_t HELPER(neon_mull_s16)(uint32_t a, uint32_t b) |
1636 | ad69471c | pbrook | { |
1637 | ad69471c | pbrook | uint64_t tmp; |
1638 | ad69471c | pbrook | uint64_t result; |
1639 | ad69471c | pbrook | |
1640 | ad69471c | pbrook | DO_MULL(result, a, b, int16_t, uint32_t); |
1641 | ad69471c | pbrook | DO_MULL(tmp, a >> 16, b >> 16, int16_t, uint32_t); |
1642 | ad69471c | pbrook | return result | (tmp << 32); |
1643 | ad69471c | pbrook | } |
1644 | ad69471c | pbrook | |
1645 | ad69471c | pbrook | uint64_t HELPER(neon_negl_u16)(uint64_t x) |
1646 | ad69471c | pbrook | { |
1647 | ad69471c | pbrook | uint16_t tmp; |
1648 | ad69471c | pbrook | uint64_t result; |
1649 | ad69471c | pbrook | result = (uint16_t)-x; |
1650 | ad69471c | pbrook | tmp = -(x >> 16);
|
1651 | ad69471c | pbrook | result |= (uint64_t)tmp << 16;
|
1652 | ad69471c | pbrook | tmp = -(x >> 32);
|
1653 | ad69471c | pbrook | result |= (uint64_t)tmp << 32;
|
1654 | ad69471c | pbrook | tmp = -(x >> 48);
|
1655 | ad69471c | pbrook | result |= (uint64_t)tmp << 48;
|
1656 | ad69471c | pbrook | return result;
|
1657 | ad69471c | pbrook | } |
1658 | ad69471c | pbrook | |
1659 | ad69471c | pbrook | uint64_t HELPER(neon_negl_u32)(uint64_t x) |
1660 | ad69471c | pbrook | { |
1661 | ad69471c | pbrook | uint32_t low = -x; |
1662 | ad69471c | pbrook | uint32_t high = -(x >> 32);
|
1663 | ad69471c | pbrook | return low | ((uint64_t)high << 32); |
1664 | ad69471c | pbrook | } |
1665 | ad69471c | pbrook | |
1666 | ad69471c | pbrook | /* FIXME: There should be a native op for this. */
|
1667 | ad69471c | pbrook | uint64_t HELPER(neon_negl_u64)(uint64_t x) |
1668 | ad69471c | pbrook | { |
1669 | ad69471c | pbrook | return -x;
|
1670 | ad69471c | pbrook | } |
1671 | ad69471c | pbrook | |
1672 | ad69471c | pbrook | /* Saturnating sign manuipulation. */
|
1673 | ad69471c | pbrook | /* ??? Make these use NEON_VOP1 */
|
1674 | ad69471c | pbrook | #define DO_QABS8(x) do { \ |
1675 | ad69471c | pbrook | if (x == (int8_t)0x80) { \ |
1676 | ad69471c | pbrook | x = 0x7f; \
|
1677 | ad69471c | pbrook | SET_QC(); \ |
1678 | ad69471c | pbrook | } else if (x < 0) { \ |
1679 | ad69471c | pbrook | x = -x; \ |
1680 | ad69471c | pbrook | }} while (0) |
1681 | 2a3f75b4 | Peter Maydell | uint32_t HELPER(neon_qabs_s8)(uint32_t x) |
1682 | ad69471c | pbrook | { |
1683 | ad69471c | pbrook | neon_s8 vec; |
1684 | ad69471c | pbrook | NEON_UNPACK(neon_s8, vec, x); |
1685 | ad69471c | pbrook | DO_QABS8(vec.v1); |
1686 | ad69471c | pbrook | DO_QABS8(vec.v2); |
1687 | ad69471c | pbrook | DO_QABS8(vec.v3); |
1688 | ad69471c | pbrook | DO_QABS8(vec.v4); |
1689 | ad69471c | pbrook | NEON_PACK(neon_s8, x, vec); |
1690 | ad69471c | pbrook | return x;
|
1691 | ad69471c | pbrook | } |
1692 | ad69471c | pbrook | #undef DO_QABS8
|
1693 | ad69471c | pbrook | |
1694 | ad69471c | pbrook | #define DO_QNEG8(x) do { \ |
1695 | ad69471c | pbrook | if (x == (int8_t)0x80) { \ |
1696 | ad69471c | pbrook | x = 0x7f; \
|
1697 | ad69471c | pbrook | SET_QC(); \ |
1698 | ad69471c | pbrook | } else { \
|
1699 | ad69471c | pbrook | x = -x; \ |
1700 | ad69471c | pbrook | }} while (0) |
1701 | 2a3f75b4 | Peter Maydell | uint32_t HELPER(neon_qneg_s8)(uint32_t x) |
1702 | ad69471c | pbrook | { |
1703 | ad69471c | pbrook | neon_s8 vec; |
1704 | ad69471c | pbrook | NEON_UNPACK(neon_s8, vec, x); |
1705 | ad69471c | pbrook | DO_QNEG8(vec.v1); |
1706 | ad69471c | pbrook | DO_QNEG8(vec.v2); |
1707 | ad69471c | pbrook | DO_QNEG8(vec.v3); |
1708 | ad69471c | pbrook | DO_QNEG8(vec.v4); |
1709 | ad69471c | pbrook | NEON_PACK(neon_s8, x, vec); |
1710 | ad69471c | pbrook | return x;
|
1711 | ad69471c | pbrook | } |
1712 | ad69471c | pbrook | #undef DO_QNEG8
|
1713 | ad69471c | pbrook | |
1714 | ad69471c | pbrook | #define DO_QABS16(x) do { \ |
1715 | ad69471c | pbrook | if (x == (int16_t)0x8000) { \ |
1716 | ad69471c | pbrook | x = 0x7fff; \
|
1717 | ad69471c | pbrook | SET_QC(); \ |
1718 | ad69471c | pbrook | } else if (x < 0) { \ |
1719 | ad69471c | pbrook | x = -x; \ |
1720 | ad69471c | pbrook | }} while (0) |
1721 | 2a3f75b4 | Peter Maydell | uint32_t HELPER(neon_qabs_s16)(uint32_t x) |
1722 | ad69471c | pbrook | { |
1723 | ad69471c | pbrook | neon_s16 vec; |
1724 | ad69471c | pbrook | NEON_UNPACK(neon_s16, vec, x); |
1725 | ad69471c | pbrook | DO_QABS16(vec.v1); |
1726 | ad69471c | pbrook | DO_QABS16(vec.v2); |
1727 | ad69471c | pbrook | NEON_PACK(neon_s16, x, vec); |
1728 | ad69471c | pbrook | return x;
|
1729 | ad69471c | pbrook | } |
1730 | ad69471c | pbrook | #undef DO_QABS16
|
1731 | ad69471c | pbrook | |
1732 | ad69471c | pbrook | #define DO_QNEG16(x) do { \ |
1733 | ad69471c | pbrook | if (x == (int16_t)0x8000) { \ |
1734 | ad69471c | pbrook | x = 0x7fff; \
|
1735 | ad69471c | pbrook | SET_QC(); \ |
1736 | ad69471c | pbrook | } else { \
|
1737 | ad69471c | pbrook | x = -x; \ |
1738 | ad69471c | pbrook | }} while (0) |
1739 | 2a3f75b4 | Peter Maydell | uint32_t HELPER(neon_qneg_s16)(uint32_t x) |
1740 | ad69471c | pbrook | { |
1741 | ad69471c | pbrook | neon_s16 vec; |
1742 | ad69471c | pbrook | NEON_UNPACK(neon_s16, vec, x); |
1743 | ad69471c | pbrook | DO_QNEG16(vec.v1); |
1744 | ad69471c | pbrook | DO_QNEG16(vec.v2); |
1745 | ad69471c | pbrook | NEON_PACK(neon_s16, x, vec); |
1746 | ad69471c | pbrook | return x;
|
1747 | ad69471c | pbrook | } |
1748 | ad69471c | pbrook | #undef DO_QNEG16
|
1749 | ad69471c | pbrook | |
1750 | 2a3f75b4 | Peter Maydell | uint32_t HELPER(neon_qabs_s32)(uint32_t x) |
1751 | ad69471c | pbrook | { |
1752 | ad69471c | pbrook | if (x == SIGNBIT) {
|
1753 | ad69471c | pbrook | SET_QC(); |
1754 | ad69471c | pbrook | x = ~SIGNBIT; |
1755 | ad69471c | pbrook | } else if ((int32_t)x < 0) { |
1756 | ad69471c | pbrook | x = -x; |
1757 | ad69471c | pbrook | } |
1758 | ad69471c | pbrook | return x;
|
1759 | ad69471c | pbrook | } |
1760 | ad69471c | pbrook | |
1761 | 2a3f75b4 | Peter Maydell | uint32_t HELPER(neon_qneg_s32)(uint32_t x) |
1762 | ad69471c | pbrook | { |
1763 | ad69471c | pbrook | if (x == SIGNBIT) {
|
1764 | ad69471c | pbrook | SET_QC(); |
1765 | ad69471c | pbrook | x = ~SIGNBIT; |
1766 | ad69471c | pbrook | } else {
|
1767 | ad69471c | pbrook | x = -x; |
1768 | ad69471c | pbrook | } |
1769 | ad69471c | pbrook | return x;
|
1770 | ad69471c | pbrook | } |
1771 | ad69471c | pbrook | |
1772 | ad69471c | pbrook | /* NEON Float helpers. */
|
1773 | ad69471c | pbrook | uint32_t HELPER(neon_min_f32)(uint32_t a, uint32_t b) |
1774 | ad69471c | pbrook | { |
1775 | 4a9f9cb2 | Peter Maydell | return float32_val(float32_min(make_float32(a), make_float32(b), NFS));
|
1776 | ad69471c | pbrook | } |
1777 | ad69471c | pbrook | |
1778 | ad69471c | pbrook | uint32_t HELPER(neon_max_f32)(uint32_t a, uint32_t b) |
1779 | ad69471c | pbrook | { |
1780 | 4a9f9cb2 | Peter Maydell | return float32_val(float32_max(make_float32(a), make_float32(b), NFS));
|
1781 | ad69471c | pbrook | } |
1782 | ad69471c | pbrook | |
1783 | ad69471c | pbrook | uint32_t HELPER(neon_abd_f32)(uint32_t a, uint32_t b) |
1784 | ad69471c | pbrook | { |
1785 | 51d85267 | Peter Maydell | float32 f0 = make_float32(a); |
1786 | 51d85267 | Peter Maydell | float32 f1 = make_float32(b); |
1787 | 79c18be7 | Peter Maydell | return float32_val(float32_abs(float32_sub(f0, f1, NFS)));
|
1788 | ad69471c | pbrook | } |
1789 | ad69471c | pbrook | |
1790 | ad69471c | pbrook | uint32_t HELPER(neon_add_f32)(uint32_t a, uint32_t b) |
1791 | ad69471c | pbrook | { |
1792 | 51d85267 | Peter Maydell | return float32_val(float32_add(make_float32(a), make_float32(b), NFS));
|
1793 | ad69471c | pbrook | } |
1794 | ad69471c | pbrook | |
1795 | ad69471c | pbrook | uint32_t HELPER(neon_sub_f32)(uint32_t a, uint32_t b) |
1796 | ad69471c | pbrook | { |
1797 | 51d85267 | Peter Maydell | return float32_val(float32_sub(make_float32(a), make_float32(b), NFS));
|
1798 | ad69471c | pbrook | } |
1799 | ad69471c | pbrook | |
1800 | ad69471c | pbrook | uint32_t HELPER(neon_mul_f32)(uint32_t a, uint32_t b) |
1801 | ad69471c | pbrook | { |
1802 | 51d85267 | Peter Maydell | return float32_val(float32_mul(make_float32(a), make_float32(b), NFS));
|
1803 | ad69471c | pbrook | } |
1804 | ad69471c | pbrook | |
1805 | ad69471c | pbrook | /* Floating point comparisons produce an integer result. */
|
1806 | c7498dae | Peter Maydell | #define NEON_VOP_FCMP(name, ok) \
|
1807 | ad69471c | pbrook | uint32_t HELPER(neon_##name)(uint32_t a, uint32_t b) \ |
1808 | ad69471c | pbrook | { \ |
1809 | c7498dae | Peter Maydell | switch (float32_compare_quiet(make_float32(a), make_float32(b), NFS)) { \
|
1810 | c7498dae | Peter Maydell | ok return ~0; \ |
1811 | c7498dae | Peter Maydell | default: return 0; \ |
1812 | 51d85267 | Peter Maydell | } \ |
1813 | ad69471c | pbrook | } |
1814 | ad69471c | pbrook | |
1815 | c7498dae | Peter Maydell | NEON_VOP_FCMP(ceq_f32, case float_relation_equal:)
|
1816 | c7498dae | Peter Maydell | NEON_VOP_FCMP(cge_f32, case float_relation_equal: case float_relation_greater:) |
1817 | c7498dae | Peter Maydell | NEON_VOP_FCMP(cgt_f32, case float_relation_greater:)
|
1818 | ad69471c | pbrook | |
1819 | ad69471c | pbrook | uint32_t HELPER(neon_acge_f32)(uint32_t a, uint32_t b) |
1820 | ad69471c | pbrook | { |
1821 | 51d85267 | Peter Maydell | float32 f0 = float32_abs(make_float32(a)); |
1822 | 51d85267 | Peter Maydell | float32 f1 = float32_abs(make_float32(b)); |
1823 | c7498dae | Peter Maydell | switch (float32_compare_quiet(f0, f1, NFS)) {
|
1824 | c7498dae | Peter Maydell | case float_relation_equal:
|
1825 | c7498dae | Peter Maydell | case float_relation_greater:
|
1826 | c7498dae | Peter Maydell | return ~0; |
1827 | c7498dae | Peter Maydell | default:
|
1828 | c7498dae | Peter Maydell | return 0; |
1829 | c7498dae | Peter Maydell | } |
1830 | ad69471c | pbrook | } |
1831 | ad69471c | pbrook | |
1832 | ad69471c | pbrook | uint32_t HELPER(neon_acgt_f32)(uint32_t a, uint32_t b) |
1833 | ad69471c | pbrook | { |
1834 | 51d85267 | Peter Maydell | float32 f0 = float32_abs(make_float32(a)); |
1835 | 51d85267 | Peter Maydell | float32 f1 = float32_abs(make_float32(b)); |
1836 | c7498dae | Peter Maydell | if (float32_compare_quiet(f0, f1, NFS) == float_relation_greater) {
|
1837 | c7498dae | Peter Maydell | return ~0; |
1838 | c7498dae | Peter Maydell | } |
1839 | c7498dae | Peter Maydell | return 0; |
1840 | ad69471c | pbrook | } |
1841 | 02acedf9 | Peter Maydell | |
1842 | 02acedf9 | Peter Maydell | #define ELEM(V, N, SIZE) (((V) >> ((N) * (SIZE))) & ((1ull << (SIZE)) - 1)) |
1843 | 02acedf9 | Peter Maydell | |
1844 | 2a3f75b4 | Peter Maydell | void HELPER(neon_qunzip8)(uint32_t rd, uint32_t rm)
|
1845 | 02acedf9 | Peter Maydell | { |
1846 | 02acedf9 | Peter Maydell | uint64_t zm0 = float64_val(env->vfp.regs[rm]); |
1847 | 02acedf9 | Peter Maydell | uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
|
1848 | 02acedf9 | Peter Maydell | uint64_t zd0 = float64_val(env->vfp.regs[rd]); |
1849 | 02acedf9 | Peter Maydell | uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
|
1850 | 02acedf9 | Peter Maydell | uint64_t d0 = ELEM(zd0, 0, 8) | (ELEM(zd0, 2, 8) << 8) |
1851 | 02acedf9 | Peter Maydell | | (ELEM(zd0, 4, 8) << 16) | (ELEM(zd0, 6, 8) << 24) |
1852 | 02acedf9 | Peter Maydell | | (ELEM(zd1, 0, 8) << 32) | (ELEM(zd1, 2, 8) << 40) |
1853 | 02acedf9 | Peter Maydell | | (ELEM(zd1, 4, 8) << 48) | (ELEM(zd1, 6, 8) << 56); |
1854 | 02acedf9 | Peter Maydell | uint64_t d1 = ELEM(zm0, 0, 8) | (ELEM(zm0, 2, 8) << 8) |
1855 | 02acedf9 | Peter Maydell | | (ELEM(zm0, 4, 8) << 16) | (ELEM(zm0, 6, 8) << 24) |
1856 | 02acedf9 | Peter Maydell | | (ELEM(zm1, 0, 8) << 32) | (ELEM(zm1, 2, 8) << 40) |
1857 | 02acedf9 | Peter Maydell | | (ELEM(zm1, 4, 8) << 48) | (ELEM(zm1, 6, 8) << 56); |
1858 | 02acedf9 | Peter Maydell | uint64_t m0 = ELEM(zd0, 1, 8) | (ELEM(zd0, 3, 8) << 8) |
1859 | 02acedf9 | Peter Maydell | | (ELEM(zd0, 5, 8) << 16) | (ELEM(zd0, 7, 8) << 24) |
1860 | 02acedf9 | Peter Maydell | | (ELEM(zd1, 1, 8) << 32) | (ELEM(zd1, 3, 8) << 40) |
1861 | 02acedf9 | Peter Maydell | | (ELEM(zd1, 5, 8) << 48) | (ELEM(zd1, 7, 8) << 56); |
1862 | 02acedf9 | Peter Maydell | uint64_t m1 = ELEM(zm0, 1, 8) | (ELEM(zm0, 3, 8) << 8) |
1863 | 02acedf9 | Peter Maydell | | (ELEM(zm0, 5, 8) << 16) | (ELEM(zm0, 7, 8) << 24) |
1864 | 02acedf9 | Peter Maydell | | (ELEM(zm1, 1, 8) << 32) | (ELEM(zm1, 3, 8) << 40) |
1865 | 02acedf9 | Peter Maydell | | (ELEM(zm1, 5, 8) << 48) | (ELEM(zm1, 7, 8) << 56); |
1866 | 02acedf9 | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
1867 | 02acedf9 | Peter Maydell | env->vfp.regs[rm + 1] = make_float64(m1);
|
1868 | 02acedf9 | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
1869 | 02acedf9 | Peter Maydell | env->vfp.regs[rd + 1] = make_float64(d1);
|
1870 | 02acedf9 | Peter Maydell | } |
1871 | 02acedf9 | Peter Maydell | |
1872 | 2a3f75b4 | Peter Maydell | void HELPER(neon_qunzip16)(uint32_t rd, uint32_t rm)
|
1873 | 02acedf9 | Peter Maydell | { |
1874 | 02acedf9 | Peter Maydell | uint64_t zm0 = float64_val(env->vfp.regs[rm]); |
1875 | 02acedf9 | Peter Maydell | uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
|
1876 | 02acedf9 | Peter Maydell | uint64_t zd0 = float64_val(env->vfp.regs[rd]); |
1877 | 02acedf9 | Peter Maydell | uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
|
1878 | 02acedf9 | Peter Maydell | uint64_t d0 = ELEM(zd0, 0, 16) | (ELEM(zd0, 2, 16) << 16) |
1879 | 02acedf9 | Peter Maydell | | (ELEM(zd1, 0, 16) << 32) | (ELEM(zd1, 2, 16) << 48); |
1880 | 02acedf9 | Peter Maydell | uint64_t d1 = ELEM(zm0, 0, 16) | (ELEM(zm0, 2, 16) << 16) |
1881 | 02acedf9 | Peter Maydell | | (ELEM(zm1, 0, 16) << 32) | (ELEM(zm1, 2, 16) << 48); |
1882 | 02acedf9 | Peter Maydell | uint64_t m0 = ELEM(zd0, 1, 16) | (ELEM(zd0, 3, 16) << 16) |
1883 | 02acedf9 | Peter Maydell | | (ELEM(zd1, 1, 16) << 32) | (ELEM(zd1, 3, 16) << 48); |
1884 | 02acedf9 | Peter Maydell | uint64_t m1 = ELEM(zm0, 1, 16) | (ELEM(zm0, 3, 16) << 16) |
1885 | 02acedf9 | Peter Maydell | | (ELEM(zm1, 1, 16) << 32) | (ELEM(zm1, 3, 16) << 48); |
1886 | 02acedf9 | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
1887 | 02acedf9 | Peter Maydell | env->vfp.regs[rm + 1] = make_float64(m1);
|
1888 | 02acedf9 | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
1889 | 02acedf9 | Peter Maydell | env->vfp.regs[rd + 1] = make_float64(d1);
|
1890 | 02acedf9 | Peter Maydell | } |
1891 | 02acedf9 | Peter Maydell | |
1892 | 2a3f75b4 | Peter Maydell | void HELPER(neon_qunzip32)(uint32_t rd, uint32_t rm)
|
1893 | 02acedf9 | Peter Maydell | { |
1894 | 02acedf9 | Peter Maydell | uint64_t zm0 = float64_val(env->vfp.regs[rm]); |
1895 | 02acedf9 | Peter Maydell | uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
|
1896 | 02acedf9 | Peter Maydell | uint64_t zd0 = float64_val(env->vfp.regs[rd]); |
1897 | 02acedf9 | Peter Maydell | uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
|
1898 | 02acedf9 | Peter Maydell | uint64_t d0 = ELEM(zd0, 0, 32) | (ELEM(zd1, 0, 32) << 32); |
1899 | 02acedf9 | Peter Maydell | uint64_t d1 = ELEM(zm0, 0, 32) | (ELEM(zm1, 0, 32) << 32); |
1900 | 02acedf9 | Peter Maydell | uint64_t m0 = ELEM(zd0, 1, 32) | (ELEM(zd1, 1, 32) << 32); |
1901 | 02acedf9 | Peter Maydell | uint64_t m1 = ELEM(zm0, 1, 32) | (ELEM(zm1, 1, 32) << 32); |
1902 | 02acedf9 | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
1903 | 02acedf9 | Peter Maydell | env->vfp.regs[rm + 1] = make_float64(m1);
|
1904 | 02acedf9 | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
1905 | 02acedf9 | Peter Maydell | env->vfp.regs[rd + 1] = make_float64(d1);
|
1906 | 02acedf9 | Peter Maydell | } |
1907 | 02acedf9 | Peter Maydell | |
1908 | 2a3f75b4 | Peter Maydell | void HELPER(neon_unzip8)(uint32_t rd, uint32_t rm)
|
1909 | 02acedf9 | Peter Maydell | { |
1910 | 02acedf9 | Peter Maydell | uint64_t zm = float64_val(env->vfp.regs[rm]); |
1911 | 02acedf9 | Peter Maydell | uint64_t zd = float64_val(env->vfp.regs[rd]); |
1912 | 02acedf9 | Peter Maydell | uint64_t d0 = ELEM(zd, 0, 8) | (ELEM(zd, 2, 8) << 8) |
1913 | 02acedf9 | Peter Maydell | | (ELEM(zd, 4, 8) << 16) | (ELEM(zd, 6, 8) << 24) |
1914 | 02acedf9 | Peter Maydell | | (ELEM(zm, 0, 8) << 32) | (ELEM(zm, 2, 8) << 40) |
1915 | 02acedf9 | Peter Maydell | | (ELEM(zm, 4, 8) << 48) | (ELEM(zm, 6, 8) << 56); |
1916 | 02acedf9 | Peter Maydell | uint64_t m0 = ELEM(zd, 1, 8) | (ELEM(zd, 3, 8) << 8) |
1917 | 02acedf9 | Peter Maydell | | (ELEM(zd, 5, 8) << 16) | (ELEM(zd, 7, 8) << 24) |
1918 | 02acedf9 | Peter Maydell | | (ELEM(zm, 1, 8) << 32) | (ELEM(zm, 3, 8) << 40) |
1919 | 02acedf9 | Peter Maydell | | (ELEM(zm, 5, 8) << 48) | (ELEM(zm, 7, 8) << 56); |
1920 | 02acedf9 | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
1921 | 02acedf9 | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
1922 | 02acedf9 | Peter Maydell | } |
1923 | 02acedf9 | Peter Maydell | |
1924 | 2a3f75b4 | Peter Maydell | void HELPER(neon_unzip16)(uint32_t rd, uint32_t rm)
|
1925 | 02acedf9 | Peter Maydell | { |
1926 | 02acedf9 | Peter Maydell | uint64_t zm = float64_val(env->vfp.regs[rm]); |
1927 | 02acedf9 | Peter Maydell | uint64_t zd = float64_val(env->vfp.regs[rd]); |
1928 | 02acedf9 | Peter Maydell | uint64_t d0 = ELEM(zd, 0, 16) | (ELEM(zd, 2, 16) << 16) |
1929 | 02acedf9 | Peter Maydell | | (ELEM(zm, 0, 16) << 32) | (ELEM(zm, 2, 16) << 48); |
1930 | 02acedf9 | Peter Maydell | uint64_t m0 = ELEM(zd, 1, 16) | (ELEM(zd, 3, 16) << 16) |
1931 | 02acedf9 | Peter Maydell | | (ELEM(zm, 1, 16) << 32) | (ELEM(zm, 3, 16) << 48); |
1932 | 02acedf9 | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
1933 | 02acedf9 | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
1934 | 02acedf9 | Peter Maydell | } |
1935 | d68a6f3a | Peter Maydell | |
1936 | 2a3f75b4 | Peter Maydell | void HELPER(neon_qzip8)(uint32_t rd, uint32_t rm)
|
1937 | d68a6f3a | Peter Maydell | { |
1938 | d68a6f3a | Peter Maydell | uint64_t zm0 = float64_val(env->vfp.regs[rm]); |
1939 | d68a6f3a | Peter Maydell | uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
|
1940 | d68a6f3a | Peter Maydell | uint64_t zd0 = float64_val(env->vfp.regs[rd]); |
1941 | d68a6f3a | Peter Maydell | uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
|
1942 | d68a6f3a | Peter Maydell | uint64_t d0 = ELEM(zd0, 0, 8) | (ELEM(zm0, 0, 8) << 8) |
1943 | d68a6f3a | Peter Maydell | | (ELEM(zd0, 1, 8) << 16) | (ELEM(zm0, 1, 8) << 24) |
1944 | d68a6f3a | Peter Maydell | | (ELEM(zd0, 2, 8) << 32) | (ELEM(zm0, 2, 8) << 40) |
1945 | d68a6f3a | Peter Maydell | | (ELEM(zd0, 3, 8) << 48) | (ELEM(zm0, 3, 8) << 56); |
1946 | d68a6f3a | Peter Maydell | uint64_t d1 = ELEM(zd0, 4, 8) | (ELEM(zm0, 4, 8) << 8) |
1947 | d68a6f3a | Peter Maydell | | (ELEM(zd0, 5, 8) << 16) | (ELEM(zm0, 5, 8) << 24) |
1948 | d68a6f3a | Peter Maydell | | (ELEM(zd0, 6, 8) << 32) | (ELEM(zm0, 6, 8) << 40) |
1949 | d68a6f3a | Peter Maydell | | (ELEM(zd0, 7, 8) << 48) | (ELEM(zm0, 7, 8) << 56); |
1950 | d68a6f3a | Peter Maydell | uint64_t m0 = ELEM(zd1, 0, 8) | (ELEM(zm1, 0, 8) << 8) |
1951 | d68a6f3a | Peter Maydell | | (ELEM(zd1, 1, 8) << 16) | (ELEM(zm1, 1, 8) << 24) |
1952 | d68a6f3a | Peter Maydell | | (ELEM(zd1, 2, 8) << 32) | (ELEM(zm1, 2, 8) << 40) |
1953 | d68a6f3a | Peter Maydell | | (ELEM(zd1, 3, 8) << 48) | (ELEM(zm1, 3, 8) << 56); |
1954 | d68a6f3a | Peter Maydell | uint64_t m1 = ELEM(zd1, 4, 8) | (ELEM(zm1, 4, 8) << 8) |
1955 | d68a6f3a | Peter Maydell | | (ELEM(zd1, 5, 8) << 16) | (ELEM(zm1, 5, 8) << 24) |
1956 | d68a6f3a | Peter Maydell | | (ELEM(zd1, 6, 8) << 32) | (ELEM(zm1, 6, 8) << 40) |
1957 | d68a6f3a | Peter Maydell | | (ELEM(zd1, 7, 8) << 48) | (ELEM(zm1, 7, 8) << 56); |
1958 | d68a6f3a | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
1959 | d68a6f3a | Peter Maydell | env->vfp.regs[rm + 1] = make_float64(m1);
|
1960 | d68a6f3a | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
1961 | d68a6f3a | Peter Maydell | env->vfp.regs[rd + 1] = make_float64(d1);
|
1962 | d68a6f3a | Peter Maydell | } |
1963 | d68a6f3a | Peter Maydell | |
1964 | 2a3f75b4 | Peter Maydell | void HELPER(neon_qzip16)(uint32_t rd, uint32_t rm)
|
1965 | d68a6f3a | Peter Maydell | { |
1966 | d68a6f3a | Peter Maydell | uint64_t zm0 = float64_val(env->vfp.regs[rm]); |
1967 | d68a6f3a | Peter Maydell | uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
|
1968 | d68a6f3a | Peter Maydell | uint64_t zd0 = float64_val(env->vfp.regs[rd]); |
1969 | d68a6f3a | Peter Maydell | uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
|
1970 | d68a6f3a | Peter Maydell | uint64_t d0 = ELEM(zd0, 0, 16) | (ELEM(zm0, 0, 16) << 16) |
1971 | d68a6f3a | Peter Maydell | | (ELEM(zd0, 1, 16) << 32) | (ELEM(zm0, 1, 16) << 48); |
1972 | d68a6f3a | Peter Maydell | uint64_t d1 = ELEM(zd0, 2, 16) | (ELEM(zm0, 2, 16) << 16) |
1973 | d68a6f3a | Peter Maydell | | (ELEM(zd0, 3, 16) << 32) | (ELEM(zm0, 3, 16) << 48); |
1974 | d68a6f3a | Peter Maydell | uint64_t m0 = ELEM(zd1, 0, 16) | (ELEM(zm1, 0, 16) << 16) |
1975 | d68a6f3a | Peter Maydell | | (ELEM(zd1, 1, 16) << 32) | (ELEM(zm1, 1, 16) << 48); |
1976 | d68a6f3a | Peter Maydell | uint64_t m1 = ELEM(zd1, 2, 16) | (ELEM(zm1, 2, 16) << 16) |
1977 | d68a6f3a | Peter Maydell | | (ELEM(zd1, 3, 16) << 32) | (ELEM(zm1, 3, 16) << 48); |
1978 | d68a6f3a | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
1979 | d68a6f3a | Peter Maydell | env->vfp.regs[rm + 1] = make_float64(m1);
|
1980 | d68a6f3a | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
1981 | d68a6f3a | Peter Maydell | env->vfp.regs[rd + 1] = make_float64(d1);
|
1982 | d68a6f3a | Peter Maydell | } |
1983 | d68a6f3a | Peter Maydell | |
1984 | 2a3f75b4 | Peter Maydell | void HELPER(neon_qzip32)(uint32_t rd, uint32_t rm)
|
1985 | d68a6f3a | Peter Maydell | { |
1986 | d68a6f3a | Peter Maydell | uint64_t zm0 = float64_val(env->vfp.regs[rm]); |
1987 | d68a6f3a | Peter Maydell | uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
|
1988 | d68a6f3a | Peter Maydell | uint64_t zd0 = float64_val(env->vfp.regs[rd]); |
1989 | d68a6f3a | Peter Maydell | uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
|
1990 | d68a6f3a | Peter Maydell | uint64_t d0 = ELEM(zd0, 0, 32) | (ELEM(zm0, 0, 32) << 32); |
1991 | d68a6f3a | Peter Maydell | uint64_t d1 = ELEM(zd0, 1, 32) | (ELEM(zm0, 1, 32) << 32); |
1992 | d68a6f3a | Peter Maydell | uint64_t m0 = ELEM(zd1, 0, 32) | (ELEM(zm1, 0, 32) << 32); |
1993 | d68a6f3a | Peter Maydell | uint64_t m1 = ELEM(zd1, 1, 32) | (ELEM(zm1, 1, 32) << 32); |
1994 | d68a6f3a | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
1995 | d68a6f3a | Peter Maydell | env->vfp.regs[rm + 1] = make_float64(m1);
|
1996 | d68a6f3a | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
1997 | d68a6f3a | Peter Maydell | env->vfp.regs[rd + 1] = make_float64(d1);
|
1998 | d68a6f3a | Peter Maydell | } |
1999 | d68a6f3a | Peter Maydell | |
2000 | 2a3f75b4 | Peter Maydell | void HELPER(neon_zip8)(uint32_t rd, uint32_t rm)
|
2001 | d68a6f3a | Peter Maydell | { |
2002 | d68a6f3a | Peter Maydell | uint64_t zm = float64_val(env->vfp.regs[rm]); |
2003 | d68a6f3a | Peter Maydell | uint64_t zd = float64_val(env->vfp.regs[rd]); |
2004 | d68a6f3a | Peter Maydell | uint64_t d0 = ELEM(zd, 0, 8) | (ELEM(zm, 0, 8) << 8) |
2005 | d68a6f3a | Peter Maydell | | (ELEM(zd, 1, 8) << 16) | (ELEM(zm, 1, 8) << 24) |
2006 | d68a6f3a | Peter Maydell | | (ELEM(zd, 2, 8) << 32) | (ELEM(zm, 2, 8) << 40) |
2007 | d68a6f3a | Peter Maydell | | (ELEM(zd, 3, 8) << 48) | (ELEM(zm, 3, 8) << 56); |
2008 | d68a6f3a | Peter Maydell | uint64_t m0 = ELEM(zd, 4, 8) | (ELEM(zm, 4, 8) << 8) |
2009 | d68a6f3a | Peter Maydell | | (ELEM(zd, 5, 8) << 16) | (ELEM(zm, 5, 8) << 24) |
2010 | d68a6f3a | Peter Maydell | | (ELEM(zd, 6, 8) << 32) | (ELEM(zm, 6, 8) << 40) |
2011 | d68a6f3a | Peter Maydell | | (ELEM(zd, 7, 8) << 48) | (ELEM(zm, 7, 8) << 56); |
2012 | d68a6f3a | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
2013 | d68a6f3a | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
2014 | d68a6f3a | Peter Maydell | } |
2015 | d68a6f3a | Peter Maydell | |
2016 | 2a3f75b4 | Peter Maydell | void HELPER(neon_zip16)(uint32_t rd, uint32_t rm)
|
2017 | d68a6f3a | Peter Maydell | { |
2018 | d68a6f3a | Peter Maydell | uint64_t zm = float64_val(env->vfp.regs[rm]); |
2019 | d68a6f3a | Peter Maydell | uint64_t zd = float64_val(env->vfp.regs[rd]); |
2020 | d68a6f3a | Peter Maydell | uint64_t d0 = ELEM(zd, 0, 16) | (ELEM(zm, 0, 16) << 16) |
2021 | d68a6f3a | Peter Maydell | | (ELEM(zd, 1, 16) << 32) | (ELEM(zm, 1, 16) << 48); |
2022 | d68a6f3a | Peter Maydell | uint64_t m0 = ELEM(zd, 2, 16) | (ELEM(zm, 2, 16) << 16) |
2023 | d68a6f3a | Peter Maydell | | (ELEM(zd, 3, 16) << 32) | (ELEM(zm, 3, 16) << 48); |
2024 | d68a6f3a | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
2025 | d68a6f3a | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
2026 | d68a6f3a | Peter Maydell | } |