root / target-arm / neon_helper.c @ 60e1b2a6
History | View | Annotate | Download (52.8 kB)
1 | e677137d | pbrook | /*
|
---|---|---|---|
2 | e677137d | pbrook | * ARM NEON vector operations.
|
3 | e677137d | pbrook | *
|
4 | e677137d | pbrook | * Copyright (c) 2007, 2008 CodeSourcery.
|
5 | e677137d | pbrook | * Written by Paul Brook
|
6 | e677137d | pbrook | *
|
7 | 8e31bf38 | Matthew Fernandez | * This code is licensed under the GNU GPL v2.
|
8 | e677137d | pbrook | */
|
9 | ad69471c | pbrook | #include <stdlib.h> |
10 | ad69471c | pbrook | #include <stdio.h> |
11 | ad69471c | pbrook | |
12 | ad69471c | pbrook | #include "cpu.h" |
13 | 02da0b2d | Peter Maydell | #include "exec-all.h" |
14 | 7b59220e | Lluís | #include "helper.h" |
15 | ad69471c | pbrook | |
16 | ad69471c | pbrook | #define SIGNBIT (uint32_t)0x80000000 |
17 | ad69471c | pbrook | #define SIGNBIT64 ((uint64_t)1 << 63) |
18 | ad69471c | pbrook | |
19 | ad69471c | pbrook | #define SET_QC() env->vfp.xregs[ARM_VFP_FPSCR] = CPSR_Q
|
20 | ad69471c | pbrook | |
21 | ad69471c | pbrook | #define NEON_TYPE1(name, type) \
|
22 | ad69471c | pbrook | typedef struct \ |
23 | ad69471c | pbrook | { \ |
24 | ad69471c | pbrook | type v1; \ |
25 | ad69471c | pbrook | } neon_##name; |
26 | e2542fe2 | Juan Quintela | #ifdef HOST_WORDS_BIGENDIAN
|
27 | ad69471c | pbrook | #define NEON_TYPE2(name, type) \
|
28 | ad69471c | pbrook | typedef struct \ |
29 | ad69471c | pbrook | { \ |
30 | ad69471c | pbrook | type v2; \ |
31 | ad69471c | pbrook | type v1; \ |
32 | ad69471c | pbrook | } neon_##name; |
33 | ad69471c | pbrook | #define NEON_TYPE4(name, type) \
|
34 | ad69471c | pbrook | typedef struct \ |
35 | ad69471c | pbrook | { \ |
36 | ad69471c | pbrook | type v4; \ |
37 | ad69471c | pbrook | type v3; \ |
38 | ad69471c | pbrook | type v2; \ |
39 | ad69471c | pbrook | type v1; \ |
40 | ad69471c | pbrook | } neon_##name; |
41 | ad69471c | pbrook | #else
|
42 | ad69471c | pbrook | #define NEON_TYPE2(name, type) \
|
43 | ad69471c | pbrook | typedef struct \ |
44 | ad69471c | pbrook | { \ |
45 | ad69471c | pbrook | type v1; \ |
46 | ad69471c | pbrook | type v2; \ |
47 | ad69471c | pbrook | } neon_##name; |
48 | ad69471c | pbrook | #define NEON_TYPE4(name, type) \
|
49 | ad69471c | pbrook | typedef struct \ |
50 | ad69471c | pbrook | { \ |
51 | ad69471c | pbrook | type v1; \ |
52 | ad69471c | pbrook | type v2; \ |
53 | ad69471c | pbrook | type v3; \ |
54 | ad69471c | pbrook | type v4; \ |
55 | ad69471c | pbrook | } neon_##name; |
56 | ad69471c | pbrook | #endif
|
57 | ad69471c | pbrook | |
58 | ad69471c | pbrook | NEON_TYPE4(s8, int8_t) |
59 | ad69471c | pbrook | NEON_TYPE4(u8, uint8_t) |
60 | ad69471c | pbrook | NEON_TYPE2(s16, int16_t) |
61 | ad69471c | pbrook | NEON_TYPE2(u16, uint16_t) |
62 | ad69471c | pbrook | NEON_TYPE1(s32, int32_t) |
63 | ad69471c | pbrook | NEON_TYPE1(u32, uint32_t) |
64 | ad69471c | pbrook | #undef NEON_TYPE4
|
65 | ad69471c | pbrook | #undef NEON_TYPE2
|
66 | ad69471c | pbrook | #undef NEON_TYPE1
|
67 | ad69471c | pbrook | |
68 | ad69471c | pbrook | /* Copy from a uint32_t to a vector structure type. */
|
69 | ad69471c | pbrook | #define NEON_UNPACK(vtype, dest, val) do { \ |
70 | ad69471c | pbrook | union { \
|
71 | ad69471c | pbrook | vtype v; \ |
72 | ad69471c | pbrook | uint32_t i; \ |
73 | ad69471c | pbrook | } conv_u; \ |
74 | ad69471c | pbrook | conv_u.i = (val); \ |
75 | ad69471c | pbrook | dest = conv_u.v; \ |
76 | ad69471c | pbrook | } while(0) |
77 | ad69471c | pbrook | |
78 | ad69471c | pbrook | /* Copy from a vector structure type to a uint32_t. */
|
79 | ad69471c | pbrook | #define NEON_PACK(vtype, dest, val) do { \ |
80 | ad69471c | pbrook | union { \
|
81 | ad69471c | pbrook | vtype v; \ |
82 | ad69471c | pbrook | uint32_t i; \ |
83 | ad69471c | pbrook | } conv_u; \ |
84 | ad69471c | pbrook | conv_u.v = (val); \ |
85 | ad69471c | pbrook | dest = conv_u.i; \ |
86 | ad69471c | pbrook | } while(0) |
87 | ad69471c | pbrook | |
88 | ad69471c | pbrook | #define NEON_DO1 \
|
89 | ad69471c | pbrook | NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); |
90 | ad69471c | pbrook | #define NEON_DO2 \
|
91 | ad69471c | pbrook | NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \ |
92 | ad69471c | pbrook | NEON_FN(vdest.v2, vsrc1.v2, vsrc2.v2); |
93 | ad69471c | pbrook | #define NEON_DO4 \
|
94 | ad69471c | pbrook | NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \ |
95 | ad69471c | pbrook | NEON_FN(vdest.v2, vsrc1.v2, vsrc2.v2); \ |
96 | ad69471c | pbrook | NEON_FN(vdest.v3, vsrc1.v3, vsrc2.v3); \ |
97 | ad69471c | pbrook | NEON_FN(vdest.v4, vsrc1.v4, vsrc2.v4); |
98 | ad69471c | pbrook | |
99 | ad69471c | pbrook | #define NEON_VOP_BODY(vtype, n) \
|
100 | ad69471c | pbrook | { \ |
101 | ad69471c | pbrook | uint32_t res; \ |
102 | ad69471c | pbrook | vtype vsrc1; \ |
103 | ad69471c | pbrook | vtype vsrc2; \ |
104 | ad69471c | pbrook | vtype vdest; \ |
105 | ad69471c | pbrook | NEON_UNPACK(vtype, vsrc1, arg1); \ |
106 | ad69471c | pbrook | NEON_UNPACK(vtype, vsrc2, arg2); \ |
107 | ad69471c | pbrook | NEON_DO##n; \ |
108 | ad69471c | pbrook | NEON_PACK(vtype, res, vdest); \ |
109 | ad69471c | pbrook | return res; \
|
110 | ad69471c | pbrook | } |
111 | ad69471c | pbrook | |
112 | ad69471c | pbrook | #define NEON_VOP(name, vtype, n) \
|
113 | ad69471c | pbrook | uint32_t HELPER(glue(neon_,name))(uint32_t arg1, uint32_t arg2) \ |
114 | ad69471c | pbrook | NEON_VOP_BODY(vtype, n) |
115 | ad69471c | pbrook | |
116 | 02da0b2d | Peter Maydell | #define NEON_VOP_ENV(name, vtype, n) \
|
117 | 02da0b2d | Peter Maydell | uint32_t HELPER(glue(neon_,name))(CPUState *env, uint32_t arg1, uint32_t arg2) \ |
118 | 02da0b2d | Peter Maydell | NEON_VOP_BODY(vtype, n) |
119 | 02da0b2d | Peter Maydell | |
120 | ad69471c | pbrook | /* Pairwise operations. */
|
121 | ad69471c | pbrook | /* For 32-bit elements each segment only contains a single element, so
|
122 | ad69471c | pbrook | the elementwise and pairwise operations are the same. */
|
123 | ad69471c | pbrook | #define NEON_PDO2 \
|
124 | ad69471c | pbrook | NEON_FN(vdest.v1, vsrc1.v1, vsrc1.v2); \ |
125 | ad69471c | pbrook | NEON_FN(vdest.v2, vsrc2.v1, vsrc2.v2); |
126 | ad69471c | pbrook | #define NEON_PDO4 \
|
127 | ad69471c | pbrook | NEON_FN(vdest.v1, vsrc1.v1, vsrc1.v2); \ |
128 | ad69471c | pbrook | NEON_FN(vdest.v2, vsrc1.v3, vsrc1.v4); \ |
129 | ad69471c | pbrook | NEON_FN(vdest.v3, vsrc2.v1, vsrc2.v2); \ |
130 | ad69471c | pbrook | NEON_FN(vdest.v4, vsrc2.v3, vsrc2.v4); \ |
131 | ad69471c | pbrook | |
132 | ad69471c | pbrook | #define NEON_POP(name, vtype, n) \
|
133 | ad69471c | pbrook | uint32_t HELPER(glue(neon_,name))(uint32_t arg1, uint32_t arg2) \ |
134 | ad69471c | pbrook | { \ |
135 | ad69471c | pbrook | uint32_t res; \ |
136 | ad69471c | pbrook | vtype vsrc1; \ |
137 | ad69471c | pbrook | vtype vsrc2; \ |
138 | ad69471c | pbrook | vtype vdest; \ |
139 | ad69471c | pbrook | NEON_UNPACK(vtype, vsrc1, arg1); \ |
140 | ad69471c | pbrook | NEON_UNPACK(vtype, vsrc2, arg2); \ |
141 | ad69471c | pbrook | NEON_PDO##n; \ |
142 | ad69471c | pbrook | NEON_PACK(vtype, res, vdest); \ |
143 | ad69471c | pbrook | return res; \
|
144 | ad69471c | pbrook | } |
145 | ad69471c | pbrook | |
146 | ad69471c | pbrook | /* Unary operators. */
|
147 | ad69471c | pbrook | #define NEON_VOP1(name, vtype, n) \
|
148 | ad69471c | pbrook | uint32_t HELPER(glue(neon_,name))(uint32_t arg) \ |
149 | ad69471c | pbrook | { \ |
150 | ad69471c | pbrook | vtype vsrc1; \ |
151 | ad69471c | pbrook | vtype vdest; \ |
152 | ad69471c | pbrook | NEON_UNPACK(vtype, vsrc1, arg); \ |
153 | ad69471c | pbrook | NEON_DO##n; \ |
154 | ad69471c | pbrook | NEON_PACK(vtype, arg, vdest); \ |
155 | ad69471c | pbrook | return arg; \
|
156 | ad69471c | pbrook | } |
157 | ad69471c | pbrook | |
158 | ad69471c | pbrook | |
159 | ad69471c | pbrook | #define NEON_USAT(dest, src1, src2, type) do { \ |
160 | ad69471c | pbrook | uint32_t tmp = (uint32_t)src1 + (uint32_t)src2; \ |
161 | ad69471c | pbrook | if (tmp != (type)tmp) { \
|
162 | ad69471c | pbrook | SET_QC(); \ |
163 | ad69471c | pbrook | dest = ~0; \
|
164 | ad69471c | pbrook | } else { \
|
165 | ad69471c | pbrook | dest = tmp; \ |
166 | ad69471c | pbrook | }} while(0) |
167 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t)
|
168 | 02da0b2d | Peter Maydell | NEON_VOP_ENV(qadd_u8, neon_u8, 4)
|
169 | ad69471c | pbrook | #undef NEON_FN
|
170 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t)
|
171 | 02da0b2d | Peter Maydell | NEON_VOP_ENV(qadd_u16, neon_u16, 2)
|
172 | ad69471c | pbrook | #undef NEON_FN
|
173 | ad69471c | pbrook | #undef NEON_USAT
|
174 | ad69471c | pbrook | |
175 | 02da0b2d | Peter Maydell | uint32_t HELPER(neon_qadd_u32)(CPUState *env, uint32_t a, uint32_t b) |
176 | 72902672 | Christophe Lyon | { |
177 | 72902672 | Christophe Lyon | uint32_t res = a + b; |
178 | 72902672 | Christophe Lyon | if (res < a) {
|
179 | 72902672 | Christophe Lyon | SET_QC(); |
180 | 72902672 | Christophe Lyon | res = ~0;
|
181 | 72902672 | Christophe Lyon | } |
182 | 72902672 | Christophe Lyon | return res;
|
183 | 72902672 | Christophe Lyon | } |
184 | 72902672 | Christophe Lyon | |
185 | 02da0b2d | Peter Maydell | uint64_t HELPER(neon_qadd_u64)(CPUState *env, uint64_t src1, uint64_t src2) |
186 | 72902672 | Christophe Lyon | { |
187 | 72902672 | Christophe Lyon | uint64_t res; |
188 | 72902672 | Christophe Lyon | |
189 | 72902672 | Christophe Lyon | res = src1 + src2; |
190 | 72902672 | Christophe Lyon | if (res < src1) {
|
191 | 72902672 | Christophe Lyon | SET_QC(); |
192 | 72902672 | Christophe Lyon | res = ~(uint64_t)0;
|
193 | 72902672 | Christophe Lyon | } |
194 | 72902672 | Christophe Lyon | return res;
|
195 | 72902672 | Christophe Lyon | } |
196 | 72902672 | Christophe Lyon | |
197 | ad69471c | pbrook | #define NEON_SSAT(dest, src1, src2, type) do { \ |
198 | ad69471c | pbrook | int32_t tmp = (uint32_t)src1 + (uint32_t)src2; \ |
199 | ad69471c | pbrook | if (tmp != (type)tmp) { \
|
200 | ad69471c | pbrook | SET_QC(); \ |
201 | ad69471c | pbrook | if (src2 > 0) { \ |
202 | ad69471c | pbrook | tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \ |
203 | ad69471c | pbrook | } else { \
|
204 | ad69471c | pbrook | tmp = 1 << (sizeof(type) * 8 - 1); \ |
205 | ad69471c | pbrook | } \ |
206 | ad69471c | pbrook | } \ |
207 | ad69471c | pbrook | dest = tmp; \ |
208 | ad69471c | pbrook | } while(0) |
209 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t)
|
210 | 02da0b2d | Peter Maydell | NEON_VOP_ENV(qadd_s8, neon_s8, 4)
|
211 | ad69471c | pbrook | #undef NEON_FN
|
212 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t)
|
213 | 02da0b2d | Peter Maydell | NEON_VOP_ENV(qadd_s16, neon_s16, 2)
|
214 | ad69471c | pbrook | #undef NEON_FN
|
215 | ad69471c | pbrook | #undef NEON_SSAT
|
216 | ad69471c | pbrook | |
217 | 02da0b2d | Peter Maydell | uint32_t HELPER(neon_qadd_s32)(CPUState *env, uint32_t a, uint32_t b) |
218 | 72902672 | Christophe Lyon | { |
219 | 72902672 | Christophe Lyon | uint32_t res = a + b; |
220 | 72902672 | Christophe Lyon | if (((res ^ a) & SIGNBIT) && !((a ^ b) & SIGNBIT)) {
|
221 | 72902672 | Christophe Lyon | SET_QC(); |
222 | 72902672 | Christophe Lyon | res = ~(((int32_t)a >> 31) ^ SIGNBIT);
|
223 | 72902672 | Christophe Lyon | } |
224 | 72902672 | Christophe Lyon | return res;
|
225 | 72902672 | Christophe Lyon | } |
226 | 72902672 | Christophe Lyon | |
227 | 02da0b2d | Peter Maydell | uint64_t HELPER(neon_qadd_s64)(CPUState *env, uint64_t src1, uint64_t src2) |
228 | 72902672 | Christophe Lyon | { |
229 | 72902672 | Christophe Lyon | uint64_t res; |
230 | 72902672 | Christophe Lyon | |
231 | 72902672 | Christophe Lyon | res = src1 + src2; |
232 | 72902672 | Christophe Lyon | if (((res ^ src1) & SIGNBIT64) && !((src1 ^ src2) & SIGNBIT64)) {
|
233 | 72902672 | Christophe Lyon | SET_QC(); |
234 | 72902672 | Christophe Lyon | res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64;
|
235 | 72902672 | Christophe Lyon | } |
236 | 72902672 | Christophe Lyon | return res;
|
237 | 72902672 | Christophe Lyon | } |
238 | 72902672 | Christophe Lyon | |
239 | ad69471c | pbrook | #define NEON_USAT(dest, src1, src2, type) do { \ |
240 | ad69471c | pbrook | uint32_t tmp = (uint32_t)src1 - (uint32_t)src2; \ |
241 | ad69471c | pbrook | if (tmp != (type)tmp) { \
|
242 | ad69471c | pbrook | SET_QC(); \ |
243 | ad69471c | pbrook | dest = 0; \
|
244 | ad69471c | pbrook | } else { \
|
245 | ad69471c | pbrook | dest = tmp; \ |
246 | ad69471c | pbrook | }} while(0) |
247 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t)
|
248 | 02da0b2d | Peter Maydell | NEON_VOP_ENV(qsub_u8, neon_u8, 4)
|
249 | ad69471c | pbrook | #undef NEON_FN
|
250 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t)
|
251 | 02da0b2d | Peter Maydell | NEON_VOP_ENV(qsub_u16, neon_u16, 2)
|
252 | ad69471c | pbrook | #undef NEON_FN
|
253 | ad69471c | pbrook | #undef NEON_USAT
|
254 | ad69471c | pbrook | |
255 | 02da0b2d | Peter Maydell | uint32_t HELPER(neon_qsub_u32)(CPUState *env, uint32_t a, uint32_t b) |
256 | 72902672 | Christophe Lyon | { |
257 | 72902672 | Christophe Lyon | uint32_t res = a - b; |
258 | 72902672 | Christophe Lyon | if (res > a) {
|
259 | 72902672 | Christophe Lyon | SET_QC(); |
260 | 72902672 | Christophe Lyon | res = 0;
|
261 | 72902672 | Christophe Lyon | } |
262 | 72902672 | Christophe Lyon | return res;
|
263 | 72902672 | Christophe Lyon | } |
264 | 72902672 | Christophe Lyon | |
265 | 02da0b2d | Peter Maydell | uint64_t HELPER(neon_qsub_u64)(CPUState *env, uint64_t src1, uint64_t src2) |
266 | 72902672 | Christophe Lyon | { |
267 | 72902672 | Christophe Lyon | uint64_t res; |
268 | 72902672 | Christophe Lyon | |
269 | 72902672 | Christophe Lyon | if (src1 < src2) {
|
270 | 72902672 | Christophe Lyon | SET_QC(); |
271 | 72902672 | Christophe Lyon | res = 0;
|
272 | 72902672 | Christophe Lyon | } else {
|
273 | 72902672 | Christophe Lyon | res = src1 - src2; |
274 | 72902672 | Christophe Lyon | } |
275 | 72902672 | Christophe Lyon | return res;
|
276 | 72902672 | Christophe Lyon | } |
277 | 72902672 | Christophe Lyon | |
278 | ad69471c | pbrook | #define NEON_SSAT(dest, src1, src2, type) do { \ |
279 | ad69471c | pbrook | int32_t tmp = (uint32_t)src1 - (uint32_t)src2; \ |
280 | ad69471c | pbrook | if (tmp != (type)tmp) { \
|
281 | ad69471c | pbrook | SET_QC(); \ |
282 | ad69471c | pbrook | if (src2 < 0) { \ |
283 | ad69471c | pbrook | tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \ |
284 | ad69471c | pbrook | } else { \
|
285 | ad69471c | pbrook | tmp = 1 << (sizeof(type) * 8 - 1); \ |
286 | ad69471c | pbrook | } \ |
287 | ad69471c | pbrook | } \ |
288 | ad69471c | pbrook | dest = tmp; \ |
289 | ad69471c | pbrook | } while(0) |
290 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t)
|
291 | 02da0b2d | Peter Maydell | NEON_VOP_ENV(qsub_s8, neon_s8, 4)
|
292 | ad69471c | pbrook | #undef NEON_FN
|
293 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t)
|
294 | 02da0b2d | Peter Maydell | NEON_VOP_ENV(qsub_s16, neon_s16, 2)
|
295 | ad69471c | pbrook | #undef NEON_FN
|
296 | ad69471c | pbrook | #undef NEON_SSAT
|
297 | ad69471c | pbrook | |
298 | 02da0b2d | Peter Maydell | uint32_t HELPER(neon_qsub_s32)(CPUState *env, uint32_t a, uint32_t b) |
299 | 72902672 | Christophe Lyon | { |
300 | 72902672 | Christophe Lyon | uint32_t res = a - b; |
301 | 72902672 | Christophe Lyon | if (((res ^ a) & SIGNBIT) && ((a ^ b) & SIGNBIT)) {
|
302 | 72902672 | Christophe Lyon | SET_QC(); |
303 | 72902672 | Christophe Lyon | res = ~(((int32_t)a >> 31) ^ SIGNBIT);
|
304 | 72902672 | Christophe Lyon | } |
305 | 72902672 | Christophe Lyon | return res;
|
306 | 72902672 | Christophe Lyon | } |
307 | 72902672 | Christophe Lyon | |
308 | 02da0b2d | Peter Maydell | uint64_t HELPER(neon_qsub_s64)(CPUState *env, uint64_t src1, uint64_t src2) |
309 | 72902672 | Christophe Lyon | { |
310 | 72902672 | Christophe Lyon | uint64_t res; |
311 | 72902672 | Christophe Lyon | |
312 | 72902672 | Christophe Lyon | res = src1 - src2; |
313 | 72902672 | Christophe Lyon | if (((res ^ src1) & SIGNBIT64) && ((src1 ^ src2) & SIGNBIT64)) {
|
314 | 72902672 | Christophe Lyon | SET_QC(); |
315 | 72902672 | Christophe Lyon | res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64;
|
316 | 72902672 | Christophe Lyon | } |
317 | 72902672 | Christophe Lyon | return res;
|
318 | 72902672 | Christophe Lyon | } |
319 | 72902672 | Christophe Lyon | |
320 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 + src2) >> 1 |
321 | ad69471c | pbrook | NEON_VOP(hadd_s8, neon_s8, 4)
|
322 | ad69471c | pbrook | NEON_VOP(hadd_u8, neon_u8, 4)
|
323 | ad69471c | pbrook | NEON_VOP(hadd_s16, neon_s16, 2)
|
324 | ad69471c | pbrook | NEON_VOP(hadd_u16, neon_u16, 2)
|
325 | ad69471c | pbrook | #undef NEON_FN
|
326 | ad69471c | pbrook | |
327 | ad69471c | pbrook | int32_t HELPER(neon_hadd_s32)(int32_t src1, int32_t src2) |
328 | ad69471c | pbrook | { |
329 | ad69471c | pbrook | int32_t dest; |
330 | ad69471c | pbrook | |
331 | ad69471c | pbrook | dest = (src1 >> 1) + (src2 >> 1); |
332 | ad69471c | pbrook | if (src1 & src2 & 1) |
333 | ad69471c | pbrook | dest++; |
334 | ad69471c | pbrook | return dest;
|
335 | ad69471c | pbrook | } |
336 | ad69471c | pbrook | |
337 | ad69471c | pbrook | uint32_t HELPER(neon_hadd_u32)(uint32_t src1, uint32_t src2) |
338 | ad69471c | pbrook | { |
339 | ad69471c | pbrook | uint32_t dest; |
340 | ad69471c | pbrook | |
341 | ad69471c | pbrook | dest = (src1 >> 1) + (src2 >> 1); |
342 | ad69471c | pbrook | if (src1 & src2 & 1) |
343 | ad69471c | pbrook | dest++; |
344 | ad69471c | pbrook | return dest;
|
345 | ad69471c | pbrook | } |
346 | ad69471c | pbrook | |
347 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 + src2 + 1) >> 1 |
348 | ad69471c | pbrook | NEON_VOP(rhadd_s8, neon_s8, 4)
|
349 | ad69471c | pbrook | NEON_VOP(rhadd_u8, neon_u8, 4)
|
350 | ad69471c | pbrook | NEON_VOP(rhadd_s16, neon_s16, 2)
|
351 | ad69471c | pbrook | NEON_VOP(rhadd_u16, neon_u16, 2)
|
352 | ad69471c | pbrook | #undef NEON_FN
|
353 | ad69471c | pbrook | |
354 | ad69471c | pbrook | int32_t HELPER(neon_rhadd_s32)(int32_t src1, int32_t src2) |
355 | ad69471c | pbrook | { |
356 | ad69471c | pbrook | int32_t dest; |
357 | ad69471c | pbrook | |
358 | ad69471c | pbrook | dest = (src1 >> 1) + (src2 >> 1); |
359 | ad69471c | pbrook | if ((src1 | src2) & 1) |
360 | ad69471c | pbrook | dest++; |
361 | ad69471c | pbrook | return dest;
|
362 | ad69471c | pbrook | } |
363 | ad69471c | pbrook | |
364 | ad69471c | pbrook | uint32_t HELPER(neon_rhadd_u32)(uint32_t src1, uint32_t src2) |
365 | ad69471c | pbrook | { |
366 | ad69471c | pbrook | uint32_t dest; |
367 | ad69471c | pbrook | |
368 | ad69471c | pbrook | dest = (src1 >> 1) + (src2 >> 1); |
369 | ad69471c | pbrook | if ((src1 | src2) & 1) |
370 | ad69471c | pbrook | dest++; |
371 | ad69471c | pbrook | return dest;
|
372 | ad69471c | pbrook | } |
373 | ad69471c | pbrook | |
374 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 - src2) >> 1 |
375 | ad69471c | pbrook | NEON_VOP(hsub_s8, neon_s8, 4)
|
376 | ad69471c | pbrook | NEON_VOP(hsub_u8, neon_u8, 4)
|
377 | ad69471c | pbrook | NEON_VOP(hsub_s16, neon_s16, 2)
|
378 | ad69471c | pbrook | NEON_VOP(hsub_u16, neon_u16, 2)
|
379 | ad69471c | pbrook | #undef NEON_FN
|
380 | ad69471c | pbrook | |
381 | ad69471c | pbrook | int32_t HELPER(neon_hsub_s32)(int32_t src1, int32_t src2) |
382 | ad69471c | pbrook | { |
383 | ad69471c | pbrook | int32_t dest; |
384 | ad69471c | pbrook | |
385 | ad69471c | pbrook | dest = (src1 >> 1) - (src2 >> 1); |
386 | ad69471c | pbrook | if ((~src1) & src2 & 1) |
387 | ad69471c | pbrook | dest--; |
388 | ad69471c | pbrook | return dest;
|
389 | ad69471c | pbrook | } |
390 | ad69471c | pbrook | |
391 | ad69471c | pbrook | uint32_t HELPER(neon_hsub_u32)(uint32_t src1, uint32_t src2) |
392 | ad69471c | pbrook | { |
393 | ad69471c | pbrook | uint32_t dest; |
394 | ad69471c | pbrook | |
395 | ad69471c | pbrook | dest = (src1 >> 1) - (src2 >> 1); |
396 | ad69471c | pbrook | if ((~src1) & src2 & 1) |
397 | ad69471c | pbrook | dest--; |
398 | ad69471c | pbrook | return dest;
|
399 | ad69471c | pbrook | } |
400 | ad69471c | pbrook | |
401 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? ~0 : 0 |
402 | ad69471c | pbrook | NEON_VOP(cgt_s8, neon_s8, 4)
|
403 | ad69471c | pbrook | NEON_VOP(cgt_u8, neon_u8, 4)
|
404 | ad69471c | pbrook | NEON_VOP(cgt_s16, neon_s16, 2)
|
405 | ad69471c | pbrook | NEON_VOP(cgt_u16, neon_u16, 2)
|
406 | ad69471c | pbrook | NEON_VOP(cgt_s32, neon_s32, 1)
|
407 | ad69471c | pbrook | NEON_VOP(cgt_u32, neon_u32, 1)
|
408 | ad69471c | pbrook | #undef NEON_FN
|
409 | ad69471c | pbrook | |
410 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 >= src2) ? ~0 : 0 |
411 | ad69471c | pbrook | NEON_VOP(cge_s8, neon_s8, 4)
|
412 | ad69471c | pbrook | NEON_VOP(cge_u8, neon_u8, 4)
|
413 | ad69471c | pbrook | NEON_VOP(cge_s16, neon_s16, 2)
|
414 | ad69471c | pbrook | NEON_VOP(cge_u16, neon_u16, 2)
|
415 | ad69471c | pbrook | NEON_VOP(cge_s32, neon_s32, 1)
|
416 | ad69471c | pbrook | NEON_VOP(cge_u32, neon_u32, 1)
|
417 | ad69471c | pbrook | #undef NEON_FN
|
418 | ad69471c | pbrook | |
419 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 < src2) ? src1 : src2
|
420 | ad69471c | pbrook | NEON_VOP(min_s8, neon_s8, 4)
|
421 | ad69471c | pbrook | NEON_VOP(min_u8, neon_u8, 4)
|
422 | ad69471c | pbrook | NEON_VOP(min_s16, neon_s16, 2)
|
423 | ad69471c | pbrook | NEON_VOP(min_u16, neon_u16, 2)
|
424 | ad69471c | pbrook | NEON_VOP(min_s32, neon_s32, 1)
|
425 | ad69471c | pbrook | NEON_VOP(min_u32, neon_u32, 1)
|
426 | ad69471c | pbrook | NEON_POP(pmin_s8, neon_s8, 4)
|
427 | ad69471c | pbrook | NEON_POP(pmin_u8, neon_u8, 4)
|
428 | ad69471c | pbrook | NEON_POP(pmin_s16, neon_s16, 2)
|
429 | ad69471c | pbrook | NEON_POP(pmin_u16, neon_u16, 2)
|
430 | ad69471c | pbrook | #undef NEON_FN
|
431 | ad69471c | pbrook | |
432 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? src1 : src2
|
433 | ad69471c | pbrook | NEON_VOP(max_s8, neon_s8, 4)
|
434 | ad69471c | pbrook | NEON_VOP(max_u8, neon_u8, 4)
|
435 | ad69471c | pbrook | NEON_VOP(max_s16, neon_s16, 2)
|
436 | ad69471c | pbrook | NEON_VOP(max_u16, neon_u16, 2)
|
437 | ad69471c | pbrook | NEON_VOP(max_s32, neon_s32, 1)
|
438 | ad69471c | pbrook | NEON_VOP(max_u32, neon_u32, 1)
|
439 | ad69471c | pbrook | NEON_POP(pmax_s8, neon_s8, 4)
|
440 | ad69471c | pbrook | NEON_POP(pmax_u8, neon_u8, 4)
|
441 | ad69471c | pbrook | NEON_POP(pmax_s16, neon_s16, 2)
|
442 | ad69471c | pbrook | NEON_POP(pmax_u16, neon_u16, 2)
|
443 | ad69471c | pbrook | #undef NEON_FN
|
444 | ad69471c | pbrook | |
445 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) \
|
446 | ad69471c | pbrook | dest = (src1 > src2) ? (src1 - src2) : (src2 - src1) |
447 | ad69471c | pbrook | NEON_VOP(abd_s8, neon_s8, 4)
|
448 | ad69471c | pbrook | NEON_VOP(abd_u8, neon_u8, 4)
|
449 | ad69471c | pbrook | NEON_VOP(abd_s16, neon_s16, 2)
|
450 | ad69471c | pbrook | NEON_VOP(abd_u16, neon_u16, 2)
|
451 | ad69471c | pbrook | NEON_VOP(abd_s32, neon_s32, 1)
|
452 | ad69471c | pbrook | NEON_VOP(abd_u32, neon_u32, 1)
|
453 | ad69471c | pbrook | #undef NEON_FN
|
454 | ad69471c | pbrook | |
455 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
456 | ad69471c | pbrook | int8_t tmp; \ |
457 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
458 | 50f67e95 | Juha Riihimäki | if (tmp >= (ssize_t)sizeof(src1) * 8 || \ |
459 | 50f67e95 | Juha Riihimäki | tmp <= -(ssize_t)sizeof(src1) * 8) { \ |
460 | ad69471c | pbrook | dest = 0; \
|
461 | ad69471c | pbrook | } else if (tmp < 0) { \ |
462 | ad69471c | pbrook | dest = src1 >> -tmp; \ |
463 | ad69471c | pbrook | } else { \
|
464 | ad69471c | pbrook | dest = src1 << tmp; \ |
465 | ad69471c | pbrook | }} while (0) |
466 | ad69471c | pbrook | NEON_VOP(shl_u8, neon_u8, 4)
|
467 | ad69471c | pbrook | NEON_VOP(shl_u16, neon_u16, 2)
|
468 | ad69471c | pbrook | NEON_VOP(shl_u32, neon_u32, 1)
|
469 | ad69471c | pbrook | #undef NEON_FN
|
470 | ad69471c | pbrook | |
471 | ad69471c | pbrook | uint64_t HELPER(neon_shl_u64)(uint64_t val, uint64_t shiftop) |
472 | ad69471c | pbrook | { |
473 | ad69471c | pbrook | int8_t shift = (int8_t)shiftop; |
474 | ad69471c | pbrook | if (shift >= 64 || shift <= -64) { |
475 | ad69471c | pbrook | val = 0;
|
476 | ad69471c | pbrook | } else if (shift < 0) { |
477 | ad69471c | pbrook | val >>= -shift; |
478 | ad69471c | pbrook | } else {
|
479 | ad69471c | pbrook | val <<= shift; |
480 | ad69471c | pbrook | } |
481 | ad69471c | pbrook | return val;
|
482 | ad69471c | pbrook | } |
483 | ad69471c | pbrook | |
484 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
485 | ad69471c | pbrook | int8_t tmp; \ |
486 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
487 | 50f67e95 | Juha Riihimäki | if (tmp >= (ssize_t)sizeof(src1) * 8) { \ |
488 | ad69471c | pbrook | dest = 0; \
|
489 | 50f67e95 | Juha Riihimäki | } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \ |
490 | ad69471c | pbrook | dest = src1 >> (sizeof(src1) * 8 - 1); \ |
491 | ad69471c | pbrook | } else if (tmp < 0) { \ |
492 | ad69471c | pbrook | dest = src1 >> -tmp; \ |
493 | ad69471c | pbrook | } else { \
|
494 | ad69471c | pbrook | dest = src1 << tmp; \ |
495 | ad69471c | pbrook | }} while (0) |
496 | ad69471c | pbrook | NEON_VOP(shl_s8, neon_s8, 4)
|
497 | ad69471c | pbrook | NEON_VOP(shl_s16, neon_s16, 2)
|
498 | ad69471c | pbrook | NEON_VOP(shl_s32, neon_s32, 1)
|
499 | ad69471c | pbrook | #undef NEON_FN
|
500 | ad69471c | pbrook | |
501 | ad69471c | pbrook | uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop) |
502 | ad69471c | pbrook | { |
503 | ad69471c | pbrook | int8_t shift = (int8_t)shiftop; |
504 | ad69471c | pbrook | int64_t val = valop; |
505 | ad69471c | pbrook | if (shift >= 64) { |
506 | ad69471c | pbrook | val = 0;
|
507 | ad69471c | pbrook | } else if (shift <= -64) { |
508 | ad69471c | pbrook | val >>= 63;
|
509 | ad69471c | pbrook | } else if (shift < 0) { |
510 | ad69471c | pbrook | val >>= -shift; |
511 | ad69471c | pbrook | } else {
|
512 | ad69471c | pbrook | val <<= shift; |
513 | ad69471c | pbrook | } |
514 | ad69471c | pbrook | return val;
|
515 | ad69471c | pbrook | } |
516 | ad69471c | pbrook | |
517 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
518 | ad69471c | pbrook | int8_t tmp; \ |
519 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
520 | 0670a7b6 | Peter Maydell | if ((tmp >= (ssize_t)sizeof(src1) * 8) \ |
521 | 0670a7b6 | Peter Maydell | || (tmp <= -(ssize_t)sizeof(src1) * 8)) { \ |
522 | ad69471c | pbrook | dest = 0; \
|
523 | ad69471c | pbrook | } else if (tmp < 0) { \ |
524 | ad69471c | pbrook | dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ |
525 | ad69471c | pbrook | } else { \
|
526 | ad69471c | pbrook | dest = src1 << tmp; \ |
527 | ad69471c | pbrook | }} while (0) |
528 | ad69471c | pbrook | NEON_VOP(rshl_s8, neon_s8, 4)
|
529 | ad69471c | pbrook | NEON_VOP(rshl_s16, neon_s16, 2)
|
530 | ad69471c | pbrook | #undef NEON_FN
|
531 | ad69471c | pbrook | |
532 | 4bd4ee07 | Christophe Lyon | /* The addition of the rounding constant may overflow, so we use an
|
533 | 4bd4ee07 | Christophe Lyon | * intermediate 64 bits accumulator. */
|
534 | 4bd4ee07 | Christophe Lyon | uint32_t HELPER(neon_rshl_s32)(uint32_t valop, uint32_t shiftop) |
535 | 4bd4ee07 | Christophe Lyon | { |
536 | 4bd4ee07 | Christophe Lyon | int32_t dest; |
537 | 4bd4ee07 | Christophe Lyon | int32_t val = (int32_t)valop; |
538 | 4bd4ee07 | Christophe Lyon | int8_t shift = (int8_t)shiftop; |
539 | 4bd4ee07 | Christophe Lyon | if ((shift >= 32) || (shift <= -32)) { |
540 | 4bd4ee07 | Christophe Lyon | dest = 0;
|
541 | 4bd4ee07 | Christophe Lyon | } else if (shift < 0) { |
542 | 4bd4ee07 | Christophe Lyon | int64_t big_dest = ((int64_t)val + (1 << (-1 - shift))); |
543 | 4bd4ee07 | Christophe Lyon | dest = big_dest >> -shift; |
544 | 4bd4ee07 | Christophe Lyon | } else {
|
545 | 4bd4ee07 | Christophe Lyon | dest = val << shift; |
546 | 4bd4ee07 | Christophe Lyon | } |
547 | 4bd4ee07 | Christophe Lyon | return dest;
|
548 | 4bd4ee07 | Christophe Lyon | } |
549 | 4bd4ee07 | Christophe Lyon | |
550 | 4bd4ee07 | Christophe Lyon | /* Handling addition overflow with 64 bits inputs values is more
|
551 | 4bd4ee07 | Christophe Lyon | * tricky than with 32 bits values. */
|
552 | ad69471c | pbrook | uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop) |
553 | ad69471c | pbrook | { |
554 | ad69471c | pbrook | int8_t shift = (int8_t)shiftop; |
555 | ad69471c | pbrook | int64_t val = valop; |
556 | 0670a7b6 | Peter Maydell | if ((shift >= 64) || (shift <= -64)) { |
557 | ad69471c | pbrook | val = 0;
|
558 | ad69471c | pbrook | } else if (shift < 0) { |
559 | 4bd4ee07 | Christophe Lyon | val >>= (-shift - 1);
|
560 | 4bd4ee07 | Christophe Lyon | if (val == INT64_MAX) {
|
561 | 4bd4ee07 | Christophe Lyon | /* In this case, it means that the rounding constant is 1,
|
562 | 4bd4ee07 | Christophe Lyon | * and the addition would overflow. Return the actual
|
563 | 4bd4ee07 | Christophe Lyon | * result directly. */
|
564 | 4bd4ee07 | Christophe Lyon | val = 0x4000000000000000LL;
|
565 | 4bd4ee07 | Christophe Lyon | } else {
|
566 | 4bd4ee07 | Christophe Lyon | val++; |
567 | 4bd4ee07 | Christophe Lyon | val >>= 1;
|
568 | 4bd4ee07 | Christophe Lyon | } |
569 | ad69471c | pbrook | } else {
|
570 | ad69471c | pbrook | val <<= shift; |
571 | ad69471c | pbrook | } |
572 | ad69471c | pbrook | return val;
|
573 | ad69471c | pbrook | } |
574 | ad69471c | pbrook | |
575 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
576 | ad69471c | pbrook | int8_t tmp; \ |
577 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
578 | 50f67e95 | Juha Riihimäki | if (tmp >= (ssize_t)sizeof(src1) * 8 || \ |
579 | 50f67e95 | Juha Riihimäki | tmp < -(ssize_t)sizeof(src1) * 8) { \ |
580 | ad69471c | pbrook | dest = 0; \
|
581 | 50f67e95 | Juha Riihimäki | } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \ |
582 | b6c63b98 | Christophe Lyon | dest = src1 >> (-tmp - 1); \
|
583 | ad69471c | pbrook | } else if (tmp < 0) { \ |
584 | ad69471c | pbrook | dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ |
585 | ad69471c | pbrook | } else { \
|
586 | ad69471c | pbrook | dest = src1 << tmp; \ |
587 | ad69471c | pbrook | }} while (0) |
588 | ad69471c | pbrook | NEON_VOP(rshl_u8, neon_u8, 4)
|
589 | ad69471c | pbrook | NEON_VOP(rshl_u16, neon_u16, 2)
|
590 | ad69471c | pbrook | #undef NEON_FN
|
591 | ad69471c | pbrook | |
592 | 4bd4ee07 | Christophe Lyon | /* The addition of the rounding constant may overflow, so we use an
|
593 | 4bd4ee07 | Christophe Lyon | * intermediate 64 bits accumulator. */
|
594 | 4bd4ee07 | Christophe Lyon | uint32_t HELPER(neon_rshl_u32)(uint32_t val, uint32_t shiftop) |
595 | 4bd4ee07 | Christophe Lyon | { |
596 | 4bd4ee07 | Christophe Lyon | uint32_t dest; |
597 | 4bd4ee07 | Christophe Lyon | int8_t shift = (int8_t)shiftop; |
598 | 4bd4ee07 | Christophe Lyon | if (shift >= 32 || shift < -32) { |
599 | 4bd4ee07 | Christophe Lyon | dest = 0;
|
600 | 4bd4ee07 | Christophe Lyon | } else if (shift == -32) { |
601 | 4bd4ee07 | Christophe Lyon | dest = val >> 31;
|
602 | 4bd4ee07 | Christophe Lyon | } else if (shift < 0) { |
603 | 4bd4ee07 | Christophe Lyon | uint64_t big_dest = ((uint64_t)val + (1 << (-1 - shift))); |
604 | 4bd4ee07 | Christophe Lyon | dest = big_dest >> -shift; |
605 | 4bd4ee07 | Christophe Lyon | } else {
|
606 | 4bd4ee07 | Christophe Lyon | dest = val << shift; |
607 | 4bd4ee07 | Christophe Lyon | } |
608 | 4bd4ee07 | Christophe Lyon | return dest;
|
609 | 4bd4ee07 | Christophe Lyon | } |
610 | 4bd4ee07 | Christophe Lyon | |
611 | 4bd4ee07 | Christophe Lyon | /* Handling addition overflow with 64 bits inputs values is more
|
612 | 4bd4ee07 | Christophe Lyon | * tricky than with 32 bits values. */
|
613 | ad69471c | pbrook | uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop) |
614 | ad69471c | pbrook | { |
615 | ad69471c | pbrook | int8_t shift = (uint8_t)shiftop; |
616 | 51e3930f | Christophe Lyon | if (shift >= 64 || shift < -64) { |
617 | ad69471c | pbrook | val = 0;
|
618 | ad69471c | pbrook | } else if (shift == -64) { |
619 | ad69471c | pbrook | /* Rounding a 1-bit result just preserves that bit. */
|
620 | ad69471c | pbrook | val >>= 63;
|
621 | 4bd4ee07 | Christophe Lyon | } else if (shift < 0) { |
622 | 4bd4ee07 | Christophe Lyon | val >>= (-shift - 1);
|
623 | 4bd4ee07 | Christophe Lyon | if (val == UINT64_MAX) {
|
624 | 4bd4ee07 | Christophe Lyon | /* In this case, it means that the rounding constant is 1,
|
625 | 4bd4ee07 | Christophe Lyon | * and the addition would overflow. Return the actual
|
626 | 4bd4ee07 | Christophe Lyon | * result directly. */
|
627 | 4bd4ee07 | Christophe Lyon | val = 0x8000000000000000ULL;
|
628 | 4bd4ee07 | Christophe Lyon | } else {
|
629 | 4bd4ee07 | Christophe Lyon | val++; |
630 | 4bd4ee07 | Christophe Lyon | val >>= 1;
|
631 | 4bd4ee07 | Christophe Lyon | } |
632 | ad69471c | pbrook | } else {
|
633 | ad69471c | pbrook | val <<= shift; |
634 | ad69471c | pbrook | } |
635 | ad69471c | pbrook | return val;
|
636 | ad69471c | pbrook | } |
637 | ad69471c | pbrook | |
638 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
639 | ad69471c | pbrook | int8_t tmp; \ |
640 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
641 | 50f67e95 | Juha Riihimäki | if (tmp >= (ssize_t)sizeof(src1) * 8) { \ |
642 | ad69471c | pbrook | if (src1) { \
|
643 | ad69471c | pbrook | SET_QC(); \ |
644 | ad69471c | pbrook | dest = ~0; \
|
645 | ad69471c | pbrook | } else { \
|
646 | ad69471c | pbrook | dest = 0; \
|
647 | ad69471c | pbrook | } \ |
648 | 50f67e95 | Juha Riihimäki | } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \ |
649 | ad69471c | pbrook | dest = 0; \
|
650 | ad69471c | pbrook | } else if (tmp < 0) { \ |
651 | ad69471c | pbrook | dest = src1 >> -tmp; \ |
652 | ad69471c | pbrook | } else { \
|
653 | ad69471c | pbrook | dest = src1 << tmp; \ |
654 | ad69471c | pbrook | if ((dest >> tmp) != src1) { \
|
655 | ad69471c | pbrook | SET_QC(); \ |
656 | ad69471c | pbrook | dest = ~0; \
|
657 | ad69471c | pbrook | } \ |
658 | ad69471c | pbrook | }} while (0) |
659 | 02da0b2d | Peter Maydell | NEON_VOP_ENV(qshl_u8, neon_u8, 4)
|
660 | 02da0b2d | Peter Maydell | NEON_VOP_ENV(qshl_u16, neon_u16, 2)
|
661 | 02da0b2d | Peter Maydell | NEON_VOP_ENV(qshl_u32, neon_u32, 1)
|
662 | ad69471c | pbrook | #undef NEON_FN
|
663 | ad69471c | pbrook | |
664 | 02da0b2d | Peter Maydell | uint64_t HELPER(neon_qshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop) |
665 | ad69471c | pbrook | { |
666 | ad69471c | pbrook | int8_t shift = (int8_t)shiftop; |
667 | ad69471c | pbrook | if (shift >= 64) { |
668 | ad69471c | pbrook | if (val) {
|
669 | ad69471c | pbrook | val = ~(uint64_t)0;
|
670 | ad69471c | pbrook | SET_QC(); |
671 | ad69471c | pbrook | } |
672 | ad69471c | pbrook | } else if (shift <= -64) { |
673 | ad69471c | pbrook | val = 0;
|
674 | ad69471c | pbrook | } else if (shift < 0) { |
675 | ad69471c | pbrook | val >>= -shift; |
676 | ad69471c | pbrook | } else {
|
677 | ad69471c | pbrook | uint64_t tmp = val; |
678 | ad69471c | pbrook | val <<= shift; |
679 | ad69471c | pbrook | if ((val >> shift) != tmp) {
|
680 | ad69471c | pbrook | SET_QC(); |
681 | ad69471c | pbrook | val = ~(uint64_t)0;
|
682 | ad69471c | pbrook | } |
683 | ad69471c | pbrook | } |
684 | ad69471c | pbrook | return val;
|
685 | ad69471c | pbrook | } |
686 | ad69471c | pbrook | |
687 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
688 | ad69471c | pbrook | int8_t tmp; \ |
689 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
690 | 50f67e95 | Juha Riihimäki | if (tmp >= (ssize_t)sizeof(src1) * 8) { \ |
691 | a5d88f3e | Peter Maydell | if (src1) { \
|
692 | ad69471c | pbrook | SET_QC(); \ |
693 | a5d88f3e | Peter Maydell | dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \ |
694 | a5d88f3e | Peter Maydell | if (src1 > 0) { \ |
695 | a5d88f3e | Peter Maydell | dest--; \ |
696 | a5d88f3e | Peter Maydell | } \ |
697 | a5d88f3e | Peter Maydell | } else { \
|
698 | a5d88f3e | Peter Maydell | dest = src1; \ |
699 | a5d88f3e | Peter Maydell | } \ |
700 | 50f67e95 | Juha Riihimäki | } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \ |
701 | ad69471c | pbrook | dest = src1 >> 31; \
|
702 | ad69471c | pbrook | } else if (tmp < 0) { \ |
703 | ad69471c | pbrook | dest = src1 >> -tmp; \ |
704 | ad69471c | pbrook | } else { \
|
705 | ad69471c | pbrook | dest = src1 << tmp; \ |
706 | ad69471c | pbrook | if ((dest >> tmp) != src1) { \
|
707 | ad69471c | pbrook | SET_QC(); \ |
708 | a5d88f3e | Peter Maydell | dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \ |
709 | a5d88f3e | Peter Maydell | if (src1 > 0) { \ |
710 | a5d88f3e | Peter Maydell | dest--; \ |
711 | a5d88f3e | Peter Maydell | } \ |
712 | ad69471c | pbrook | } \ |
713 | ad69471c | pbrook | }} while (0) |
714 | 02da0b2d | Peter Maydell | NEON_VOP_ENV(qshl_s8, neon_s8, 4)
|
715 | 02da0b2d | Peter Maydell | NEON_VOP_ENV(qshl_s16, neon_s16, 2)
|
716 | 02da0b2d | Peter Maydell | NEON_VOP_ENV(qshl_s32, neon_s32, 1)
|
717 | ad69471c | pbrook | #undef NEON_FN
|
718 | ad69471c | pbrook | |
719 | 02da0b2d | Peter Maydell | uint64_t HELPER(neon_qshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop) |
720 | ad69471c | pbrook | { |
721 | ad69471c | pbrook | int8_t shift = (uint8_t)shiftop; |
722 | ad69471c | pbrook | int64_t val = valop; |
723 | ad69471c | pbrook | if (shift >= 64) { |
724 | ad69471c | pbrook | if (val) {
|
725 | ad69471c | pbrook | SET_QC(); |
726 | eb7a3d79 | Peter Maydell | val = (val >> 63) ^ ~SIGNBIT64;
|
727 | ad69471c | pbrook | } |
728 | 4c9b70ae | Juha Riihimäki | } else if (shift <= -64) { |
729 | ad69471c | pbrook | val >>= 63;
|
730 | ad69471c | pbrook | } else if (shift < 0) { |
731 | ad69471c | pbrook | val >>= -shift; |
732 | ad69471c | pbrook | } else {
|
733 | ad69471c | pbrook | int64_t tmp = val; |
734 | ad69471c | pbrook | val <<= shift; |
735 | ad69471c | pbrook | if ((val >> shift) != tmp) {
|
736 | ad69471c | pbrook | SET_QC(); |
737 | ad69471c | pbrook | val = (tmp >> 63) ^ ~SIGNBIT64;
|
738 | ad69471c | pbrook | } |
739 | ad69471c | pbrook | } |
740 | ad69471c | pbrook | return val;
|
741 | ad69471c | pbrook | } |
742 | ad69471c | pbrook | |
743 | 4ca4502c | Juha Riihimäki | #define NEON_FN(dest, src1, src2) do { \ |
744 | 4ca4502c | Juha Riihimäki | if (src1 & (1 << (sizeof(src1) * 8 - 1))) { \ |
745 | 4ca4502c | Juha Riihimäki | SET_QC(); \ |
746 | 4ca4502c | Juha Riihimäki | dest = 0; \
|
747 | 4ca4502c | Juha Riihimäki | } else { \
|
748 | 4ca4502c | Juha Riihimäki | int8_t tmp; \ |
749 | 4ca4502c | Juha Riihimäki | tmp = (int8_t)src2; \ |
750 | 4ca4502c | Juha Riihimäki | if (tmp >= (ssize_t)sizeof(src1) * 8) { \ |
751 | 4ca4502c | Juha Riihimäki | if (src1) { \
|
752 | 4ca4502c | Juha Riihimäki | SET_QC(); \ |
753 | 4ca4502c | Juha Riihimäki | dest = ~0; \
|
754 | 4ca4502c | Juha Riihimäki | } else { \
|
755 | 4ca4502c | Juha Riihimäki | dest = 0; \
|
756 | 4ca4502c | Juha Riihimäki | } \ |
757 | 4ca4502c | Juha Riihimäki | } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \ |
758 | 4ca4502c | Juha Riihimäki | dest = 0; \
|
759 | 4ca4502c | Juha Riihimäki | } else if (tmp < 0) { \ |
760 | 4ca4502c | Juha Riihimäki | dest = src1 >> -tmp; \ |
761 | 4ca4502c | Juha Riihimäki | } else { \
|
762 | 4ca4502c | Juha Riihimäki | dest = src1 << tmp; \ |
763 | 4ca4502c | Juha Riihimäki | if ((dest >> tmp) != src1) { \
|
764 | 4ca4502c | Juha Riihimäki | SET_QC(); \ |
765 | 4ca4502c | Juha Riihimäki | dest = ~0; \
|
766 | 4ca4502c | Juha Riihimäki | } \ |
767 | 4ca4502c | Juha Riihimäki | } \ |
768 | 4ca4502c | Juha Riihimäki | }} while (0) |
769 | 02da0b2d | Peter Maydell | NEON_VOP_ENV(qshlu_s8, neon_u8, 4)
|
770 | 02da0b2d | Peter Maydell | NEON_VOP_ENV(qshlu_s16, neon_u16, 2)
|
771 | 4ca4502c | Juha Riihimäki | #undef NEON_FN
|
772 | 4ca4502c | Juha Riihimäki | |
773 | 02da0b2d | Peter Maydell | uint32_t HELPER(neon_qshlu_s32)(CPUState *env, uint32_t valop, uint32_t shiftop) |
774 | 4ca4502c | Juha Riihimäki | { |
775 | 4ca4502c | Juha Riihimäki | if ((int32_t)valop < 0) { |
776 | 4ca4502c | Juha Riihimäki | SET_QC(); |
777 | 4ca4502c | Juha Riihimäki | return 0; |
778 | 4ca4502c | Juha Riihimäki | } |
779 | 02da0b2d | Peter Maydell | return helper_neon_qshl_u32(env, valop, shiftop);
|
780 | 4ca4502c | Juha Riihimäki | } |
781 | 4ca4502c | Juha Riihimäki | |
782 | 02da0b2d | Peter Maydell | uint64_t HELPER(neon_qshlu_s64)(CPUState *env, uint64_t valop, uint64_t shiftop) |
783 | 4ca4502c | Juha Riihimäki | { |
784 | 4ca4502c | Juha Riihimäki | if ((int64_t)valop < 0) { |
785 | 4ca4502c | Juha Riihimäki | SET_QC(); |
786 | 4ca4502c | Juha Riihimäki | return 0; |
787 | 4ca4502c | Juha Riihimäki | } |
788 | 02da0b2d | Peter Maydell | return helper_neon_qshl_u64(env, valop, shiftop);
|
789 | 4ca4502c | Juha Riihimäki | } |
790 | ad69471c | pbrook | |
791 | ad69471c | pbrook | /* FIXME: This is wrong. */
|
792 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
793 | ad69471c | pbrook | int8_t tmp; \ |
794 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
795 | 33ebc293 | Peter Maydell | if (tmp >= (ssize_t)sizeof(src1) * 8) { \ |
796 | 33ebc293 | Peter Maydell | if (src1) { \
|
797 | 33ebc293 | Peter Maydell | SET_QC(); \ |
798 | 33ebc293 | Peter Maydell | dest = ~0; \
|
799 | 33ebc293 | Peter Maydell | } else { \
|
800 | 33ebc293 | Peter Maydell | dest = 0; \
|
801 | 33ebc293 | Peter Maydell | } \ |
802 | 33ebc293 | Peter Maydell | } else if (tmp < -(ssize_t)sizeof(src1) * 8) { \ |
803 | 33ebc293 | Peter Maydell | dest = 0; \
|
804 | 33ebc293 | Peter Maydell | } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \ |
805 | 33ebc293 | Peter Maydell | dest = src1 >> (sizeof(src1) * 8 - 1); \ |
806 | 33ebc293 | Peter Maydell | } else if (tmp < 0) { \ |
807 | ad69471c | pbrook | dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ |
808 | ad69471c | pbrook | } else { \
|
809 | ad69471c | pbrook | dest = src1 << tmp; \ |
810 | ad69471c | pbrook | if ((dest >> tmp) != src1) { \
|
811 | ad69471c | pbrook | SET_QC(); \ |
812 | ad69471c | pbrook | dest = ~0; \
|
813 | ad69471c | pbrook | } \ |
814 | ad69471c | pbrook | }} while (0) |
815 | 02da0b2d | Peter Maydell | NEON_VOP_ENV(qrshl_u8, neon_u8, 4)
|
816 | 02da0b2d | Peter Maydell | NEON_VOP_ENV(qrshl_u16, neon_u16, 2)
|
817 | ad69471c | pbrook | #undef NEON_FN
|
818 | ad69471c | pbrook | |
819 | 4bd4ee07 | Christophe Lyon | /* The addition of the rounding constant may overflow, so we use an
|
820 | 4bd4ee07 | Christophe Lyon | * intermediate 64 bits accumulator. */
|
821 | 02da0b2d | Peter Maydell | uint32_t HELPER(neon_qrshl_u32)(CPUState *env, uint32_t val, uint32_t shiftop) |
822 | 4bd4ee07 | Christophe Lyon | { |
823 | 4bd4ee07 | Christophe Lyon | uint32_t dest; |
824 | 4bd4ee07 | Christophe Lyon | int8_t shift = (int8_t)shiftop; |
825 | 33ebc293 | Peter Maydell | if (shift >= 32) { |
826 | 33ebc293 | Peter Maydell | if (val) {
|
827 | 33ebc293 | Peter Maydell | SET_QC(); |
828 | 33ebc293 | Peter Maydell | dest = ~0;
|
829 | 33ebc293 | Peter Maydell | } else {
|
830 | 33ebc293 | Peter Maydell | dest = 0;
|
831 | 33ebc293 | Peter Maydell | } |
832 | 33ebc293 | Peter Maydell | } else if (shift < -32) { |
833 | 33ebc293 | Peter Maydell | dest = 0;
|
834 | 33ebc293 | Peter Maydell | } else if (shift == -32) { |
835 | 33ebc293 | Peter Maydell | dest = val >> 31;
|
836 | 33ebc293 | Peter Maydell | } else if (shift < 0) { |
837 | 4bd4ee07 | Christophe Lyon | uint64_t big_dest = ((uint64_t)val + (1 << (-1 - shift))); |
838 | 4bd4ee07 | Christophe Lyon | dest = big_dest >> -shift; |
839 | 4bd4ee07 | Christophe Lyon | } else {
|
840 | 4bd4ee07 | Christophe Lyon | dest = val << shift; |
841 | 4bd4ee07 | Christophe Lyon | if ((dest >> shift) != val) {
|
842 | 4bd4ee07 | Christophe Lyon | SET_QC(); |
843 | 4bd4ee07 | Christophe Lyon | dest = ~0;
|
844 | 4bd4ee07 | Christophe Lyon | } |
845 | 4bd4ee07 | Christophe Lyon | } |
846 | 4bd4ee07 | Christophe Lyon | return dest;
|
847 | 4bd4ee07 | Christophe Lyon | } |
848 | 4bd4ee07 | Christophe Lyon | |
849 | 4bd4ee07 | Christophe Lyon | /* Handling addition overflow with 64 bits inputs values is more
|
850 | 4bd4ee07 | Christophe Lyon | * tricky than with 32 bits values. */
|
851 | 02da0b2d | Peter Maydell | uint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop) |
852 | ad69471c | pbrook | { |
853 | ad69471c | pbrook | int8_t shift = (int8_t)shiftop; |
854 | 33ebc293 | Peter Maydell | if (shift >= 64) { |
855 | 33ebc293 | Peter Maydell | if (val) {
|
856 | 33ebc293 | Peter Maydell | SET_QC(); |
857 | 33ebc293 | Peter Maydell | val = ~0;
|
858 | 33ebc293 | Peter Maydell | } |
859 | 33ebc293 | Peter Maydell | } else if (shift < -64) { |
860 | 33ebc293 | Peter Maydell | val = 0;
|
861 | 33ebc293 | Peter Maydell | } else if (shift == -64) { |
862 | 33ebc293 | Peter Maydell | val >>= 63;
|
863 | 33ebc293 | Peter Maydell | } else if (shift < 0) { |
864 | 4bd4ee07 | Christophe Lyon | val >>= (-shift - 1);
|
865 | 4bd4ee07 | Christophe Lyon | if (val == UINT64_MAX) {
|
866 | 4bd4ee07 | Christophe Lyon | /* In this case, it means that the rounding constant is 1,
|
867 | 4bd4ee07 | Christophe Lyon | * and the addition would overflow. Return the actual
|
868 | 4bd4ee07 | Christophe Lyon | * result directly. */
|
869 | 4bd4ee07 | Christophe Lyon | val = 0x8000000000000000ULL;
|
870 | 4bd4ee07 | Christophe Lyon | } else {
|
871 | 4bd4ee07 | Christophe Lyon | val++; |
872 | 4bd4ee07 | Christophe Lyon | val >>= 1;
|
873 | 4bd4ee07 | Christophe Lyon | } |
874 | ad69471c | pbrook | } else { \
|
875 | ad69471c | pbrook | uint64_t tmp = val; |
876 | ad69471c | pbrook | val <<= shift; |
877 | ad69471c | pbrook | if ((val >> shift) != tmp) {
|
878 | ad69471c | pbrook | SET_QC(); |
879 | ad69471c | pbrook | val = ~0;
|
880 | ad69471c | pbrook | } |
881 | ad69471c | pbrook | } |
882 | ad69471c | pbrook | return val;
|
883 | ad69471c | pbrook | } |
884 | ad69471c | pbrook | |
885 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
886 | ad69471c | pbrook | int8_t tmp; \ |
887 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
888 | 7b6ecf5b | Peter Maydell | if (tmp >= (ssize_t)sizeof(src1) * 8) { \ |
889 | 7b6ecf5b | Peter Maydell | if (src1) { \
|
890 | 7b6ecf5b | Peter Maydell | SET_QC(); \ |
891 | 7b6ecf5b | Peter Maydell | dest = (1 << (sizeof(src1) * 8 - 1)); \ |
892 | 7b6ecf5b | Peter Maydell | if (src1 > 0) { \ |
893 | 7b6ecf5b | Peter Maydell | dest--; \ |
894 | 7b6ecf5b | Peter Maydell | } \ |
895 | 7b6ecf5b | Peter Maydell | } else { \
|
896 | 7b6ecf5b | Peter Maydell | dest = 0; \
|
897 | 7b6ecf5b | Peter Maydell | } \ |
898 | 7b6ecf5b | Peter Maydell | } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \ |
899 | 7b6ecf5b | Peter Maydell | dest = 0; \
|
900 | 7b6ecf5b | Peter Maydell | } else if (tmp < 0) { \ |
901 | ad69471c | pbrook | dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ |
902 | ad69471c | pbrook | } else { \
|
903 | ad69471c | pbrook | dest = src1 << tmp; \ |
904 | ad69471c | pbrook | if ((dest >> tmp) != src1) { \
|
905 | ad69471c | pbrook | SET_QC(); \ |
906 | 960e623b | Peter Maydell | dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \ |
907 | 960e623b | Peter Maydell | if (src1 > 0) { \ |
908 | 960e623b | Peter Maydell | dest--; \ |
909 | 960e623b | Peter Maydell | } \ |
910 | ad69471c | pbrook | } \ |
911 | ad69471c | pbrook | }} while (0) |
912 | 02da0b2d | Peter Maydell | NEON_VOP_ENV(qrshl_s8, neon_s8, 4)
|
913 | 02da0b2d | Peter Maydell | NEON_VOP_ENV(qrshl_s16, neon_s16, 2)
|
914 | ad69471c | pbrook | #undef NEON_FN
|
915 | ad69471c | pbrook | |
916 | 4bd4ee07 | Christophe Lyon | /* The addition of the rounding constant may overflow, so we use an
|
917 | 4bd4ee07 | Christophe Lyon | * intermediate 64 bits accumulator. */
|
918 | 02da0b2d | Peter Maydell | uint32_t HELPER(neon_qrshl_s32)(CPUState *env, uint32_t valop, uint32_t shiftop) |
919 | 4bd4ee07 | Christophe Lyon | { |
920 | 4bd4ee07 | Christophe Lyon | int32_t dest; |
921 | 4bd4ee07 | Christophe Lyon | int32_t val = (int32_t)valop; |
922 | 4bd4ee07 | Christophe Lyon | int8_t shift = (int8_t)shiftop; |
923 | 7b6ecf5b | Peter Maydell | if (shift >= 32) { |
924 | 7b6ecf5b | Peter Maydell | if (val) {
|
925 | 7b6ecf5b | Peter Maydell | SET_QC(); |
926 | 7b6ecf5b | Peter Maydell | dest = (val >> 31) ^ ~SIGNBIT;
|
927 | 7b6ecf5b | Peter Maydell | } else {
|
928 | 7b6ecf5b | Peter Maydell | dest = 0;
|
929 | 7b6ecf5b | Peter Maydell | } |
930 | 7b6ecf5b | Peter Maydell | } else if (shift <= -32) { |
931 | 7b6ecf5b | Peter Maydell | dest = 0;
|
932 | 7b6ecf5b | Peter Maydell | } else if (shift < 0) { |
933 | 4bd4ee07 | Christophe Lyon | int64_t big_dest = ((int64_t)val + (1 << (-1 - shift))); |
934 | 4bd4ee07 | Christophe Lyon | dest = big_dest >> -shift; |
935 | 4bd4ee07 | Christophe Lyon | } else {
|
936 | 4bd4ee07 | Christophe Lyon | dest = val << shift; |
937 | 4bd4ee07 | Christophe Lyon | if ((dest >> shift) != val) {
|
938 | 4bd4ee07 | Christophe Lyon | SET_QC(); |
939 | 4bd4ee07 | Christophe Lyon | dest = (val >> 31) ^ ~SIGNBIT;
|
940 | 4bd4ee07 | Christophe Lyon | } |
941 | 4bd4ee07 | Christophe Lyon | } |
942 | 4bd4ee07 | Christophe Lyon | return dest;
|
943 | 4bd4ee07 | Christophe Lyon | } |
944 | 4bd4ee07 | Christophe Lyon | |
945 | 4bd4ee07 | Christophe Lyon | /* Handling addition overflow with 64 bits inputs values is more
|
946 | 4bd4ee07 | Christophe Lyon | * tricky than with 32 bits values. */
|
947 | 02da0b2d | Peter Maydell | uint64_t HELPER(neon_qrshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop) |
948 | ad69471c | pbrook | { |
949 | ad69471c | pbrook | int8_t shift = (uint8_t)shiftop; |
950 | ad69471c | pbrook | int64_t val = valop; |
951 | ad69471c | pbrook | |
952 | 7b6ecf5b | Peter Maydell | if (shift >= 64) { |
953 | 7b6ecf5b | Peter Maydell | if (val) {
|
954 | 7b6ecf5b | Peter Maydell | SET_QC(); |
955 | 7b6ecf5b | Peter Maydell | val = (val >> 63) ^ ~SIGNBIT64;
|
956 | 7b6ecf5b | Peter Maydell | } |
957 | 7b6ecf5b | Peter Maydell | } else if (shift <= -64) { |
958 | 7b6ecf5b | Peter Maydell | val = 0;
|
959 | 7b6ecf5b | Peter Maydell | } else if (shift < 0) { |
960 | 4bd4ee07 | Christophe Lyon | val >>= (-shift - 1);
|
961 | 4bd4ee07 | Christophe Lyon | if (val == INT64_MAX) {
|
962 | 4bd4ee07 | Christophe Lyon | /* In this case, it means that the rounding constant is 1,
|
963 | 4bd4ee07 | Christophe Lyon | * and the addition would overflow. Return the actual
|
964 | 4bd4ee07 | Christophe Lyon | * result directly. */
|
965 | 4bd4ee07 | Christophe Lyon | val = 0x4000000000000000ULL;
|
966 | 4bd4ee07 | Christophe Lyon | } else {
|
967 | 4bd4ee07 | Christophe Lyon | val++; |
968 | 4bd4ee07 | Christophe Lyon | val >>= 1;
|
969 | 4bd4ee07 | Christophe Lyon | } |
970 | ad69471c | pbrook | } else {
|
971 | 4bd4ee07 | Christophe Lyon | int64_t tmp = val; |
972 | ad69471c | pbrook | val <<= shift; |
973 | ad69471c | pbrook | if ((val >> shift) != tmp) {
|
974 | ad69471c | pbrook | SET_QC(); |
975 | 4bd4ee07 | Christophe Lyon | val = (tmp >> 63) ^ ~SIGNBIT64;
|
976 | ad69471c | pbrook | } |
977 | ad69471c | pbrook | } |
978 | ad69471c | pbrook | return val;
|
979 | ad69471c | pbrook | } |
980 | ad69471c | pbrook | |
981 | ad69471c | pbrook | uint32_t HELPER(neon_add_u8)(uint32_t a, uint32_t b) |
982 | ad69471c | pbrook | { |
983 | ad69471c | pbrook | uint32_t mask; |
984 | ad69471c | pbrook | mask = (a ^ b) & 0x80808080u;
|
985 | ad69471c | pbrook | a &= ~0x80808080u;
|
986 | ad69471c | pbrook | b &= ~0x80808080u;
|
987 | ad69471c | pbrook | return (a + b) ^ mask;
|
988 | ad69471c | pbrook | } |
989 | ad69471c | pbrook | |
990 | ad69471c | pbrook | uint32_t HELPER(neon_add_u16)(uint32_t a, uint32_t b) |
991 | ad69471c | pbrook | { |
992 | ad69471c | pbrook | uint32_t mask; |
993 | ad69471c | pbrook | mask = (a ^ b) & 0x80008000u;
|
994 | ad69471c | pbrook | a &= ~0x80008000u;
|
995 | ad69471c | pbrook | b &= ~0x80008000u;
|
996 | ad69471c | pbrook | return (a + b) ^ mask;
|
997 | ad69471c | pbrook | } |
998 | ad69471c | pbrook | |
999 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = src1 + src2
|
1000 | ad69471c | pbrook | NEON_POP(padd_u8, neon_u8, 4)
|
1001 | ad69471c | pbrook | NEON_POP(padd_u16, neon_u16, 2)
|
1002 | ad69471c | pbrook | #undef NEON_FN
|
1003 | ad69471c | pbrook | |
1004 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = src1 - src2
|
1005 | ad69471c | pbrook | NEON_VOP(sub_u8, neon_u8, 4)
|
1006 | ad69471c | pbrook | NEON_VOP(sub_u16, neon_u16, 2)
|
1007 | ad69471c | pbrook | #undef NEON_FN
|
1008 | ad69471c | pbrook | |
1009 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = src1 * src2
|
1010 | ad69471c | pbrook | NEON_VOP(mul_u8, neon_u8, 4)
|
1011 | ad69471c | pbrook | NEON_VOP(mul_u16, neon_u16, 2)
|
1012 | ad69471c | pbrook | #undef NEON_FN
|
1013 | ad69471c | pbrook | |
1014 | 1654b2d6 | aurel32 | /* Polynomial multiplication is like integer multiplication except the
|
1015 | ad69471c | pbrook | partial products are XORed, not added. */
|
1016 | ad69471c | pbrook | uint32_t HELPER(neon_mul_p8)(uint32_t op1, uint32_t op2) |
1017 | ad69471c | pbrook | { |
1018 | ad69471c | pbrook | uint32_t mask; |
1019 | ad69471c | pbrook | uint32_t result; |
1020 | ad69471c | pbrook | result = 0;
|
1021 | ad69471c | pbrook | while (op1) {
|
1022 | ad69471c | pbrook | mask = 0;
|
1023 | ad69471c | pbrook | if (op1 & 1) |
1024 | ad69471c | pbrook | mask |= 0xff;
|
1025 | ad69471c | pbrook | if (op1 & (1 << 8)) |
1026 | ad69471c | pbrook | mask |= (0xff << 8); |
1027 | ad69471c | pbrook | if (op1 & (1 << 16)) |
1028 | ad69471c | pbrook | mask |= (0xff << 16); |
1029 | ad69471c | pbrook | if (op1 & (1 << 24)) |
1030 | ad69471c | pbrook | mask |= (0xff << 24); |
1031 | ad69471c | pbrook | result ^= op2 & mask; |
1032 | ad69471c | pbrook | op1 = (op1 >> 1) & 0x7f7f7f7f; |
1033 | ad69471c | pbrook | op2 = (op2 << 1) & 0xfefefefe; |
1034 | ad69471c | pbrook | } |
1035 | ad69471c | pbrook | return result;
|
1036 | ad69471c | pbrook | } |
1037 | ad69471c | pbrook | |
1038 | e5ca24cb | Peter Maydell | uint64_t HELPER(neon_mull_p8)(uint32_t op1, uint32_t op2) |
1039 | e5ca24cb | Peter Maydell | { |
1040 | e5ca24cb | Peter Maydell | uint64_t result = 0;
|
1041 | e5ca24cb | Peter Maydell | uint64_t mask; |
1042 | e5ca24cb | Peter Maydell | uint64_t op2ex = op2; |
1043 | e5ca24cb | Peter Maydell | op2ex = (op2ex & 0xff) |
|
1044 | e5ca24cb | Peter Maydell | ((op2ex & 0xff00) << 8) | |
1045 | e5ca24cb | Peter Maydell | ((op2ex & 0xff0000) << 16) | |
1046 | e5ca24cb | Peter Maydell | ((op2ex & 0xff000000) << 24); |
1047 | e5ca24cb | Peter Maydell | while (op1) {
|
1048 | e5ca24cb | Peter Maydell | mask = 0;
|
1049 | e5ca24cb | Peter Maydell | if (op1 & 1) { |
1050 | e5ca24cb | Peter Maydell | mask |= 0xffff;
|
1051 | e5ca24cb | Peter Maydell | } |
1052 | e5ca24cb | Peter Maydell | if (op1 & (1 << 8)) { |
1053 | e5ca24cb | Peter Maydell | mask |= (0xffffU << 16); |
1054 | e5ca24cb | Peter Maydell | } |
1055 | e5ca24cb | Peter Maydell | if (op1 & (1 << 16)) { |
1056 | e5ca24cb | Peter Maydell | mask |= (0xffffULL << 32); |
1057 | e5ca24cb | Peter Maydell | } |
1058 | e5ca24cb | Peter Maydell | if (op1 & (1 << 24)) { |
1059 | e5ca24cb | Peter Maydell | mask |= (0xffffULL << 48); |
1060 | e5ca24cb | Peter Maydell | } |
1061 | e5ca24cb | Peter Maydell | result ^= op2ex & mask; |
1062 | e5ca24cb | Peter Maydell | op1 = (op1 >> 1) & 0x7f7f7f7f; |
1063 | e5ca24cb | Peter Maydell | op2ex <<= 1;
|
1064 | e5ca24cb | Peter Maydell | } |
1065 | e5ca24cb | Peter Maydell | return result;
|
1066 | e5ca24cb | Peter Maydell | } |
1067 | e5ca24cb | Peter Maydell | |
1068 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 & src2) ? -1 : 0 |
1069 | ad69471c | pbrook | NEON_VOP(tst_u8, neon_u8, 4)
|
1070 | ad69471c | pbrook | NEON_VOP(tst_u16, neon_u16, 2)
|
1071 | ad69471c | pbrook | NEON_VOP(tst_u32, neon_u32, 1)
|
1072 | ad69471c | pbrook | #undef NEON_FN
|
1073 | ad69471c | pbrook | |
1074 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 == src2) ? -1 : 0 |
1075 | ad69471c | pbrook | NEON_VOP(ceq_u8, neon_u8, 4)
|
1076 | ad69471c | pbrook | NEON_VOP(ceq_u16, neon_u16, 2)
|
1077 | ad69471c | pbrook | NEON_VOP(ceq_u32, neon_u32, 1)
|
1078 | ad69471c | pbrook | #undef NEON_FN
|
1079 | ad69471c | pbrook | |
1080 | ad69471c | pbrook | #define NEON_FN(dest, src, dummy) dest = (src < 0) ? -src : src |
1081 | ad69471c | pbrook | NEON_VOP1(abs_s8, neon_s8, 4)
|
1082 | ad69471c | pbrook | NEON_VOP1(abs_s16, neon_s16, 2)
|
1083 | ad69471c | pbrook | #undef NEON_FN
|
1084 | ad69471c | pbrook | |
1085 | ad69471c | pbrook | /* Count Leading Sign/Zero Bits. */
|
1086 | ad69471c | pbrook | static inline int do_clz8(uint8_t x) |
1087 | ad69471c | pbrook | { |
1088 | ad69471c | pbrook | int n;
|
1089 | ad69471c | pbrook | for (n = 8; x; n--) |
1090 | ad69471c | pbrook | x >>= 1;
|
1091 | ad69471c | pbrook | return n;
|
1092 | ad69471c | pbrook | } |
1093 | ad69471c | pbrook | |
1094 | ad69471c | pbrook | static inline int do_clz16(uint16_t x) |
1095 | ad69471c | pbrook | { |
1096 | ad69471c | pbrook | int n;
|
1097 | ad69471c | pbrook | for (n = 16; x; n--) |
1098 | ad69471c | pbrook | x >>= 1;
|
1099 | ad69471c | pbrook | return n;
|
1100 | ad69471c | pbrook | } |
1101 | ad69471c | pbrook | |
1102 | ad69471c | pbrook | #define NEON_FN(dest, src, dummy) dest = do_clz8(src)
|
1103 | ad69471c | pbrook | NEON_VOP1(clz_u8, neon_u8, 4)
|
1104 | ad69471c | pbrook | #undef NEON_FN
|
1105 | ad69471c | pbrook | |
1106 | ad69471c | pbrook | #define NEON_FN(dest, src, dummy) dest = do_clz16(src)
|
1107 | ad69471c | pbrook | NEON_VOP1(clz_u16, neon_u16, 2)
|
1108 | ad69471c | pbrook | #undef NEON_FN
|
1109 | ad69471c | pbrook | |
1110 | ad69471c | pbrook | #define NEON_FN(dest, src, dummy) dest = do_clz8((src < 0) ? ~src : src) - 1 |
1111 | ad69471c | pbrook | NEON_VOP1(cls_s8, neon_s8, 4)
|
1112 | ad69471c | pbrook | #undef NEON_FN
|
1113 | ad69471c | pbrook | |
1114 | ad69471c | pbrook | #define NEON_FN(dest, src, dummy) dest = do_clz16((src < 0) ? ~src : src) - 1 |
1115 | ad69471c | pbrook | NEON_VOP1(cls_s16, neon_s16, 2)
|
1116 | ad69471c | pbrook | #undef NEON_FN
|
1117 | ad69471c | pbrook | |
1118 | ad69471c | pbrook | uint32_t HELPER(neon_cls_s32)(uint32_t x) |
1119 | ad69471c | pbrook | { |
1120 | ad69471c | pbrook | int count;
|
1121 | ad69471c | pbrook | if ((int32_t)x < 0) |
1122 | ad69471c | pbrook | x = ~x; |
1123 | ad69471c | pbrook | for (count = 32; x; count--) |
1124 | ad69471c | pbrook | x = x >> 1;
|
1125 | ad69471c | pbrook | return count - 1; |
1126 | ad69471c | pbrook | } |
1127 | ad69471c | pbrook | |
1128 | ad69471c | pbrook | /* Bit count. */
|
1129 | ad69471c | pbrook | uint32_t HELPER(neon_cnt_u8)(uint32_t x) |
1130 | ad69471c | pbrook | { |
1131 | ad69471c | pbrook | x = (x & 0x55555555) + ((x >> 1) & 0x55555555); |
1132 | ad69471c | pbrook | x = (x & 0x33333333) + ((x >> 2) & 0x33333333); |
1133 | ad69471c | pbrook | x = (x & 0x0f0f0f0f) + ((x >> 4) & 0x0f0f0f0f); |
1134 | ad69471c | pbrook | return x;
|
1135 | ad69471c | pbrook | } |
1136 | ad69471c | pbrook | |
1137 | ad69471c | pbrook | #define NEON_QDMULH16(dest, src1, src2, round) do { \ |
1138 | ad69471c | pbrook | uint32_t tmp = (int32_t)(int16_t) src1 * (int16_t) src2; \ |
1139 | ad69471c | pbrook | if ((tmp ^ (tmp << 1)) & SIGNBIT) { \ |
1140 | ad69471c | pbrook | SET_QC(); \ |
1141 | ad69471c | pbrook | tmp = (tmp >> 31) ^ ~SIGNBIT; \
|
1142 | 46eece9d | Juha Riihimäki | } else { \
|
1143 | 46eece9d | Juha Riihimäki | tmp <<= 1; \
|
1144 | ad69471c | pbrook | } \ |
1145 | ad69471c | pbrook | if (round) { \
|
1146 | ad69471c | pbrook | int32_t old = tmp; \ |
1147 | ad69471c | pbrook | tmp += 1 << 15; \ |
1148 | ad69471c | pbrook | if ((int32_t)tmp < old) { \
|
1149 | ad69471c | pbrook | SET_QC(); \ |
1150 | ad69471c | pbrook | tmp = SIGNBIT - 1; \
|
1151 | ad69471c | pbrook | } \ |
1152 | ad69471c | pbrook | } \ |
1153 | ad69471c | pbrook | dest = tmp >> 16; \
|
1154 | ad69471c | pbrook | } while(0) |
1155 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 0) |
1156 | 02da0b2d | Peter Maydell | NEON_VOP_ENV(qdmulh_s16, neon_s16, 2)
|
1157 | ad69471c | pbrook | #undef NEON_FN
|
1158 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 1) |
1159 | 02da0b2d | Peter Maydell | NEON_VOP_ENV(qrdmulh_s16, neon_s16, 2)
|
1160 | ad69471c | pbrook | #undef NEON_FN
|
1161 | ad69471c | pbrook | #undef NEON_QDMULH16
|
1162 | ad69471c | pbrook | |
1163 | ad69471c | pbrook | #define NEON_QDMULH32(dest, src1, src2, round) do { \ |
1164 | ad69471c | pbrook | uint64_t tmp = (int64_t)(int32_t) src1 * (int32_t) src2; \ |
1165 | ad69471c | pbrook | if ((tmp ^ (tmp << 1)) & SIGNBIT64) { \ |
1166 | ad69471c | pbrook | SET_QC(); \ |
1167 | ad69471c | pbrook | tmp = (tmp >> 63) ^ ~SIGNBIT64; \
|
1168 | ad69471c | pbrook | } else { \
|
1169 | ad69471c | pbrook | tmp <<= 1; \
|
1170 | ad69471c | pbrook | } \ |
1171 | ad69471c | pbrook | if (round) { \
|
1172 | ad69471c | pbrook | int64_t old = tmp; \ |
1173 | ad69471c | pbrook | tmp += (int64_t)1 << 31; \ |
1174 | ad69471c | pbrook | if ((int64_t)tmp < old) { \
|
1175 | ad69471c | pbrook | SET_QC(); \ |
1176 | ad69471c | pbrook | tmp = SIGNBIT64 - 1; \
|
1177 | ad69471c | pbrook | } \ |
1178 | ad69471c | pbrook | } \ |
1179 | ad69471c | pbrook | dest = tmp >> 32; \
|
1180 | ad69471c | pbrook | } while(0) |
1181 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 0) |
1182 | 02da0b2d | Peter Maydell | NEON_VOP_ENV(qdmulh_s32, neon_s32, 1)
|
1183 | ad69471c | pbrook | #undef NEON_FN
|
1184 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 1) |
1185 | 02da0b2d | Peter Maydell | NEON_VOP_ENV(qrdmulh_s32, neon_s32, 1)
|
1186 | ad69471c | pbrook | #undef NEON_FN
|
1187 | ad69471c | pbrook | #undef NEON_QDMULH32
|
1188 | ad69471c | pbrook | |
1189 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_u8)(uint64_t x) |
1190 | ad69471c | pbrook | { |
1191 | ad69471c | pbrook | return (x & 0xffu) | ((x >> 8) & 0xff00u) | ((x >> 16) & 0xff0000u) |
1192 | ad69471c | pbrook | | ((x >> 24) & 0xff000000u); |
1193 | ad69471c | pbrook | } |
1194 | ad69471c | pbrook | |
1195 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_u16)(uint64_t x) |
1196 | ad69471c | pbrook | { |
1197 | ad69471c | pbrook | return (x & 0xffffu) | ((x >> 16) & 0xffff0000u); |
1198 | ad69471c | pbrook | } |
1199 | ad69471c | pbrook | |
1200 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_high_u8)(uint64_t x) |
1201 | ad69471c | pbrook | { |
1202 | ad69471c | pbrook | return ((x >> 8) & 0xff) | ((x >> 16) & 0xff00) |
1203 | ad69471c | pbrook | | ((x >> 24) & 0xff0000) | ((x >> 32) & 0xff000000); |
1204 | ad69471c | pbrook | } |
1205 | ad69471c | pbrook | |
1206 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_high_u16)(uint64_t x) |
1207 | ad69471c | pbrook | { |
1208 | ad69471c | pbrook | return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000); |
1209 | ad69471c | pbrook | } |
1210 | ad69471c | pbrook | |
1211 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_round_high_u8)(uint64_t x) |
1212 | ad69471c | pbrook | { |
1213 | ad69471c | pbrook | x &= 0xff80ff80ff80ff80ull;
|
1214 | ad69471c | pbrook | x += 0x0080008000800080ull;
|
1215 | ad69471c | pbrook | return ((x >> 8) & 0xff) | ((x >> 16) & 0xff00) |
1216 | ad69471c | pbrook | | ((x >> 24) & 0xff0000) | ((x >> 32) & 0xff000000); |
1217 | ad69471c | pbrook | } |
1218 | ad69471c | pbrook | |
1219 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_round_high_u16)(uint64_t x) |
1220 | ad69471c | pbrook | { |
1221 | ad69471c | pbrook | x &= 0xffff8000ffff8000ull;
|
1222 | ad69471c | pbrook | x += 0x0000800000008000ull;
|
1223 | ad69471c | pbrook | return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000); |
1224 | ad69471c | pbrook | } |
1225 | ad69471c | pbrook | |
1226 | 02da0b2d | Peter Maydell | uint32_t HELPER(neon_unarrow_sat8)(CPUState *env, uint64_t x) |
1227 | af1bbf30 | Juha Riihimäki | { |
1228 | af1bbf30 | Juha Riihimäki | uint16_t s; |
1229 | af1bbf30 | Juha Riihimäki | uint8_t d; |
1230 | af1bbf30 | Juha Riihimäki | uint32_t res = 0;
|
1231 | af1bbf30 | Juha Riihimäki | #define SAT8(n) \
|
1232 | af1bbf30 | Juha Riihimäki | s = x >> n; \ |
1233 | af1bbf30 | Juha Riihimäki | if (s & 0x8000) { \ |
1234 | af1bbf30 | Juha Riihimäki | SET_QC(); \ |
1235 | af1bbf30 | Juha Riihimäki | } else { \
|
1236 | af1bbf30 | Juha Riihimäki | if (s > 0xff) { \ |
1237 | af1bbf30 | Juha Riihimäki | d = 0xff; \
|
1238 | af1bbf30 | Juha Riihimäki | SET_QC(); \ |
1239 | af1bbf30 | Juha Riihimäki | } else { \
|
1240 | af1bbf30 | Juha Riihimäki | d = s; \ |
1241 | af1bbf30 | Juha Riihimäki | } \ |
1242 | af1bbf30 | Juha Riihimäki | res |= (uint32_t)d << (n / 2); \
|
1243 | af1bbf30 | Juha Riihimäki | } |
1244 | af1bbf30 | Juha Riihimäki | |
1245 | af1bbf30 | Juha Riihimäki | SAT8(0);
|
1246 | af1bbf30 | Juha Riihimäki | SAT8(16);
|
1247 | af1bbf30 | Juha Riihimäki | SAT8(32);
|
1248 | af1bbf30 | Juha Riihimäki | SAT8(48);
|
1249 | af1bbf30 | Juha Riihimäki | #undef SAT8
|
1250 | af1bbf30 | Juha Riihimäki | return res;
|
1251 | af1bbf30 | Juha Riihimäki | } |
1252 | af1bbf30 | Juha Riihimäki | |
1253 | 02da0b2d | Peter Maydell | uint32_t HELPER(neon_narrow_sat_u8)(CPUState *env, uint64_t x) |
1254 | ad69471c | pbrook | { |
1255 | ad69471c | pbrook | uint16_t s; |
1256 | ad69471c | pbrook | uint8_t d; |
1257 | ad69471c | pbrook | uint32_t res = 0;
|
1258 | ad69471c | pbrook | #define SAT8(n) \
|
1259 | ad69471c | pbrook | s = x >> n; \ |
1260 | ad69471c | pbrook | if (s > 0xff) { \ |
1261 | ad69471c | pbrook | d = 0xff; \
|
1262 | ad69471c | pbrook | SET_QC(); \ |
1263 | ad69471c | pbrook | } else { \
|
1264 | ad69471c | pbrook | d = s; \ |
1265 | ad69471c | pbrook | } \ |
1266 | ad69471c | pbrook | res |= (uint32_t)d << (n / 2);
|
1267 | ad69471c | pbrook | |
1268 | ad69471c | pbrook | SAT8(0);
|
1269 | ad69471c | pbrook | SAT8(16);
|
1270 | ad69471c | pbrook | SAT8(32);
|
1271 | ad69471c | pbrook | SAT8(48);
|
1272 | ad69471c | pbrook | #undef SAT8
|
1273 | ad69471c | pbrook | return res;
|
1274 | ad69471c | pbrook | } |
1275 | ad69471c | pbrook | |
1276 | 02da0b2d | Peter Maydell | uint32_t HELPER(neon_narrow_sat_s8)(CPUState *env, uint64_t x) |
1277 | ad69471c | pbrook | { |
1278 | ad69471c | pbrook | int16_t s; |
1279 | ad69471c | pbrook | uint8_t d; |
1280 | ad69471c | pbrook | uint32_t res = 0;
|
1281 | ad69471c | pbrook | #define SAT8(n) \
|
1282 | ad69471c | pbrook | s = x >> n; \ |
1283 | ad69471c | pbrook | if (s != (int8_t)s) { \
|
1284 | ad69471c | pbrook | d = (s >> 15) ^ 0x7f; \ |
1285 | ad69471c | pbrook | SET_QC(); \ |
1286 | ad69471c | pbrook | } else { \
|
1287 | ad69471c | pbrook | d = s; \ |
1288 | ad69471c | pbrook | } \ |
1289 | ad69471c | pbrook | res |= (uint32_t)d << (n / 2);
|
1290 | ad69471c | pbrook | |
1291 | ad69471c | pbrook | SAT8(0);
|
1292 | ad69471c | pbrook | SAT8(16);
|
1293 | ad69471c | pbrook | SAT8(32);
|
1294 | ad69471c | pbrook | SAT8(48);
|
1295 | ad69471c | pbrook | #undef SAT8
|
1296 | ad69471c | pbrook | return res;
|
1297 | ad69471c | pbrook | } |
1298 | ad69471c | pbrook | |
1299 | 02da0b2d | Peter Maydell | uint32_t HELPER(neon_unarrow_sat16)(CPUState *env, uint64_t x) |
1300 | af1bbf30 | Juha Riihimäki | { |
1301 | af1bbf30 | Juha Riihimäki | uint32_t high; |
1302 | af1bbf30 | Juha Riihimäki | uint32_t low; |
1303 | af1bbf30 | Juha Riihimäki | low = x; |
1304 | af1bbf30 | Juha Riihimäki | if (low & 0x80000000) { |
1305 | af1bbf30 | Juha Riihimäki | low = 0;
|
1306 | af1bbf30 | Juha Riihimäki | SET_QC(); |
1307 | af1bbf30 | Juha Riihimäki | } else if (low > 0xffff) { |
1308 | af1bbf30 | Juha Riihimäki | low = 0xffff;
|
1309 | af1bbf30 | Juha Riihimäki | SET_QC(); |
1310 | af1bbf30 | Juha Riihimäki | } |
1311 | af1bbf30 | Juha Riihimäki | high = x >> 32;
|
1312 | af1bbf30 | Juha Riihimäki | if (high & 0x80000000) { |
1313 | af1bbf30 | Juha Riihimäki | high = 0;
|
1314 | af1bbf30 | Juha Riihimäki | SET_QC(); |
1315 | af1bbf30 | Juha Riihimäki | } else if (high > 0xffff) { |
1316 | af1bbf30 | Juha Riihimäki | high = 0xffff;
|
1317 | af1bbf30 | Juha Riihimäki | SET_QC(); |
1318 | af1bbf30 | Juha Riihimäki | } |
1319 | af1bbf30 | Juha Riihimäki | return low | (high << 16); |
1320 | af1bbf30 | Juha Riihimäki | } |
1321 | af1bbf30 | Juha Riihimäki | |
1322 | 02da0b2d | Peter Maydell | uint32_t HELPER(neon_narrow_sat_u16)(CPUState *env, uint64_t x) |
1323 | ad69471c | pbrook | { |
1324 | ad69471c | pbrook | uint32_t high; |
1325 | ad69471c | pbrook | uint32_t low; |
1326 | ad69471c | pbrook | low = x; |
1327 | ad69471c | pbrook | if (low > 0xffff) { |
1328 | ad69471c | pbrook | low = 0xffff;
|
1329 | ad69471c | pbrook | SET_QC(); |
1330 | ad69471c | pbrook | } |
1331 | ad69471c | pbrook | high = x >> 32;
|
1332 | ad69471c | pbrook | if (high > 0xffff) { |
1333 | ad69471c | pbrook | high = 0xffff;
|
1334 | ad69471c | pbrook | SET_QC(); |
1335 | ad69471c | pbrook | } |
1336 | ad69471c | pbrook | return low | (high << 16); |
1337 | ad69471c | pbrook | } |
1338 | ad69471c | pbrook | |
1339 | 02da0b2d | Peter Maydell | uint32_t HELPER(neon_narrow_sat_s16)(CPUState *env, uint64_t x) |
1340 | ad69471c | pbrook | { |
1341 | ad69471c | pbrook | int32_t low; |
1342 | ad69471c | pbrook | int32_t high; |
1343 | ad69471c | pbrook | low = x; |
1344 | ad69471c | pbrook | if (low != (int16_t)low) {
|
1345 | ad69471c | pbrook | low = (low >> 31) ^ 0x7fff; |
1346 | ad69471c | pbrook | SET_QC(); |
1347 | ad69471c | pbrook | } |
1348 | ad69471c | pbrook | high = x >> 32;
|
1349 | ad69471c | pbrook | if (high != (int16_t)high) {
|
1350 | ad69471c | pbrook | high = (high >> 31) ^ 0x7fff; |
1351 | ad69471c | pbrook | SET_QC(); |
1352 | ad69471c | pbrook | } |
1353 | ad69471c | pbrook | return (uint16_t)low | (high << 16); |
1354 | ad69471c | pbrook | } |
1355 | ad69471c | pbrook | |
1356 | 02da0b2d | Peter Maydell | uint32_t HELPER(neon_unarrow_sat32)(CPUState *env, uint64_t x) |
1357 | af1bbf30 | Juha Riihimäki | { |
1358 | af1bbf30 | Juha Riihimäki | if (x & 0x8000000000000000ull) { |
1359 | af1bbf30 | Juha Riihimäki | SET_QC(); |
1360 | af1bbf30 | Juha Riihimäki | return 0; |
1361 | af1bbf30 | Juha Riihimäki | } |
1362 | af1bbf30 | Juha Riihimäki | if (x > 0xffffffffu) { |
1363 | af1bbf30 | Juha Riihimäki | SET_QC(); |
1364 | af1bbf30 | Juha Riihimäki | return 0xffffffffu; |
1365 | af1bbf30 | Juha Riihimäki | } |
1366 | af1bbf30 | Juha Riihimäki | return x;
|
1367 | af1bbf30 | Juha Riihimäki | } |
1368 | af1bbf30 | Juha Riihimäki | |
1369 | 02da0b2d | Peter Maydell | uint32_t HELPER(neon_narrow_sat_u32)(CPUState *env, uint64_t x) |
1370 | ad69471c | pbrook | { |
1371 | ad69471c | pbrook | if (x > 0xffffffffu) { |
1372 | ad69471c | pbrook | SET_QC(); |
1373 | ad69471c | pbrook | return 0xffffffffu; |
1374 | ad69471c | pbrook | } |
1375 | ad69471c | pbrook | return x;
|
1376 | ad69471c | pbrook | } |
1377 | ad69471c | pbrook | |
1378 | 02da0b2d | Peter Maydell | uint32_t HELPER(neon_narrow_sat_s32)(CPUState *env, uint64_t x) |
1379 | ad69471c | pbrook | { |
1380 | ad69471c | pbrook | if ((int64_t)x != (int32_t)x) {
|
1381 | ad69471c | pbrook | SET_QC(); |
1382 | cc2212c2 | Peter Maydell | return ((int64_t)x >> 63) ^ 0x7fffffff; |
1383 | ad69471c | pbrook | } |
1384 | ad69471c | pbrook | return x;
|
1385 | ad69471c | pbrook | } |
1386 | ad69471c | pbrook | |
1387 | ad69471c | pbrook | uint64_t HELPER(neon_widen_u8)(uint32_t x) |
1388 | ad69471c | pbrook | { |
1389 | ad69471c | pbrook | uint64_t tmp; |
1390 | ad69471c | pbrook | uint64_t ret; |
1391 | ad69471c | pbrook | ret = (uint8_t)x; |
1392 | ad69471c | pbrook | tmp = (uint8_t)(x >> 8);
|
1393 | ad69471c | pbrook | ret |= tmp << 16;
|
1394 | ad69471c | pbrook | tmp = (uint8_t)(x >> 16);
|
1395 | ad69471c | pbrook | ret |= tmp << 32;
|
1396 | ad69471c | pbrook | tmp = (uint8_t)(x >> 24);
|
1397 | ad69471c | pbrook | ret |= tmp << 48;
|
1398 | ad69471c | pbrook | return ret;
|
1399 | ad69471c | pbrook | } |
1400 | ad69471c | pbrook | |
1401 | ad69471c | pbrook | uint64_t HELPER(neon_widen_s8)(uint32_t x) |
1402 | ad69471c | pbrook | { |
1403 | ad69471c | pbrook | uint64_t tmp; |
1404 | ad69471c | pbrook | uint64_t ret; |
1405 | ad69471c | pbrook | ret = (uint16_t)(int8_t)x; |
1406 | ad69471c | pbrook | tmp = (uint16_t)(int8_t)(x >> 8);
|
1407 | ad69471c | pbrook | ret |= tmp << 16;
|
1408 | ad69471c | pbrook | tmp = (uint16_t)(int8_t)(x >> 16);
|
1409 | ad69471c | pbrook | ret |= tmp << 32;
|
1410 | ad69471c | pbrook | tmp = (uint16_t)(int8_t)(x >> 24);
|
1411 | ad69471c | pbrook | ret |= tmp << 48;
|
1412 | ad69471c | pbrook | return ret;
|
1413 | ad69471c | pbrook | } |
1414 | ad69471c | pbrook | |
1415 | ad69471c | pbrook | uint64_t HELPER(neon_widen_u16)(uint32_t x) |
1416 | ad69471c | pbrook | { |
1417 | ad69471c | pbrook | uint64_t high = (uint16_t)(x >> 16);
|
1418 | ad69471c | pbrook | return ((uint16_t)x) | (high << 32); |
1419 | ad69471c | pbrook | } |
1420 | ad69471c | pbrook | |
1421 | ad69471c | pbrook | uint64_t HELPER(neon_widen_s16)(uint32_t x) |
1422 | ad69471c | pbrook | { |
1423 | ad69471c | pbrook | uint64_t high = (int16_t)(x >> 16);
|
1424 | ad69471c | pbrook | return ((uint32_t)(int16_t)x) | (high << 32); |
1425 | ad69471c | pbrook | } |
1426 | ad69471c | pbrook | |
1427 | ad69471c | pbrook | uint64_t HELPER(neon_addl_u16)(uint64_t a, uint64_t b) |
1428 | ad69471c | pbrook | { |
1429 | ad69471c | pbrook | uint64_t mask; |
1430 | ad69471c | pbrook | mask = (a ^ b) & 0x8000800080008000ull;
|
1431 | ad69471c | pbrook | a &= ~0x8000800080008000ull;
|
1432 | ad69471c | pbrook | b &= ~0x8000800080008000ull;
|
1433 | ad69471c | pbrook | return (a + b) ^ mask;
|
1434 | ad69471c | pbrook | } |
1435 | ad69471c | pbrook | |
1436 | ad69471c | pbrook | uint64_t HELPER(neon_addl_u32)(uint64_t a, uint64_t b) |
1437 | ad69471c | pbrook | { |
1438 | ad69471c | pbrook | uint64_t mask; |
1439 | ad69471c | pbrook | mask = (a ^ b) & 0x8000000080000000ull;
|
1440 | ad69471c | pbrook | a &= ~0x8000000080000000ull;
|
1441 | ad69471c | pbrook | b &= ~0x8000000080000000ull;
|
1442 | ad69471c | pbrook | return (a + b) ^ mask;
|
1443 | ad69471c | pbrook | } |
1444 | ad69471c | pbrook | |
1445 | ad69471c | pbrook | uint64_t HELPER(neon_paddl_u16)(uint64_t a, uint64_t b) |
1446 | ad69471c | pbrook | { |
1447 | ad69471c | pbrook | uint64_t tmp; |
1448 | ad69471c | pbrook | uint64_t tmp2; |
1449 | ad69471c | pbrook | |
1450 | ad69471c | pbrook | tmp = a & 0x0000ffff0000ffffull;
|
1451 | ad69471c | pbrook | tmp += (a >> 16) & 0x0000ffff0000ffffull; |
1452 | ad69471c | pbrook | tmp2 = b & 0xffff0000ffff0000ull;
|
1453 | ad69471c | pbrook | tmp2 += (b << 16) & 0xffff0000ffff0000ull; |
1454 | ad69471c | pbrook | return ( tmp & 0xffff) |
1455 | ad69471c | pbrook | | ((tmp >> 16) & 0xffff0000ull) |
1456 | ad69471c | pbrook | | ((tmp2 << 16) & 0xffff00000000ull) |
1457 | ad69471c | pbrook | | ( tmp2 & 0xffff000000000000ull);
|
1458 | ad69471c | pbrook | } |
1459 | ad69471c | pbrook | |
1460 | ad69471c | pbrook | uint64_t HELPER(neon_paddl_u32)(uint64_t a, uint64_t b) |
1461 | ad69471c | pbrook | { |
1462 | ad69471c | pbrook | uint32_t low = a + (a >> 32);
|
1463 | ad69471c | pbrook | uint32_t high = b + (b >> 32);
|
1464 | ad69471c | pbrook | return low + ((uint64_t)high << 32); |
1465 | ad69471c | pbrook | } |
1466 | ad69471c | pbrook | |
1467 | ad69471c | pbrook | uint64_t HELPER(neon_subl_u16)(uint64_t a, uint64_t b) |
1468 | ad69471c | pbrook | { |
1469 | ad69471c | pbrook | uint64_t mask; |
1470 | ad69471c | pbrook | mask = (a ^ ~b) & 0x8000800080008000ull;
|
1471 | ad69471c | pbrook | a |= 0x8000800080008000ull;
|
1472 | ad69471c | pbrook | b &= ~0x8000800080008000ull;
|
1473 | ad69471c | pbrook | return (a - b) ^ mask;
|
1474 | ad69471c | pbrook | } |
1475 | ad69471c | pbrook | |
1476 | ad69471c | pbrook | uint64_t HELPER(neon_subl_u32)(uint64_t a, uint64_t b) |
1477 | ad69471c | pbrook | { |
1478 | ad69471c | pbrook | uint64_t mask; |
1479 | ad69471c | pbrook | mask = (a ^ ~b) & 0x8000000080000000ull;
|
1480 | ad69471c | pbrook | a |= 0x8000000080000000ull;
|
1481 | ad69471c | pbrook | b &= ~0x8000000080000000ull;
|
1482 | ad69471c | pbrook | return (a - b) ^ mask;
|
1483 | ad69471c | pbrook | } |
1484 | ad69471c | pbrook | |
1485 | 02da0b2d | Peter Maydell | uint64_t HELPER(neon_addl_saturate_s32)(CPUState *env, uint64_t a, uint64_t b) |
1486 | ad69471c | pbrook | { |
1487 | ad69471c | pbrook | uint32_t x, y; |
1488 | ad69471c | pbrook | uint32_t low, high; |
1489 | ad69471c | pbrook | |
1490 | ad69471c | pbrook | x = a; |
1491 | ad69471c | pbrook | y = b; |
1492 | ad69471c | pbrook | low = x + y; |
1493 | ad69471c | pbrook | if (((low ^ x) & SIGNBIT) && !((x ^ y) & SIGNBIT)) {
|
1494 | ad69471c | pbrook | SET_QC(); |
1495 | ad69471c | pbrook | low = ((int32_t)x >> 31) ^ ~SIGNBIT;
|
1496 | ad69471c | pbrook | } |
1497 | ad69471c | pbrook | x = a >> 32;
|
1498 | ad69471c | pbrook | y = b >> 32;
|
1499 | ad69471c | pbrook | high = x + y; |
1500 | ad69471c | pbrook | if (((high ^ x) & SIGNBIT) && !((x ^ y) & SIGNBIT)) {
|
1501 | ad69471c | pbrook | SET_QC(); |
1502 | ad69471c | pbrook | high = ((int32_t)x >> 31) ^ ~SIGNBIT;
|
1503 | ad69471c | pbrook | } |
1504 | ad69471c | pbrook | return low | ((uint64_t)high << 32); |
1505 | ad69471c | pbrook | } |
1506 | ad69471c | pbrook | |
1507 | 02da0b2d | Peter Maydell | uint64_t HELPER(neon_addl_saturate_s64)(CPUState *env, uint64_t a, uint64_t b) |
1508 | ad69471c | pbrook | { |
1509 | ad69471c | pbrook | uint64_t result; |
1510 | ad69471c | pbrook | |
1511 | ad69471c | pbrook | result = a + b; |
1512 | ad69471c | pbrook | if (((result ^ a) & SIGNBIT64) && !((a ^ b) & SIGNBIT64)) {
|
1513 | ad69471c | pbrook | SET_QC(); |
1514 | ad69471c | pbrook | result = ((int64_t)a >> 63) ^ ~SIGNBIT64;
|
1515 | ad69471c | pbrook | } |
1516 | ad69471c | pbrook | return result;
|
1517 | ad69471c | pbrook | } |
1518 | ad69471c | pbrook | |
1519 | 4d9ad7f7 | Peter Maydell | /* We have to do the arithmetic in a larger type than
|
1520 | 4d9ad7f7 | Peter Maydell | * the input type, because for example with a signed 32 bit
|
1521 | 4d9ad7f7 | Peter Maydell | * op the absolute difference can overflow a signed 32 bit value.
|
1522 | 4d9ad7f7 | Peter Maydell | */
|
1523 | 4d9ad7f7 | Peter Maydell | #define DO_ABD(dest, x, y, intype, arithtype) do { \ |
1524 | 4d9ad7f7 | Peter Maydell | arithtype tmp_x = (intype)(x); \ |
1525 | 4d9ad7f7 | Peter Maydell | arithtype tmp_y = (intype)(y); \ |
1526 | ad69471c | pbrook | dest = ((tmp_x > tmp_y) ? tmp_x - tmp_y : tmp_y - tmp_x); \ |
1527 | ad69471c | pbrook | } while(0) |
1528 | ad69471c | pbrook | |
1529 | ad69471c | pbrook | uint64_t HELPER(neon_abdl_u16)(uint32_t a, uint32_t b) |
1530 | ad69471c | pbrook | { |
1531 | ad69471c | pbrook | uint64_t tmp; |
1532 | ad69471c | pbrook | uint64_t result; |
1533 | 4d9ad7f7 | Peter Maydell | DO_ABD(result, a, b, uint8_t, uint32_t); |
1534 | 4d9ad7f7 | Peter Maydell | DO_ABD(tmp, a >> 8, b >> 8, uint8_t, uint32_t); |
1535 | ad69471c | pbrook | result |= tmp << 16;
|
1536 | 4d9ad7f7 | Peter Maydell | DO_ABD(tmp, a >> 16, b >> 16, uint8_t, uint32_t); |
1537 | ad69471c | pbrook | result |= tmp << 32;
|
1538 | 4d9ad7f7 | Peter Maydell | DO_ABD(tmp, a >> 24, b >> 24, uint8_t, uint32_t); |
1539 | ad69471c | pbrook | result |= tmp << 48;
|
1540 | ad69471c | pbrook | return result;
|
1541 | ad69471c | pbrook | } |
1542 | ad69471c | pbrook | |
1543 | ad69471c | pbrook | uint64_t HELPER(neon_abdl_s16)(uint32_t a, uint32_t b) |
1544 | ad69471c | pbrook | { |
1545 | ad69471c | pbrook | uint64_t tmp; |
1546 | ad69471c | pbrook | uint64_t result; |
1547 | 4d9ad7f7 | Peter Maydell | DO_ABD(result, a, b, int8_t, int32_t); |
1548 | 4d9ad7f7 | Peter Maydell | DO_ABD(tmp, a >> 8, b >> 8, int8_t, int32_t); |
1549 | ad69471c | pbrook | result |= tmp << 16;
|
1550 | 4d9ad7f7 | Peter Maydell | DO_ABD(tmp, a >> 16, b >> 16, int8_t, int32_t); |
1551 | ad69471c | pbrook | result |= tmp << 32;
|
1552 | 4d9ad7f7 | Peter Maydell | DO_ABD(tmp, a >> 24, b >> 24, int8_t, int32_t); |
1553 | ad69471c | pbrook | result |= tmp << 48;
|
1554 | ad69471c | pbrook | return result;
|
1555 | ad69471c | pbrook | } |
1556 | ad69471c | pbrook | |
1557 | ad69471c | pbrook | uint64_t HELPER(neon_abdl_u32)(uint32_t a, uint32_t b) |
1558 | ad69471c | pbrook | { |
1559 | ad69471c | pbrook | uint64_t tmp; |
1560 | ad69471c | pbrook | uint64_t result; |
1561 | 4d9ad7f7 | Peter Maydell | DO_ABD(result, a, b, uint16_t, uint32_t); |
1562 | 4d9ad7f7 | Peter Maydell | DO_ABD(tmp, a >> 16, b >> 16, uint16_t, uint32_t); |
1563 | ad69471c | pbrook | return result | (tmp << 32); |
1564 | ad69471c | pbrook | } |
1565 | ad69471c | pbrook | |
1566 | ad69471c | pbrook | uint64_t HELPER(neon_abdl_s32)(uint32_t a, uint32_t b) |
1567 | ad69471c | pbrook | { |
1568 | ad69471c | pbrook | uint64_t tmp; |
1569 | ad69471c | pbrook | uint64_t result; |
1570 | 4d9ad7f7 | Peter Maydell | DO_ABD(result, a, b, int16_t, int32_t); |
1571 | 4d9ad7f7 | Peter Maydell | DO_ABD(tmp, a >> 16, b >> 16, int16_t, int32_t); |
1572 | ad69471c | pbrook | return result | (tmp << 32); |
1573 | ad69471c | pbrook | } |
1574 | ad69471c | pbrook | |
1575 | ad69471c | pbrook | uint64_t HELPER(neon_abdl_u64)(uint32_t a, uint32_t b) |
1576 | ad69471c | pbrook | { |
1577 | ad69471c | pbrook | uint64_t result; |
1578 | 4d9ad7f7 | Peter Maydell | DO_ABD(result, a, b, uint32_t, uint64_t); |
1579 | ad69471c | pbrook | return result;
|
1580 | ad69471c | pbrook | } |
1581 | ad69471c | pbrook | |
1582 | ad69471c | pbrook | uint64_t HELPER(neon_abdl_s64)(uint32_t a, uint32_t b) |
1583 | ad69471c | pbrook | { |
1584 | ad69471c | pbrook | uint64_t result; |
1585 | 4d9ad7f7 | Peter Maydell | DO_ABD(result, a, b, int32_t, int64_t); |
1586 | ad69471c | pbrook | return result;
|
1587 | ad69471c | pbrook | } |
1588 | ad69471c | pbrook | #undef DO_ABD
|
1589 | ad69471c | pbrook | |
1590 | ad69471c | pbrook | /* Widening multiply. Named type is the source type. */
|
1591 | ad69471c | pbrook | #define DO_MULL(dest, x, y, type1, type2) do { \ |
1592 | ad69471c | pbrook | type1 tmp_x = x; \ |
1593 | ad69471c | pbrook | type1 tmp_y = y; \ |
1594 | ad69471c | pbrook | dest = (type2)((type2)tmp_x * (type2)tmp_y); \ |
1595 | ad69471c | pbrook | } while(0) |
1596 | ad69471c | pbrook | |
1597 | ad69471c | pbrook | uint64_t HELPER(neon_mull_u8)(uint32_t a, uint32_t b) |
1598 | ad69471c | pbrook | { |
1599 | ad69471c | pbrook | uint64_t tmp; |
1600 | ad69471c | pbrook | uint64_t result; |
1601 | ad69471c | pbrook | |
1602 | ad69471c | pbrook | DO_MULL(result, a, b, uint8_t, uint16_t); |
1603 | ad69471c | pbrook | DO_MULL(tmp, a >> 8, b >> 8, uint8_t, uint16_t); |
1604 | ad69471c | pbrook | result |= tmp << 16;
|
1605 | ad69471c | pbrook | DO_MULL(tmp, a >> 16, b >> 16, uint8_t, uint16_t); |
1606 | ad69471c | pbrook | result |= tmp << 32;
|
1607 | ad69471c | pbrook | DO_MULL(tmp, a >> 24, b >> 24, uint8_t, uint16_t); |
1608 | ad69471c | pbrook | result |= tmp << 48;
|
1609 | ad69471c | pbrook | return result;
|
1610 | ad69471c | pbrook | } |
1611 | ad69471c | pbrook | |
1612 | ad69471c | pbrook | uint64_t HELPER(neon_mull_s8)(uint32_t a, uint32_t b) |
1613 | ad69471c | pbrook | { |
1614 | ad69471c | pbrook | uint64_t tmp; |
1615 | ad69471c | pbrook | uint64_t result; |
1616 | ad69471c | pbrook | |
1617 | ad69471c | pbrook | DO_MULL(result, a, b, int8_t, uint16_t); |
1618 | ad69471c | pbrook | DO_MULL(tmp, a >> 8, b >> 8, int8_t, uint16_t); |
1619 | ad69471c | pbrook | result |= tmp << 16;
|
1620 | ad69471c | pbrook | DO_MULL(tmp, a >> 16, b >> 16, int8_t, uint16_t); |
1621 | ad69471c | pbrook | result |= tmp << 32;
|
1622 | ad69471c | pbrook | DO_MULL(tmp, a >> 24, b >> 24, int8_t, uint16_t); |
1623 | ad69471c | pbrook | result |= tmp << 48;
|
1624 | ad69471c | pbrook | return result;
|
1625 | ad69471c | pbrook | } |
1626 | ad69471c | pbrook | |
1627 | ad69471c | pbrook | uint64_t HELPER(neon_mull_u16)(uint32_t a, uint32_t b) |
1628 | ad69471c | pbrook | { |
1629 | ad69471c | pbrook | uint64_t tmp; |
1630 | ad69471c | pbrook | uint64_t result; |
1631 | ad69471c | pbrook | |
1632 | ad69471c | pbrook | DO_MULL(result, a, b, uint16_t, uint32_t); |
1633 | ad69471c | pbrook | DO_MULL(tmp, a >> 16, b >> 16, uint16_t, uint32_t); |
1634 | ad69471c | pbrook | return result | (tmp << 32); |
1635 | ad69471c | pbrook | } |
1636 | ad69471c | pbrook | |
1637 | ad69471c | pbrook | uint64_t HELPER(neon_mull_s16)(uint32_t a, uint32_t b) |
1638 | ad69471c | pbrook | { |
1639 | ad69471c | pbrook | uint64_t tmp; |
1640 | ad69471c | pbrook | uint64_t result; |
1641 | ad69471c | pbrook | |
1642 | ad69471c | pbrook | DO_MULL(result, a, b, int16_t, uint32_t); |
1643 | ad69471c | pbrook | DO_MULL(tmp, a >> 16, b >> 16, int16_t, uint32_t); |
1644 | ad69471c | pbrook | return result | (tmp << 32); |
1645 | ad69471c | pbrook | } |
1646 | ad69471c | pbrook | |
1647 | ad69471c | pbrook | uint64_t HELPER(neon_negl_u16)(uint64_t x) |
1648 | ad69471c | pbrook | { |
1649 | ad69471c | pbrook | uint16_t tmp; |
1650 | ad69471c | pbrook | uint64_t result; |
1651 | ad69471c | pbrook | result = (uint16_t)-x; |
1652 | ad69471c | pbrook | tmp = -(x >> 16);
|
1653 | ad69471c | pbrook | result |= (uint64_t)tmp << 16;
|
1654 | ad69471c | pbrook | tmp = -(x >> 32);
|
1655 | ad69471c | pbrook | result |= (uint64_t)tmp << 32;
|
1656 | ad69471c | pbrook | tmp = -(x >> 48);
|
1657 | ad69471c | pbrook | result |= (uint64_t)tmp << 48;
|
1658 | ad69471c | pbrook | return result;
|
1659 | ad69471c | pbrook | } |
1660 | ad69471c | pbrook | |
1661 | ad69471c | pbrook | uint64_t HELPER(neon_negl_u32)(uint64_t x) |
1662 | ad69471c | pbrook | { |
1663 | ad69471c | pbrook | uint32_t low = -x; |
1664 | ad69471c | pbrook | uint32_t high = -(x >> 32);
|
1665 | ad69471c | pbrook | return low | ((uint64_t)high << 32); |
1666 | ad69471c | pbrook | } |
1667 | ad69471c | pbrook | |
1668 | ad69471c | pbrook | /* FIXME: There should be a native op for this. */
|
1669 | ad69471c | pbrook | uint64_t HELPER(neon_negl_u64)(uint64_t x) |
1670 | ad69471c | pbrook | { |
1671 | ad69471c | pbrook | return -x;
|
1672 | ad69471c | pbrook | } |
1673 | ad69471c | pbrook | |
1674 | ad69471c | pbrook | /* Saturnating sign manuipulation. */
|
1675 | ad69471c | pbrook | /* ??? Make these use NEON_VOP1 */
|
1676 | ad69471c | pbrook | #define DO_QABS8(x) do { \ |
1677 | ad69471c | pbrook | if (x == (int8_t)0x80) { \ |
1678 | ad69471c | pbrook | x = 0x7f; \
|
1679 | ad69471c | pbrook | SET_QC(); \ |
1680 | ad69471c | pbrook | } else if (x < 0) { \ |
1681 | ad69471c | pbrook | x = -x; \ |
1682 | ad69471c | pbrook | }} while (0) |
1683 | 02da0b2d | Peter Maydell | uint32_t HELPER(neon_qabs_s8)(CPUState *env, uint32_t x) |
1684 | ad69471c | pbrook | { |
1685 | ad69471c | pbrook | neon_s8 vec; |
1686 | ad69471c | pbrook | NEON_UNPACK(neon_s8, vec, x); |
1687 | ad69471c | pbrook | DO_QABS8(vec.v1); |
1688 | ad69471c | pbrook | DO_QABS8(vec.v2); |
1689 | ad69471c | pbrook | DO_QABS8(vec.v3); |
1690 | ad69471c | pbrook | DO_QABS8(vec.v4); |
1691 | ad69471c | pbrook | NEON_PACK(neon_s8, x, vec); |
1692 | ad69471c | pbrook | return x;
|
1693 | ad69471c | pbrook | } |
1694 | ad69471c | pbrook | #undef DO_QABS8
|
1695 | ad69471c | pbrook | |
1696 | ad69471c | pbrook | #define DO_QNEG8(x) do { \ |
1697 | ad69471c | pbrook | if (x == (int8_t)0x80) { \ |
1698 | ad69471c | pbrook | x = 0x7f; \
|
1699 | ad69471c | pbrook | SET_QC(); \ |
1700 | ad69471c | pbrook | } else { \
|
1701 | ad69471c | pbrook | x = -x; \ |
1702 | ad69471c | pbrook | }} while (0) |
1703 | 02da0b2d | Peter Maydell | uint32_t HELPER(neon_qneg_s8)(CPUState *env, uint32_t x) |
1704 | ad69471c | pbrook | { |
1705 | ad69471c | pbrook | neon_s8 vec; |
1706 | ad69471c | pbrook | NEON_UNPACK(neon_s8, vec, x); |
1707 | ad69471c | pbrook | DO_QNEG8(vec.v1); |
1708 | ad69471c | pbrook | DO_QNEG8(vec.v2); |
1709 | ad69471c | pbrook | DO_QNEG8(vec.v3); |
1710 | ad69471c | pbrook | DO_QNEG8(vec.v4); |
1711 | ad69471c | pbrook | NEON_PACK(neon_s8, x, vec); |
1712 | ad69471c | pbrook | return x;
|
1713 | ad69471c | pbrook | } |
1714 | ad69471c | pbrook | #undef DO_QNEG8
|
1715 | ad69471c | pbrook | |
1716 | ad69471c | pbrook | #define DO_QABS16(x) do { \ |
1717 | ad69471c | pbrook | if (x == (int16_t)0x8000) { \ |
1718 | ad69471c | pbrook | x = 0x7fff; \
|
1719 | ad69471c | pbrook | SET_QC(); \ |
1720 | ad69471c | pbrook | } else if (x < 0) { \ |
1721 | ad69471c | pbrook | x = -x; \ |
1722 | ad69471c | pbrook | }} while (0) |
1723 | 02da0b2d | Peter Maydell | uint32_t HELPER(neon_qabs_s16)(CPUState *env, uint32_t x) |
1724 | ad69471c | pbrook | { |
1725 | ad69471c | pbrook | neon_s16 vec; |
1726 | ad69471c | pbrook | NEON_UNPACK(neon_s16, vec, x); |
1727 | ad69471c | pbrook | DO_QABS16(vec.v1); |
1728 | ad69471c | pbrook | DO_QABS16(vec.v2); |
1729 | ad69471c | pbrook | NEON_PACK(neon_s16, x, vec); |
1730 | ad69471c | pbrook | return x;
|
1731 | ad69471c | pbrook | } |
1732 | ad69471c | pbrook | #undef DO_QABS16
|
1733 | ad69471c | pbrook | |
1734 | ad69471c | pbrook | #define DO_QNEG16(x) do { \ |
1735 | ad69471c | pbrook | if (x == (int16_t)0x8000) { \ |
1736 | ad69471c | pbrook | x = 0x7fff; \
|
1737 | ad69471c | pbrook | SET_QC(); \ |
1738 | ad69471c | pbrook | } else { \
|
1739 | ad69471c | pbrook | x = -x; \ |
1740 | ad69471c | pbrook | }} while (0) |
1741 | 02da0b2d | Peter Maydell | uint32_t HELPER(neon_qneg_s16)(CPUState *env, uint32_t x) |
1742 | ad69471c | pbrook | { |
1743 | ad69471c | pbrook | neon_s16 vec; |
1744 | ad69471c | pbrook | NEON_UNPACK(neon_s16, vec, x); |
1745 | ad69471c | pbrook | DO_QNEG16(vec.v1); |
1746 | ad69471c | pbrook | DO_QNEG16(vec.v2); |
1747 | ad69471c | pbrook | NEON_PACK(neon_s16, x, vec); |
1748 | ad69471c | pbrook | return x;
|
1749 | ad69471c | pbrook | } |
1750 | ad69471c | pbrook | #undef DO_QNEG16
|
1751 | ad69471c | pbrook | |
1752 | 02da0b2d | Peter Maydell | uint32_t HELPER(neon_qabs_s32)(CPUState *env, uint32_t x) |
1753 | ad69471c | pbrook | { |
1754 | ad69471c | pbrook | if (x == SIGNBIT) {
|
1755 | ad69471c | pbrook | SET_QC(); |
1756 | ad69471c | pbrook | x = ~SIGNBIT; |
1757 | ad69471c | pbrook | } else if ((int32_t)x < 0) { |
1758 | ad69471c | pbrook | x = -x; |
1759 | ad69471c | pbrook | } |
1760 | ad69471c | pbrook | return x;
|
1761 | ad69471c | pbrook | } |
1762 | ad69471c | pbrook | |
1763 | 02da0b2d | Peter Maydell | uint32_t HELPER(neon_qneg_s32)(CPUState *env, uint32_t x) |
1764 | ad69471c | pbrook | { |
1765 | ad69471c | pbrook | if (x == SIGNBIT) {
|
1766 | ad69471c | pbrook | SET_QC(); |
1767 | ad69471c | pbrook | x = ~SIGNBIT; |
1768 | ad69471c | pbrook | } else {
|
1769 | ad69471c | pbrook | x = -x; |
1770 | ad69471c | pbrook | } |
1771 | ad69471c | pbrook | return x;
|
1772 | ad69471c | pbrook | } |
1773 | ad69471c | pbrook | |
1774 | ad69471c | pbrook | /* NEON Float helpers. */
|
1775 | aa47cfdd | Peter Maydell | uint32_t HELPER(neon_min_f32)(uint32_t a, uint32_t b, void *fpstp)
|
1776 | ad69471c | pbrook | { |
1777 | aa47cfdd | Peter Maydell | float_status *fpst = fpstp; |
1778 | aa47cfdd | Peter Maydell | return float32_val(float32_min(make_float32(a), make_float32(b), fpst));
|
1779 | ad69471c | pbrook | } |
1780 | ad69471c | pbrook | |
1781 | aa47cfdd | Peter Maydell | uint32_t HELPER(neon_max_f32)(uint32_t a, uint32_t b, void *fpstp)
|
1782 | ad69471c | pbrook | { |
1783 | aa47cfdd | Peter Maydell | float_status *fpst = fpstp; |
1784 | aa47cfdd | Peter Maydell | return float32_val(float32_max(make_float32(a), make_float32(b), fpst));
|
1785 | ad69471c | pbrook | } |
1786 | ad69471c | pbrook | |
1787 | aa47cfdd | Peter Maydell | uint32_t HELPER(neon_abd_f32)(uint32_t a, uint32_t b, void *fpstp)
|
1788 | ad69471c | pbrook | { |
1789 | aa47cfdd | Peter Maydell | float_status *fpst = fpstp; |
1790 | 51d85267 | Peter Maydell | float32 f0 = make_float32(a); |
1791 | 51d85267 | Peter Maydell | float32 f1 = make_float32(b); |
1792 | aa47cfdd | Peter Maydell | return float32_val(float32_abs(float32_sub(f0, f1, fpst)));
|
1793 | ad69471c | pbrook | } |
1794 | ad69471c | pbrook | |
1795 | cab565c4 | Peter Maydell | /* Floating point comparisons produce an integer result.
|
1796 | cab565c4 | Peter Maydell | * Note that EQ doesn't signal InvalidOp for QNaNs but GE and GT do.
|
1797 | cab565c4 | Peter Maydell | * Softfloat routines return 0/1, which we convert to the 0/-1 Neon requires.
|
1798 | cab565c4 | Peter Maydell | */
|
1799 | aa47cfdd | Peter Maydell | uint32_t HELPER(neon_ceq_f32)(uint32_t a, uint32_t b, void *fpstp)
|
1800 | cab565c4 | Peter Maydell | { |
1801 | aa47cfdd | Peter Maydell | float_status *fpst = fpstp; |
1802 | aa47cfdd | Peter Maydell | return -float32_eq_quiet(make_float32(a), make_float32(b), fpst);
|
1803 | cab565c4 | Peter Maydell | } |
1804 | cab565c4 | Peter Maydell | |
1805 | aa47cfdd | Peter Maydell | uint32_t HELPER(neon_cge_f32)(uint32_t a, uint32_t b, void *fpstp)
|
1806 | cab565c4 | Peter Maydell | { |
1807 | aa47cfdd | Peter Maydell | float_status *fpst = fpstp; |
1808 | aa47cfdd | Peter Maydell | return -float32_le(make_float32(b), make_float32(a), fpst);
|
1809 | ad69471c | pbrook | } |
1810 | ad69471c | pbrook | |
1811 | aa47cfdd | Peter Maydell | uint32_t HELPER(neon_cgt_f32)(uint32_t a, uint32_t b, void *fpstp)
|
1812 | cab565c4 | Peter Maydell | { |
1813 | aa47cfdd | Peter Maydell | float_status *fpst = fpstp; |
1814 | aa47cfdd | Peter Maydell | return -float32_lt(make_float32(b), make_float32(a), fpst);
|
1815 | cab565c4 | Peter Maydell | } |
1816 | ad69471c | pbrook | |
1817 | aa47cfdd | Peter Maydell | uint32_t HELPER(neon_acge_f32)(uint32_t a, uint32_t b, void *fpstp)
|
1818 | ad69471c | pbrook | { |
1819 | aa47cfdd | Peter Maydell | float_status *fpst = fpstp; |
1820 | 51d85267 | Peter Maydell | float32 f0 = float32_abs(make_float32(a)); |
1821 | 51d85267 | Peter Maydell | float32 f1 = float32_abs(make_float32(b)); |
1822 | aa47cfdd | Peter Maydell | return -float32_le(f1, f0, fpst);
|
1823 | ad69471c | pbrook | } |
1824 | ad69471c | pbrook | |
1825 | aa47cfdd | Peter Maydell | uint32_t HELPER(neon_acgt_f32)(uint32_t a, uint32_t b, void *fpstp)
|
1826 | ad69471c | pbrook | { |
1827 | aa47cfdd | Peter Maydell | float_status *fpst = fpstp; |
1828 | 51d85267 | Peter Maydell | float32 f0 = float32_abs(make_float32(a)); |
1829 | 51d85267 | Peter Maydell | float32 f1 = float32_abs(make_float32(b)); |
1830 | aa47cfdd | Peter Maydell | return -float32_lt(f1, f0, fpst);
|
1831 | ad69471c | pbrook | } |
1832 | 02acedf9 | Peter Maydell | |
1833 | 02acedf9 | Peter Maydell | #define ELEM(V, N, SIZE) (((V) >> ((N) * (SIZE))) & ((1ull << (SIZE)) - 1)) |
1834 | 02acedf9 | Peter Maydell | |
1835 | 02da0b2d | Peter Maydell | void HELPER(neon_qunzip8)(CPUState *env, uint32_t rd, uint32_t rm)
|
1836 | 02acedf9 | Peter Maydell | { |
1837 | 02acedf9 | Peter Maydell | uint64_t zm0 = float64_val(env->vfp.regs[rm]); |
1838 | 02acedf9 | Peter Maydell | uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
|
1839 | 02acedf9 | Peter Maydell | uint64_t zd0 = float64_val(env->vfp.regs[rd]); |
1840 | 02acedf9 | Peter Maydell | uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
|
1841 | 02acedf9 | Peter Maydell | uint64_t d0 = ELEM(zd0, 0, 8) | (ELEM(zd0, 2, 8) << 8) |
1842 | 02acedf9 | Peter Maydell | | (ELEM(zd0, 4, 8) << 16) | (ELEM(zd0, 6, 8) << 24) |
1843 | 02acedf9 | Peter Maydell | | (ELEM(zd1, 0, 8) << 32) | (ELEM(zd1, 2, 8) << 40) |
1844 | 02acedf9 | Peter Maydell | | (ELEM(zd1, 4, 8) << 48) | (ELEM(zd1, 6, 8) << 56); |
1845 | 02acedf9 | Peter Maydell | uint64_t d1 = ELEM(zm0, 0, 8) | (ELEM(zm0, 2, 8) << 8) |
1846 | 02acedf9 | Peter Maydell | | (ELEM(zm0, 4, 8) << 16) | (ELEM(zm0, 6, 8) << 24) |
1847 | 02acedf9 | Peter Maydell | | (ELEM(zm1, 0, 8) << 32) | (ELEM(zm1, 2, 8) << 40) |
1848 | 02acedf9 | Peter Maydell | | (ELEM(zm1, 4, 8) << 48) | (ELEM(zm1, 6, 8) << 56); |
1849 | 02acedf9 | Peter Maydell | uint64_t m0 = ELEM(zd0, 1, 8) | (ELEM(zd0, 3, 8) << 8) |
1850 | 02acedf9 | Peter Maydell | | (ELEM(zd0, 5, 8) << 16) | (ELEM(zd0, 7, 8) << 24) |
1851 | 02acedf9 | Peter Maydell | | (ELEM(zd1, 1, 8) << 32) | (ELEM(zd1, 3, 8) << 40) |
1852 | 02acedf9 | Peter Maydell | | (ELEM(zd1, 5, 8) << 48) | (ELEM(zd1, 7, 8) << 56); |
1853 | 02acedf9 | Peter Maydell | uint64_t m1 = ELEM(zm0, 1, 8) | (ELEM(zm0, 3, 8) << 8) |
1854 | 02acedf9 | Peter Maydell | | (ELEM(zm0, 5, 8) << 16) | (ELEM(zm0, 7, 8) << 24) |
1855 | 02acedf9 | Peter Maydell | | (ELEM(zm1, 1, 8) << 32) | (ELEM(zm1, 3, 8) << 40) |
1856 | 02acedf9 | Peter Maydell | | (ELEM(zm1, 5, 8) << 48) | (ELEM(zm1, 7, 8) << 56); |
1857 | 02acedf9 | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
1858 | 02acedf9 | Peter Maydell | env->vfp.regs[rm + 1] = make_float64(m1);
|
1859 | 02acedf9 | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
1860 | 02acedf9 | Peter Maydell | env->vfp.regs[rd + 1] = make_float64(d1);
|
1861 | 02acedf9 | Peter Maydell | } |
1862 | 02acedf9 | Peter Maydell | |
1863 | 02da0b2d | Peter Maydell | void HELPER(neon_qunzip16)(CPUState *env, uint32_t rd, uint32_t rm)
|
1864 | 02acedf9 | Peter Maydell | { |
1865 | 02acedf9 | Peter Maydell | uint64_t zm0 = float64_val(env->vfp.regs[rm]); |
1866 | 02acedf9 | Peter Maydell | uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
|
1867 | 02acedf9 | Peter Maydell | uint64_t zd0 = float64_val(env->vfp.regs[rd]); |
1868 | 02acedf9 | Peter Maydell | uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
|
1869 | 02acedf9 | Peter Maydell | uint64_t d0 = ELEM(zd0, 0, 16) | (ELEM(zd0, 2, 16) << 16) |
1870 | 02acedf9 | Peter Maydell | | (ELEM(zd1, 0, 16) << 32) | (ELEM(zd1, 2, 16) << 48); |
1871 | 02acedf9 | Peter Maydell | uint64_t d1 = ELEM(zm0, 0, 16) | (ELEM(zm0, 2, 16) << 16) |
1872 | 02acedf9 | Peter Maydell | | (ELEM(zm1, 0, 16) << 32) | (ELEM(zm1, 2, 16) << 48); |
1873 | 02acedf9 | Peter Maydell | uint64_t m0 = ELEM(zd0, 1, 16) | (ELEM(zd0, 3, 16) << 16) |
1874 | 02acedf9 | Peter Maydell | | (ELEM(zd1, 1, 16) << 32) | (ELEM(zd1, 3, 16) << 48); |
1875 | 02acedf9 | Peter Maydell | uint64_t m1 = ELEM(zm0, 1, 16) | (ELEM(zm0, 3, 16) << 16) |
1876 | 02acedf9 | Peter Maydell | | (ELEM(zm1, 1, 16) << 32) | (ELEM(zm1, 3, 16) << 48); |
1877 | 02acedf9 | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
1878 | 02acedf9 | Peter Maydell | env->vfp.regs[rm + 1] = make_float64(m1);
|
1879 | 02acedf9 | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
1880 | 02acedf9 | Peter Maydell | env->vfp.regs[rd + 1] = make_float64(d1);
|
1881 | 02acedf9 | Peter Maydell | } |
1882 | 02acedf9 | Peter Maydell | |
1883 | 02da0b2d | Peter Maydell | void HELPER(neon_qunzip32)(CPUState *env, uint32_t rd, uint32_t rm)
|
1884 | 02acedf9 | Peter Maydell | { |
1885 | 02acedf9 | Peter Maydell | uint64_t zm0 = float64_val(env->vfp.regs[rm]); |
1886 | 02acedf9 | Peter Maydell | uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
|
1887 | 02acedf9 | Peter Maydell | uint64_t zd0 = float64_val(env->vfp.regs[rd]); |
1888 | 02acedf9 | Peter Maydell | uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
|
1889 | 02acedf9 | Peter Maydell | uint64_t d0 = ELEM(zd0, 0, 32) | (ELEM(zd1, 0, 32) << 32); |
1890 | 02acedf9 | Peter Maydell | uint64_t d1 = ELEM(zm0, 0, 32) | (ELEM(zm1, 0, 32) << 32); |
1891 | 02acedf9 | Peter Maydell | uint64_t m0 = ELEM(zd0, 1, 32) | (ELEM(zd1, 1, 32) << 32); |
1892 | 02acedf9 | Peter Maydell | uint64_t m1 = ELEM(zm0, 1, 32) | (ELEM(zm1, 1, 32) << 32); |
1893 | 02acedf9 | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
1894 | 02acedf9 | Peter Maydell | env->vfp.regs[rm + 1] = make_float64(m1);
|
1895 | 02acedf9 | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
1896 | 02acedf9 | Peter Maydell | env->vfp.regs[rd + 1] = make_float64(d1);
|
1897 | 02acedf9 | Peter Maydell | } |
1898 | 02acedf9 | Peter Maydell | |
1899 | 02da0b2d | Peter Maydell | void HELPER(neon_unzip8)(CPUState *env, uint32_t rd, uint32_t rm)
|
1900 | 02acedf9 | Peter Maydell | { |
1901 | 02acedf9 | Peter Maydell | uint64_t zm = float64_val(env->vfp.regs[rm]); |
1902 | 02acedf9 | Peter Maydell | uint64_t zd = float64_val(env->vfp.regs[rd]); |
1903 | 02acedf9 | Peter Maydell | uint64_t d0 = ELEM(zd, 0, 8) | (ELEM(zd, 2, 8) << 8) |
1904 | 02acedf9 | Peter Maydell | | (ELEM(zd, 4, 8) << 16) | (ELEM(zd, 6, 8) << 24) |
1905 | 02acedf9 | Peter Maydell | | (ELEM(zm, 0, 8) << 32) | (ELEM(zm, 2, 8) << 40) |
1906 | 02acedf9 | Peter Maydell | | (ELEM(zm, 4, 8) << 48) | (ELEM(zm, 6, 8) << 56); |
1907 | 02acedf9 | Peter Maydell | uint64_t m0 = ELEM(zd, 1, 8) | (ELEM(zd, 3, 8) << 8) |
1908 | 02acedf9 | Peter Maydell | | (ELEM(zd, 5, 8) << 16) | (ELEM(zd, 7, 8) << 24) |
1909 | 02acedf9 | Peter Maydell | | (ELEM(zm, 1, 8) << 32) | (ELEM(zm, 3, 8) << 40) |
1910 | 02acedf9 | Peter Maydell | | (ELEM(zm, 5, 8) << 48) | (ELEM(zm, 7, 8) << 56); |
1911 | 02acedf9 | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
1912 | 02acedf9 | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
1913 | 02acedf9 | Peter Maydell | } |
1914 | 02acedf9 | Peter Maydell | |
1915 | 02da0b2d | Peter Maydell | void HELPER(neon_unzip16)(CPUState *env, uint32_t rd, uint32_t rm)
|
1916 | 02acedf9 | Peter Maydell | { |
1917 | 02acedf9 | Peter Maydell | uint64_t zm = float64_val(env->vfp.regs[rm]); |
1918 | 02acedf9 | Peter Maydell | uint64_t zd = float64_val(env->vfp.regs[rd]); |
1919 | 02acedf9 | Peter Maydell | uint64_t d0 = ELEM(zd, 0, 16) | (ELEM(zd, 2, 16) << 16) |
1920 | 02acedf9 | Peter Maydell | | (ELEM(zm, 0, 16) << 32) | (ELEM(zm, 2, 16) << 48); |
1921 | 02acedf9 | Peter Maydell | uint64_t m0 = ELEM(zd, 1, 16) | (ELEM(zd, 3, 16) << 16) |
1922 | 02acedf9 | Peter Maydell | | (ELEM(zm, 1, 16) << 32) | (ELEM(zm, 3, 16) << 48); |
1923 | 02acedf9 | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
1924 | 02acedf9 | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
1925 | 02acedf9 | Peter Maydell | } |
1926 | d68a6f3a | Peter Maydell | |
1927 | 02da0b2d | Peter Maydell | void HELPER(neon_qzip8)(CPUState *env, uint32_t rd, uint32_t rm)
|
1928 | d68a6f3a | Peter Maydell | { |
1929 | d68a6f3a | Peter Maydell | uint64_t zm0 = float64_val(env->vfp.regs[rm]); |
1930 | d68a6f3a | Peter Maydell | uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
|
1931 | d68a6f3a | Peter Maydell | uint64_t zd0 = float64_val(env->vfp.regs[rd]); |
1932 | d68a6f3a | Peter Maydell | uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
|
1933 | d68a6f3a | Peter Maydell | uint64_t d0 = ELEM(zd0, 0, 8) | (ELEM(zm0, 0, 8) << 8) |
1934 | d68a6f3a | Peter Maydell | | (ELEM(zd0, 1, 8) << 16) | (ELEM(zm0, 1, 8) << 24) |
1935 | d68a6f3a | Peter Maydell | | (ELEM(zd0, 2, 8) << 32) | (ELEM(zm0, 2, 8) << 40) |
1936 | d68a6f3a | Peter Maydell | | (ELEM(zd0, 3, 8) << 48) | (ELEM(zm0, 3, 8) << 56); |
1937 | d68a6f3a | Peter Maydell | uint64_t d1 = ELEM(zd0, 4, 8) | (ELEM(zm0, 4, 8) << 8) |
1938 | d68a6f3a | Peter Maydell | | (ELEM(zd0, 5, 8) << 16) | (ELEM(zm0, 5, 8) << 24) |
1939 | d68a6f3a | Peter Maydell | | (ELEM(zd0, 6, 8) << 32) | (ELEM(zm0, 6, 8) << 40) |
1940 | d68a6f3a | Peter Maydell | | (ELEM(zd0, 7, 8) << 48) | (ELEM(zm0, 7, 8) << 56); |
1941 | d68a6f3a | Peter Maydell | uint64_t m0 = ELEM(zd1, 0, 8) | (ELEM(zm1, 0, 8) << 8) |
1942 | d68a6f3a | Peter Maydell | | (ELEM(zd1, 1, 8) << 16) | (ELEM(zm1, 1, 8) << 24) |
1943 | d68a6f3a | Peter Maydell | | (ELEM(zd1, 2, 8) << 32) | (ELEM(zm1, 2, 8) << 40) |
1944 | d68a6f3a | Peter Maydell | | (ELEM(zd1, 3, 8) << 48) | (ELEM(zm1, 3, 8) << 56); |
1945 | d68a6f3a | Peter Maydell | uint64_t m1 = ELEM(zd1, 4, 8) | (ELEM(zm1, 4, 8) << 8) |
1946 | d68a6f3a | Peter Maydell | | (ELEM(zd1, 5, 8) << 16) | (ELEM(zm1, 5, 8) << 24) |
1947 | d68a6f3a | Peter Maydell | | (ELEM(zd1, 6, 8) << 32) | (ELEM(zm1, 6, 8) << 40) |
1948 | d68a6f3a | Peter Maydell | | (ELEM(zd1, 7, 8) << 48) | (ELEM(zm1, 7, 8) << 56); |
1949 | d68a6f3a | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
1950 | d68a6f3a | Peter Maydell | env->vfp.regs[rm + 1] = make_float64(m1);
|
1951 | d68a6f3a | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
1952 | d68a6f3a | Peter Maydell | env->vfp.regs[rd + 1] = make_float64(d1);
|
1953 | d68a6f3a | Peter Maydell | } |
1954 | d68a6f3a | Peter Maydell | |
1955 | 02da0b2d | Peter Maydell | void HELPER(neon_qzip16)(CPUState *env, uint32_t rd, uint32_t rm)
|
1956 | d68a6f3a | Peter Maydell | { |
1957 | d68a6f3a | Peter Maydell | uint64_t zm0 = float64_val(env->vfp.regs[rm]); |
1958 | d68a6f3a | Peter Maydell | uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
|
1959 | d68a6f3a | Peter Maydell | uint64_t zd0 = float64_val(env->vfp.regs[rd]); |
1960 | d68a6f3a | Peter Maydell | uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
|
1961 | d68a6f3a | Peter Maydell | uint64_t d0 = ELEM(zd0, 0, 16) | (ELEM(zm0, 0, 16) << 16) |
1962 | d68a6f3a | Peter Maydell | | (ELEM(zd0, 1, 16) << 32) | (ELEM(zm0, 1, 16) << 48); |
1963 | d68a6f3a | Peter Maydell | uint64_t d1 = ELEM(zd0, 2, 16) | (ELEM(zm0, 2, 16) << 16) |
1964 | d68a6f3a | Peter Maydell | | (ELEM(zd0, 3, 16) << 32) | (ELEM(zm0, 3, 16) << 48); |
1965 | d68a6f3a | Peter Maydell | uint64_t m0 = ELEM(zd1, 0, 16) | (ELEM(zm1, 0, 16) << 16) |
1966 | d68a6f3a | Peter Maydell | | (ELEM(zd1, 1, 16) << 32) | (ELEM(zm1, 1, 16) << 48); |
1967 | d68a6f3a | Peter Maydell | uint64_t m1 = ELEM(zd1, 2, 16) | (ELEM(zm1, 2, 16) << 16) |
1968 | d68a6f3a | Peter Maydell | | (ELEM(zd1, 3, 16) << 32) | (ELEM(zm1, 3, 16) << 48); |
1969 | d68a6f3a | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
1970 | d68a6f3a | Peter Maydell | env->vfp.regs[rm + 1] = make_float64(m1);
|
1971 | d68a6f3a | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
1972 | d68a6f3a | Peter Maydell | env->vfp.regs[rd + 1] = make_float64(d1);
|
1973 | d68a6f3a | Peter Maydell | } |
1974 | d68a6f3a | Peter Maydell | |
1975 | 02da0b2d | Peter Maydell | void HELPER(neon_qzip32)(CPUState *env, uint32_t rd, uint32_t rm)
|
1976 | d68a6f3a | Peter Maydell | { |
1977 | d68a6f3a | Peter Maydell | uint64_t zm0 = float64_val(env->vfp.regs[rm]); |
1978 | d68a6f3a | Peter Maydell | uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
|
1979 | d68a6f3a | Peter Maydell | uint64_t zd0 = float64_val(env->vfp.regs[rd]); |
1980 | d68a6f3a | Peter Maydell | uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
|
1981 | d68a6f3a | Peter Maydell | uint64_t d0 = ELEM(zd0, 0, 32) | (ELEM(zm0, 0, 32) << 32); |
1982 | d68a6f3a | Peter Maydell | uint64_t d1 = ELEM(zd0, 1, 32) | (ELEM(zm0, 1, 32) << 32); |
1983 | d68a6f3a | Peter Maydell | uint64_t m0 = ELEM(zd1, 0, 32) | (ELEM(zm1, 0, 32) << 32); |
1984 | d68a6f3a | Peter Maydell | uint64_t m1 = ELEM(zd1, 1, 32) | (ELEM(zm1, 1, 32) << 32); |
1985 | d68a6f3a | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
1986 | d68a6f3a | Peter Maydell | env->vfp.regs[rm + 1] = make_float64(m1);
|
1987 | d68a6f3a | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
1988 | d68a6f3a | Peter Maydell | env->vfp.regs[rd + 1] = make_float64(d1);
|
1989 | d68a6f3a | Peter Maydell | } |
1990 | d68a6f3a | Peter Maydell | |
1991 | 02da0b2d | Peter Maydell | void HELPER(neon_zip8)(CPUState *env, uint32_t rd, uint32_t rm)
|
1992 | d68a6f3a | Peter Maydell | { |
1993 | d68a6f3a | Peter Maydell | uint64_t zm = float64_val(env->vfp.regs[rm]); |
1994 | d68a6f3a | Peter Maydell | uint64_t zd = float64_val(env->vfp.regs[rd]); |
1995 | d68a6f3a | Peter Maydell | uint64_t d0 = ELEM(zd, 0, 8) | (ELEM(zm, 0, 8) << 8) |
1996 | d68a6f3a | Peter Maydell | | (ELEM(zd, 1, 8) << 16) | (ELEM(zm, 1, 8) << 24) |
1997 | d68a6f3a | Peter Maydell | | (ELEM(zd, 2, 8) << 32) | (ELEM(zm, 2, 8) << 40) |
1998 | d68a6f3a | Peter Maydell | | (ELEM(zd, 3, 8) << 48) | (ELEM(zm, 3, 8) << 56); |
1999 | d68a6f3a | Peter Maydell | uint64_t m0 = ELEM(zd, 4, 8) | (ELEM(zm, 4, 8) << 8) |
2000 | d68a6f3a | Peter Maydell | | (ELEM(zd, 5, 8) << 16) | (ELEM(zm, 5, 8) << 24) |
2001 | d68a6f3a | Peter Maydell | | (ELEM(zd, 6, 8) << 32) | (ELEM(zm, 6, 8) << 40) |
2002 | d68a6f3a | Peter Maydell | | (ELEM(zd, 7, 8) << 48) | (ELEM(zm, 7, 8) << 56); |
2003 | d68a6f3a | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
2004 | d68a6f3a | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
2005 | d68a6f3a | Peter Maydell | } |
2006 | d68a6f3a | Peter Maydell | |
2007 | 02da0b2d | Peter Maydell | void HELPER(neon_zip16)(CPUState *env, uint32_t rd, uint32_t rm)
|
2008 | d68a6f3a | Peter Maydell | { |
2009 | d68a6f3a | Peter Maydell | uint64_t zm = float64_val(env->vfp.regs[rm]); |
2010 | d68a6f3a | Peter Maydell | uint64_t zd = float64_val(env->vfp.regs[rd]); |
2011 | d68a6f3a | Peter Maydell | uint64_t d0 = ELEM(zd, 0, 16) | (ELEM(zm, 0, 16) << 16) |
2012 | d68a6f3a | Peter Maydell | | (ELEM(zd, 1, 16) << 32) | (ELEM(zm, 1, 16) << 48); |
2013 | d68a6f3a | Peter Maydell | uint64_t m0 = ELEM(zd, 2, 16) | (ELEM(zm, 2, 16) << 16) |
2014 | d68a6f3a | Peter Maydell | | (ELEM(zd, 3, 16) << 32) | (ELEM(zm, 3, 16) << 48); |
2015 | d68a6f3a | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
2016 | d68a6f3a | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
2017 | d68a6f3a | Peter Maydell | } |