root / target-arm / neon_helper.c @ f8bf8606
History | View | Annotate | Download (52.8 kB)
1 | e677137d | pbrook | /*
|
---|---|---|---|
2 | e677137d | pbrook | * ARM NEON vector operations.
|
3 | e677137d | pbrook | *
|
4 | e677137d | pbrook | * Copyright (c) 2007, 2008 CodeSourcery.
|
5 | e677137d | pbrook | * Written by Paul Brook
|
6 | e677137d | pbrook | *
|
7 | e677137d | pbrook | * This code is licenced under the GNU GPL v2.
|
8 | e677137d | pbrook | */
|
9 | ad69471c | pbrook | #include <stdlib.h> |
10 | ad69471c | pbrook | #include <stdio.h> |
11 | ad69471c | pbrook | |
12 | ad69471c | pbrook | #include "cpu.h" |
13 | ad69471c | pbrook | #include "exec-all.h" |
14 | ad69471c | pbrook | #include "helpers.h" |
15 | ad69471c | pbrook | |
16 | ad69471c | pbrook | #define SIGNBIT (uint32_t)0x80000000 |
17 | ad69471c | pbrook | #define SIGNBIT64 ((uint64_t)1 << 63) |
18 | ad69471c | pbrook | |
19 | ad69471c | pbrook | #define SET_QC() env->vfp.xregs[ARM_VFP_FPSCR] = CPSR_Q
|
20 | ad69471c | pbrook | |
21 | ad69471c | pbrook | static float_status neon_float_status;
|
22 | ad69471c | pbrook | #define NFS &neon_float_status
|
23 | ad69471c | pbrook | |
24 | ad69471c | pbrook | /* Helper routines to perform bitwise copies between float and int. */
|
25 | ad69471c | pbrook | static inline float32 vfp_itos(uint32_t i) |
26 | ad69471c | pbrook | { |
27 | ad69471c | pbrook | union {
|
28 | ad69471c | pbrook | uint32_t i; |
29 | ad69471c | pbrook | float32 s; |
30 | ad69471c | pbrook | } v; |
31 | ad69471c | pbrook | |
32 | ad69471c | pbrook | v.i = i; |
33 | ad69471c | pbrook | return v.s;
|
34 | ad69471c | pbrook | } |
35 | ad69471c | pbrook | |
36 | ad69471c | pbrook | static inline uint32_t vfp_stoi(float32 s) |
37 | ad69471c | pbrook | { |
38 | ad69471c | pbrook | union {
|
39 | ad69471c | pbrook | uint32_t i; |
40 | ad69471c | pbrook | float32 s; |
41 | ad69471c | pbrook | } v; |
42 | ad69471c | pbrook | |
43 | ad69471c | pbrook | v.s = s; |
44 | ad69471c | pbrook | return v.i;
|
45 | ad69471c | pbrook | } |
46 | ad69471c | pbrook | |
47 | ad69471c | pbrook | #define NEON_TYPE1(name, type) \
|
48 | ad69471c | pbrook | typedef struct \ |
49 | ad69471c | pbrook | { \ |
50 | ad69471c | pbrook | type v1; \ |
51 | ad69471c | pbrook | } neon_##name; |
52 | e2542fe2 | Juan Quintela | #ifdef HOST_WORDS_BIGENDIAN
|
53 | ad69471c | pbrook | #define NEON_TYPE2(name, type) \
|
54 | ad69471c | pbrook | typedef struct \ |
55 | ad69471c | pbrook | { \ |
56 | ad69471c | pbrook | type v2; \ |
57 | ad69471c | pbrook | type v1; \ |
58 | ad69471c | pbrook | } neon_##name; |
59 | ad69471c | pbrook | #define NEON_TYPE4(name, type) \
|
60 | ad69471c | pbrook | typedef struct \ |
61 | ad69471c | pbrook | { \ |
62 | ad69471c | pbrook | type v4; \ |
63 | ad69471c | pbrook | type v3; \ |
64 | ad69471c | pbrook | type v2; \ |
65 | ad69471c | pbrook | type v1; \ |
66 | ad69471c | pbrook | } neon_##name; |
67 | ad69471c | pbrook | #else
|
68 | ad69471c | pbrook | #define NEON_TYPE2(name, type) \
|
69 | ad69471c | pbrook | typedef struct \ |
70 | ad69471c | pbrook | { \ |
71 | ad69471c | pbrook | type v1; \ |
72 | ad69471c | pbrook | type v2; \ |
73 | ad69471c | pbrook | } neon_##name; |
74 | ad69471c | pbrook | #define NEON_TYPE4(name, type) \
|
75 | ad69471c | pbrook | typedef struct \ |
76 | ad69471c | pbrook | { \ |
77 | ad69471c | pbrook | type v1; \ |
78 | ad69471c | pbrook | type v2; \ |
79 | ad69471c | pbrook | type v3; \ |
80 | ad69471c | pbrook | type v4; \ |
81 | ad69471c | pbrook | } neon_##name; |
82 | ad69471c | pbrook | #endif
|
83 | ad69471c | pbrook | |
84 | ad69471c | pbrook | NEON_TYPE4(s8, int8_t) |
85 | ad69471c | pbrook | NEON_TYPE4(u8, uint8_t) |
86 | ad69471c | pbrook | NEON_TYPE2(s16, int16_t) |
87 | ad69471c | pbrook | NEON_TYPE2(u16, uint16_t) |
88 | ad69471c | pbrook | NEON_TYPE1(s32, int32_t) |
89 | ad69471c | pbrook | NEON_TYPE1(u32, uint32_t) |
90 | ad69471c | pbrook | #undef NEON_TYPE4
|
91 | ad69471c | pbrook | #undef NEON_TYPE2
|
92 | ad69471c | pbrook | #undef NEON_TYPE1
|
93 | ad69471c | pbrook | |
94 | ad69471c | pbrook | /* Copy from a uint32_t to a vector structure type. */
|
95 | ad69471c | pbrook | #define NEON_UNPACK(vtype, dest, val) do { \ |
96 | ad69471c | pbrook | union { \
|
97 | ad69471c | pbrook | vtype v; \ |
98 | ad69471c | pbrook | uint32_t i; \ |
99 | ad69471c | pbrook | } conv_u; \ |
100 | ad69471c | pbrook | conv_u.i = (val); \ |
101 | ad69471c | pbrook | dest = conv_u.v; \ |
102 | ad69471c | pbrook | } while(0) |
103 | ad69471c | pbrook | |
104 | ad69471c | pbrook | /* Copy from a vector structure type to a uint32_t. */
|
105 | ad69471c | pbrook | #define NEON_PACK(vtype, dest, val) do { \ |
106 | ad69471c | pbrook | union { \
|
107 | ad69471c | pbrook | vtype v; \ |
108 | ad69471c | pbrook | uint32_t i; \ |
109 | ad69471c | pbrook | } conv_u; \ |
110 | ad69471c | pbrook | conv_u.v = (val); \ |
111 | ad69471c | pbrook | dest = conv_u.i; \ |
112 | ad69471c | pbrook | } while(0) |
113 | ad69471c | pbrook | |
114 | ad69471c | pbrook | #define NEON_DO1 \
|
115 | ad69471c | pbrook | NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); |
116 | ad69471c | pbrook | #define NEON_DO2 \
|
117 | ad69471c | pbrook | NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \ |
118 | ad69471c | pbrook | NEON_FN(vdest.v2, vsrc1.v2, vsrc2.v2); |
119 | ad69471c | pbrook | #define NEON_DO4 \
|
120 | ad69471c | pbrook | NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \ |
121 | ad69471c | pbrook | NEON_FN(vdest.v2, vsrc1.v2, vsrc2.v2); \ |
122 | ad69471c | pbrook | NEON_FN(vdest.v3, vsrc1.v3, vsrc2.v3); \ |
123 | ad69471c | pbrook | NEON_FN(vdest.v4, vsrc1.v4, vsrc2.v4); |
124 | ad69471c | pbrook | |
125 | ad69471c | pbrook | #define NEON_VOP_BODY(vtype, n) \
|
126 | ad69471c | pbrook | { \ |
127 | ad69471c | pbrook | uint32_t res; \ |
128 | ad69471c | pbrook | vtype vsrc1; \ |
129 | ad69471c | pbrook | vtype vsrc2; \ |
130 | ad69471c | pbrook | vtype vdest; \ |
131 | ad69471c | pbrook | NEON_UNPACK(vtype, vsrc1, arg1); \ |
132 | ad69471c | pbrook | NEON_UNPACK(vtype, vsrc2, arg2); \ |
133 | ad69471c | pbrook | NEON_DO##n; \ |
134 | ad69471c | pbrook | NEON_PACK(vtype, res, vdest); \ |
135 | ad69471c | pbrook | return res; \
|
136 | ad69471c | pbrook | } |
137 | ad69471c | pbrook | |
138 | ad69471c | pbrook | #define NEON_VOP(name, vtype, n) \
|
139 | ad69471c | pbrook | uint32_t HELPER(glue(neon_,name))(uint32_t arg1, uint32_t arg2) \ |
140 | ad69471c | pbrook | NEON_VOP_BODY(vtype, n) |
141 | ad69471c | pbrook | |
142 | ad69471c | pbrook | #define NEON_VOP_ENV(name, vtype, n) \
|
143 | ad69471c | pbrook | uint32_t HELPER(glue(neon_,name))(CPUState *env, uint32_t arg1, uint32_t arg2) \ |
144 | ad69471c | pbrook | NEON_VOP_BODY(vtype, n) |
145 | ad69471c | pbrook | |
146 | ad69471c | pbrook | /* Pairwise operations. */
|
147 | ad69471c | pbrook | /* For 32-bit elements each segment only contains a single element, so
|
148 | ad69471c | pbrook | the elementwise and pairwise operations are the same. */
|
149 | ad69471c | pbrook | #define NEON_PDO2 \
|
150 | ad69471c | pbrook | NEON_FN(vdest.v1, vsrc1.v1, vsrc1.v2); \ |
151 | ad69471c | pbrook | NEON_FN(vdest.v2, vsrc2.v1, vsrc2.v2); |
152 | ad69471c | pbrook | #define NEON_PDO4 \
|
153 | ad69471c | pbrook | NEON_FN(vdest.v1, vsrc1.v1, vsrc1.v2); \ |
154 | ad69471c | pbrook | NEON_FN(vdest.v2, vsrc1.v3, vsrc1.v4); \ |
155 | ad69471c | pbrook | NEON_FN(vdest.v3, vsrc2.v1, vsrc2.v2); \ |
156 | ad69471c | pbrook | NEON_FN(vdest.v4, vsrc2.v3, vsrc2.v4); \ |
157 | ad69471c | pbrook | |
158 | ad69471c | pbrook | #define NEON_POP(name, vtype, n) \
|
159 | ad69471c | pbrook | uint32_t HELPER(glue(neon_,name))(uint32_t arg1, uint32_t arg2) \ |
160 | ad69471c | pbrook | { \ |
161 | ad69471c | pbrook | uint32_t res; \ |
162 | ad69471c | pbrook | vtype vsrc1; \ |
163 | ad69471c | pbrook | vtype vsrc2; \ |
164 | ad69471c | pbrook | vtype vdest; \ |
165 | ad69471c | pbrook | NEON_UNPACK(vtype, vsrc1, arg1); \ |
166 | ad69471c | pbrook | NEON_UNPACK(vtype, vsrc2, arg2); \ |
167 | ad69471c | pbrook | NEON_PDO##n; \ |
168 | ad69471c | pbrook | NEON_PACK(vtype, res, vdest); \ |
169 | ad69471c | pbrook | return res; \
|
170 | ad69471c | pbrook | } |
171 | ad69471c | pbrook | |
172 | ad69471c | pbrook | /* Unary operators. */
|
173 | ad69471c | pbrook | #define NEON_VOP1(name, vtype, n) \
|
174 | ad69471c | pbrook | uint32_t HELPER(glue(neon_,name))(uint32_t arg) \ |
175 | ad69471c | pbrook | { \ |
176 | ad69471c | pbrook | vtype vsrc1; \ |
177 | ad69471c | pbrook | vtype vdest; \ |
178 | ad69471c | pbrook | NEON_UNPACK(vtype, vsrc1, arg); \ |
179 | ad69471c | pbrook | NEON_DO##n; \ |
180 | ad69471c | pbrook | NEON_PACK(vtype, arg, vdest); \ |
181 | ad69471c | pbrook | return arg; \
|
182 | ad69471c | pbrook | } |
183 | ad69471c | pbrook | |
184 | ad69471c | pbrook | |
185 | ad69471c | pbrook | #define NEON_USAT(dest, src1, src2, type) do { \ |
186 | ad69471c | pbrook | uint32_t tmp = (uint32_t)src1 + (uint32_t)src2; \ |
187 | ad69471c | pbrook | if (tmp != (type)tmp) { \
|
188 | ad69471c | pbrook | SET_QC(); \ |
189 | ad69471c | pbrook | dest = ~0; \
|
190 | ad69471c | pbrook | } else { \
|
191 | ad69471c | pbrook | dest = tmp; \ |
192 | ad69471c | pbrook | }} while(0) |
193 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t)
|
194 | ad69471c | pbrook | NEON_VOP_ENV(qadd_u8, neon_u8, 4)
|
195 | ad69471c | pbrook | #undef NEON_FN
|
196 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t)
|
197 | ad69471c | pbrook | NEON_VOP_ENV(qadd_u16, neon_u16, 2)
|
198 | ad69471c | pbrook | #undef NEON_FN
|
199 | ad69471c | pbrook | #undef NEON_USAT
|
200 | ad69471c | pbrook | |
201 | 72902672 | Christophe Lyon | uint32_t HELPER(neon_qadd_u32)(CPUState *env, uint32_t a, uint32_t b) |
202 | 72902672 | Christophe Lyon | { |
203 | 72902672 | Christophe Lyon | uint32_t res = a + b; |
204 | 72902672 | Christophe Lyon | if (res < a) {
|
205 | 72902672 | Christophe Lyon | SET_QC(); |
206 | 72902672 | Christophe Lyon | res = ~0;
|
207 | 72902672 | Christophe Lyon | } |
208 | 72902672 | Christophe Lyon | return res;
|
209 | 72902672 | Christophe Lyon | } |
210 | 72902672 | Christophe Lyon | |
211 | 72902672 | Christophe Lyon | uint64_t HELPER(neon_qadd_u64)(CPUState *env, uint64_t src1, uint64_t src2) |
212 | 72902672 | Christophe Lyon | { |
213 | 72902672 | Christophe Lyon | uint64_t res; |
214 | 72902672 | Christophe Lyon | |
215 | 72902672 | Christophe Lyon | res = src1 + src2; |
216 | 72902672 | Christophe Lyon | if (res < src1) {
|
217 | 72902672 | Christophe Lyon | SET_QC(); |
218 | 72902672 | Christophe Lyon | res = ~(uint64_t)0;
|
219 | 72902672 | Christophe Lyon | } |
220 | 72902672 | Christophe Lyon | return res;
|
221 | 72902672 | Christophe Lyon | } |
222 | 72902672 | Christophe Lyon | |
223 | ad69471c | pbrook | #define NEON_SSAT(dest, src1, src2, type) do { \ |
224 | ad69471c | pbrook | int32_t tmp = (uint32_t)src1 + (uint32_t)src2; \ |
225 | ad69471c | pbrook | if (tmp != (type)tmp) { \
|
226 | ad69471c | pbrook | SET_QC(); \ |
227 | ad69471c | pbrook | if (src2 > 0) { \ |
228 | ad69471c | pbrook | tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \ |
229 | ad69471c | pbrook | } else { \
|
230 | ad69471c | pbrook | tmp = 1 << (sizeof(type) * 8 - 1); \ |
231 | ad69471c | pbrook | } \ |
232 | ad69471c | pbrook | } \ |
233 | ad69471c | pbrook | dest = tmp; \ |
234 | ad69471c | pbrook | } while(0) |
235 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t)
|
236 | ad69471c | pbrook | NEON_VOP_ENV(qadd_s8, neon_s8, 4)
|
237 | ad69471c | pbrook | #undef NEON_FN
|
238 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t)
|
239 | ad69471c | pbrook | NEON_VOP_ENV(qadd_s16, neon_s16, 2)
|
240 | ad69471c | pbrook | #undef NEON_FN
|
241 | ad69471c | pbrook | #undef NEON_SSAT
|
242 | ad69471c | pbrook | |
243 | 72902672 | Christophe Lyon | uint32_t HELPER(neon_qadd_s32)(CPUState *env, uint32_t a, uint32_t b) |
244 | 72902672 | Christophe Lyon | { |
245 | 72902672 | Christophe Lyon | uint32_t res = a + b; |
246 | 72902672 | Christophe Lyon | if (((res ^ a) & SIGNBIT) && !((a ^ b) & SIGNBIT)) {
|
247 | 72902672 | Christophe Lyon | SET_QC(); |
248 | 72902672 | Christophe Lyon | res = ~(((int32_t)a >> 31) ^ SIGNBIT);
|
249 | 72902672 | Christophe Lyon | } |
250 | 72902672 | Christophe Lyon | return res;
|
251 | 72902672 | Christophe Lyon | } |
252 | 72902672 | Christophe Lyon | |
253 | 72902672 | Christophe Lyon | uint64_t HELPER(neon_qadd_s64)(CPUState *env, uint64_t src1, uint64_t src2) |
254 | 72902672 | Christophe Lyon | { |
255 | 72902672 | Christophe Lyon | uint64_t res; |
256 | 72902672 | Christophe Lyon | |
257 | 72902672 | Christophe Lyon | res = src1 + src2; |
258 | 72902672 | Christophe Lyon | if (((res ^ src1) & SIGNBIT64) && !((src1 ^ src2) & SIGNBIT64)) {
|
259 | 72902672 | Christophe Lyon | SET_QC(); |
260 | 72902672 | Christophe Lyon | res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64;
|
261 | 72902672 | Christophe Lyon | } |
262 | 72902672 | Christophe Lyon | return res;
|
263 | 72902672 | Christophe Lyon | } |
264 | 72902672 | Christophe Lyon | |
265 | ad69471c | pbrook | #define NEON_USAT(dest, src1, src2, type) do { \ |
266 | ad69471c | pbrook | uint32_t tmp = (uint32_t)src1 - (uint32_t)src2; \ |
267 | ad69471c | pbrook | if (tmp != (type)tmp) { \
|
268 | ad69471c | pbrook | SET_QC(); \ |
269 | ad69471c | pbrook | dest = 0; \
|
270 | ad69471c | pbrook | } else { \
|
271 | ad69471c | pbrook | dest = tmp; \ |
272 | ad69471c | pbrook | }} while(0) |
273 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t)
|
274 | ad69471c | pbrook | NEON_VOP_ENV(qsub_u8, neon_u8, 4)
|
275 | ad69471c | pbrook | #undef NEON_FN
|
276 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t)
|
277 | ad69471c | pbrook | NEON_VOP_ENV(qsub_u16, neon_u16, 2)
|
278 | ad69471c | pbrook | #undef NEON_FN
|
279 | ad69471c | pbrook | #undef NEON_USAT
|
280 | ad69471c | pbrook | |
281 | 72902672 | Christophe Lyon | uint32_t HELPER(neon_qsub_u32)(CPUState *env, uint32_t a, uint32_t b) |
282 | 72902672 | Christophe Lyon | { |
283 | 72902672 | Christophe Lyon | uint32_t res = a - b; |
284 | 72902672 | Christophe Lyon | if (res > a) {
|
285 | 72902672 | Christophe Lyon | SET_QC(); |
286 | 72902672 | Christophe Lyon | res = 0;
|
287 | 72902672 | Christophe Lyon | } |
288 | 72902672 | Christophe Lyon | return res;
|
289 | 72902672 | Christophe Lyon | } |
290 | 72902672 | Christophe Lyon | |
291 | 72902672 | Christophe Lyon | uint64_t HELPER(neon_qsub_u64)(CPUState *env, uint64_t src1, uint64_t src2) |
292 | 72902672 | Christophe Lyon | { |
293 | 72902672 | Christophe Lyon | uint64_t res; |
294 | 72902672 | Christophe Lyon | |
295 | 72902672 | Christophe Lyon | if (src1 < src2) {
|
296 | 72902672 | Christophe Lyon | SET_QC(); |
297 | 72902672 | Christophe Lyon | res = 0;
|
298 | 72902672 | Christophe Lyon | } else {
|
299 | 72902672 | Christophe Lyon | res = src1 - src2; |
300 | 72902672 | Christophe Lyon | } |
301 | 72902672 | Christophe Lyon | return res;
|
302 | 72902672 | Christophe Lyon | } |
303 | 72902672 | Christophe Lyon | |
304 | ad69471c | pbrook | #define NEON_SSAT(dest, src1, src2, type) do { \ |
305 | ad69471c | pbrook | int32_t tmp = (uint32_t)src1 - (uint32_t)src2; \ |
306 | ad69471c | pbrook | if (tmp != (type)tmp) { \
|
307 | ad69471c | pbrook | SET_QC(); \ |
308 | ad69471c | pbrook | if (src2 < 0) { \ |
309 | ad69471c | pbrook | tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \ |
310 | ad69471c | pbrook | } else { \
|
311 | ad69471c | pbrook | tmp = 1 << (sizeof(type) * 8 - 1); \ |
312 | ad69471c | pbrook | } \ |
313 | ad69471c | pbrook | } \ |
314 | ad69471c | pbrook | dest = tmp; \ |
315 | ad69471c | pbrook | } while(0) |
316 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t)
|
317 | ad69471c | pbrook | NEON_VOP_ENV(qsub_s8, neon_s8, 4)
|
318 | ad69471c | pbrook | #undef NEON_FN
|
319 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t)
|
320 | ad69471c | pbrook | NEON_VOP_ENV(qsub_s16, neon_s16, 2)
|
321 | ad69471c | pbrook | #undef NEON_FN
|
322 | ad69471c | pbrook | #undef NEON_SSAT
|
323 | ad69471c | pbrook | |
324 | 72902672 | Christophe Lyon | uint32_t HELPER(neon_qsub_s32)(CPUState *env, uint32_t a, uint32_t b) |
325 | 72902672 | Christophe Lyon | { |
326 | 72902672 | Christophe Lyon | uint32_t res = a - b; |
327 | 72902672 | Christophe Lyon | if (((res ^ a) & SIGNBIT) && ((a ^ b) & SIGNBIT)) {
|
328 | 72902672 | Christophe Lyon | SET_QC(); |
329 | 72902672 | Christophe Lyon | res = ~(((int32_t)a >> 31) ^ SIGNBIT);
|
330 | 72902672 | Christophe Lyon | } |
331 | 72902672 | Christophe Lyon | return res;
|
332 | 72902672 | Christophe Lyon | } |
333 | 72902672 | Christophe Lyon | |
334 | 72902672 | Christophe Lyon | uint64_t HELPER(neon_qsub_s64)(CPUState *env, uint64_t src1, uint64_t src2) |
335 | 72902672 | Christophe Lyon | { |
336 | 72902672 | Christophe Lyon | uint64_t res; |
337 | 72902672 | Christophe Lyon | |
338 | 72902672 | Christophe Lyon | res = src1 - src2; |
339 | 72902672 | Christophe Lyon | if (((res ^ src1) & SIGNBIT64) && ((src1 ^ src2) & SIGNBIT64)) {
|
340 | 72902672 | Christophe Lyon | SET_QC(); |
341 | 72902672 | Christophe Lyon | res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64;
|
342 | 72902672 | Christophe Lyon | } |
343 | 72902672 | Christophe Lyon | return res;
|
344 | 72902672 | Christophe Lyon | } |
345 | 72902672 | Christophe Lyon | |
346 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 + src2) >> 1 |
347 | ad69471c | pbrook | NEON_VOP(hadd_s8, neon_s8, 4)
|
348 | ad69471c | pbrook | NEON_VOP(hadd_u8, neon_u8, 4)
|
349 | ad69471c | pbrook | NEON_VOP(hadd_s16, neon_s16, 2)
|
350 | ad69471c | pbrook | NEON_VOP(hadd_u16, neon_u16, 2)
|
351 | ad69471c | pbrook | #undef NEON_FN
|
352 | ad69471c | pbrook | |
353 | ad69471c | pbrook | int32_t HELPER(neon_hadd_s32)(int32_t src1, int32_t src2) |
354 | ad69471c | pbrook | { |
355 | ad69471c | pbrook | int32_t dest; |
356 | ad69471c | pbrook | |
357 | ad69471c | pbrook | dest = (src1 >> 1) + (src2 >> 1); |
358 | ad69471c | pbrook | if (src1 & src2 & 1) |
359 | ad69471c | pbrook | dest++; |
360 | ad69471c | pbrook | return dest;
|
361 | ad69471c | pbrook | } |
362 | ad69471c | pbrook | |
363 | ad69471c | pbrook | uint32_t HELPER(neon_hadd_u32)(uint32_t src1, uint32_t src2) |
364 | ad69471c | pbrook | { |
365 | ad69471c | pbrook | uint32_t dest; |
366 | ad69471c | pbrook | |
367 | ad69471c | pbrook | dest = (src1 >> 1) + (src2 >> 1); |
368 | ad69471c | pbrook | if (src1 & src2 & 1) |
369 | ad69471c | pbrook | dest++; |
370 | ad69471c | pbrook | return dest;
|
371 | ad69471c | pbrook | } |
372 | ad69471c | pbrook | |
373 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 + src2 + 1) >> 1 |
374 | ad69471c | pbrook | NEON_VOP(rhadd_s8, neon_s8, 4)
|
375 | ad69471c | pbrook | NEON_VOP(rhadd_u8, neon_u8, 4)
|
376 | ad69471c | pbrook | NEON_VOP(rhadd_s16, neon_s16, 2)
|
377 | ad69471c | pbrook | NEON_VOP(rhadd_u16, neon_u16, 2)
|
378 | ad69471c | pbrook | #undef NEON_FN
|
379 | ad69471c | pbrook | |
380 | ad69471c | pbrook | int32_t HELPER(neon_rhadd_s32)(int32_t src1, int32_t src2) |
381 | ad69471c | pbrook | { |
382 | ad69471c | pbrook | int32_t dest; |
383 | ad69471c | pbrook | |
384 | ad69471c | pbrook | dest = (src1 >> 1) + (src2 >> 1); |
385 | ad69471c | pbrook | if ((src1 | src2) & 1) |
386 | ad69471c | pbrook | dest++; |
387 | ad69471c | pbrook | return dest;
|
388 | ad69471c | pbrook | } |
389 | ad69471c | pbrook | |
390 | ad69471c | pbrook | uint32_t HELPER(neon_rhadd_u32)(uint32_t src1, uint32_t src2) |
391 | ad69471c | pbrook | { |
392 | ad69471c | pbrook | uint32_t dest; |
393 | ad69471c | pbrook | |
394 | ad69471c | pbrook | dest = (src1 >> 1) + (src2 >> 1); |
395 | ad69471c | pbrook | if ((src1 | src2) & 1) |
396 | ad69471c | pbrook | dest++; |
397 | ad69471c | pbrook | return dest;
|
398 | ad69471c | pbrook | } |
399 | ad69471c | pbrook | |
400 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 - src2) >> 1 |
401 | ad69471c | pbrook | NEON_VOP(hsub_s8, neon_s8, 4)
|
402 | ad69471c | pbrook | NEON_VOP(hsub_u8, neon_u8, 4)
|
403 | ad69471c | pbrook | NEON_VOP(hsub_s16, neon_s16, 2)
|
404 | ad69471c | pbrook | NEON_VOP(hsub_u16, neon_u16, 2)
|
405 | ad69471c | pbrook | #undef NEON_FN
|
406 | ad69471c | pbrook | |
407 | ad69471c | pbrook | int32_t HELPER(neon_hsub_s32)(int32_t src1, int32_t src2) |
408 | ad69471c | pbrook | { |
409 | ad69471c | pbrook | int32_t dest; |
410 | ad69471c | pbrook | |
411 | ad69471c | pbrook | dest = (src1 >> 1) - (src2 >> 1); |
412 | ad69471c | pbrook | if ((~src1) & src2 & 1) |
413 | ad69471c | pbrook | dest--; |
414 | ad69471c | pbrook | return dest;
|
415 | ad69471c | pbrook | } |
416 | ad69471c | pbrook | |
417 | ad69471c | pbrook | uint32_t HELPER(neon_hsub_u32)(uint32_t src1, uint32_t src2) |
418 | ad69471c | pbrook | { |
419 | ad69471c | pbrook | uint32_t dest; |
420 | ad69471c | pbrook | |
421 | ad69471c | pbrook | dest = (src1 >> 1) - (src2 >> 1); |
422 | ad69471c | pbrook | if ((~src1) & src2 & 1) |
423 | ad69471c | pbrook | dest--; |
424 | ad69471c | pbrook | return dest;
|
425 | ad69471c | pbrook | } |
426 | ad69471c | pbrook | |
427 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? ~0 : 0 |
428 | ad69471c | pbrook | NEON_VOP(cgt_s8, neon_s8, 4)
|
429 | ad69471c | pbrook | NEON_VOP(cgt_u8, neon_u8, 4)
|
430 | ad69471c | pbrook | NEON_VOP(cgt_s16, neon_s16, 2)
|
431 | ad69471c | pbrook | NEON_VOP(cgt_u16, neon_u16, 2)
|
432 | ad69471c | pbrook | NEON_VOP(cgt_s32, neon_s32, 1)
|
433 | ad69471c | pbrook | NEON_VOP(cgt_u32, neon_u32, 1)
|
434 | ad69471c | pbrook | #undef NEON_FN
|
435 | ad69471c | pbrook | |
436 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 >= src2) ? ~0 : 0 |
437 | ad69471c | pbrook | NEON_VOP(cge_s8, neon_s8, 4)
|
438 | ad69471c | pbrook | NEON_VOP(cge_u8, neon_u8, 4)
|
439 | ad69471c | pbrook | NEON_VOP(cge_s16, neon_s16, 2)
|
440 | ad69471c | pbrook | NEON_VOP(cge_u16, neon_u16, 2)
|
441 | ad69471c | pbrook | NEON_VOP(cge_s32, neon_s32, 1)
|
442 | ad69471c | pbrook | NEON_VOP(cge_u32, neon_u32, 1)
|
443 | ad69471c | pbrook | #undef NEON_FN
|
444 | ad69471c | pbrook | |
445 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 < src2) ? src1 : src2
|
446 | ad69471c | pbrook | NEON_VOP(min_s8, neon_s8, 4)
|
447 | ad69471c | pbrook | NEON_VOP(min_u8, neon_u8, 4)
|
448 | ad69471c | pbrook | NEON_VOP(min_s16, neon_s16, 2)
|
449 | ad69471c | pbrook | NEON_VOP(min_u16, neon_u16, 2)
|
450 | ad69471c | pbrook | NEON_VOP(min_s32, neon_s32, 1)
|
451 | ad69471c | pbrook | NEON_VOP(min_u32, neon_u32, 1)
|
452 | ad69471c | pbrook | NEON_POP(pmin_s8, neon_s8, 4)
|
453 | ad69471c | pbrook | NEON_POP(pmin_u8, neon_u8, 4)
|
454 | ad69471c | pbrook | NEON_POP(pmin_s16, neon_s16, 2)
|
455 | ad69471c | pbrook | NEON_POP(pmin_u16, neon_u16, 2)
|
456 | ad69471c | pbrook | #undef NEON_FN
|
457 | ad69471c | pbrook | |
458 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? src1 : src2
|
459 | ad69471c | pbrook | NEON_VOP(max_s8, neon_s8, 4)
|
460 | ad69471c | pbrook | NEON_VOP(max_u8, neon_u8, 4)
|
461 | ad69471c | pbrook | NEON_VOP(max_s16, neon_s16, 2)
|
462 | ad69471c | pbrook | NEON_VOP(max_u16, neon_u16, 2)
|
463 | ad69471c | pbrook | NEON_VOP(max_s32, neon_s32, 1)
|
464 | ad69471c | pbrook | NEON_VOP(max_u32, neon_u32, 1)
|
465 | ad69471c | pbrook | NEON_POP(pmax_s8, neon_s8, 4)
|
466 | ad69471c | pbrook | NEON_POP(pmax_u8, neon_u8, 4)
|
467 | ad69471c | pbrook | NEON_POP(pmax_s16, neon_s16, 2)
|
468 | ad69471c | pbrook | NEON_POP(pmax_u16, neon_u16, 2)
|
469 | ad69471c | pbrook | #undef NEON_FN
|
470 | ad69471c | pbrook | |
471 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) \
|
472 | ad69471c | pbrook | dest = (src1 > src2) ? (src1 - src2) : (src2 - src1) |
473 | ad69471c | pbrook | NEON_VOP(abd_s8, neon_s8, 4)
|
474 | ad69471c | pbrook | NEON_VOP(abd_u8, neon_u8, 4)
|
475 | ad69471c | pbrook | NEON_VOP(abd_s16, neon_s16, 2)
|
476 | ad69471c | pbrook | NEON_VOP(abd_u16, neon_u16, 2)
|
477 | ad69471c | pbrook | NEON_VOP(abd_s32, neon_s32, 1)
|
478 | ad69471c | pbrook | NEON_VOP(abd_u32, neon_u32, 1)
|
479 | ad69471c | pbrook | #undef NEON_FN
|
480 | ad69471c | pbrook | |
481 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
482 | ad69471c | pbrook | int8_t tmp; \ |
483 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
484 | 50f67e95 | Juha Riihimรคki | if (tmp >= (ssize_t)sizeof(src1) * 8 || \ |
485 | 50f67e95 | Juha Riihimรคki | tmp <= -(ssize_t)sizeof(src1) * 8) { \ |
486 | ad69471c | pbrook | dest = 0; \
|
487 | ad69471c | pbrook | } else if (tmp < 0) { \ |
488 | ad69471c | pbrook | dest = src1 >> -tmp; \ |
489 | ad69471c | pbrook | } else { \
|
490 | ad69471c | pbrook | dest = src1 << tmp; \ |
491 | ad69471c | pbrook | }} while (0) |
492 | ad69471c | pbrook | NEON_VOP(shl_u8, neon_u8, 4)
|
493 | ad69471c | pbrook | NEON_VOP(shl_u16, neon_u16, 2)
|
494 | ad69471c | pbrook | NEON_VOP(shl_u32, neon_u32, 1)
|
495 | ad69471c | pbrook | #undef NEON_FN
|
496 | ad69471c | pbrook | |
497 | ad69471c | pbrook | uint64_t HELPER(neon_shl_u64)(uint64_t val, uint64_t shiftop) |
498 | ad69471c | pbrook | { |
499 | ad69471c | pbrook | int8_t shift = (int8_t)shiftop; |
500 | ad69471c | pbrook | if (shift >= 64 || shift <= -64) { |
501 | ad69471c | pbrook | val = 0;
|
502 | ad69471c | pbrook | } else if (shift < 0) { |
503 | ad69471c | pbrook | val >>= -shift; |
504 | ad69471c | pbrook | } else {
|
505 | ad69471c | pbrook | val <<= shift; |
506 | ad69471c | pbrook | } |
507 | ad69471c | pbrook | return val;
|
508 | ad69471c | pbrook | } |
509 | ad69471c | pbrook | |
510 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
511 | ad69471c | pbrook | int8_t tmp; \ |
512 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
513 | 50f67e95 | Juha Riihimรคki | if (tmp >= (ssize_t)sizeof(src1) * 8) { \ |
514 | ad69471c | pbrook | dest = 0; \
|
515 | 50f67e95 | Juha Riihimรคki | } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \ |
516 | ad69471c | pbrook | dest = src1 >> (sizeof(src1) * 8 - 1); \ |
517 | ad69471c | pbrook | } else if (tmp < 0) { \ |
518 | ad69471c | pbrook | dest = src1 >> -tmp; \ |
519 | ad69471c | pbrook | } else { \
|
520 | ad69471c | pbrook | dest = src1 << tmp; \ |
521 | ad69471c | pbrook | }} while (0) |
522 | ad69471c | pbrook | NEON_VOP(shl_s8, neon_s8, 4)
|
523 | ad69471c | pbrook | NEON_VOP(shl_s16, neon_s16, 2)
|
524 | ad69471c | pbrook | NEON_VOP(shl_s32, neon_s32, 1)
|
525 | ad69471c | pbrook | #undef NEON_FN
|
526 | ad69471c | pbrook | |
527 | ad69471c | pbrook | uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop) |
528 | ad69471c | pbrook | { |
529 | ad69471c | pbrook | int8_t shift = (int8_t)shiftop; |
530 | ad69471c | pbrook | int64_t val = valop; |
531 | ad69471c | pbrook | if (shift >= 64) { |
532 | ad69471c | pbrook | val = 0;
|
533 | ad69471c | pbrook | } else if (shift <= -64) { |
534 | ad69471c | pbrook | val >>= 63;
|
535 | ad69471c | pbrook | } else if (shift < 0) { |
536 | ad69471c | pbrook | val >>= -shift; |
537 | ad69471c | pbrook | } else {
|
538 | ad69471c | pbrook | val <<= shift; |
539 | ad69471c | pbrook | } |
540 | ad69471c | pbrook | return val;
|
541 | ad69471c | pbrook | } |
542 | ad69471c | pbrook | |
543 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
544 | ad69471c | pbrook | int8_t tmp; \ |
545 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
546 | 0670a7b6 | Peter Maydell | if ((tmp >= (ssize_t)sizeof(src1) * 8) \ |
547 | 0670a7b6 | Peter Maydell | || (tmp <= -(ssize_t)sizeof(src1) * 8)) { \ |
548 | ad69471c | pbrook | dest = 0; \
|
549 | ad69471c | pbrook | } else if (tmp < 0) { \ |
550 | ad69471c | pbrook | dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ |
551 | ad69471c | pbrook | } else { \
|
552 | ad69471c | pbrook | dest = src1 << tmp; \ |
553 | ad69471c | pbrook | }} while (0) |
554 | ad69471c | pbrook | NEON_VOP(rshl_s8, neon_s8, 4)
|
555 | ad69471c | pbrook | NEON_VOP(rshl_s16, neon_s16, 2)
|
556 | ad69471c | pbrook | #undef NEON_FN
|
557 | ad69471c | pbrook | |
558 | 4bd4ee07 | Christophe Lyon | /* The addition of the rounding constant may overflow, so we use an
|
559 | 4bd4ee07 | Christophe Lyon | * intermediate 64 bits accumulator. */
|
560 | 4bd4ee07 | Christophe Lyon | uint32_t HELPER(neon_rshl_s32)(uint32_t valop, uint32_t shiftop) |
561 | 4bd4ee07 | Christophe Lyon | { |
562 | 4bd4ee07 | Christophe Lyon | int32_t dest; |
563 | 4bd4ee07 | Christophe Lyon | int32_t val = (int32_t)valop; |
564 | 4bd4ee07 | Christophe Lyon | int8_t shift = (int8_t)shiftop; |
565 | 4bd4ee07 | Christophe Lyon | if ((shift >= 32) || (shift <= -32)) { |
566 | 4bd4ee07 | Christophe Lyon | dest = 0;
|
567 | 4bd4ee07 | Christophe Lyon | } else if (shift < 0) { |
568 | 4bd4ee07 | Christophe Lyon | int64_t big_dest = ((int64_t)val + (1 << (-1 - shift))); |
569 | 4bd4ee07 | Christophe Lyon | dest = big_dest >> -shift; |
570 | 4bd4ee07 | Christophe Lyon | } else {
|
571 | 4bd4ee07 | Christophe Lyon | dest = val << shift; |
572 | 4bd4ee07 | Christophe Lyon | } |
573 | 4bd4ee07 | Christophe Lyon | return dest;
|
574 | 4bd4ee07 | Christophe Lyon | } |
575 | 4bd4ee07 | Christophe Lyon | |
576 | 4bd4ee07 | Christophe Lyon | /* Handling addition overflow with 64 bits inputs values is more
|
577 | 4bd4ee07 | Christophe Lyon | * tricky than with 32 bits values. */
|
578 | ad69471c | pbrook | uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop) |
579 | ad69471c | pbrook | { |
580 | ad69471c | pbrook | int8_t shift = (int8_t)shiftop; |
581 | ad69471c | pbrook | int64_t val = valop; |
582 | 0670a7b6 | Peter Maydell | if ((shift >= 64) || (shift <= -64)) { |
583 | ad69471c | pbrook | val = 0;
|
584 | ad69471c | pbrook | } else if (shift < 0) { |
585 | 4bd4ee07 | Christophe Lyon | val >>= (-shift - 1);
|
586 | 4bd4ee07 | Christophe Lyon | if (val == INT64_MAX) {
|
587 | 4bd4ee07 | Christophe Lyon | /* In this case, it means that the rounding constant is 1,
|
588 | 4bd4ee07 | Christophe Lyon | * and the addition would overflow. Return the actual
|
589 | 4bd4ee07 | Christophe Lyon | * result directly. */
|
590 | 4bd4ee07 | Christophe Lyon | val = 0x4000000000000000LL;
|
591 | 4bd4ee07 | Christophe Lyon | } else {
|
592 | 4bd4ee07 | Christophe Lyon | val++; |
593 | 4bd4ee07 | Christophe Lyon | val >>= 1;
|
594 | 4bd4ee07 | Christophe Lyon | } |
595 | ad69471c | pbrook | } else {
|
596 | ad69471c | pbrook | val <<= shift; |
597 | ad69471c | pbrook | } |
598 | ad69471c | pbrook | return val;
|
599 | ad69471c | pbrook | } |
600 | ad69471c | pbrook | |
601 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
602 | ad69471c | pbrook | int8_t tmp; \ |
603 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
604 | 50f67e95 | Juha Riihimรคki | if (tmp >= (ssize_t)sizeof(src1) * 8 || \ |
605 | 50f67e95 | Juha Riihimรคki | tmp < -(ssize_t)sizeof(src1) * 8) { \ |
606 | ad69471c | pbrook | dest = 0; \
|
607 | 50f67e95 | Juha Riihimรคki | } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \ |
608 | b6c63b98 | Christophe Lyon | dest = src1 >> (-tmp - 1); \
|
609 | ad69471c | pbrook | } else if (tmp < 0) { \ |
610 | ad69471c | pbrook | dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ |
611 | ad69471c | pbrook | } else { \
|
612 | ad69471c | pbrook | dest = src1 << tmp; \ |
613 | ad69471c | pbrook | }} while (0) |
614 | ad69471c | pbrook | NEON_VOP(rshl_u8, neon_u8, 4)
|
615 | ad69471c | pbrook | NEON_VOP(rshl_u16, neon_u16, 2)
|
616 | ad69471c | pbrook | #undef NEON_FN
|
617 | ad69471c | pbrook | |
618 | 4bd4ee07 | Christophe Lyon | /* The addition of the rounding constant may overflow, so we use an
|
619 | 4bd4ee07 | Christophe Lyon | * intermediate 64 bits accumulator. */
|
620 | 4bd4ee07 | Christophe Lyon | uint32_t HELPER(neon_rshl_u32)(uint32_t val, uint32_t shiftop) |
621 | 4bd4ee07 | Christophe Lyon | { |
622 | 4bd4ee07 | Christophe Lyon | uint32_t dest; |
623 | 4bd4ee07 | Christophe Lyon | int8_t shift = (int8_t)shiftop; |
624 | 4bd4ee07 | Christophe Lyon | if (shift >= 32 || shift < -32) { |
625 | 4bd4ee07 | Christophe Lyon | dest = 0;
|
626 | 4bd4ee07 | Christophe Lyon | } else if (shift == -32) { |
627 | 4bd4ee07 | Christophe Lyon | dest = val >> 31;
|
628 | 4bd4ee07 | Christophe Lyon | } else if (shift < 0) { |
629 | 4bd4ee07 | Christophe Lyon | uint64_t big_dest = ((uint64_t)val + (1 << (-1 - shift))); |
630 | 4bd4ee07 | Christophe Lyon | dest = big_dest >> -shift; |
631 | 4bd4ee07 | Christophe Lyon | } else {
|
632 | 4bd4ee07 | Christophe Lyon | dest = val << shift; |
633 | 4bd4ee07 | Christophe Lyon | } |
634 | 4bd4ee07 | Christophe Lyon | return dest;
|
635 | 4bd4ee07 | Christophe Lyon | } |
636 | 4bd4ee07 | Christophe Lyon | |
637 | 4bd4ee07 | Christophe Lyon | /* Handling addition overflow with 64 bits inputs values is more
|
638 | 4bd4ee07 | Christophe Lyon | * tricky than with 32 bits values. */
|
639 | ad69471c | pbrook | uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop) |
640 | ad69471c | pbrook | { |
641 | ad69471c | pbrook | int8_t shift = (uint8_t)shiftop; |
642 | 51e3930f | Christophe Lyon | if (shift >= 64 || shift < -64) { |
643 | ad69471c | pbrook | val = 0;
|
644 | ad69471c | pbrook | } else if (shift == -64) { |
645 | ad69471c | pbrook | /* Rounding a 1-bit result just preserves that bit. */
|
646 | ad69471c | pbrook | val >>= 63;
|
647 | 4bd4ee07 | Christophe Lyon | } else if (shift < 0) { |
648 | 4bd4ee07 | Christophe Lyon | val >>= (-shift - 1);
|
649 | 4bd4ee07 | Christophe Lyon | if (val == UINT64_MAX) {
|
650 | 4bd4ee07 | Christophe Lyon | /* In this case, it means that the rounding constant is 1,
|
651 | 4bd4ee07 | Christophe Lyon | * and the addition would overflow. Return the actual
|
652 | 4bd4ee07 | Christophe Lyon | * result directly. */
|
653 | 4bd4ee07 | Christophe Lyon | val = 0x8000000000000000ULL;
|
654 | 4bd4ee07 | Christophe Lyon | } else {
|
655 | 4bd4ee07 | Christophe Lyon | val++; |
656 | 4bd4ee07 | Christophe Lyon | val >>= 1;
|
657 | 4bd4ee07 | Christophe Lyon | } |
658 | ad69471c | pbrook | } else {
|
659 | ad69471c | pbrook | val <<= shift; |
660 | ad69471c | pbrook | } |
661 | ad69471c | pbrook | return val;
|
662 | ad69471c | pbrook | } |
663 | ad69471c | pbrook | |
664 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
665 | ad69471c | pbrook | int8_t tmp; \ |
666 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
667 | 50f67e95 | Juha Riihimรคki | if (tmp >= (ssize_t)sizeof(src1) * 8) { \ |
668 | ad69471c | pbrook | if (src1) { \
|
669 | ad69471c | pbrook | SET_QC(); \ |
670 | ad69471c | pbrook | dest = ~0; \
|
671 | ad69471c | pbrook | } else { \
|
672 | ad69471c | pbrook | dest = 0; \
|
673 | ad69471c | pbrook | } \ |
674 | 50f67e95 | Juha Riihimรคki | } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \ |
675 | ad69471c | pbrook | dest = 0; \
|
676 | ad69471c | pbrook | } else if (tmp < 0) { \ |
677 | ad69471c | pbrook | dest = src1 >> -tmp; \ |
678 | ad69471c | pbrook | } else { \
|
679 | ad69471c | pbrook | dest = src1 << tmp; \ |
680 | ad69471c | pbrook | if ((dest >> tmp) != src1) { \
|
681 | ad69471c | pbrook | SET_QC(); \ |
682 | ad69471c | pbrook | dest = ~0; \
|
683 | ad69471c | pbrook | } \ |
684 | ad69471c | pbrook | }} while (0) |
685 | ad69471c | pbrook | NEON_VOP_ENV(qshl_u8, neon_u8, 4)
|
686 | ad69471c | pbrook | NEON_VOP_ENV(qshl_u16, neon_u16, 2)
|
687 | ad69471c | pbrook | NEON_VOP_ENV(qshl_u32, neon_u32, 1)
|
688 | ad69471c | pbrook | #undef NEON_FN
|
689 | ad69471c | pbrook | |
690 | ad69471c | pbrook | uint64_t HELPER(neon_qshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop) |
691 | ad69471c | pbrook | { |
692 | ad69471c | pbrook | int8_t shift = (int8_t)shiftop; |
693 | ad69471c | pbrook | if (shift >= 64) { |
694 | ad69471c | pbrook | if (val) {
|
695 | ad69471c | pbrook | val = ~(uint64_t)0;
|
696 | ad69471c | pbrook | SET_QC(); |
697 | ad69471c | pbrook | } |
698 | ad69471c | pbrook | } else if (shift <= -64) { |
699 | ad69471c | pbrook | val = 0;
|
700 | ad69471c | pbrook | } else if (shift < 0) { |
701 | ad69471c | pbrook | val >>= -shift; |
702 | ad69471c | pbrook | } else {
|
703 | ad69471c | pbrook | uint64_t tmp = val; |
704 | ad69471c | pbrook | val <<= shift; |
705 | ad69471c | pbrook | if ((val >> shift) != tmp) {
|
706 | ad69471c | pbrook | SET_QC(); |
707 | ad69471c | pbrook | val = ~(uint64_t)0;
|
708 | ad69471c | pbrook | } |
709 | ad69471c | pbrook | } |
710 | ad69471c | pbrook | return val;
|
711 | ad69471c | pbrook | } |
712 | ad69471c | pbrook | |
713 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
714 | ad69471c | pbrook | int8_t tmp; \ |
715 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
716 | 50f67e95 | Juha Riihimรคki | if (tmp >= (ssize_t)sizeof(src1) * 8) { \ |
717 | a5d88f3e | Peter Maydell | if (src1) { \
|
718 | ad69471c | pbrook | SET_QC(); \ |
719 | a5d88f3e | Peter Maydell | dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \ |
720 | a5d88f3e | Peter Maydell | if (src1 > 0) { \ |
721 | a5d88f3e | Peter Maydell | dest--; \ |
722 | a5d88f3e | Peter Maydell | } \ |
723 | a5d88f3e | Peter Maydell | } else { \
|
724 | a5d88f3e | Peter Maydell | dest = src1; \ |
725 | a5d88f3e | Peter Maydell | } \ |
726 | 50f67e95 | Juha Riihimรคki | } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \ |
727 | ad69471c | pbrook | dest = src1 >> 31; \
|
728 | ad69471c | pbrook | } else if (tmp < 0) { \ |
729 | ad69471c | pbrook | dest = src1 >> -tmp; \ |
730 | ad69471c | pbrook | } else { \
|
731 | ad69471c | pbrook | dest = src1 << tmp; \ |
732 | ad69471c | pbrook | if ((dest >> tmp) != src1) { \
|
733 | ad69471c | pbrook | SET_QC(); \ |
734 | a5d88f3e | Peter Maydell | dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \ |
735 | a5d88f3e | Peter Maydell | if (src1 > 0) { \ |
736 | a5d88f3e | Peter Maydell | dest--; \ |
737 | a5d88f3e | Peter Maydell | } \ |
738 | ad69471c | pbrook | } \ |
739 | ad69471c | pbrook | }} while (0) |
740 | ad69471c | pbrook | NEON_VOP_ENV(qshl_s8, neon_s8, 4)
|
741 | ad69471c | pbrook | NEON_VOP_ENV(qshl_s16, neon_s16, 2)
|
742 | ad69471c | pbrook | NEON_VOP_ENV(qshl_s32, neon_s32, 1)
|
743 | ad69471c | pbrook | #undef NEON_FN
|
744 | ad69471c | pbrook | |
745 | ad69471c | pbrook | uint64_t HELPER(neon_qshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop) |
746 | ad69471c | pbrook | { |
747 | ad69471c | pbrook | int8_t shift = (uint8_t)shiftop; |
748 | ad69471c | pbrook | int64_t val = valop; |
749 | ad69471c | pbrook | if (shift >= 64) { |
750 | ad69471c | pbrook | if (val) {
|
751 | ad69471c | pbrook | SET_QC(); |
752 | eb7a3d79 | Peter Maydell | val = (val >> 63) ^ ~SIGNBIT64;
|
753 | ad69471c | pbrook | } |
754 | 4c9b70ae | Juha Riihimรคki | } else if (shift <= -64) { |
755 | ad69471c | pbrook | val >>= 63;
|
756 | ad69471c | pbrook | } else if (shift < 0) { |
757 | ad69471c | pbrook | val >>= -shift; |
758 | ad69471c | pbrook | } else {
|
759 | ad69471c | pbrook | int64_t tmp = val; |
760 | ad69471c | pbrook | val <<= shift; |
761 | ad69471c | pbrook | if ((val >> shift) != tmp) {
|
762 | ad69471c | pbrook | SET_QC(); |
763 | ad69471c | pbrook | val = (tmp >> 63) ^ ~SIGNBIT64;
|
764 | ad69471c | pbrook | } |
765 | ad69471c | pbrook | } |
766 | ad69471c | pbrook | return val;
|
767 | ad69471c | pbrook | } |
768 | ad69471c | pbrook | |
769 | 4ca4502c | Juha Riihimรคki | #define NEON_FN(dest, src1, src2) do { \ |
770 | 4ca4502c | Juha Riihimรคki | if (src1 & (1 << (sizeof(src1) * 8 - 1))) { \ |
771 | 4ca4502c | Juha Riihimรคki | SET_QC(); \ |
772 | 4ca4502c | Juha Riihimรคki | dest = 0; \
|
773 | 4ca4502c | Juha Riihimรคki | } else { \
|
774 | 4ca4502c | Juha Riihimรคki | int8_t tmp; \ |
775 | 4ca4502c | Juha Riihimรคki | tmp = (int8_t)src2; \ |
776 | 4ca4502c | Juha Riihimรคki | if (tmp >= (ssize_t)sizeof(src1) * 8) { \ |
777 | 4ca4502c | Juha Riihimรคki | if (src1) { \
|
778 | 4ca4502c | Juha Riihimรคki | SET_QC(); \ |
779 | 4ca4502c | Juha Riihimรคki | dest = ~0; \
|
780 | 4ca4502c | Juha Riihimรคki | } else { \
|
781 | 4ca4502c | Juha Riihimรคki | dest = 0; \
|
782 | 4ca4502c | Juha Riihimรคki | } \ |
783 | 4ca4502c | Juha Riihimรคki | } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \ |
784 | 4ca4502c | Juha Riihimรคki | dest = 0; \
|
785 | 4ca4502c | Juha Riihimรคki | } else if (tmp < 0) { \ |
786 | 4ca4502c | Juha Riihimรคki | dest = src1 >> -tmp; \ |
787 | 4ca4502c | Juha Riihimรคki | } else { \
|
788 | 4ca4502c | Juha Riihimรคki | dest = src1 << tmp; \ |
789 | 4ca4502c | Juha Riihimรคki | if ((dest >> tmp) != src1) { \
|
790 | 4ca4502c | Juha Riihimรคki | SET_QC(); \ |
791 | 4ca4502c | Juha Riihimรคki | dest = ~0; \
|
792 | 4ca4502c | Juha Riihimรคki | } \ |
793 | 4ca4502c | Juha Riihimรคki | } \ |
794 | 4ca4502c | Juha Riihimรคki | }} while (0) |
795 | 4ca4502c | Juha Riihimรคki | NEON_VOP_ENV(qshlu_s8, neon_u8, 4)
|
796 | 4ca4502c | Juha Riihimรคki | NEON_VOP_ENV(qshlu_s16, neon_u16, 2)
|
797 | 4ca4502c | Juha Riihimรคki | #undef NEON_FN
|
798 | 4ca4502c | Juha Riihimรคki | |
799 | 4ca4502c | Juha Riihimรคki | uint32_t HELPER(neon_qshlu_s32)(CPUState *env, uint32_t valop, uint32_t shiftop) |
800 | 4ca4502c | Juha Riihimรคki | { |
801 | 4ca4502c | Juha Riihimรคki | if ((int32_t)valop < 0) { |
802 | 4ca4502c | Juha Riihimรคki | SET_QC(); |
803 | 4ca4502c | Juha Riihimรคki | return 0; |
804 | 4ca4502c | Juha Riihimรคki | } |
805 | 4ca4502c | Juha Riihimรคki | return helper_neon_qshl_u32(env, valop, shiftop);
|
806 | 4ca4502c | Juha Riihimรคki | } |
807 | 4ca4502c | Juha Riihimรคki | |
808 | 4ca4502c | Juha Riihimรคki | uint64_t HELPER(neon_qshlu_s64)(CPUState *env, uint64_t valop, uint64_t shiftop) |
809 | 4ca4502c | Juha Riihimรคki | { |
810 | 4ca4502c | Juha Riihimรคki | if ((int64_t)valop < 0) { |
811 | 4ca4502c | Juha Riihimรคki | SET_QC(); |
812 | 4ca4502c | Juha Riihimรคki | return 0; |
813 | 4ca4502c | Juha Riihimรคki | } |
814 | 4ca4502c | Juha Riihimรคki | return helper_neon_qshl_u64(env, valop, shiftop);
|
815 | 4ca4502c | Juha Riihimรคki | } |
816 | ad69471c | pbrook | |
817 | ad69471c | pbrook | /* FIXME: This is wrong. */
|
818 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
819 | ad69471c | pbrook | int8_t tmp; \ |
820 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
821 | 33ebc293 | Peter Maydell | if (tmp >= (ssize_t)sizeof(src1) * 8) { \ |
822 | 33ebc293 | Peter Maydell | if (src1) { \
|
823 | 33ebc293 | Peter Maydell | SET_QC(); \ |
824 | 33ebc293 | Peter Maydell | dest = ~0; \
|
825 | 33ebc293 | Peter Maydell | } else { \
|
826 | 33ebc293 | Peter Maydell | dest = 0; \
|
827 | 33ebc293 | Peter Maydell | } \ |
828 | 33ebc293 | Peter Maydell | } else if (tmp < -(ssize_t)sizeof(src1) * 8) { \ |
829 | 33ebc293 | Peter Maydell | dest = 0; \
|
830 | 33ebc293 | Peter Maydell | } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \ |
831 | 33ebc293 | Peter Maydell | dest = src1 >> (sizeof(src1) * 8 - 1); \ |
832 | 33ebc293 | Peter Maydell | } else if (tmp < 0) { \ |
833 | ad69471c | pbrook | dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ |
834 | ad69471c | pbrook | } else { \
|
835 | ad69471c | pbrook | dest = src1 << tmp; \ |
836 | ad69471c | pbrook | if ((dest >> tmp) != src1) { \
|
837 | ad69471c | pbrook | SET_QC(); \ |
838 | ad69471c | pbrook | dest = ~0; \
|
839 | ad69471c | pbrook | } \ |
840 | ad69471c | pbrook | }} while (0) |
841 | ad69471c | pbrook | NEON_VOP_ENV(qrshl_u8, neon_u8, 4)
|
842 | ad69471c | pbrook | NEON_VOP_ENV(qrshl_u16, neon_u16, 2)
|
843 | ad69471c | pbrook | #undef NEON_FN
|
844 | ad69471c | pbrook | |
845 | 4bd4ee07 | Christophe Lyon | /* The addition of the rounding constant may overflow, so we use an
|
846 | 4bd4ee07 | Christophe Lyon | * intermediate 64 bits accumulator. */
|
847 | 4bd4ee07 | Christophe Lyon | uint32_t HELPER(neon_qrshl_u32)(CPUState *env, uint32_t val, uint32_t shiftop) |
848 | 4bd4ee07 | Christophe Lyon | { |
849 | 4bd4ee07 | Christophe Lyon | uint32_t dest; |
850 | 4bd4ee07 | Christophe Lyon | int8_t shift = (int8_t)shiftop; |
851 | 33ebc293 | Peter Maydell | if (shift >= 32) { |
852 | 33ebc293 | Peter Maydell | if (val) {
|
853 | 33ebc293 | Peter Maydell | SET_QC(); |
854 | 33ebc293 | Peter Maydell | dest = ~0;
|
855 | 33ebc293 | Peter Maydell | } else {
|
856 | 33ebc293 | Peter Maydell | dest = 0;
|
857 | 33ebc293 | Peter Maydell | } |
858 | 33ebc293 | Peter Maydell | } else if (shift < -32) { |
859 | 33ebc293 | Peter Maydell | dest = 0;
|
860 | 33ebc293 | Peter Maydell | } else if (shift == -32) { |
861 | 33ebc293 | Peter Maydell | dest = val >> 31;
|
862 | 33ebc293 | Peter Maydell | } else if (shift < 0) { |
863 | 4bd4ee07 | Christophe Lyon | uint64_t big_dest = ((uint64_t)val + (1 << (-1 - shift))); |
864 | 4bd4ee07 | Christophe Lyon | dest = big_dest >> -shift; |
865 | 4bd4ee07 | Christophe Lyon | } else {
|
866 | 4bd4ee07 | Christophe Lyon | dest = val << shift; |
867 | 4bd4ee07 | Christophe Lyon | if ((dest >> shift) != val) {
|
868 | 4bd4ee07 | Christophe Lyon | SET_QC(); |
869 | 4bd4ee07 | Christophe Lyon | dest = ~0;
|
870 | 4bd4ee07 | Christophe Lyon | } |
871 | 4bd4ee07 | Christophe Lyon | } |
872 | 4bd4ee07 | Christophe Lyon | return dest;
|
873 | 4bd4ee07 | Christophe Lyon | } |
874 | 4bd4ee07 | Christophe Lyon | |
875 | 4bd4ee07 | Christophe Lyon | /* Handling addition overflow with 64 bits inputs values is more
|
876 | 4bd4ee07 | Christophe Lyon | * tricky than with 32 bits values. */
|
877 | ad69471c | pbrook | uint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop) |
878 | ad69471c | pbrook | { |
879 | ad69471c | pbrook | int8_t shift = (int8_t)shiftop; |
880 | 33ebc293 | Peter Maydell | if (shift >= 64) { |
881 | 33ebc293 | Peter Maydell | if (val) {
|
882 | 33ebc293 | Peter Maydell | SET_QC(); |
883 | 33ebc293 | Peter Maydell | val = ~0;
|
884 | 33ebc293 | Peter Maydell | } |
885 | 33ebc293 | Peter Maydell | } else if (shift < -64) { |
886 | 33ebc293 | Peter Maydell | val = 0;
|
887 | 33ebc293 | Peter Maydell | } else if (shift == -64) { |
888 | 33ebc293 | Peter Maydell | val >>= 63;
|
889 | 33ebc293 | Peter Maydell | } else if (shift < 0) { |
890 | 4bd4ee07 | Christophe Lyon | val >>= (-shift - 1);
|
891 | 4bd4ee07 | Christophe Lyon | if (val == UINT64_MAX) {
|
892 | 4bd4ee07 | Christophe Lyon | /* In this case, it means that the rounding constant is 1,
|
893 | 4bd4ee07 | Christophe Lyon | * and the addition would overflow. Return the actual
|
894 | 4bd4ee07 | Christophe Lyon | * result directly. */
|
895 | 4bd4ee07 | Christophe Lyon | val = 0x8000000000000000ULL;
|
896 | 4bd4ee07 | Christophe Lyon | } else {
|
897 | 4bd4ee07 | Christophe Lyon | val++; |
898 | 4bd4ee07 | Christophe Lyon | val >>= 1;
|
899 | 4bd4ee07 | Christophe Lyon | } |
900 | ad69471c | pbrook | } else { \
|
901 | ad69471c | pbrook | uint64_t tmp = val; |
902 | ad69471c | pbrook | val <<= shift; |
903 | ad69471c | pbrook | if ((val >> shift) != tmp) {
|
904 | ad69471c | pbrook | SET_QC(); |
905 | ad69471c | pbrook | val = ~0;
|
906 | ad69471c | pbrook | } |
907 | ad69471c | pbrook | } |
908 | ad69471c | pbrook | return val;
|
909 | ad69471c | pbrook | } |
910 | ad69471c | pbrook | |
911 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
912 | ad69471c | pbrook | int8_t tmp; \ |
913 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
914 | 7b6ecf5b | Peter Maydell | if (tmp >= (ssize_t)sizeof(src1) * 8) { \ |
915 | 7b6ecf5b | Peter Maydell | if (src1) { \
|
916 | 7b6ecf5b | Peter Maydell | SET_QC(); \ |
917 | 7b6ecf5b | Peter Maydell | dest = (1 << (sizeof(src1) * 8 - 1)); \ |
918 | 7b6ecf5b | Peter Maydell | if (src1 > 0) { \ |
919 | 7b6ecf5b | Peter Maydell | dest--; \ |
920 | 7b6ecf5b | Peter Maydell | } \ |
921 | 7b6ecf5b | Peter Maydell | } else { \
|
922 | 7b6ecf5b | Peter Maydell | dest = 0; \
|
923 | 7b6ecf5b | Peter Maydell | } \ |
924 | 7b6ecf5b | Peter Maydell | } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \ |
925 | 7b6ecf5b | Peter Maydell | dest = 0; \
|
926 | 7b6ecf5b | Peter Maydell | } else if (tmp < 0) { \ |
927 | ad69471c | pbrook | dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ |
928 | ad69471c | pbrook | } else { \
|
929 | ad69471c | pbrook | dest = src1 << tmp; \ |
930 | ad69471c | pbrook | if ((dest >> tmp) != src1) { \
|
931 | ad69471c | pbrook | SET_QC(); \ |
932 | 960e623b | Peter Maydell | dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \ |
933 | 960e623b | Peter Maydell | if (src1 > 0) { \ |
934 | 960e623b | Peter Maydell | dest--; \ |
935 | 960e623b | Peter Maydell | } \ |
936 | ad69471c | pbrook | } \ |
937 | ad69471c | pbrook | }} while (0) |
938 | ad69471c | pbrook | NEON_VOP_ENV(qrshl_s8, neon_s8, 4)
|
939 | ad69471c | pbrook | NEON_VOP_ENV(qrshl_s16, neon_s16, 2)
|
940 | ad69471c | pbrook | #undef NEON_FN
|
941 | ad69471c | pbrook | |
942 | 4bd4ee07 | Christophe Lyon | /* The addition of the rounding constant may overflow, so we use an
|
943 | 4bd4ee07 | Christophe Lyon | * intermediate 64 bits accumulator. */
|
944 | 4bd4ee07 | Christophe Lyon | uint32_t HELPER(neon_qrshl_s32)(CPUState *env, uint32_t valop, uint32_t shiftop) |
945 | 4bd4ee07 | Christophe Lyon | { |
946 | 4bd4ee07 | Christophe Lyon | int32_t dest; |
947 | 4bd4ee07 | Christophe Lyon | int32_t val = (int32_t)valop; |
948 | 4bd4ee07 | Christophe Lyon | int8_t shift = (int8_t)shiftop; |
949 | 7b6ecf5b | Peter Maydell | if (shift >= 32) { |
950 | 7b6ecf5b | Peter Maydell | if (val) {
|
951 | 7b6ecf5b | Peter Maydell | SET_QC(); |
952 | 7b6ecf5b | Peter Maydell | dest = (val >> 31) ^ ~SIGNBIT;
|
953 | 7b6ecf5b | Peter Maydell | } else {
|
954 | 7b6ecf5b | Peter Maydell | dest = 0;
|
955 | 7b6ecf5b | Peter Maydell | } |
956 | 7b6ecf5b | Peter Maydell | } else if (shift <= -32) { |
957 | 7b6ecf5b | Peter Maydell | dest = 0;
|
958 | 7b6ecf5b | Peter Maydell | } else if (shift < 0) { |
959 | 4bd4ee07 | Christophe Lyon | int64_t big_dest = ((int64_t)val + (1 << (-1 - shift))); |
960 | 4bd4ee07 | Christophe Lyon | dest = big_dest >> -shift; |
961 | 4bd4ee07 | Christophe Lyon | } else {
|
962 | 4bd4ee07 | Christophe Lyon | dest = val << shift; |
963 | 4bd4ee07 | Christophe Lyon | if ((dest >> shift) != val) {
|
964 | 4bd4ee07 | Christophe Lyon | SET_QC(); |
965 | 4bd4ee07 | Christophe Lyon | dest = (val >> 31) ^ ~SIGNBIT;
|
966 | 4bd4ee07 | Christophe Lyon | } |
967 | 4bd4ee07 | Christophe Lyon | } |
968 | 4bd4ee07 | Christophe Lyon | return dest;
|
969 | 4bd4ee07 | Christophe Lyon | } |
970 | 4bd4ee07 | Christophe Lyon | |
971 | 4bd4ee07 | Christophe Lyon | /* Handling addition overflow with 64 bits inputs values is more
|
972 | 4bd4ee07 | Christophe Lyon | * tricky than with 32 bits values. */
|
973 | ad69471c | pbrook | uint64_t HELPER(neon_qrshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop) |
974 | ad69471c | pbrook | { |
975 | ad69471c | pbrook | int8_t shift = (uint8_t)shiftop; |
976 | ad69471c | pbrook | int64_t val = valop; |
977 | ad69471c | pbrook | |
978 | 7b6ecf5b | Peter Maydell | if (shift >= 64) { |
979 | 7b6ecf5b | Peter Maydell | if (val) {
|
980 | 7b6ecf5b | Peter Maydell | SET_QC(); |
981 | 7b6ecf5b | Peter Maydell | val = (val >> 63) ^ ~SIGNBIT64;
|
982 | 7b6ecf5b | Peter Maydell | } |
983 | 7b6ecf5b | Peter Maydell | } else if (shift <= -64) { |
984 | 7b6ecf5b | Peter Maydell | val = 0;
|
985 | 7b6ecf5b | Peter Maydell | } else if (shift < 0) { |
986 | 4bd4ee07 | Christophe Lyon | val >>= (-shift - 1);
|
987 | 4bd4ee07 | Christophe Lyon | if (val == INT64_MAX) {
|
988 | 4bd4ee07 | Christophe Lyon | /* In this case, it means that the rounding constant is 1,
|
989 | 4bd4ee07 | Christophe Lyon | * and the addition would overflow. Return the actual
|
990 | 4bd4ee07 | Christophe Lyon | * result directly. */
|
991 | 4bd4ee07 | Christophe Lyon | val = 0x4000000000000000ULL;
|
992 | 4bd4ee07 | Christophe Lyon | } else {
|
993 | 4bd4ee07 | Christophe Lyon | val++; |
994 | 4bd4ee07 | Christophe Lyon | val >>= 1;
|
995 | 4bd4ee07 | Christophe Lyon | } |
996 | ad69471c | pbrook | } else {
|
997 | 4bd4ee07 | Christophe Lyon | int64_t tmp = val; |
998 | ad69471c | pbrook | val <<= shift; |
999 | ad69471c | pbrook | if ((val >> shift) != tmp) {
|
1000 | ad69471c | pbrook | SET_QC(); |
1001 | 4bd4ee07 | Christophe Lyon | val = (tmp >> 63) ^ ~SIGNBIT64;
|
1002 | ad69471c | pbrook | } |
1003 | ad69471c | pbrook | } |
1004 | ad69471c | pbrook | return val;
|
1005 | ad69471c | pbrook | } |
1006 | ad69471c | pbrook | |
1007 | ad69471c | pbrook | uint32_t HELPER(neon_add_u8)(uint32_t a, uint32_t b) |
1008 | ad69471c | pbrook | { |
1009 | ad69471c | pbrook | uint32_t mask; |
1010 | ad69471c | pbrook | mask = (a ^ b) & 0x80808080u;
|
1011 | ad69471c | pbrook | a &= ~0x80808080u;
|
1012 | ad69471c | pbrook | b &= ~0x80808080u;
|
1013 | ad69471c | pbrook | return (a + b) ^ mask;
|
1014 | ad69471c | pbrook | } |
1015 | ad69471c | pbrook | |
1016 | ad69471c | pbrook | uint32_t HELPER(neon_add_u16)(uint32_t a, uint32_t b) |
1017 | ad69471c | pbrook | { |
1018 | ad69471c | pbrook | uint32_t mask; |
1019 | ad69471c | pbrook | mask = (a ^ b) & 0x80008000u;
|
1020 | ad69471c | pbrook | a &= ~0x80008000u;
|
1021 | ad69471c | pbrook | b &= ~0x80008000u;
|
1022 | ad69471c | pbrook | return (a + b) ^ mask;
|
1023 | ad69471c | pbrook | } |
1024 | ad69471c | pbrook | |
1025 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = src1 + src2
|
1026 | ad69471c | pbrook | NEON_POP(padd_u8, neon_u8, 4)
|
1027 | ad69471c | pbrook | NEON_POP(padd_u16, neon_u16, 2)
|
1028 | ad69471c | pbrook | #undef NEON_FN
|
1029 | ad69471c | pbrook | |
1030 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = src1 - src2
|
1031 | ad69471c | pbrook | NEON_VOP(sub_u8, neon_u8, 4)
|
1032 | ad69471c | pbrook | NEON_VOP(sub_u16, neon_u16, 2)
|
1033 | ad69471c | pbrook | #undef NEON_FN
|
1034 | ad69471c | pbrook | |
1035 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = src1 * src2
|
1036 | ad69471c | pbrook | NEON_VOP(mul_u8, neon_u8, 4)
|
1037 | ad69471c | pbrook | NEON_VOP(mul_u16, neon_u16, 2)
|
1038 | ad69471c | pbrook | #undef NEON_FN
|
1039 | ad69471c | pbrook | |
1040 | 1654b2d6 | aurel32 | /* Polynomial multiplication is like integer multiplication except the
|
1041 | ad69471c | pbrook | partial products are XORed, not added. */
|
1042 | ad69471c | pbrook | uint32_t HELPER(neon_mul_p8)(uint32_t op1, uint32_t op2) |
1043 | ad69471c | pbrook | { |
1044 | ad69471c | pbrook | uint32_t mask; |
1045 | ad69471c | pbrook | uint32_t result; |
1046 | ad69471c | pbrook | result = 0;
|
1047 | ad69471c | pbrook | while (op1) {
|
1048 | ad69471c | pbrook | mask = 0;
|
1049 | ad69471c | pbrook | if (op1 & 1) |
1050 | ad69471c | pbrook | mask |= 0xff;
|
1051 | ad69471c | pbrook | if (op1 & (1 << 8)) |
1052 | ad69471c | pbrook | mask |= (0xff << 8); |
1053 | ad69471c | pbrook | if (op1 & (1 << 16)) |
1054 | ad69471c | pbrook | mask |= (0xff << 16); |
1055 | ad69471c | pbrook | if (op1 & (1 << 24)) |
1056 | ad69471c | pbrook | mask |= (0xff << 24); |
1057 | ad69471c | pbrook | result ^= op2 & mask; |
1058 | ad69471c | pbrook | op1 = (op1 >> 1) & 0x7f7f7f7f; |
1059 | ad69471c | pbrook | op2 = (op2 << 1) & 0xfefefefe; |
1060 | ad69471c | pbrook | } |
1061 | ad69471c | pbrook | return result;
|
1062 | ad69471c | pbrook | } |
1063 | ad69471c | pbrook | |
1064 | e5ca24cb | Peter Maydell | uint64_t HELPER(neon_mull_p8)(uint32_t op1, uint32_t op2) |
1065 | e5ca24cb | Peter Maydell | { |
1066 | e5ca24cb | Peter Maydell | uint64_t result = 0;
|
1067 | e5ca24cb | Peter Maydell | uint64_t mask; |
1068 | e5ca24cb | Peter Maydell | uint64_t op2ex = op2; |
1069 | e5ca24cb | Peter Maydell | op2ex = (op2ex & 0xff) |
|
1070 | e5ca24cb | Peter Maydell | ((op2ex & 0xff00) << 8) | |
1071 | e5ca24cb | Peter Maydell | ((op2ex & 0xff0000) << 16) | |
1072 | e5ca24cb | Peter Maydell | ((op2ex & 0xff000000) << 24); |
1073 | e5ca24cb | Peter Maydell | while (op1) {
|
1074 | e5ca24cb | Peter Maydell | mask = 0;
|
1075 | e5ca24cb | Peter Maydell | if (op1 & 1) { |
1076 | e5ca24cb | Peter Maydell | mask |= 0xffff;
|
1077 | e5ca24cb | Peter Maydell | } |
1078 | e5ca24cb | Peter Maydell | if (op1 & (1 << 8)) { |
1079 | e5ca24cb | Peter Maydell | mask |= (0xffffU << 16); |
1080 | e5ca24cb | Peter Maydell | } |
1081 | e5ca24cb | Peter Maydell | if (op1 & (1 << 16)) { |
1082 | e5ca24cb | Peter Maydell | mask |= (0xffffULL << 32); |
1083 | e5ca24cb | Peter Maydell | } |
1084 | e5ca24cb | Peter Maydell | if (op1 & (1 << 24)) { |
1085 | e5ca24cb | Peter Maydell | mask |= (0xffffULL << 48); |
1086 | e5ca24cb | Peter Maydell | } |
1087 | e5ca24cb | Peter Maydell | result ^= op2ex & mask; |
1088 | e5ca24cb | Peter Maydell | op1 = (op1 >> 1) & 0x7f7f7f7f; |
1089 | e5ca24cb | Peter Maydell | op2ex <<= 1;
|
1090 | e5ca24cb | Peter Maydell | } |
1091 | e5ca24cb | Peter Maydell | return result;
|
1092 | e5ca24cb | Peter Maydell | } |
1093 | e5ca24cb | Peter Maydell | |
1094 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 & src2) ? -1 : 0 |
1095 | ad69471c | pbrook | NEON_VOP(tst_u8, neon_u8, 4)
|
1096 | ad69471c | pbrook | NEON_VOP(tst_u16, neon_u16, 2)
|
1097 | ad69471c | pbrook | NEON_VOP(tst_u32, neon_u32, 1)
|
1098 | ad69471c | pbrook | #undef NEON_FN
|
1099 | ad69471c | pbrook | |
1100 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 == src2) ? -1 : 0 |
1101 | ad69471c | pbrook | NEON_VOP(ceq_u8, neon_u8, 4)
|
1102 | ad69471c | pbrook | NEON_VOP(ceq_u16, neon_u16, 2)
|
1103 | ad69471c | pbrook | NEON_VOP(ceq_u32, neon_u32, 1)
|
1104 | ad69471c | pbrook | #undef NEON_FN
|
1105 | ad69471c | pbrook | |
1106 | ad69471c | pbrook | #define NEON_FN(dest, src, dummy) dest = (src < 0) ? -src : src |
1107 | ad69471c | pbrook | NEON_VOP1(abs_s8, neon_s8, 4)
|
1108 | ad69471c | pbrook | NEON_VOP1(abs_s16, neon_s16, 2)
|
1109 | ad69471c | pbrook | #undef NEON_FN
|
1110 | ad69471c | pbrook | |
1111 | ad69471c | pbrook | /* Count Leading Sign/Zero Bits. */
|
1112 | ad69471c | pbrook | static inline int do_clz8(uint8_t x) |
1113 | ad69471c | pbrook | { |
1114 | ad69471c | pbrook | int n;
|
1115 | ad69471c | pbrook | for (n = 8; x; n--) |
1116 | ad69471c | pbrook | x >>= 1;
|
1117 | ad69471c | pbrook | return n;
|
1118 | ad69471c | pbrook | } |
1119 | ad69471c | pbrook | |
1120 | ad69471c | pbrook | static inline int do_clz16(uint16_t x) |
1121 | ad69471c | pbrook | { |
1122 | ad69471c | pbrook | int n;
|
1123 | ad69471c | pbrook | for (n = 16; x; n--) |
1124 | ad69471c | pbrook | x >>= 1;
|
1125 | ad69471c | pbrook | return n;
|
1126 | ad69471c | pbrook | } |
1127 | ad69471c | pbrook | |
1128 | ad69471c | pbrook | #define NEON_FN(dest, src, dummy) dest = do_clz8(src)
|
1129 | ad69471c | pbrook | NEON_VOP1(clz_u8, neon_u8, 4)
|
1130 | ad69471c | pbrook | #undef NEON_FN
|
1131 | ad69471c | pbrook | |
1132 | ad69471c | pbrook | #define NEON_FN(dest, src, dummy) dest = do_clz16(src)
|
1133 | ad69471c | pbrook | NEON_VOP1(clz_u16, neon_u16, 2)
|
1134 | ad69471c | pbrook | #undef NEON_FN
|
1135 | ad69471c | pbrook | |
1136 | ad69471c | pbrook | #define NEON_FN(dest, src, dummy) dest = do_clz8((src < 0) ? ~src : src) - 1 |
1137 | ad69471c | pbrook | NEON_VOP1(cls_s8, neon_s8, 4)
|
1138 | ad69471c | pbrook | #undef NEON_FN
|
1139 | ad69471c | pbrook | |
1140 | ad69471c | pbrook | #define NEON_FN(dest, src, dummy) dest = do_clz16((src < 0) ? ~src : src) - 1 |
1141 | ad69471c | pbrook | NEON_VOP1(cls_s16, neon_s16, 2)
|
1142 | ad69471c | pbrook | #undef NEON_FN
|
1143 | ad69471c | pbrook | |
1144 | ad69471c | pbrook | uint32_t HELPER(neon_cls_s32)(uint32_t x) |
1145 | ad69471c | pbrook | { |
1146 | ad69471c | pbrook | int count;
|
1147 | ad69471c | pbrook | if ((int32_t)x < 0) |
1148 | ad69471c | pbrook | x = ~x; |
1149 | ad69471c | pbrook | for (count = 32; x; count--) |
1150 | ad69471c | pbrook | x = x >> 1;
|
1151 | ad69471c | pbrook | return count - 1; |
1152 | ad69471c | pbrook | } |
1153 | ad69471c | pbrook | |
1154 | ad69471c | pbrook | /* Bit count. */
|
1155 | ad69471c | pbrook | uint32_t HELPER(neon_cnt_u8)(uint32_t x) |
1156 | ad69471c | pbrook | { |
1157 | ad69471c | pbrook | x = (x & 0x55555555) + ((x >> 1) & 0x55555555); |
1158 | ad69471c | pbrook | x = (x & 0x33333333) + ((x >> 2) & 0x33333333); |
1159 | ad69471c | pbrook | x = (x & 0x0f0f0f0f) + ((x >> 4) & 0x0f0f0f0f); |
1160 | ad69471c | pbrook | return x;
|
1161 | ad69471c | pbrook | } |
1162 | ad69471c | pbrook | |
1163 | ad69471c | pbrook | #define NEON_QDMULH16(dest, src1, src2, round) do { \ |
1164 | ad69471c | pbrook | uint32_t tmp = (int32_t)(int16_t) src1 * (int16_t) src2; \ |
1165 | ad69471c | pbrook | if ((tmp ^ (tmp << 1)) & SIGNBIT) { \ |
1166 | ad69471c | pbrook | SET_QC(); \ |
1167 | ad69471c | pbrook | tmp = (tmp >> 31) ^ ~SIGNBIT; \
|
1168 | 46eece9d | Juha Riihimรคki | } else { \
|
1169 | 46eece9d | Juha Riihimรคki | tmp <<= 1; \
|
1170 | ad69471c | pbrook | } \ |
1171 | ad69471c | pbrook | if (round) { \
|
1172 | ad69471c | pbrook | int32_t old = tmp; \ |
1173 | ad69471c | pbrook | tmp += 1 << 15; \ |
1174 | ad69471c | pbrook | if ((int32_t)tmp < old) { \
|
1175 | ad69471c | pbrook | SET_QC(); \ |
1176 | ad69471c | pbrook | tmp = SIGNBIT - 1; \
|
1177 | ad69471c | pbrook | } \ |
1178 | ad69471c | pbrook | } \ |
1179 | ad69471c | pbrook | dest = tmp >> 16; \
|
1180 | ad69471c | pbrook | } while(0) |
1181 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 0) |
1182 | ad69471c | pbrook | NEON_VOP_ENV(qdmulh_s16, neon_s16, 2)
|
1183 | ad69471c | pbrook | #undef NEON_FN
|
1184 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 1) |
1185 | ad69471c | pbrook | NEON_VOP_ENV(qrdmulh_s16, neon_s16, 2)
|
1186 | ad69471c | pbrook | #undef NEON_FN
|
1187 | ad69471c | pbrook | #undef NEON_QDMULH16
|
1188 | ad69471c | pbrook | |
1189 | ad69471c | pbrook | #define NEON_QDMULH32(dest, src1, src2, round) do { \ |
1190 | ad69471c | pbrook | uint64_t tmp = (int64_t)(int32_t) src1 * (int32_t) src2; \ |
1191 | ad69471c | pbrook | if ((tmp ^ (tmp << 1)) & SIGNBIT64) { \ |
1192 | ad69471c | pbrook | SET_QC(); \ |
1193 | ad69471c | pbrook | tmp = (tmp >> 63) ^ ~SIGNBIT64; \
|
1194 | ad69471c | pbrook | } else { \
|
1195 | ad69471c | pbrook | tmp <<= 1; \
|
1196 | ad69471c | pbrook | } \ |
1197 | ad69471c | pbrook | if (round) { \
|
1198 | ad69471c | pbrook | int64_t old = tmp; \ |
1199 | ad69471c | pbrook | tmp += (int64_t)1 << 31; \ |
1200 | ad69471c | pbrook | if ((int64_t)tmp < old) { \
|
1201 | ad69471c | pbrook | SET_QC(); \ |
1202 | ad69471c | pbrook | tmp = SIGNBIT64 - 1; \
|
1203 | ad69471c | pbrook | } \ |
1204 | ad69471c | pbrook | } \ |
1205 | ad69471c | pbrook | dest = tmp >> 32; \
|
1206 | ad69471c | pbrook | } while(0) |
1207 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 0) |
1208 | ad69471c | pbrook | NEON_VOP_ENV(qdmulh_s32, neon_s32, 1)
|
1209 | ad69471c | pbrook | #undef NEON_FN
|
1210 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 1) |
1211 | ad69471c | pbrook | NEON_VOP_ENV(qrdmulh_s32, neon_s32, 1)
|
1212 | ad69471c | pbrook | #undef NEON_FN
|
1213 | ad69471c | pbrook | #undef NEON_QDMULH32
|
1214 | ad69471c | pbrook | |
1215 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_u8)(uint64_t x) |
1216 | ad69471c | pbrook | { |
1217 | ad69471c | pbrook | return (x & 0xffu) | ((x >> 8) & 0xff00u) | ((x >> 16) & 0xff0000u) |
1218 | ad69471c | pbrook | | ((x >> 24) & 0xff000000u); |
1219 | ad69471c | pbrook | } |
1220 | ad69471c | pbrook | |
1221 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_u16)(uint64_t x) |
1222 | ad69471c | pbrook | { |
1223 | ad69471c | pbrook | return (x & 0xffffu) | ((x >> 16) & 0xffff0000u); |
1224 | ad69471c | pbrook | } |
1225 | ad69471c | pbrook | |
1226 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_high_u8)(uint64_t x) |
1227 | ad69471c | pbrook | { |
1228 | ad69471c | pbrook | return ((x >> 8) & 0xff) | ((x >> 16) & 0xff00) |
1229 | ad69471c | pbrook | | ((x >> 24) & 0xff0000) | ((x >> 32) & 0xff000000); |
1230 | ad69471c | pbrook | } |
1231 | ad69471c | pbrook | |
1232 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_high_u16)(uint64_t x) |
1233 | ad69471c | pbrook | { |
1234 | ad69471c | pbrook | return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000); |
1235 | ad69471c | pbrook | } |
1236 | ad69471c | pbrook | |
1237 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_round_high_u8)(uint64_t x) |
1238 | ad69471c | pbrook | { |
1239 | ad69471c | pbrook | x &= 0xff80ff80ff80ff80ull;
|
1240 | ad69471c | pbrook | x += 0x0080008000800080ull;
|
1241 | ad69471c | pbrook | return ((x >> 8) & 0xff) | ((x >> 16) & 0xff00) |
1242 | ad69471c | pbrook | | ((x >> 24) & 0xff0000) | ((x >> 32) & 0xff000000); |
1243 | ad69471c | pbrook | } |
1244 | ad69471c | pbrook | |
1245 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_round_high_u16)(uint64_t x) |
1246 | ad69471c | pbrook | { |
1247 | ad69471c | pbrook | x &= 0xffff8000ffff8000ull;
|
1248 | ad69471c | pbrook | x += 0x0000800000008000ull;
|
1249 | ad69471c | pbrook | return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000); |
1250 | ad69471c | pbrook | } |
1251 | ad69471c | pbrook | |
1252 | af1bbf30 | Juha Riihimรคki | uint32_t HELPER(neon_unarrow_sat8)(CPUState *env, uint64_t x) |
1253 | af1bbf30 | Juha Riihimรคki | { |
1254 | af1bbf30 | Juha Riihimรคki | uint16_t s; |
1255 | af1bbf30 | Juha Riihimรคki | uint8_t d; |
1256 | af1bbf30 | Juha Riihimรคki | uint32_t res = 0;
|
1257 | af1bbf30 | Juha Riihimรคki | #define SAT8(n) \
|
1258 | af1bbf30 | Juha Riihimรคki | s = x >> n; \ |
1259 | af1bbf30 | Juha Riihimรคki | if (s & 0x8000) { \ |
1260 | af1bbf30 | Juha Riihimรคki | SET_QC(); \ |
1261 | af1bbf30 | Juha Riihimรคki | } else { \
|
1262 | af1bbf30 | Juha Riihimรคki | if (s > 0xff) { \ |
1263 | af1bbf30 | Juha Riihimรคki | d = 0xff; \
|
1264 | af1bbf30 | Juha Riihimรคki | SET_QC(); \ |
1265 | af1bbf30 | Juha Riihimรคki | } else { \
|
1266 | af1bbf30 | Juha Riihimรคki | d = s; \ |
1267 | af1bbf30 | Juha Riihimรคki | } \ |
1268 | af1bbf30 | Juha Riihimรคki | res |= (uint32_t)d << (n / 2); \
|
1269 | af1bbf30 | Juha Riihimรคki | } |
1270 | af1bbf30 | Juha Riihimรคki | |
1271 | af1bbf30 | Juha Riihimรคki | SAT8(0);
|
1272 | af1bbf30 | Juha Riihimรคki | SAT8(16);
|
1273 | af1bbf30 | Juha Riihimรคki | SAT8(32);
|
1274 | af1bbf30 | Juha Riihimรคki | SAT8(48);
|
1275 | af1bbf30 | Juha Riihimรคki | #undef SAT8
|
1276 | af1bbf30 | Juha Riihimรคki | return res;
|
1277 | af1bbf30 | Juha Riihimรคki | } |
1278 | af1bbf30 | Juha Riihimรคki | |
1279 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_sat_u8)(CPUState *env, uint64_t x) |
1280 | ad69471c | pbrook | { |
1281 | ad69471c | pbrook | uint16_t s; |
1282 | ad69471c | pbrook | uint8_t d; |
1283 | ad69471c | pbrook | uint32_t res = 0;
|
1284 | ad69471c | pbrook | #define SAT8(n) \
|
1285 | ad69471c | pbrook | s = x >> n; \ |
1286 | ad69471c | pbrook | if (s > 0xff) { \ |
1287 | ad69471c | pbrook | d = 0xff; \
|
1288 | ad69471c | pbrook | SET_QC(); \ |
1289 | ad69471c | pbrook | } else { \
|
1290 | ad69471c | pbrook | d = s; \ |
1291 | ad69471c | pbrook | } \ |
1292 | ad69471c | pbrook | res |= (uint32_t)d << (n / 2);
|
1293 | ad69471c | pbrook | |
1294 | ad69471c | pbrook | SAT8(0);
|
1295 | ad69471c | pbrook | SAT8(16);
|
1296 | ad69471c | pbrook | SAT8(32);
|
1297 | ad69471c | pbrook | SAT8(48);
|
1298 | ad69471c | pbrook | #undef SAT8
|
1299 | ad69471c | pbrook | return res;
|
1300 | ad69471c | pbrook | } |
1301 | ad69471c | pbrook | |
1302 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_sat_s8)(CPUState *env, uint64_t x) |
1303 | ad69471c | pbrook | { |
1304 | ad69471c | pbrook | int16_t s; |
1305 | ad69471c | pbrook | uint8_t d; |
1306 | ad69471c | pbrook | uint32_t res = 0;
|
1307 | ad69471c | pbrook | #define SAT8(n) \
|
1308 | ad69471c | pbrook | s = x >> n; \ |
1309 | ad69471c | pbrook | if (s != (int8_t)s) { \
|
1310 | ad69471c | pbrook | d = (s >> 15) ^ 0x7f; \ |
1311 | ad69471c | pbrook | SET_QC(); \ |
1312 | ad69471c | pbrook | } else { \
|
1313 | ad69471c | pbrook | d = s; \ |
1314 | ad69471c | pbrook | } \ |
1315 | ad69471c | pbrook | res |= (uint32_t)d << (n / 2);
|
1316 | ad69471c | pbrook | |
1317 | ad69471c | pbrook | SAT8(0);
|
1318 | ad69471c | pbrook | SAT8(16);
|
1319 | ad69471c | pbrook | SAT8(32);
|
1320 | ad69471c | pbrook | SAT8(48);
|
1321 | ad69471c | pbrook | #undef SAT8
|
1322 | ad69471c | pbrook | return res;
|
1323 | ad69471c | pbrook | } |
1324 | ad69471c | pbrook | |
1325 | af1bbf30 | Juha Riihimรคki | uint32_t HELPER(neon_unarrow_sat16)(CPUState *env, uint64_t x) |
1326 | af1bbf30 | Juha Riihimรคki | { |
1327 | af1bbf30 | Juha Riihimรคki | uint32_t high; |
1328 | af1bbf30 | Juha Riihimรคki | uint32_t low; |
1329 | af1bbf30 | Juha Riihimรคki | low = x; |
1330 | af1bbf30 | Juha Riihimรคki | if (low & 0x80000000) { |
1331 | af1bbf30 | Juha Riihimรคki | low = 0;
|
1332 | af1bbf30 | Juha Riihimรคki | SET_QC(); |
1333 | af1bbf30 | Juha Riihimรคki | } else if (low > 0xffff) { |
1334 | af1bbf30 | Juha Riihimรคki | low = 0xffff;
|
1335 | af1bbf30 | Juha Riihimรคki | SET_QC(); |
1336 | af1bbf30 | Juha Riihimรคki | } |
1337 | af1bbf30 | Juha Riihimรคki | high = x >> 32;
|
1338 | af1bbf30 | Juha Riihimรคki | if (high & 0x80000000) { |
1339 | af1bbf30 | Juha Riihimรคki | high = 0;
|
1340 | af1bbf30 | Juha Riihimรคki | SET_QC(); |
1341 | af1bbf30 | Juha Riihimรคki | } else if (high > 0xffff) { |
1342 | af1bbf30 | Juha Riihimรคki | high = 0xffff;
|
1343 | af1bbf30 | Juha Riihimรคki | SET_QC(); |
1344 | af1bbf30 | Juha Riihimรคki | } |
1345 | af1bbf30 | Juha Riihimรคki | return low | (high << 16); |
1346 | af1bbf30 | Juha Riihimรคki | } |
1347 | af1bbf30 | Juha Riihimรคki | |
1348 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_sat_u16)(CPUState *env, uint64_t x) |
1349 | ad69471c | pbrook | { |
1350 | ad69471c | pbrook | uint32_t high; |
1351 | ad69471c | pbrook | uint32_t low; |
1352 | ad69471c | pbrook | low = x; |
1353 | ad69471c | pbrook | if (low > 0xffff) { |
1354 | ad69471c | pbrook | low = 0xffff;
|
1355 | ad69471c | pbrook | SET_QC(); |
1356 | ad69471c | pbrook | } |
1357 | ad69471c | pbrook | high = x >> 32;
|
1358 | ad69471c | pbrook | if (high > 0xffff) { |
1359 | ad69471c | pbrook | high = 0xffff;
|
1360 | ad69471c | pbrook | SET_QC(); |
1361 | ad69471c | pbrook | } |
1362 | ad69471c | pbrook | return low | (high << 16); |
1363 | ad69471c | pbrook | } |
1364 | ad69471c | pbrook | |
1365 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_sat_s16)(CPUState *env, uint64_t x) |
1366 | ad69471c | pbrook | { |
1367 | ad69471c | pbrook | int32_t low; |
1368 | ad69471c | pbrook | int32_t high; |
1369 | ad69471c | pbrook | low = x; |
1370 | ad69471c | pbrook | if (low != (int16_t)low) {
|
1371 | ad69471c | pbrook | low = (low >> 31) ^ 0x7fff; |
1372 | ad69471c | pbrook | SET_QC(); |
1373 | ad69471c | pbrook | } |
1374 | ad69471c | pbrook | high = x >> 32;
|
1375 | ad69471c | pbrook | if (high != (int16_t)high) {
|
1376 | ad69471c | pbrook | high = (high >> 31) ^ 0x7fff; |
1377 | ad69471c | pbrook | SET_QC(); |
1378 | ad69471c | pbrook | } |
1379 | ad69471c | pbrook | return (uint16_t)low | (high << 16); |
1380 | ad69471c | pbrook | } |
1381 | ad69471c | pbrook | |
1382 | af1bbf30 | Juha Riihimรคki | uint32_t HELPER(neon_unarrow_sat32)(CPUState *env, uint64_t x) |
1383 | af1bbf30 | Juha Riihimรคki | { |
1384 | af1bbf30 | Juha Riihimรคki | if (x & 0x8000000000000000ull) { |
1385 | af1bbf30 | Juha Riihimรคki | SET_QC(); |
1386 | af1bbf30 | Juha Riihimรคki | return 0; |
1387 | af1bbf30 | Juha Riihimรคki | } |
1388 | af1bbf30 | Juha Riihimรคki | if (x > 0xffffffffu) { |
1389 | af1bbf30 | Juha Riihimรคki | SET_QC(); |
1390 | af1bbf30 | Juha Riihimรคki | return 0xffffffffu; |
1391 | af1bbf30 | Juha Riihimรคki | } |
1392 | af1bbf30 | Juha Riihimรคki | return x;
|
1393 | af1bbf30 | Juha Riihimรคki | } |
1394 | af1bbf30 | Juha Riihimรคki | |
1395 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_sat_u32)(CPUState *env, uint64_t x) |
1396 | ad69471c | pbrook | { |
1397 | ad69471c | pbrook | if (x > 0xffffffffu) { |
1398 | ad69471c | pbrook | SET_QC(); |
1399 | ad69471c | pbrook | return 0xffffffffu; |
1400 | ad69471c | pbrook | } |
1401 | ad69471c | pbrook | return x;
|
1402 | ad69471c | pbrook | } |
1403 | ad69471c | pbrook | |
1404 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_sat_s32)(CPUState *env, uint64_t x) |
1405 | ad69471c | pbrook | { |
1406 | ad69471c | pbrook | if ((int64_t)x != (int32_t)x) {
|
1407 | ad69471c | pbrook | SET_QC(); |
1408 | cc2212c2 | Peter Maydell | return ((int64_t)x >> 63) ^ 0x7fffffff; |
1409 | ad69471c | pbrook | } |
1410 | ad69471c | pbrook | return x;
|
1411 | ad69471c | pbrook | } |
1412 | ad69471c | pbrook | |
1413 | ad69471c | pbrook | uint64_t HELPER(neon_widen_u8)(uint32_t x) |
1414 | ad69471c | pbrook | { |
1415 | ad69471c | pbrook | uint64_t tmp; |
1416 | ad69471c | pbrook | uint64_t ret; |
1417 | ad69471c | pbrook | ret = (uint8_t)x; |
1418 | ad69471c | pbrook | tmp = (uint8_t)(x >> 8);
|
1419 | ad69471c | pbrook | ret |= tmp << 16;
|
1420 | ad69471c | pbrook | tmp = (uint8_t)(x >> 16);
|
1421 | ad69471c | pbrook | ret |= tmp << 32;
|
1422 | ad69471c | pbrook | tmp = (uint8_t)(x >> 24);
|
1423 | ad69471c | pbrook | ret |= tmp << 48;
|
1424 | ad69471c | pbrook | return ret;
|
1425 | ad69471c | pbrook | } |
1426 | ad69471c | pbrook | |
1427 | ad69471c | pbrook | uint64_t HELPER(neon_widen_s8)(uint32_t x) |
1428 | ad69471c | pbrook | { |
1429 | ad69471c | pbrook | uint64_t tmp; |
1430 | ad69471c | pbrook | uint64_t ret; |
1431 | ad69471c | pbrook | ret = (uint16_t)(int8_t)x; |
1432 | ad69471c | pbrook | tmp = (uint16_t)(int8_t)(x >> 8);
|
1433 | ad69471c | pbrook | ret |= tmp << 16;
|
1434 | ad69471c | pbrook | tmp = (uint16_t)(int8_t)(x >> 16);
|
1435 | ad69471c | pbrook | ret |= tmp << 32;
|
1436 | ad69471c | pbrook | tmp = (uint16_t)(int8_t)(x >> 24);
|
1437 | ad69471c | pbrook | ret |= tmp << 48;
|
1438 | ad69471c | pbrook | return ret;
|
1439 | ad69471c | pbrook | } |
1440 | ad69471c | pbrook | |
1441 | ad69471c | pbrook | uint64_t HELPER(neon_widen_u16)(uint32_t x) |
1442 | ad69471c | pbrook | { |
1443 | ad69471c | pbrook | uint64_t high = (uint16_t)(x >> 16);
|
1444 | ad69471c | pbrook | return ((uint16_t)x) | (high << 32); |
1445 | ad69471c | pbrook | } |
1446 | ad69471c | pbrook | |
1447 | ad69471c | pbrook | uint64_t HELPER(neon_widen_s16)(uint32_t x) |
1448 | ad69471c | pbrook | { |
1449 | ad69471c | pbrook | uint64_t high = (int16_t)(x >> 16);
|
1450 | ad69471c | pbrook | return ((uint32_t)(int16_t)x) | (high << 32); |
1451 | ad69471c | pbrook | } |
1452 | ad69471c | pbrook | |
1453 | ad69471c | pbrook | uint64_t HELPER(neon_addl_u16)(uint64_t a, uint64_t b) |
1454 | ad69471c | pbrook | { |
1455 | ad69471c | pbrook | uint64_t mask; |
1456 | ad69471c | pbrook | mask = (a ^ b) & 0x8000800080008000ull;
|
1457 | ad69471c | pbrook | a &= ~0x8000800080008000ull;
|
1458 | ad69471c | pbrook | b &= ~0x8000800080008000ull;
|
1459 | ad69471c | pbrook | return (a + b) ^ mask;
|
1460 | ad69471c | pbrook | } |
1461 | ad69471c | pbrook | |
1462 | ad69471c | pbrook | uint64_t HELPER(neon_addl_u32)(uint64_t a, uint64_t b) |
1463 | ad69471c | pbrook | { |
1464 | ad69471c | pbrook | uint64_t mask; |
1465 | ad69471c | pbrook | mask = (a ^ b) & 0x8000000080000000ull;
|
1466 | ad69471c | pbrook | a &= ~0x8000000080000000ull;
|
1467 | ad69471c | pbrook | b &= ~0x8000000080000000ull;
|
1468 | ad69471c | pbrook | return (a + b) ^ mask;
|
1469 | ad69471c | pbrook | } |
1470 | ad69471c | pbrook | |
1471 | ad69471c | pbrook | uint64_t HELPER(neon_paddl_u16)(uint64_t a, uint64_t b) |
1472 | ad69471c | pbrook | { |
1473 | ad69471c | pbrook | uint64_t tmp; |
1474 | ad69471c | pbrook | uint64_t tmp2; |
1475 | ad69471c | pbrook | |
1476 | ad69471c | pbrook | tmp = a & 0x0000ffff0000ffffull;
|
1477 | ad69471c | pbrook | tmp += (a >> 16) & 0x0000ffff0000ffffull; |
1478 | ad69471c | pbrook | tmp2 = b & 0xffff0000ffff0000ull;
|
1479 | ad69471c | pbrook | tmp2 += (b << 16) & 0xffff0000ffff0000ull; |
1480 | ad69471c | pbrook | return ( tmp & 0xffff) |
1481 | ad69471c | pbrook | | ((tmp >> 16) & 0xffff0000ull) |
1482 | ad69471c | pbrook | | ((tmp2 << 16) & 0xffff00000000ull) |
1483 | ad69471c | pbrook | | ( tmp2 & 0xffff000000000000ull);
|
1484 | ad69471c | pbrook | } |
1485 | ad69471c | pbrook | |
1486 | ad69471c | pbrook | uint64_t HELPER(neon_paddl_u32)(uint64_t a, uint64_t b) |
1487 | ad69471c | pbrook | { |
1488 | ad69471c | pbrook | uint32_t low = a + (a >> 32);
|
1489 | ad69471c | pbrook | uint32_t high = b + (b >> 32);
|
1490 | ad69471c | pbrook | return low + ((uint64_t)high << 32); |
1491 | ad69471c | pbrook | } |
1492 | ad69471c | pbrook | |
1493 | ad69471c | pbrook | uint64_t HELPER(neon_subl_u16)(uint64_t a, uint64_t b) |
1494 | ad69471c | pbrook | { |
1495 | ad69471c | pbrook | uint64_t mask; |
1496 | ad69471c | pbrook | mask = (a ^ ~b) & 0x8000800080008000ull;
|
1497 | ad69471c | pbrook | a |= 0x8000800080008000ull;
|
1498 | ad69471c | pbrook | b &= ~0x8000800080008000ull;
|
1499 | ad69471c | pbrook | return (a - b) ^ mask;
|
1500 | ad69471c | pbrook | } |
1501 | ad69471c | pbrook | |
1502 | ad69471c | pbrook | uint64_t HELPER(neon_subl_u32)(uint64_t a, uint64_t b) |
1503 | ad69471c | pbrook | { |
1504 | ad69471c | pbrook | uint64_t mask; |
1505 | ad69471c | pbrook | mask = (a ^ ~b) & 0x8000000080000000ull;
|
1506 | ad69471c | pbrook | a |= 0x8000000080000000ull;
|
1507 | ad69471c | pbrook | b &= ~0x8000000080000000ull;
|
1508 | ad69471c | pbrook | return (a - b) ^ mask;
|
1509 | ad69471c | pbrook | } |
1510 | ad69471c | pbrook | |
1511 | ad69471c | pbrook | uint64_t HELPER(neon_addl_saturate_s32)(CPUState *env, uint64_t a, uint64_t b) |
1512 | ad69471c | pbrook | { |
1513 | ad69471c | pbrook | uint32_t x, y; |
1514 | ad69471c | pbrook | uint32_t low, high; |
1515 | ad69471c | pbrook | |
1516 | ad69471c | pbrook | x = a; |
1517 | ad69471c | pbrook | y = b; |
1518 | ad69471c | pbrook | low = x + y; |
1519 | ad69471c | pbrook | if (((low ^ x) & SIGNBIT) && !((x ^ y) & SIGNBIT)) {
|
1520 | ad69471c | pbrook | SET_QC(); |
1521 | ad69471c | pbrook | low = ((int32_t)x >> 31) ^ ~SIGNBIT;
|
1522 | ad69471c | pbrook | } |
1523 | ad69471c | pbrook | x = a >> 32;
|
1524 | ad69471c | pbrook | y = b >> 32;
|
1525 | ad69471c | pbrook | high = x + y; |
1526 | ad69471c | pbrook | if (((high ^ x) & SIGNBIT) && !((x ^ y) & SIGNBIT)) {
|
1527 | ad69471c | pbrook | SET_QC(); |
1528 | ad69471c | pbrook | high = ((int32_t)x >> 31) ^ ~SIGNBIT;
|
1529 | ad69471c | pbrook | } |
1530 | ad69471c | pbrook | return low | ((uint64_t)high << 32); |
1531 | ad69471c | pbrook | } |
1532 | ad69471c | pbrook | |
1533 | ad69471c | pbrook | uint64_t HELPER(neon_addl_saturate_s64)(CPUState *env, uint64_t a, uint64_t b) |
1534 | ad69471c | pbrook | { |
1535 | ad69471c | pbrook | uint64_t result; |
1536 | ad69471c | pbrook | |
1537 | ad69471c | pbrook | result = a + b; |
1538 | ad69471c | pbrook | if (((result ^ a) & SIGNBIT64) && !((a ^ b) & SIGNBIT64)) {
|
1539 | ad69471c | pbrook | SET_QC(); |
1540 | ad69471c | pbrook | result = ((int64_t)a >> 63) ^ ~SIGNBIT64;
|
1541 | ad69471c | pbrook | } |
1542 | ad69471c | pbrook | return result;
|
1543 | ad69471c | pbrook | } |
1544 | ad69471c | pbrook | |
1545 | ad69471c | pbrook | #define DO_ABD(dest, x, y, type) do { \ |
1546 | ad69471c | pbrook | type tmp_x = x; \ |
1547 | ad69471c | pbrook | type tmp_y = y; \ |
1548 | ad69471c | pbrook | dest = ((tmp_x > tmp_y) ? tmp_x - tmp_y : tmp_y - tmp_x); \ |
1549 | ad69471c | pbrook | } while(0) |
1550 | ad69471c | pbrook | |
1551 | ad69471c | pbrook | uint64_t HELPER(neon_abdl_u16)(uint32_t a, uint32_t b) |
1552 | ad69471c | pbrook | { |
1553 | ad69471c | pbrook | uint64_t tmp; |
1554 | ad69471c | pbrook | uint64_t result; |
1555 | ad69471c | pbrook | DO_ABD(result, a, b, uint8_t); |
1556 | ad69471c | pbrook | DO_ABD(tmp, a >> 8, b >> 8, uint8_t); |
1557 | ad69471c | pbrook | result |= tmp << 16;
|
1558 | ad69471c | pbrook | DO_ABD(tmp, a >> 16, b >> 16, uint8_t); |
1559 | ad69471c | pbrook | result |= tmp << 32;
|
1560 | ad69471c | pbrook | DO_ABD(tmp, a >> 24, b >> 24, uint8_t); |
1561 | ad69471c | pbrook | result |= tmp << 48;
|
1562 | ad69471c | pbrook | return result;
|
1563 | ad69471c | pbrook | } |
1564 | ad69471c | pbrook | |
1565 | ad69471c | pbrook | uint64_t HELPER(neon_abdl_s16)(uint32_t a, uint32_t b) |
1566 | ad69471c | pbrook | { |
1567 | ad69471c | pbrook | uint64_t tmp; |
1568 | ad69471c | pbrook | uint64_t result; |
1569 | ad69471c | pbrook | DO_ABD(result, a, b, int8_t); |
1570 | ad69471c | pbrook | DO_ABD(tmp, a >> 8, b >> 8, int8_t); |
1571 | ad69471c | pbrook | result |= tmp << 16;
|
1572 | ad69471c | pbrook | DO_ABD(tmp, a >> 16, b >> 16, int8_t); |
1573 | ad69471c | pbrook | result |= tmp << 32;
|
1574 | ad69471c | pbrook | DO_ABD(tmp, a >> 24, b >> 24, int8_t); |
1575 | ad69471c | pbrook | result |= tmp << 48;
|
1576 | ad69471c | pbrook | return result;
|
1577 | ad69471c | pbrook | } |
1578 | ad69471c | pbrook | |
1579 | ad69471c | pbrook | uint64_t HELPER(neon_abdl_u32)(uint32_t a, uint32_t b) |
1580 | ad69471c | pbrook | { |
1581 | ad69471c | pbrook | uint64_t tmp; |
1582 | ad69471c | pbrook | uint64_t result; |
1583 | ad69471c | pbrook | DO_ABD(result, a, b, uint16_t); |
1584 | ad69471c | pbrook | DO_ABD(tmp, a >> 16, b >> 16, uint16_t); |
1585 | ad69471c | pbrook | return result | (tmp << 32); |
1586 | ad69471c | pbrook | } |
1587 | ad69471c | pbrook | |
1588 | ad69471c | pbrook | uint64_t HELPER(neon_abdl_s32)(uint32_t a, uint32_t b) |
1589 | ad69471c | pbrook | { |
1590 | ad69471c | pbrook | uint64_t tmp; |
1591 | ad69471c | pbrook | uint64_t result; |
1592 | ad69471c | pbrook | DO_ABD(result, a, b, int16_t); |
1593 | ad69471c | pbrook | DO_ABD(tmp, a >> 16, b >> 16, int16_t); |
1594 | ad69471c | pbrook | return result | (tmp << 32); |
1595 | ad69471c | pbrook | } |
1596 | ad69471c | pbrook | |
1597 | ad69471c | pbrook | uint64_t HELPER(neon_abdl_u64)(uint32_t a, uint32_t b) |
1598 | ad69471c | pbrook | { |
1599 | ad69471c | pbrook | uint64_t result; |
1600 | ad69471c | pbrook | DO_ABD(result, a, b, uint32_t); |
1601 | ad69471c | pbrook | return result;
|
1602 | ad69471c | pbrook | } |
1603 | ad69471c | pbrook | |
1604 | ad69471c | pbrook | uint64_t HELPER(neon_abdl_s64)(uint32_t a, uint32_t b) |
1605 | ad69471c | pbrook | { |
1606 | ad69471c | pbrook | uint64_t result; |
1607 | ad69471c | pbrook | DO_ABD(result, a, b, int32_t); |
1608 | ad69471c | pbrook | return result;
|
1609 | ad69471c | pbrook | } |
1610 | ad69471c | pbrook | #undef DO_ABD
|
1611 | ad69471c | pbrook | |
1612 | ad69471c | pbrook | /* Widening multiply. Named type is the source type. */
|
1613 | ad69471c | pbrook | #define DO_MULL(dest, x, y, type1, type2) do { \ |
1614 | ad69471c | pbrook | type1 tmp_x = x; \ |
1615 | ad69471c | pbrook | type1 tmp_y = y; \ |
1616 | ad69471c | pbrook | dest = (type2)((type2)tmp_x * (type2)tmp_y); \ |
1617 | ad69471c | pbrook | } while(0) |
1618 | ad69471c | pbrook | |
1619 | ad69471c | pbrook | uint64_t HELPER(neon_mull_u8)(uint32_t a, uint32_t b) |
1620 | ad69471c | pbrook | { |
1621 | ad69471c | pbrook | uint64_t tmp; |
1622 | ad69471c | pbrook | uint64_t result; |
1623 | ad69471c | pbrook | |
1624 | ad69471c | pbrook | DO_MULL(result, a, b, uint8_t, uint16_t); |
1625 | ad69471c | pbrook | DO_MULL(tmp, a >> 8, b >> 8, uint8_t, uint16_t); |
1626 | ad69471c | pbrook | result |= tmp << 16;
|
1627 | ad69471c | pbrook | DO_MULL(tmp, a >> 16, b >> 16, uint8_t, uint16_t); |
1628 | ad69471c | pbrook | result |= tmp << 32;
|
1629 | ad69471c | pbrook | DO_MULL(tmp, a >> 24, b >> 24, uint8_t, uint16_t); |
1630 | ad69471c | pbrook | result |= tmp << 48;
|
1631 | ad69471c | pbrook | return result;
|
1632 | ad69471c | pbrook | } |
1633 | ad69471c | pbrook | |
1634 | ad69471c | pbrook | uint64_t HELPER(neon_mull_s8)(uint32_t a, uint32_t b) |
1635 | ad69471c | pbrook | { |
1636 | ad69471c | pbrook | uint64_t tmp; |
1637 | ad69471c | pbrook | uint64_t result; |
1638 | ad69471c | pbrook | |
1639 | ad69471c | pbrook | DO_MULL(result, a, b, int8_t, uint16_t); |
1640 | ad69471c | pbrook | DO_MULL(tmp, a >> 8, b >> 8, int8_t, uint16_t); |
1641 | ad69471c | pbrook | result |= tmp << 16;
|
1642 | ad69471c | pbrook | DO_MULL(tmp, a >> 16, b >> 16, int8_t, uint16_t); |
1643 | ad69471c | pbrook | result |= tmp << 32;
|
1644 | ad69471c | pbrook | DO_MULL(tmp, a >> 24, b >> 24, int8_t, uint16_t); |
1645 | ad69471c | pbrook | result |= tmp << 48;
|
1646 | ad69471c | pbrook | return result;
|
1647 | ad69471c | pbrook | } |
1648 | ad69471c | pbrook | |
1649 | ad69471c | pbrook | uint64_t HELPER(neon_mull_u16)(uint32_t a, uint32_t b) |
1650 | ad69471c | pbrook | { |
1651 | ad69471c | pbrook | uint64_t tmp; |
1652 | ad69471c | pbrook | uint64_t result; |
1653 | ad69471c | pbrook | |
1654 | ad69471c | pbrook | DO_MULL(result, a, b, uint16_t, uint32_t); |
1655 | ad69471c | pbrook | DO_MULL(tmp, a >> 16, b >> 16, uint16_t, uint32_t); |
1656 | ad69471c | pbrook | return result | (tmp << 32); |
1657 | ad69471c | pbrook | } |
1658 | ad69471c | pbrook | |
1659 | ad69471c | pbrook | uint64_t HELPER(neon_mull_s16)(uint32_t a, uint32_t b) |
1660 | ad69471c | pbrook | { |
1661 | ad69471c | pbrook | uint64_t tmp; |
1662 | ad69471c | pbrook | uint64_t result; |
1663 | ad69471c | pbrook | |
1664 | ad69471c | pbrook | DO_MULL(result, a, b, int16_t, uint32_t); |
1665 | ad69471c | pbrook | DO_MULL(tmp, a >> 16, b >> 16, int16_t, uint32_t); |
1666 | ad69471c | pbrook | return result | (tmp << 32); |
1667 | ad69471c | pbrook | } |
1668 | ad69471c | pbrook | |
1669 | ad69471c | pbrook | uint64_t HELPER(neon_negl_u16)(uint64_t x) |
1670 | ad69471c | pbrook | { |
1671 | ad69471c | pbrook | uint16_t tmp; |
1672 | ad69471c | pbrook | uint64_t result; |
1673 | ad69471c | pbrook | result = (uint16_t)-x; |
1674 | ad69471c | pbrook | tmp = -(x >> 16);
|
1675 | ad69471c | pbrook | result |= (uint64_t)tmp << 16;
|
1676 | ad69471c | pbrook | tmp = -(x >> 32);
|
1677 | ad69471c | pbrook | result |= (uint64_t)tmp << 32;
|
1678 | ad69471c | pbrook | tmp = -(x >> 48);
|
1679 | ad69471c | pbrook | result |= (uint64_t)tmp << 48;
|
1680 | ad69471c | pbrook | return result;
|
1681 | ad69471c | pbrook | } |
1682 | ad69471c | pbrook | |
1683 | ad69471c | pbrook | uint64_t HELPER(neon_negl_u32)(uint64_t x) |
1684 | ad69471c | pbrook | { |
1685 | ad69471c | pbrook | uint32_t low = -x; |
1686 | ad69471c | pbrook | uint32_t high = -(x >> 32);
|
1687 | ad69471c | pbrook | return low | ((uint64_t)high << 32); |
1688 | ad69471c | pbrook | } |
1689 | ad69471c | pbrook | |
1690 | ad69471c | pbrook | /* FIXME: There should be a native op for this. */
|
1691 | ad69471c | pbrook | uint64_t HELPER(neon_negl_u64)(uint64_t x) |
1692 | ad69471c | pbrook | { |
1693 | ad69471c | pbrook | return -x;
|
1694 | ad69471c | pbrook | } |
1695 | ad69471c | pbrook | |
1696 | ad69471c | pbrook | /* Saturnating sign manuipulation. */
|
1697 | ad69471c | pbrook | /* ??? Make these use NEON_VOP1 */
|
1698 | ad69471c | pbrook | #define DO_QABS8(x) do { \ |
1699 | ad69471c | pbrook | if (x == (int8_t)0x80) { \ |
1700 | ad69471c | pbrook | x = 0x7f; \
|
1701 | ad69471c | pbrook | SET_QC(); \ |
1702 | ad69471c | pbrook | } else if (x < 0) { \ |
1703 | ad69471c | pbrook | x = -x; \ |
1704 | ad69471c | pbrook | }} while (0) |
1705 | ad69471c | pbrook | uint32_t HELPER(neon_qabs_s8)(CPUState *env, uint32_t x) |
1706 | ad69471c | pbrook | { |
1707 | ad69471c | pbrook | neon_s8 vec; |
1708 | ad69471c | pbrook | NEON_UNPACK(neon_s8, vec, x); |
1709 | ad69471c | pbrook | DO_QABS8(vec.v1); |
1710 | ad69471c | pbrook | DO_QABS8(vec.v2); |
1711 | ad69471c | pbrook | DO_QABS8(vec.v3); |
1712 | ad69471c | pbrook | DO_QABS8(vec.v4); |
1713 | ad69471c | pbrook | NEON_PACK(neon_s8, x, vec); |
1714 | ad69471c | pbrook | return x;
|
1715 | ad69471c | pbrook | } |
1716 | ad69471c | pbrook | #undef DO_QABS8
|
1717 | ad69471c | pbrook | |
1718 | ad69471c | pbrook | #define DO_QNEG8(x) do { \ |
1719 | ad69471c | pbrook | if (x == (int8_t)0x80) { \ |
1720 | ad69471c | pbrook | x = 0x7f; \
|
1721 | ad69471c | pbrook | SET_QC(); \ |
1722 | ad69471c | pbrook | } else { \
|
1723 | ad69471c | pbrook | x = -x; \ |
1724 | ad69471c | pbrook | }} while (0) |
1725 | ad69471c | pbrook | uint32_t HELPER(neon_qneg_s8)(CPUState *env, uint32_t x) |
1726 | ad69471c | pbrook | { |
1727 | ad69471c | pbrook | neon_s8 vec; |
1728 | ad69471c | pbrook | NEON_UNPACK(neon_s8, vec, x); |
1729 | ad69471c | pbrook | DO_QNEG8(vec.v1); |
1730 | ad69471c | pbrook | DO_QNEG8(vec.v2); |
1731 | ad69471c | pbrook | DO_QNEG8(vec.v3); |
1732 | ad69471c | pbrook | DO_QNEG8(vec.v4); |
1733 | ad69471c | pbrook | NEON_PACK(neon_s8, x, vec); |
1734 | ad69471c | pbrook | return x;
|
1735 | ad69471c | pbrook | } |
1736 | ad69471c | pbrook | #undef DO_QNEG8
|
1737 | ad69471c | pbrook | |
1738 | ad69471c | pbrook | #define DO_QABS16(x) do { \ |
1739 | ad69471c | pbrook | if (x == (int16_t)0x8000) { \ |
1740 | ad69471c | pbrook | x = 0x7fff; \
|
1741 | ad69471c | pbrook | SET_QC(); \ |
1742 | ad69471c | pbrook | } else if (x < 0) { \ |
1743 | ad69471c | pbrook | x = -x; \ |
1744 | ad69471c | pbrook | }} while (0) |
1745 | ad69471c | pbrook | uint32_t HELPER(neon_qabs_s16)(CPUState *env, uint32_t x) |
1746 | ad69471c | pbrook | { |
1747 | ad69471c | pbrook | neon_s16 vec; |
1748 | ad69471c | pbrook | NEON_UNPACK(neon_s16, vec, x); |
1749 | ad69471c | pbrook | DO_QABS16(vec.v1); |
1750 | ad69471c | pbrook | DO_QABS16(vec.v2); |
1751 | ad69471c | pbrook | NEON_PACK(neon_s16, x, vec); |
1752 | ad69471c | pbrook | return x;
|
1753 | ad69471c | pbrook | } |
1754 | ad69471c | pbrook | #undef DO_QABS16
|
1755 | ad69471c | pbrook | |
1756 | ad69471c | pbrook | #define DO_QNEG16(x) do { \ |
1757 | ad69471c | pbrook | if (x == (int16_t)0x8000) { \ |
1758 | ad69471c | pbrook | x = 0x7fff; \
|
1759 | ad69471c | pbrook | SET_QC(); \ |
1760 | ad69471c | pbrook | } else { \
|
1761 | ad69471c | pbrook | x = -x; \ |
1762 | ad69471c | pbrook | }} while (0) |
1763 | ad69471c | pbrook | uint32_t HELPER(neon_qneg_s16)(CPUState *env, uint32_t x) |
1764 | ad69471c | pbrook | { |
1765 | ad69471c | pbrook | neon_s16 vec; |
1766 | ad69471c | pbrook | NEON_UNPACK(neon_s16, vec, x); |
1767 | ad69471c | pbrook | DO_QNEG16(vec.v1); |
1768 | ad69471c | pbrook | DO_QNEG16(vec.v2); |
1769 | ad69471c | pbrook | NEON_PACK(neon_s16, x, vec); |
1770 | ad69471c | pbrook | return x;
|
1771 | ad69471c | pbrook | } |
1772 | ad69471c | pbrook | #undef DO_QNEG16
|
1773 | ad69471c | pbrook | |
1774 | ad69471c | pbrook | uint32_t HELPER(neon_qabs_s32)(CPUState *env, uint32_t x) |
1775 | ad69471c | pbrook | { |
1776 | ad69471c | pbrook | if (x == SIGNBIT) {
|
1777 | ad69471c | pbrook | SET_QC(); |
1778 | ad69471c | pbrook | x = ~SIGNBIT; |
1779 | ad69471c | pbrook | } else if ((int32_t)x < 0) { |
1780 | ad69471c | pbrook | x = -x; |
1781 | ad69471c | pbrook | } |
1782 | ad69471c | pbrook | return x;
|
1783 | ad69471c | pbrook | } |
1784 | ad69471c | pbrook | |
1785 | ad69471c | pbrook | uint32_t HELPER(neon_qneg_s32)(CPUState *env, uint32_t x) |
1786 | ad69471c | pbrook | { |
1787 | ad69471c | pbrook | if (x == SIGNBIT) {
|
1788 | ad69471c | pbrook | SET_QC(); |
1789 | ad69471c | pbrook | x = ~SIGNBIT; |
1790 | ad69471c | pbrook | } else {
|
1791 | ad69471c | pbrook | x = -x; |
1792 | ad69471c | pbrook | } |
1793 | ad69471c | pbrook | return x;
|
1794 | ad69471c | pbrook | } |
1795 | ad69471c | pbrook | |
1796 | ad69471c | pbrook | /* NEON Float helpers. */
|
1797 | ad69471c | pbrook | uint32_t HELPER(neon_min_f32)(uint32_t a, uint32_t b) |
1798 | ad69471c | pbrook | { |
1799 | ad69471c | pbrook | float32 f0 = vfp_itos(a); |
1800 | ad69471c | pbrook | float32 f1 = vfp_itos(b); |
1801 | ad69471c | pbrook | return (float32_compare_quiet(f0, f1, NFS) == -1) ? a : b; |
1802 | ad69471c | pbrook | } |
1803 | ad69471c | pbrook | |
1804 | ad69471c | pbrook | uint32_t HELPER(neon_max_f32)(uint32_t a, uint32_t b) |
1805 | ad69471c | pbrook | { |
1806 | ad69471c | pbrook | float32 f0 = vfp_itos(a); |
1807 | ad69471c | pbrook | float32 f1 = vfp_itos(b); |
1808 | ad69471c | pbrook | return (float32_compare_quiet(f0, f1, NFS) == 1) ? a : b; |
1809 | ad69471c | pbrook | } |
1810 | ad69471c | pbrook | |
1811 | ad69471c | pbrook | uint32_t HELPER(neon_abd_f32)(uint32_t a, uint32_t b) |
1812 | ad69471c | pbrook | { |
1813 | ad69471c | pbrook | float32 f0 = vfp_itos(a); |
1814 | ad69471c | pbrook | float32 f1 = vfp_itos(b); |
1815 | ad69471c | pbrook | return vfp_stoi((float32_compare_quiet(f0, f1, NFS) == 1) |
1816 | ad69471c | pbrook | ? float32_sub(f0, f1, NFS) |
1817 | ad69471c | pbrook | : float32_sub(f1, f0, NFS)); |
1818 | ad69471c | pbrook | } |
1819 | ad69471c | pbrook | |
1820 | ad69471c | pbrook | uint32_t HELPER(neon_add_f32)(uint32_t a, uint32_t b) |
1821 | ad69471c | pbrook | { |
1822 | ad69471c | pbrook | return vfp_stoi(float32_add(vfp_itos(a), vfp_itos(b), NFS));
|
1823 | ad69471c | pbrook | } |
1824 | ad69471c | pbrook | |
1825 | ad69471c | pbrook | uint32_t HELPER(neon_sub_f32)(uint32_t a, uint32_t b) |
1826 | ad69471c | pbrook | { |
1827 | ad69471c | pbrook | return vfp_stoi(float32_sub(vfp_itos(a), vfp_itos(b), NFS));
|
1828 | ad69471c | pbrook | } |
1829 | ad69471c | pbrook | |
1830 | ad69471c | pbrook | uint32_t HELPER(neon_mul_f32)(uint32_t a, uint32_t b) |
1831 | ad69471c | pbrook | { |
1832 | ad69471c | pbrook | return vfp_stoi(float32_mul(vfp_itos(a), vfp_itos(b), NFS));
|
1833 | ad69471c | pbrook | } |
1834 | ad69471c | pbrook | |
1835 | ad69471c | pbrook | /* Floating point comparisons produce an integer result. */
|
1836 | ad69471c | pbrook | #define NEON_VOP_FCMP(name, cmp) \
|
1837 | ad69471c | pbrook | uint32_t HELPER(neon_##name)(uint32_t a, uint32_t b) \ |
1838 | ad69471c | pbrook | { \ |
1839 | ad69471c | pbrook | if (float32_compare_quiet(vfp_itos(a), vfp_itos(b), NFS) cmp 0) \ |
1840 | ad69471c | pbrook | return ~0; \ |
1841 | ad69471c | pbrook | else \
|
1842 | ad69471c | pbrook | return 0; \ |
1843 | ad69471c | pbrook | } |
1844 | ad69471c | pbrook | |
1845 | ad69471c | pbrook | NEON_VOP_FCMP(ceq_f32, ==) |
1846 | ad69471c | pbrook | NEON_VOP_FCMP(cge_f32, >=) |
1847 | ad69471c | pbrook | NEON_VOP_FCMP(cgt_f32, >) |
1848 | ad69471c | pbrook | |
1849 | ad69471c | pbrook | uint32_t HELPER(neon_acge_f32)(uint32_t a, uint32_t b) |
1850 | ad69471c | pbrook | { |
1851 | ad69471c | pbrook | float32 f0 = float32_abs(vfp_itos(a)); |
1852 | ad69471c | pbrook | float32 f1 = float32_abs(vfp_itos(b)); |
1853 | ad69471c | pbrook | return (float32_compare_quiet(f0, f1,NFS) >= 0) ? ~0 : 0; |
1854 | ad69471c | pbrook | } |
1855 | ad69471c | pbrook | |
1856 | ad69471c | pbrook | uint32_t HELPER(neon_acgt_f32)(uint32_t a, uint32_t b) |
1857 | ad69471c | pbrook | { |
1858 | ad69471c | pbrook | float32 f0 = float32_abs(vfp_itos(a)); |
1859 | ad69471c | pbrook | float32 f1 = float32_abs(vfp_itos(b)); |
1860 | ad69471c | pbrook | return (float32_compare_quiet(f0, f1, NFS) > 0) ? ~0 : 0; |
1861 | ad69471c | pbrook | } |
1862 | 02acedf9 | Peter Maydell | |
1863 | 02acedf9 | Peter Maydell | #define ELEM(V, N, SIZE) (((V) >> ((N) * (SIZE))) & ((1ull << (SIZE)) - 1)) |
1864 | 02acedf9 | Peter Maydell | |
1865 | 02acedf9 | Peter Maydell | void HELPER(neon_qunzip8)(CPUState *env, uint32_t rd, uint32_t rm)
|
1866 | 02acedf9 | Peter Maydell | { |
1867 | 02acedf9 | Peter Maydell | uint64_t zm0 = float64_val(env->vfp.regs[rm]); |
1868 | 02acedf9 | Peter Maydell | uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
|
1869 | 02acedf9 | Peter Maydell | uint64_t zd0 = float64_val(env->vfp.regs[rd]); |
1870 | 02acedf9 | Peter Maydell | uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
|
1871 | 02acedf9 | Peter Maydell | uint64_t d0 = ELEM(zd0, 0, 8) | (ELEM(zd0, 2, 8) << 8) |
1872 | 02acedf9 | Peter Maydell | | (ELEM(zd0, 4, 8) << 16) | (ELEM(zd0, 6, 8) << 24) |
1873 | 02acedf9 | Peter Maydell | | (ELEM(zd1, 0, 8) << 32) | (ELEM(zd1, 2, 8) << 40) |
1874 | 02acedf9 | Peter Maydell | | (ELEM(zd1, 4, 8) << 48) | (ELEM(zd1, 6, 8) << 56); |
1875 | 02acedf9 | Peter Maydell | uint64_t d1 = ELEM(zm0, 0, 8) | (ELEM(zm0, 2, 8) << 8) |
1876 | 02acedf9 | Peter Maydell | | (ELEM(zm0, 4, 8) << 16) | (ELEM(zm0, 6, 8) << 24) |
1877 | 02acedf9 | Peter Maydell | | (ELEM(zm1, 0, 8) << 32) | (ELEM(zm1, 2, 8) << 40) |
1878 | 02acedf9 | Peter Maydell | | (ELEM(zm1, 4, 8) << 48) | (ELEM(zm1, 6, 8) << 56); |
1879 | 02acedf9 | Peter Maydell | uint64_t m0 = ELEM(zd0, 1, 8) | (ELEM(zd0, 3, 8) << 8) |
1880 | 02acedf9 | Peter Maydell | | (ELEM(zd0, 5, 8) << 16) | (ELEM(zd0, 7, 8) << 24) |
1881 | 02acedf9 | Peter Maydell | | (ELEM(zd1, 1, 8) << 32) | (ELEM(zd1, 3, 8) << 40) |
1882 | 02acedf9 | Peter Maydell | | (ELEM(zd1, 5, 8) << 48) | (ELEM(zd1, 7, 8) << 56); |
1883 | 02acedf9 | Peter Maydell | uint64_t m1 = ELEM(zm0, 1, 8) | (ELEM(zm0, 3, 8) << 8) |
1884 | 02acedf9 | Peter Maydell | | (ELEM(zm0, 5, 8) << 16) | (ELEM(zm0, 7, 8) << 24) |
1885 | 02acedf9 | Peter Maydell | | (ELEM(zm1, 1, 8) << 32) | (ELEM(zm1, 3, 8) << 40) |
1886 | 02acedf9 | Peter Maydell | | (ELEM(zm1, 5, 8) << 48) | (ELEM(zm1, 7, 8) << 56); |
1887 | 02acedf9 | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
1888 | 02acedf9 | Peter Maydell | env->vfp.regs[rm + 1] = make_float64(m1);
|
1889 | 02acedf9 | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
1890 | 02acedf9 | Peter Maydell | env->vfp.regs[rd + 1] = make_float64(d1);
|
1891 | 02acedf9 | Peter Maydell | } |
1892 | 02acedf9 | Peter Maydell | |
1893 | 02acedf9 | Peter Maydell | void HELPER(neon_qunzip16)(CPUState *env, uint32_t rd, uint32_t rm)
|
1894 | 02acedf9 | Peter Maydell | { |
1895 | 02acedf9 | Peter Maydell | uint64_t zm0 = float64_val(env->vfp.regs[rm]); |
1896 | 02acedf9 | Peter Maydell | uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
|
1897 | 02acedf9 | Peter Maydell | uint64_t zd0 = float64_val(env->vfp.regs[rd]); |
1898 | 02acedf9 | Peter Maydell | uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
|
1899 | 02acedf9 | Peter Maydell | uint64_t d0 = ELEM(zd0, 0, 16) | (ELEM(zd0, 2, 16) << 16) |
1900 | 02acedf9 | Peter Maydell | | (ELEM(zd1, 0, 16) << 32) | (ELEM(zd1, 2, 16) << 48); |
1901 | 02acedf9 | Peter Maydell | uint64_t d1 = ELEM(zm0, 0, 16) | (ELEM(zm0, 2, 16) << 16) |
1902 | 02acedf9 | Peter Maydell | | (ELEM(zm1, 0, 16) << 32) | (ELEM(zm1, 2, 16) << 48); |
1903 | 02acedf9 | Peter Maydell | uint64_t m0 = ELEM(zd0, 1, 16) | (ELEM(zd0, 3, 16) << 16) |
1904 | 02acedf9 | Peter Maydell | | (ELEM(zd1, 1, 16) << 32) | (ELEM(zd1, 3, 16) << 48); |
1905 | 02acedf9 | Peter Maydell | uint64_t m1 = ELEM(zm0, 1, 16) | (ELEM(zm0, 3, 16) << 16) |
1906 | 02acedf9 | Peter Maydell | | (ELEM(zm1, 1, 16) << 32) | (ELEM(zm1, 3, 16) << 48); |
1907 | 02acedf9 | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
1908 | 02acedf9 | Peter Maydell | env->vfp.regs[rm + 1] = make_float64(m1);
|
1909 | 02acedf9 | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
1910 | 02acedf9 | Peter Maydell | env->vfp.regs[rd + 1] = make_float64(d1);
|
1911 | 02acedf9 | Peter Maydell | } |
1912 | 02acedf9 | Peter Maydell | |
1913 | 02acedf9 | Peter Maydell | void HELPER(neon_qunzip32)(CPUState *env, uint32_t rd, uint32_t rm)
|
1914 | 02acedf9 | Peter Maydell | { |
1915 | 02acedf9 | Peter Maydell | uint64_t zm0 = float64_val(env->vfp.regs[rm]); |
1916 | 02acedf9 | Peter Maydell | uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
|
1917 | 02acedf9 | Peter Maydell | uint64_t zd0 = float64_val(env->vfp.regs[rd]); |
1918 | 02acedf9 | Peter Maydell | uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
|
1919 | 02acedf9 | Peter Maydell | uint64_t d0 = ELEM(zd0, 0, 32) | (ELEM(zd1, 0, 32) << 32); |
1920 | 02acedf9 | Peter Maydell | uint64_t d1 = ELEM(zm0, 0, 32) | (ELEM(zm1, 0, 32) << 32); |
1921 | 02acedf9 | Peter Maydell | uint64_t m0 = ELEM(zd0, 1, 32) | (ELEM(zd1, 1, 32) << 32); |
1922 | 02acedf9 | Peter Maydell | uint64_t m1 = ELEM(zm0, 1, 32) | (ELEM(zm1, 1, 32) << 32); |
1923 | 02acedf9 | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
1924 | 02acedf9 | Peter Maydell | env->vfp.regs[rm + 1] = make_float64(m1);
|
1925 | 02acedf9 | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
1926 | 02acedf9 | Peter Maydell | env->vfp.regs[rd + 1] = make_float64(d1);
|
1927 | 02acedf9 | Peter Maydell | } |
1928 | 02acedf9 | Peter Maydell | |
1929 | 02acedf9 | Peter Maydell | void HELPER(neon_unzip8)(CPUState *env, uint32_t rd, uint32_t rm)
|
1930 | 02acedf9 | Peter Maydell | { |
1931 | 02acedf9 | Peter Maydell | uint64_t zm = float64_val(env->vfp.regs[rm]); |
1932 | 02acedf9 | Peter Maydell | uint64_t zd = float64_val(env->vfp.regs[rd]); |
1933 | 02acedf9 | Peter Maydell | uint64_t d0 = ELEM(zd, 0, 8) | (ELEM(zd, 2, 8) << 8) |
1934 | 02acedf9 | Peter Maydell | | (ELEM(zd, 4, 8) << 16) | (ELEM(zd, 6, 8) << 24) |
1935 | 02acedf9 | Peter Maydell | | (ELEM(zm, 0, 8) << 32) | (ELEM(zm, 2, 8) << 40) |
1936 | 02acedf9 | Peter Maydell | | (ELEM(zm, 4, 8) << 48) | (ELEM(zm, 6, 8) << 56); |
1937 | 02acedf9 | Peter Maydell | uint64_t m0 = ELEM(zd, 1, 8) | (ELEM(zd, 3, 8) << 8) |
1938 | 02acedf9 | Peter Maydell | | (ELEM(zd, 5, 8) << 16) | (ELEM(zd, 7, 8) << 24) |
1939 | 02acedf9 | Peter Maydell | | (ELEM(zm, 1, 8) << 32) | (ELEM(zm, 3, 8) << 40) |
1940 | 02acedf9 | Peter Maydell | | (ELEM(zm, 5, 8) << 48) | (ELEM(zm, 7, 8) << 56); |
1941 | 02acedf9 | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
1942 | 02acedf9 | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
1943 | 02acedf9 | Peter Maydell | } |
1944 | 02acedf9 | Peter Maydell | |
1945 | 02acedf9 | Peter Maydell | void HELPER(neon_unzip16)(CPUState *env, uint32_t rd, uint32_t rm)
|
1946 | 02acedf9 | Peter Maydell | { |
1947 | 02acedf9 | Peter Maydell | uint64_t zm = float64_val(env->vfp.regs[rm]); |
1948 | 02acedf9 | Peter Maydell | uint64_t zd = float64_val(env->vfp.regs[rd]); |
1949 | 02acedf9 | Peter Maydell | uint64_t d0 = ELEM(zd, 0, 16) | (ELEM(zd, 2, 16) << 16) |
1950 | 02acedf9 | Peter Maydell | | (ELEM(zm, 0, 16) << 32) | (ELEM(zm, 2, 16) << 48); |
1951 | 02acedf9 | Peter Maydell | uint64_t m0 = ELEM(zd, 1, 16) | (ELEM(zd, 3, 16) << 16) |
1952 | 02acedf9 | Peter Maydell | | (ELEM(zm, 1, 16) << 32) | (ELEM(zm, 3, 16) << 48); |
1953 | 02acedf9 | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
1954 | 02acedf9 | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
1955 | 02acedf9 | Peter Maydell | } |
1956 | d68a6f3a | Peter Maydell | |
1957 | d68a6f3a | Peter Maydell | void HELPER(neon_qzip8)(CPUState *env, uint32_t rd, uint32_t rm)
|
1958 | d68a6f3a | Peter Maydell | { |
1959 | d68a6f3a | Peter Maydell | uint64_t zm0 = float64_val(env->vfp.regs[rm]); |
1960 | d68a6f3a | Peter Maydell | uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
|
1961 | d68a6f3a | Peter Maydell | uint64_t zd0 = float64_val(env->vfp.regs[rd]); |
1962 | d68a6f3a | Peter Maydell | uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
|
1963 | d68a6f3a | Peter Maydell | uint64_t d0 = ELEM(zd0, 0, 8) | (ELEM(zm0, 0, 8) << 8) |
1964 | d68a6f3a | Peter Maydell | | (ELEM(zd0, 1, 8) << 16) | (ELEM(zm0, 1, 8) << 24) |
1965 | d68a6f3a | Peter Maydell | | (ELEM(zd0, 2, 8) << 32) | (ELEM(zm0, 2, 8) << 40) |
1966 | d68a6f3a | Peter Maydell | | (ELEM(zd0, 3, 8) << 48) | (ELEM(zm0, 3, 8) << 56); |
1967 | d68a6f3a | Peter Maydell | uint64_t d1 = ELEM(zd0, 4, 8) | (ELEM(zm0, 4, 8) << 8) |
1968 | d68a6f3a | Peter Maydell | | (ELEM(zd0, 5, 8) << 16) | (ELEM(zm0, 5, 8) << 24) |
1969 | d68a6f3a | Peter Maydell | | (ELEM(zd0, 6, 8) << 32) | (ELEM(zm0, 6, 8) << 40) |
1970 | d68a6f3a | Peter Maydell | | (ELEM(zd0, 7, 8) << 48) | (ELEM(zm0, 7, 8) << 56); |
1971 | d68a6f3a | Peter Maydell | uint64_t m0 = ELEM(zd1, 0, 8) | (ELEM(zm1, 0, 8) << 8) |
1972 | d68a6f3a | Peter Maydell | | (ELEM(zd1, 1, 8) << 16) | (ELEM(zm1, 1, 8) << 24) |
1973 | d68a6f3a | Peter Maydell | | (ELEM(zd1, 2, 8) << 32) | (ELEM(zm1, 2, 8) << 40) |
1974 | d68a6f3a | Peter Maydell | | (ELEM(zd1, 3, 8) << 48) | (ELEM(zm1, 3, 8) << 56); |
1975 | d68a6f3a | Peter Maydell | uint64_t m1 = ELEM(zd1, 4, 8) | (ELEM(zm1, 4, 8) << 8) |
1976 | d68a6f3a | Peter Maydell | | (ELEM(zd1, 5, 8) << 16) | (ELEM(zm1, 5, 8) << 24) |
1977 | d68a6f3a | Peter Maydell | | (ELEM(zd1, 6, 8) << 32) | (ELEM(zm1, 6, 8) << 40) |
1978 | d68a6f3a | Peter Maydell | | (ELEM(zd1, 7, 8) << 48) | (ELEM(zm1, 7, 8) << 56); |
1979 | d68a6f3a | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
1980 | d68a6f3a | Peter Maydell | env->vfp.regs[rm + 1] = make_float64(m1);
|
1981 | d68a6f3a | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
1982 | d68a6f3a | Peter Maydell | env->vfp.regs[rd + 1] = make_float64(d1);
|
1983 | d68a6f3a | Peter Maydell | } |
1984 | d68a6f3a | Peter Maydell | |
1985 | d68a6f3a | Peter Maydell | void HELPER(neon_qzip16)(CPUState *env, uint32_t rd, uint32_t rm)
|
1986 | d68a6f3a | Peter Maydell | { |
1987 | d68a6f3a | Peter Maydell | uint64_t zm0 = float64_val(env->vfp.regs[rm]); |
1988 | d68a6f3a | Peter Maydell | uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
|
1989 | d68a6f3a | Peter Maydell | uint64_t zd0 = float64_val(env->vfp.regs[rd]); |
1990 | d68a6f3a | Peter Maydell | uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
|
1991 | d68a6f3a | Peter Maydell | uint64_t d0 = ELEM(zd0, 0, 16) | (ELEM(zm0, 0, 16) << 16) |
1992 | d68a6f3a | Peter Maydell | | (ELEM(zd0, 1, 16) << 32) | (ELEM(zm0, 1, 16) << 48); |
1993 | d68a6f3a | Peter Maydell | uint64_t d1 = ELEM(zd0, 2, 16) | (ELEM(zm0, 2, 16) << 16) |
1994 | d68a6f3a | Peter Maydell | | (ELEM(zd0, 3, 16) << 32) | (ELEM(zm0, 3, 16) << 48); |
1995 | d68a6f3a | Peter Maydell | uint64_t m0 = ELEM(zd1, 0, 16) | (ELEM(zm1, 0, 16) << 16) |
1996 | d68a6f3a | Peter Maydell | | (ELEM(zd1, 1, 16) << 32) | (ELEM(zm1, 1, 16) << 48); |
1997 | d68a6f3a | Peter Maydell | uint64_t m1 = ELEM(zd1, 2, 16) | (ELEM(zm1, 2, 16) << 16) |
1998 | d68a6f3a | Peter Maydell | | (ELEM(zd1, 3, 16) << 32) | (ELEM(zm1, 3, 16) << 48); |
1999 | d68a6f3a | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
2000 | d68a6f3a | Peter Maydell | env->vfp.regs[rm + 1] = make_float64(m1);
|
2001 | d68a6f3a | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
2002 | d68a6f3a | Peter Maydell | env->vfp.regs[rd + 1] = make_float64(d1);
|
2003 | d68a6f3a | Peter Maydell | } |
2004 | d68a6f3a | Peter Maydell | |
2005 | d68a6f3a | Peter Maydell | void HELPER(neon_qzip32)(CPUState *env, uint32_t rd, uint32_t rm)
|
2006 | d68a6f3a | Peter Maydell | { |
2007 | d68a6f3a | Peter Maydell | uint64_t zm0 = float64_val(env->vfp.regs[rm]); |
2008 | d68a6f3a | Peter Maydell | uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
|
2009 | d68a6f3a | Peter Maydell | uint64_t zd0 = float64_val(env->vfp.regs[rd]); |
2010 | d68a6f3a | Peter Maydell | uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
|
2011 | d68a6f3a | Peter Maydell | uint64_t d0 = ELEM(zd0, 0, 32) | (ELEM(zm0, 0, 32) << 32); |
2012 | d68a6f3a | Peter Maydell | uint64_t d1 = ELEM(zd0, 1, 32) | (ELEM(zm0, 1, 32) << 32); |
2013 | d68a6f3a | Peter Maydell | uint64_t m0 = ELEM(zd1, 0, 32) | (ELEM(zm1, 0, 32) << 32); |
2014 | d68a6f3a | Peter Maydell | uint64_t m1 = ELEM(zd1, 1, 32) | (ELEM(zm1, 1, 32) << 32); |
2015 | d68a6f3a | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
2016 | d68a6f3a | Peter Maydell | env->vfp.regs[rm + 1] = make_float64(m1);
|
2017 | d68a6f3a | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
2018 | d68a6f3a | Peter Maydell | env->vfp.regs[rd + 1] = make_float64(d1);
|
2019 | d68a6f3a | Peter Maydell | } |
2020 | d68a6f3a | Peter Maydell | |
2021 | d68a6f3a | Peter Maydell | void HELPER(neon_zip8)(CPUState *env, uint32_t rd, uint32_t rm)
|
2022 | d68a6f3a | Peter Maydell | { |
2023 | d68a6f3a | Peter Maydell | uint64_t zm = float64_val(env->vfp.regs[rm]); |
2024 | d68a6f3a | Peter Maydell | uint64_t zd = float64_val(env->vfp.regs[rd]); |
2025 | d68a6f3a | Peter Maydell | uint64_t d0 = ELEM(zd, 0, 8) | (ELEM(zm, 0, 8) << 8) |
2026 | d68a6f3a | Peter Maydell | | (ELEM(zd, 1, 8) << 16) | (ELEM(zm, 1, 8) << 24) |
2027 | d68a6f3a | Peter Maydell | | (ELEM(zd, 2, 8) << 32) | (ELEM(zm, 2, 8) << 40) |
2028 | d68a6f3a | Peter Maydell | | (ELEM(zd, 3, 8) << 48) | (ELEM(zm, 3, 8) << 56); |
2029 | d68a6f3a | Peter Maydell | uint64_t m0 = ELEM(zd, 4, 8) | (ELEM(zm, 4, 8) << 8) |
2030 | d68a6f3a | Peter Maydell | | (ELEM(zd, 5, 8) << 16) | (ELEM(zm, 5, 8) << 24) |
2031 | d68a6f3a | Peter Maydell | | (ELEM(zd, 6, 8) << 32) | (ELEM(zm, 6, 8) << 40) |
2032 | d68a6f3a | Peter Maydell | | (ELEM(zd, 7, 8) << 48) | (ELEM(zm, 7, 8) << 56); |
2033 | d68a6f3a | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
2034 | d68a6f3a | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
2035 | d68a6f3a | Peter Maydell | } |
2036 | d68a6f3a | Peter Maydell | |
2037 | d68a6f3a | Peter Maydell | void HELPER(neon_zip16)(CPUState *env, uint32_t rd, uint32_t rm)
|
2038 | d68a6f3a | Peter Maydell | { |
2039 | d68a6f3a | Peter Maydell | uint64_t zm = float64_val(env->vfp.regs[rm]); |
2040 | d68a6f3a | Peter Maydell | uint64_t zd = float64_val(env->vfp.regs[rd]); |
2041 | d68a6f3a | Peter Maydell | uint64_t d0 = ELEM(zd, 0, 16) | (ELEM(zm, 0, 16) << 16) |
2042 | d68a6f3a | Peter Maydell | | (ELEM(zd, 1, 16) << 32) | (ELEM(zm, 1, 16) << 48); |
2043 | d68a6f3a | Peter Maydell | uint64_t m0 = ELEM(zd, 2, 16) | (ELEM(zm, 2, 16) << 16) |
2044 | d68a6f3a | Peter Maydell | | (ELEM(zd, 3, 16) << 32) | (ELEM(zm, 3, 16) << 48); |
2045 | d68a6f3a | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
2046 | d68a6f3a | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
2047 | d68a6f3a | Peter Maydell | } |