root / target-arm / neon_helper.c @ 4bd4ee07
History | View | Annotate | Download (51.5 kB)
1 | e677137d | pbrook | /*
|
---|---|---|---|
2 | e677137d | pbrook | * ARM NEON vector operations.
|
3 | e677137d | pbrook | *
|
4 | e677137d | pbrook | * Copyright (c) 2007, 2008 CodeSourcery.
|
5 | e677137d | pbrook | * Written by Paul Brook
|
6 | e677137d | pbrook | *
|
7 | e677137d | pbrook | * This code is licenced under the GNU GPL v2.
|
8 | e677137d | pbrook | */
|
9 | ad69471c | pbrook | #include <stdlib.h> |
10 | ad69471c | pbrook | #include <stdio.h> |
11 | ad69471c | pbrook | |
12 | ad69471c | pbrook | #include "cpu.h" |
13 | ad69471c | pbrook | #include "exec-all.h" |
14 | ad69471c | pbrook | #include "helpers.h" |
15 | ad69471c | pbrook | |
16 | ad69471c | pbrook | #define SIGNBIT (uint32_t)0x80000000 |
17 | ad69471c | pbrook | #define SIGNBIT64 ((uint64_t)1 << 63) |
18 | ad69471c | pbrook | |
19 | ad69471c | pbrook | #define SET_QC() env->vfp.xregs[ARM_VFP_FPSCR] = CPSR_Q
|
20 | ad69471c | pbrook | |
21 | ad69471c | pbrook | static float_status neon_float_status;
|
22 | ad69471c | pbrook | #define NFS &neon_float_status
|
23 | ad69471c | pbrook | |
24 | ad69471c | pbrook | /* Helper routines to perform bitwise copies between float and int. */
|
25 | ad69471c | pbrook | static inline float32 vfp_itos(uint32_t i) |
26 | ad69471c | pbrook | { |
27 | ad69471c | pbrook | union {
|
28 | ad69471c | pbrook | uint32_t i; |
29 | ad69471c | pbrook | float32 s; |
30 | ad69471c | pbrook | } v; |
31 | ad69471c | pbrook | |
32 | ad69471c | pbrook | v.i = i; |
33 | ad69471c | pbrook | return v.s;
|
34 | ad69471c | pbrook | } |
35 | ad69471c | pbrook | |
36 | ad69471c | pbrook | static inline uint32_t vfp_stoi(float32 s) |
37 | ad69471c | pbrook | { |
38 | ad69471c | pbrook | union {
|
39 | ad69471c | pbrook | uint32_t i; |
40 | ad69471c | pbrook | float32 s; |
41 | ad69471c | pbrook | } v; |
42 | ad69471c | pbrook | |
43 | ad69471c | pbrook | v.s = s; |
44 | ad69471c | pbrook | return v.i;
|
45 | ad69471c | pbrook | } |
46 | ad69471c | pbrook | |
47 | ad69471c | pbrook | #define NEON_TYPE1(name, type) \
|
48 | ad69471c | pbrook | typedef struct \ |
49 | ad69471c | pbrook | { \ |
50 | ad69471c | pbrook | type v1; \ |
51 | ad69471c | pbrook | } neon_##name; |
52 | e2542fe2 | Juan Quintela | #ifdef HOST_WORDS_BIGENDIAN
|
53 | ad69471c | pbrook | #define NEON_TYPE2(name, type) \
|
54 | ad69471c | pbrook | typedef struct \ |
55 | ad69471c | pbrook | { \ |
56 | ad69471c | pbrook | type v2; \ |
57 | ad69471c | pbrook | type v1; \ |
58 | ad69471c | pbrook | } neon_##name; |
59 | ad69471c | pbrook | #define NEON_TYPE4(name, type) \
|
60 | ad69471c | pbrook | typedef struct \ |
61 | ad69471c | pbrook | { \ |
62 | ad69471c | pbrook | type v4; \ |
63 | ad69471c | pbrook | type v3; \ |
64 | ad69471c | pbrook | type v2; \ |
65 | ad69471c | pbrook | type v1; \ |
66 | ad69471c | pbrook | } neon_##name; |
67 | ad69471c | pbrook | #else
|
68 | ad69471c | pbrook | #define NEON_TYPE2(name, type) \
|
69 | ad69471c | pbrook | typedef struct \ |
70 | ad69471c | pbrook | { \ |
71 | ad69471c | pbrook | type v1; \ |
72 | ad69471c | pbrook | type v2; \ |
73 | ad69471c | pbrook | } neon_##name; |
74 | ad69471c | pbrook | #define NEON_TYPE4(name, type) \
|
75 | ad69471c | pbrook | typedef struct \ |
76 | ad69471c | pbrook | { \ |
77 | ad69471c | pbrook | type v1; \ |
78 | ad69471c | pbrook | type v2; \ |
79 | ad69471c | pbrook | type v3; \ |
80 | ad69471c | pbrook | type v4; \ |
81 | ad69471c | pbrook | } neon_##name; |
82 | ad69471c | pbrook | #endif
|
83 | ad69471c | pbrook | |
84 | ad69471c | pbrook | NEON_TYPE4(s8, int8_t) |
85 | ad69471c | pbrook | NEON_TYPE4(u8, uint8_t) |
86 | ad69471c | pbrook | NEON_TYPE2(s16, int16_t) |
87 | ad69471c | pbrook | NEON_TYPE2(u16, uint16_t) |
88 | ad69471c | pbrook | NEON_TYPE1(s32, int32_t) |
89 | ad69471c | pbrook | NEON_TYPE1(u32, uint32_t) |
90 | ad69471c | pbrook | #undef NEON_TYPE4
|
91 | ad69471c | pbrook | #undef NEON_TYPE2
|
92 | ad69471c | pbrook | #undef NEON_TYPE1
|
93 | ad69471c | pbrook | |
94 | ad69471c | pbrook | /* Copy from a uint32_t to a vector structure type. */
|
95 | ad69471c | pbrook | #define NEON_UNPACK(vtype, dest, val) do { \ |
96 | ad69471c | pbrook | union { \
|
97 | ad69471c | pbrook | vtype v; \ |
98 | ad69471c | pbrook | uint32_t i; \ |
99 | ad69471c | pbrook | } conv_u; \ |
100 | ad69471c | pbrook | conv_u.i = (val); \ |
101 | ad69471c | pbrook | dest = conv_u.v; \ |
102 | ad69471c | pbrook | } while(0) |
103 | ad69471c | pbrook | |
104 | ad69471c | pbrook | /* Copy from a vector structure type to a uint32_t. */
|
105 | ad69471c | pbrook | #define NEON_PACK(vtype, dest, val) do { \ |
106 | ad69471c | pbrook | union { \
|
107 | ad69471c | pbrook | vtype v; \ |
108 | ad69471c | pbrook | uint32_t i; \ |
109 | ad69471c | pbrook | } conv_u; \ |
110 | ad69471c | pbrook | conv_u.v = (val); \ |
111 | ad69471c | pbrook | dest = conv_u.i; \ |
112 | ad69471c | pbrook | } while(0) |
113 | ad69471c | pbrook | |
114 | ad69471c | pbrook | #define NEON_DO1 \
|
115 | ad69471c | pbrook | NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); |
116 | ad69471c | pbrook | #define NEON_DO2 \
|
117 | ad69471c | pbrook | NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \ |
118 | ad69471c | pbrook | NEON_FN(vdest.v2, vsrc1.v2, vsrc2.v2); |
119 | ad69471c | pbrook | #define NEON_DO4 \
|
120 | ad69471c | pbrook | NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \ |
121 | ad69471c | pbrook | NEON_FN(vdest.v2, vsrc1.v2, vsrc2.v2); \ |
122 | ad69471c | pbrook | NEON_FN(vdest.v3, vsrc1.v3, vsrc2.v3); \ |
123 | ad69471c | pbrook | NEON_FN(vdest.v4, vsrc1.v4, vsrc2.v4); |
124 | ad69471c | pbrook | |
125 | ad69471c | pbrook | #define NEON_VOP_BODY(vtype, n) \
|
126 | ad69471c | pbrook | { \ |
127 | ad69471c | pbrook | uint32_t res; \ |
128 | ad69471c | pbrook | vtype vsrc1; \ |
129 | ad69471c | pbrook | vtype vsrc2; \ |
130 | ad69471c | pbrook | vtype vdest; \ |
131 | ad69471c | pbrook | NEON_UNPACK(vtype, vsrc1, arg1); \ |
132 | ad69471c | pbrook | NEON_UNPACK(vtype, vsrc2, arg2); \ |
133 | ad69471c | pbrook | NEON_DO##n; \ |
134 | ad69471c | pbrook | NEON_PACK(vtype, res, vdest); \ |
135 | ad69471c | pbrook | return res; \
|
136 | ad69471c | pbrook | } |
137 | ad69471c | pbrook | |
138 | ad69471c | pbrook | #define NEON_VOP(name, vtype, n) \
|
139 | ad69471c | pbrook | uint32_t HELPER(glue(neon_,name))(uint32_t arg1, uint32_t arg2) \ |
140 | ad69471c | pbrook | NEON_VOP_BODY(vtype, n) |
141 | ad69471c | pbrook | |
142 | ad69471c | pbrook | #define NEON_VOP_ENV(name, vtype, n) \
|
143 | ad69471c | pbrook | uint32_t HELPER(glue(neon_,name))(CPUState *env, uint32_t arg1, uint32_t arg2) \ |
144 | ad69471c | pbrook | NEON_VOP_BODY(vtype, n) |
145 | ad69471c | pbrook | |
146 | ad69471c | pbrook | /* Pairwise operations. */
|
147 | ad69471c | pbrook | /* For 32-bit elements each segment only contains a single element, so
|
148 | ad69471c | pbrook | the elementwise and pairwise operations are the same. */
|
149 | ad69471c | pbrook | #define NEON_PDO2 \
|
150 | ad69471c | pbrook | NEON_FN(vdest.v1, vsrc1.v1, vsrc1.v2); \ |
151 | ad69471c | pbrook | NEON_FN(vdest.v2, vsrc2.v1, vsrc2.v2); |
152 | ad69471c | pbrook | #define NEON_PDO4 \
|
153 | ad69471c | pbrook | NEON_FN(vdest.v1, vsrc1.v1, vsrc1.v2); \ |
154 | ad69471c | pbrook | NEON_FN(vdest.v2, vsrc1.v3, vsrc1.v4); \ |
155 | ad69471c | pbrook | NEON_FN(vdest.v3, vsrc2.v1, vsrc2.v2); \ |
156 | ad69471c | pbrook | NEON_FN(vdest.v4, vsrc2.v3, vsrc2.v4); \ |
157 | ad69471c | pbrook | |
158 | ad69471c | pbrook | #define NEON_POP(name, vtype, n) \
|
159 | ad69471c | pbrook | uint32_t HELPER(glue(neon_,name))(uint32_t arg1, uint32_t arg2) \ |
160 | ad69471c | pbrook | { \ |
161 | ad69471c | pbrook | uint32_t res; \ |
162 | ad69471c | pbrook | vtype vsrc1; \ |
163 | ad69471c | pbrook | vtype vsrc2; \ |
164 | ad69471c | pbrook | vtype vdest; \ |
165 | ad69471c | pbrook | NEON_UNPACK(vtype, vsrc1, arg1); \ |
166 | ad69471c | pbrook | NEON_UNPACK(vtype, vsrc2, arg2); \ |
167 | ad69471c | pbrook | NEON_PDO##n; \ |
168 | ad69471c | pbrook | NEON_PACK(vtype, res, vdest); \ |
169 | ad69471c | pbrook | return res; \
|
170 | ad69471c | pbrook | } |
171 | ad69471c | pbrook | |
172 | ad69471c | pbrook | /* Unary operators. */
|
173 | ad69471c | pbrook | #define NEON_VOP1(name, vtype, n) \
|
174 | ad69471c | pbrook | uint32_t HELPER(glue(neon_,name))(uint32_t arg) \ |
175 | ad69471c | pbrook | { \ |
176 | ad69471c | pbrook | vtype vsrc1; \ |
177 | ad69471c | pbrook | vtype vdest; \ |
178 | ad69471c | pbrook | NEON_UNPACK(vtype, vsrc1, arg); \ |
179 | ad69471c | pbrook | NEON_DO##n; \ |
180 | ad69471c | pbrook | NEON_PACK(vtype, arg, vdest); \ |
181 | ad69471c | pbrook | return arg; \
|
182 | ad69471c | pbrook | } |
183 | ad69471c | pbrook | |
184 | ad69471c | pbrook | |
185 | ad69471c | pbrook | #define NEON_USAT(dest, src1, src2, type) do { \ |
186 | ad69471c | pbrook | uint32_t tmp = (uint32_t)src1 + (uint32_t)src2; \ |
187 | ad69471c | pbrook | if (tmp != (type)tmp) { \
|
188 | ad69471c | pbrook | SET_QC(); \ |
189 | ad69471c | pbrook | dest = ~0; \
|
190 | ad69471c | pbrook | } else { \
|
191 | ad69471c | pbrook | dest = tmp; \ |
192 | ad69471c | pbrook | }} while(0) |
193 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t)
|
194 | ad69471c | pbrook | NEON_VOP_ENV(qadd_u8, neon_u8, 4)
|
195 | ad69471c | pbrook | #undef NEON_FN
|
196 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t)
|
197 | ad69471c | pbrook | NEON_VOP_ENV(qadd_u16, neon_u16, 2)
|
198 | ad69471c | pbrook | #undef NEON_FN
|
199 | ad69471c | pbrook | #undef NEON_USAT
|
200 | ad69471c | pbrook | |
201 | 72902672 | Christophe Lyon | uint32_t HELPER(neon_qadd_u32)(CPUState *env, uint32_t a, uint32_t b) |
202 | 72902672 | Christophe Lyon | { |
203 | 72902672 | Christophe Lyon | uint32_t res = a + b; |
204 | 72902672 | Christophe Lyon | if (res < a) {
|
205 | 72902672 | Christophe Lyon | SET_QC(); |
206 | 72902672 | Christophe Lyon | res = ~0;
|
207 | 72902672 | Christophe Lyon | } |
208 | 72902672 | Christophe Lyon | return res;
|
209 | 72902672 | Christophe Lyon | } |
210 | 72902672 | Christophe Lyon | |
211 | 72902672 | Christophe Lyon | uint64_t HELPER(neon_qadd_u64)(CPUState *env, uint64_t src1, uint64_t src2) |
212 | 72902672 | Christophe Lyon | { |
213 | 72902672 | Christophe Lyon | uint64_t res; |
214 | 72902672 | Christophe Lyon | |
215 | 72902672 | Christophe Lyon | res = src1 + src2; |
216 | 72902672 | Christophe Lyon | if (res < src1) {
|
217 | 72902672 | Christophe Lyon | SET_QC(); |
218 | 72902672 | Christophe Lyon | res = ~(uint64_t)0;
|
219 | 72902672 | Christophe Lyon | } |
220 | 72902672 | Christophe Lyon | return res;
|
221 | 72902672 | Christophe Lyon | } |
222 | 72902672 | Christophe Lyon | |
223 | ad69471c | pbrook | #define NEON_SSAT(dest, src1, src2, type) do { \ |
224 | ad69471c | pbrook | int32_t tmp = (uint32_t)src1 + (uint32_t)src2; \ |
225 | ad69471c | pbrook | if (tmp != (type)tmp) { \
|
226 | ad69471c | pbrook | SET_QC(); \ |
227 | ad69471c | pbrook | if (src2 > 0) { \ |
228 | ad69471c | pbrook | tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \ |
229 | ad69471c | pbrook | } else { \
|
230 | ad69471c | pbrook | tmp = 1 << (sizeof(type) * 8 - 1); \ |
231 | ad69471c | pbrook | } \ |
232 | ad69471c | pbrook | } \ |
233 | ad69471c | pbrook | dest = tmp; \ |
234 | ad69471c | pbrook | } while(0) |
235 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t)
|
236 | ad69471c | pbrook | NEON_VOP_ENV(qadd_s8, neon_s8, 4)
|
237 | ad69471c | pbrook | #undef NEON_FN
|
238 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t)
|
239 | ad69471c | pbrook | NEON_VOP_ENV(qadd_s16, neon_s16, 2)
|
240 | ad69471c | pbrook | #undef NEON_FN
|
241 | ad69471c | pbrook | #undef NEON_SSAT
|
242 | ad69471c | pbrook | |
243 | 72902672 | Christophe Lyon | uint32_t HELPER(neon_qadd_s32)(CPUState *env, uint32_t a, uint32_t b) |
244 | 72902672 | Christophe Lyon | { |
245 | 72902672 | Christophe Lyon | uint32_t res = a + b; |
246 | 72902672 | Christophe Lyon | if (((res ^ a) & SIGNBIT) && !((a ^ b) & SIGNBIT)) {
|
247 | 72902672 | Christophe Lyon | SET_QC(); |
248 | 72902672 | Christophe Lyon | res = ~(((int32_t)a >> 31) ^ SIGNBIT);
|
249 | 72902672 | Christophe Lyon | } |
250 | 72902672 | Christophe Lyon | return res;
|
251 | 72902672 | Christophe Lyon | } |
252 | 72902672 | Christophe Lyon | |
253 | 72902672 | Christophe Lyon | uint64_t HELPER(neon_qadd_s64)(CPUState *env, uint64_t src1, uint64_t src2) |
254 | 72902672 | Christophe Lyon | { |
255 | 72902672 | Christophe Lyon | uint64_t res; |
256 | 72902672 | Christophe Lyon | |
257 | 72902672 | Christophe Lyon | res = src1 + src2; |
258 | 72902672 | Christophe Lyon | if (((res ^ src1) & SIGNBIT64) && !((src1 ^ src2) & SIGNBIT64)) {
|
259 | 72902672 | Christophe Lyon | SET_QC(); |
260 | 72902672 | Christophe Lyon | res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64;
|
261 | 72902672 | Christophe Lyon | } |
262 | 72902672 | Christophe Lyon | return res;
|
263 | 72902672 | Christophe Lyon | } |
264 | 72902672 | Christophe Lyon | |
265 | ad69471c | pbrook | #define NEON_USAT(dest, src1, src2, type) do { \ |
266 | ad69471c | pbrook | uint32_t tmp = (uint32_t)src1 - (uint32_t)src2; \ |
267 | ad69471c | pbrook | if (tmp != (type)tmp) { \
|
268 | ad69471c | pbrook | SET_QC(); \ |
269 | ad69471c | pbrook | dest = 0; \
|
270 | ad69471c | pbrook | } else { \
|
271 | ad69471c | pbrook | dest = tmp; \ |
272 | ad69471c | pbrook | }} while(0) |
273 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t)
|
274 | ad69471c | pbrook | NEON_VOP_ENV(qsub_u8, neon_u8, 4)
|
275 | ad69471c | pbrook | #undef NEON_FN
|
276 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t)
|
277 | ad69471c | pbrook | NEON_VOP_ENV(qsub_u16, neon_u16, 2)
|
278 | ad69471c | pbrook | #undef NEON_FN
|
279 | ad69471c | pbrook | #undef NEON_USAT
|
280 | ad69471c | pbrook | |
281 | 72902672 | Christophe Lyon | uint32_t HELPER(neon_qsub_u32)(CPUState *env, uint32_t a, uint32_t b) |
282 | 72902672 | Christophe Lyon | { |
283 | 72902672 | Christophe Lyon | uint32_t res = a - b; |
284 | 72902672 | Christophe Lyon | if (res > a) {
|
285 | 72902672 | Christophe Lyon | SET_QC(); |
286 | 72902672 | Christophe Lyon | res = 0;
|
287 | 72902672 | Christophe Lyon | } |
288 | 72902672 | Christophe Lyon | return res;
|
289 | 72902672 | Christophe Lyon | } |
290 | 72902672 | Christophe Lyon | |
291 | 72902672 | Christophe Lyon | uint64_t HELPER(neon_qsub_u64)(CPUState *env, uint64_t src1, uint64_t src2) |
292 | 72902672 | Christophe Lyon | { |
293 | 72902672 | Christophe Lyon | uint64_t res; |
294 | 72902672 | Christophe Lyon | |
295 | 72902672 | Christophe Lyon | if (src1 < src2) {
|
296 | 72902672 | Christophe Lyon | SET_QC(); |
297 | 72902672 | Christophe Lyon | res = 0;
|
298 | 72902672 | Christophe Lyon | } else {
|
299 | 72902672 | Christophe Lyon | res = src1 - src2; |
300 | 72902672 | Christophe Lyon | } |
301 | 72902672 | Christophe Lyon | return res;
|
302 | 72902672 | Christophe Lyon | } |
303 | 72902672 | Christophe Lyon | |
304 | ad69471c | pbrook | #define NEON_SSAT(dest, src1, src2, type) do { \ |
305 | ad69471c | pbrook | int32_t tmp = (uint32_t)src1 - (uint32_t)src2; \ |
306 | ad69471c | pbrook | if (tmp != (type)tmp) { \
|
307 | ad69471c | pbrook | SET_QC(); \ |
308 | ad69471c | pbrook | if (src2 < 0) { \ |
309 | ad69471c | pbrook | tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \ |
310 | ad69471c | pbrook | } else { \
|
311 | ad69471c | pbrook | tmp = 1 << (sizeof(type) * 8 - 1); \ |
312 | ad69471c | pbrook | } \ |
313 | ad69471c | pbrook | } \ |
314 | ad69471c | pbrook | dest = tmp; \ |
315 | ad69471c | pbrook | } while(0) |
316 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t)
|
317 | ad69471c | pbrook | NEON_VOP_ENV(qsub_s8, neon_s8, 4)
|
318 | ad69471c | pbrook | #undef NEON_FN
|
319 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t)
|
320 | ad69471c | pbrook | NEON_VOP_ENV(qsub_s16, neon_s16, 2)
|
321 | ad69471c | pbrook | #undef NEON_FN
|
322 | ad69471c | pbrook | #undef NEON_SSAT
|
323 | ad69471c | pbrook | |
324 | 72902672 | Christophe Lyon | uint32_t HELPER(neon_qsub_s32)(CPUState *env, uint32_t a, uint32_t b) |
325 | 72902672 | Christophe Lyon | { |
326 | 72902672 | Christophe Lyon | uint32_t res = a - b; |
327 | 72902672 | Christophe Lyon | if (((res ^ a) & SIGNBIT) && ((a ^ b) & SIGNBIT)) {
|
328 | 72902672 | Christophe Lyon | SET_QC(); |
329 | 72902672 | Christophe Lyon | res = ~(((int32_t)a >> 31) ^ SIGNBIT);
|
330 | 72902672 | Christophe Lyon | } |
331 | 72902672 | Christophe Lyon | return res;
|
332 | 72902672 | Christophe Lyon | } |
333 | 72902672 | Christophe Lyon | |
334 | 72902672 | Christophe Lyon | uint64_t HELPER(neon_qsub_s64)(CPUState *env, uint64_t src1, uint64_t src2) |
335 | 72902672 | Christophe Lyon | { |
336 | 72902672 | Christophe Lyon | uint64_t res; |
337 | 72902672 | Christophe Lyon | |
338 | 72902672 | Christophe Lyon | res = src1 - src2; |
339 | 72902672 | Christophe Lyon | if (((res ^ src1) & SIGNBIT64) && ((src1 ^ src2) & SIGNBIT64)) {
|
340 | 72902672 | Christophe Lyon | SET_QC(); |
341 | 72902672 | Christophe Lyon | res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64;
|
342 | 72902672 | Christophe Lyon | } |
343 | 72902672 | Christophe Lyon | return res;
|
344 | 72902672 | Christophe Lyon | } |
345 | 72902672 | Christophe Lyon | |
346 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 + src2) >> 1 |
347 | ad69471c | pbrook | NEON_VOP(hadd_s8, neon_s8, 4)
|
348 | ad69471c | pbrook | NEON_VOP(hadd_u8, neon_u8, 4)
|
349 | ad69471c | pbrook | NEON_VOP(hadd_s16, neon_s16, 2)
|
350 | ad69471c | pbrook | NEON_VOP(hadd_u16, neon_u16, 2)
|
351 | ad69471c | pbrook | #undef NEON_FN
|
352 | ad69471c | pbrook | |
353 | ad69471c | pbrook | int32_t HELPER(neon_hadd_s32)(int32_t src1, int32_t src2) |
354 | ad69471c | pbrook | { |
355 | ad69471c | pbrook | int32_t dest; |
356 | ad69471c | pbrook | |
357 | ad69471c | pbrook | dest = (src1 >> 1) + (src2 >> 1); |
358 | ad69471c | pbrook | if (src1 & src2 & 1) |
359 | ad69471c | pbrook | dest++; |
360 | ad69471c | pbrook | return dest;
|
361 | ad69471c | pbrook | } |
362 | ad69471c | pbrook | |
363 | ad69471c | pbrook | uint32_t HELPER(neon_hadd_u32)(uint32_t src1, uint32_t src2) |
364 | ad69471c | pbrook | { |
365 | ad69471c | pbrook | uint32_t dest; |
366 | ad69471c | pbrook | |
367 | ad69471c | pbrook | dest = (src1 >> 1) + (src2 >> 1); |
368 | ad69471c | pbrook | if (src1 & src2 & 1) |
369 | ad69471c | pbrook | dest++; |
370 | ad69471c | pbrook | return dest;
|
371 | ad69471c | pbrook | } |
372 | ad69471c | pbrook | |
373 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 + src2 + 1) >> 1 |
374 | ad69471c | pbrook | NEON_VOP(rhadd_s8, neon_s8, 4)
|
375 | ad69471c | pbrook | NEON_VOP(rhadd_u8, neon_u8, 4)
|
376 | ad69471c | pbrook | NEON_VOP(rhadd_s16, neon_s16, 2)
|
377 | ad69471c | pbrook | NEON_VOP(rhadd_u16, neon_u16, 2)
|
378 | ad69471c | pbrook | #undef NEON_FN
|
379 | ad69471c | pbrook | |
380 | ad69471c | pbrook | int32_t HELPER(neon_rhadd_s32)(int32_t src1, int32_t src2) |
381 | ad69471c | pbrook | { |
382 | ad69471c | pbrook | int32_t dest; |
383 | ad69471c | pbrook | |
384 | ad69471c | pbrook | dest = (src1 >> 1) + (src2 >> 1); |
385 | ad69471c | pbrook | if ((src1 | src2) & 1) |
386 | ad69471c | pbrook | dest++; |
387 | ad69471c | pbrook | return dest;
|
388 | ad69471c | pbrook | } |
389 | ad69471c | pbrook | |
390 | ad69471c | pbrook | uint32_t HELPER(neon_rhadd_u32)(uint32_t src1, uint32_t src2) |
391 | ad69471c | pbrook | { |
392 | ad69471c | pbrook | uint32_t dest; |
393 | ad69471c | pbrook | |
394 | ad69471c | pbrook | dest = (src1 >> 1) + (src2 >> 1); |
395 | ad69471c | pbrook | if ((src1 | src2) & 1) |
396 | ad69471c | pbrook | dest++; |
397 | ad69471c | pbrook | return dest;
|
398 | ad69471c | pbrook | } |
399 | ad69471c | pbrook | |
400 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 - src2) >> 1 |
401 | ad69471c | pbrook | NEON_VOP(hsub_s8, neon_s8, 4)
|
402 | ad69471c | pbrook | NEON_VOP(hsub_u8, neon_u8, 4)
|
403 | ad69471c | pbrook | NEON_VOP(hsub_s16, neon_s16, 2)
|
404 | ad69471c | pbrook | NEON_VOP(hsub_u16, neon_u16, 2)
|
405 | ad69471c | pbrook | #undef NEON_FN
|
406 | ad69471c | pbrook | |
407 | ad69471c | pbrook | int32_t HELPER(neon_hsub_s32)(int32_t src1, int32_t src2) |
408 | ad69471c | pbrook | { |
409 | ad69471c | pbrook | int32_t dest; |
410 | ad69471c | pbrook | |
411 | ad69471c | pbrook | dest = (src1 >> 1) - (src2 >> 1); |
412 | ad69471c | pbrook | if ((~src1) & src2 & 1) |
413 | ad69471c | pbrook | dest--; |
414 | ad69471c | pbrook | return dest;
|
415 | ad69471c | pbrook | } |
416 | ad69471c | pbrook | |
417 | ad69471c | pbrook | uint32_t HELPER(neon_hsub_u32)(uint32_t src1, uint32_t src2) |
418 | ad69471c | pbrook | { |
419 | ad69471c | pbrook | uint32_t dest; |
420 | ad69471c | pbrook | |
421 | ad69471c | pbrook | dest = (src1 >> 1) - (src2 >> 1); |
422 | ad69471c | pbrook | if ((~src1) & src2 & 1) |
423 | ad69471c | pbrook | dest--; |
424 | ad69471c | pbrook | return dest;
|
425 | ad69471c | pbrook | } |
426 | ad69471c | pbrook | |
427 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? ~0 : 0 |
428 | ad69471c | pbrook | NEON_VOP(cgt_s8, neon_s8, 4)
|
429 | ad69471c | pbrook | NEON_VOP(cgt_u8, neon_u8, 4)
|
430 | ad69471c | pbrook | NEON_VOP(cgt_s16, neon_s16, 2)
|
431 | ad69471c | pbrook | NEON_VOP(cgt_u16, neon_u16, 2)
|
432 | ad69471c | pbrook | NEON_VOP(cgt_s32, neon_s32, 1)
|
433 | ad69471c | pbrook | NEON_VOP(cgt_u32, neon_u32, 1)
|
434 | ad69471c | pbrook | #undef NEON_FN
|
435 | ad69471c | pbrook | |
436 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 >= src2) ? ~0 : 0 |
437 | ad69471c | pbrook | NEON_VOP(cge_s8, neon_s8, 4)
|
438 | ad69471c | pbrook | NEON_VOP(cge_u8, neon_u8, 4)
|
439 | ad69471c | pbrook | NEON_VOP(cge_s16, neon_s16, 2)
|
440 | ad69471c | pbrook | NEON_VOP(cge_u16, neon_u16, 2)
|
441 | ad69471c | pbrook | NEON_VOP(cge_s32, neon_s32, 1)
|
442 | ad69471c | pbrook | NEON_VOP(cge_u32, neon_u32, 1)
|
443 | ad69471c | pbrook | #undef NEON_FN
|
444 | ad69471c | pbrook | |
445 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 < src2) ? src1 : src2
|
446 | ad69471c | pbrook | NEON_VOP(min_s8, neon_s8, 4)
|
447 | ad69471c | pbrook | NEON_VOP(min_u8, neon_u8, 4)
|
448 | ad69471c | pbrook | NEON_VOP(min_s16, neon_s16, 2)
|
449 | ad69471c | pbrook | NEON_VOP(min_u16, neon_u16, 2)
|
450 | ad69471c | pbrook | NEON_VOP(min_s32, neon_s32, 1)
|
451 | ad69471c | pbrook | NEON_VOP(min_u32, neon_u32, 1)
|
452 | ad69471c | pbrook | NEON_POP(pmin_s8, neon_s8, 4)
|
453 | ad69471c | pbrook | NEON_POP(pmin_u8, neon_u8, 4)
|
454 | ad69471c | pbrook | NEON_POP(pmin_s16, neon_s16, 2)
|
455 | ad69471c | pbrook | NEON_POP(pmin_u16, neon_u16, 2)
|
456 | ad69471c | pbrook | #undef NEON_FN
|
457 | ad69471c | pbrook | |
458 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? src1 : src2
|
459 | ad69471c | pbrook | NEON_VOP(max_s8, neon_s8, 4)
|
460 | ad69471c | pbrook | NEON_VOP(max_u8, neon_u8, 4)
|
461 | ad69471c | pbrook | NEON_VOP(max_s16, neon_s16, 2)
|
462 | ad69471c | pbrook | NEON_VOP(max_u16, neon_u16, 2)
|
463 | ad69471c | pbrook | NEON_VOP(max_s32, neon_s32, 1)
|
464 | ad69471c | pbrook | NEON_VOP(max_u32, neon_u32, 1)
|
465 | ad69471c | pbrook | NEON_POP(pmax_s8, neon_s8, 4)
|
466 | ad69471c | pbrook | NEON_POP(pmax_u8, neon_u8, 4)
|
467 | ad69471c | pbrook | NEON_POP(pmax_s16, neon_s16, 2)
|
468 | ad69471c | pbrook | NEON_POP(pmax_u16, neon_u16, 2)
|
469 | ad69471c | pbrook | #undef NEON_FN
|
470 | ad69471c | pbrook | |
471 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) \
|
472 | ad69471c | pbrook | dest = (src1 > src2) ? (src1 - src2) : (src2 - src1) |
473 | ad69471c | pbrook | NEON_VOP(abd_s8, neon_s8, 4)
|
474 | ad69471c | pbrook | NEON_VOP(abd_u8, neon_u8, 4)
|
475 | ad69471c | pbrook | NEON_VOP(abd_s16, neon_s16, 2)
|
476 | ad69471c | pbrook | NEON_VOP(abd_u16, neon_u16, 2)
|
477 | ad69471c | pbrook | NEON_VOP(abd_s32, neon_s32, 1)
|
478 | ad69471c | pbrook | NEON_VOP(abd_u32, neon_u32, 1)
|
479 | ad69471c | pbrook | #undef NEON_FN
|
480 | ad69471c | pbrook | |
481 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
482 | ad69471c | pbrook | int8_t tmp; \ |
483 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
484 | 50f67e95 | Juha Riihimรคki | if (tmp >= (ssize_t)sizeof(src1) * 8 || \ |
485 | 50f67e95 | Juha Riihimรคki | tmp <= -(ssize_t)sizeof(src1) * 8) { \ |
486 | ad69471c | pbrook | dest = 0; \
|
487 | ad69471c | pbrook | } else if (tmp < 0) { \ |
488 | ad69471c | pbrook | dest = src1 >> -tmp; \ |
489 | ad69471c | pbrook | } else { \
|
490 | ad69471c | pbrook | dest = src1 << tmp; \ |
491 | ad69471c | pbrook | }} while (0) |
492 | ad69471c | pbrook | NEON_VOP(shl_u8, neon_u8, 4)
|
493 | ad69471c | pbrook | NEON_VOP(shl_u16, neon_u16, 2)
|
494 | ad69471c | pbrook | NEON_VOP(shl_u32, neon_u32, 1)
|
495 | ad69471c | pbrook | #undef NEON_FN
|
496 | ad69471c | pbrook | |
497 | ad69471c | pbrook | uint64_t HELPER(neon_shl_u64)(uint64_t val, uint64_t shiftop) |
498 | ad69471c | pbrook | { |
499 | ad69471c | pbrook | int8_t shift = (int8_t)shiftop; |
500 | ad69471c | pbrook | if (shift >= 64 || shift <= -64) { |
501 | ad69471c | pbrook | val = 0;
|
502 | ad69471c | pbrook | } else if (shift < 0) { |
503 | ad69471c | pbrook | val >>= -shift; |
504 | ad69471c | pbrook | } else {
|
505 | ad69471c | pbrook | val <<= shift; |
506 | ad69471c | pbrook | } |
507 | ad69471c | pbrook | return val;
|
508 | ad69471c | pbrook | } |
509 | ad69471c | pbrook | |
510 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
511 | ad69471c | pbrook | int8_t tmp; \ |
512 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
513 | 50f67e95 | Juha Riihimรคki | if (tmp >= (ssize_t)sizeof(src1) * 8) { \ |
514 | ad69471c | pbrook | dest = 0; \
|
515 | 50f67e95 | Juha Riihimรคki | } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \ |
516 | ad69471c | pbrook | dest = src1 >> (sizeof(src1) * 8 - 1); \ |
517 | ad69471c | pbrook | } else if (tmp < 0) { \ |
518 | ad69471c | pbrook | dest = src1 >> -tmp; \ |
519 | ad69471c | pbrook | } else { \
|
520 | ad69471c | pbrook | dest = src1 << tmp; \ |
521 | ad69471c | pbrook | }} while (0) |
522 | ad69471c | pbrook | NEON_VOP(shl_s8, neon_s8, 4)
|
523 | ad69471c | pbrook | NEON_VOP(shl_s16, neon_s16, 2)
|
524 | ad69471c | pbrook | NEON_VOP(shl_s32, neon_s32, 1)
|
525 | ad69471c | pbrook | #undef NEON_FN
|
526 | ad69471c | pbrook | |
527 | ad69471c | pbrook | uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop) |
528 | ad69471c | pbrook | { |
529 | ad69471c | pbrook | int8_t shift = (int8_t)shiftop; |
530 | ad69471c | pbrook | int64_t val = valop; |
531 | ad69471c | pbrook | if (shift >= 64) { |
532 | ad69471c | pbrook | val = 0;
|
533 | ad69471c | pbrook | } else if (shift <= -64) { |
534 | ad69471c | pbrook | val >>= 63;
|
535 | ad69471c | pbrook | } else if (shift < 0) { |
536 | ad69471c | pbrook | val >>= -shift; |
537 | ad69471c | pbrook | } else {
|
538 | ad69471c | pbrook | val <<= shift; |
539 | ad69471c | pbrook | } |
540 | ad69471c | pbrook | return val;
|
541 | ad69471c | pbrook | } |
542 | ad69471c | pbrook | |
543 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
544 | ad69471c | pbrook | int8_t tmp; \ |
545 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
546 | 50f67e95 | Juha Riihimรคki | if (tmp >= (ssize_t)sizeof(src1) * 8) { \ |
547 | ad69471c | pbrook | dest = 0; \
|
548 | 50f67e95 | Juha Riihimรคki | } else if (tmp < -(ssize_t)sizeof(src1) * 8) { \ |
549 | cb76e138 | Paul Brook | dest = src1 >> (sizeof(src1) * 8 - 1); \ |
550 | 50f67e95 | Juha Riihimรคki | } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \ |
551 | ad69471c | pbrook | dest = src1 >> (tmp - 1); \
|
552 | ad69471c | pbrook | dest++; \ |
553 | cb76e138 | Paul Brook | dest >>= 1; \
|
554 | ad69471c | pbrook | } else if (tmp < 0) { \ |
555 | ad69471c | pbrook | dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ |
556 | ad69471c | pbrook | } else { \
|
557 | ad69471c | pbrook | dest = src1 << tmp; \ |
558 | ad69471c | pbrook | }} while (0) |
559 | ad69471c | pbrook | NEON_VOP(rshl_s8, neon_s8, 4)
|
560 | ad69471c | pbrook | NEON_VOP(rshl_s16, neon_s16, 2)
|
561 | ad69471c | pbrook | #undef NEON_FN
|
562 | ad69471c | pbrook | |
563 | 4bd4ee07 | Christophe Lyon | /* The addition of the rounding constant may overflow, so we use an
|
564 | 4bd4ee07 | Christophe Lyon | * intermediate 64 bits accumulator. */
|
565 | 4bd4ee07 | Christophe Lyon | uint32_t HELPER(neon_rshl_s32)(uint32_t valop, uint32_t shiftop) |
566 | 4bd4ee07 | Christophe Lyon | { |
567 | 4bd4ee07 | Christophe Lyon | int32_t dest; |
568 | 4bd4ee07 | Christophe Lyon | int32_t val = (int32_t)valop; |
569 | 4bd4ee07 | Christophe Lyon | int8_t shift = (int8_t)shiftop; |
570 | 4bd4ee07 | Christophe Lyon | if ((shift >= 32) || (shift <= -32)) { |
571 | 4bd4ee07 | Christophe Lyon | dest = 0;
|
572 | 4bd4ee07 | Christophe Lyon | } else if (shift < 0) { |
573 | 4bd4ee07 | Christophe Lyon | int64_t big_dest = ((int64_t)val + (1 << (-1 - shift))); |
574 | 4bd4ee07 | Christophe Lyon | dest = big_dest >> -shift; |
575 | 4bd4ee07 | Christophe Lyon | } else {
|
576 | 4bd4ee07 | Christophe Lyon | dest = val << shift; |
577 | 4bd4ee07 | Christophe Lyon | } |
578 | 4bd4ee07 | Christophe Lyon | return dest;
|
579 | 4bd4ee07 | Christophe Lyon | } |
580 | 4bd4ee07 | Christophe Lyon | |
581 | 4bd4ee07 | Christophe Lyon | /* Handling addition overflow with 64 bits inputs values is more
|
582 | 4bd4ee07 | Christophe Lyon | * tricky than with 32 bits values. */
|
583 | ad69471c | pbrook | uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop) |
584 | ad69471c | pbrook | { |
585 | ad69471c | pbrook | int8_t shift = (int8_t)shiftop; |
586 | ad69471c | pbrook | int64_t val = valop; |
587 | ad69471c | pbrook | if (shift >= 64) { |
588 | ad69471c | pbrook | val = 0;
|
589 | ad69471c | pbrook | } else if (shift < -64) { |
590 | ad69471c | pbrook | val >>= 63;
|
591 | ad69471c | pbrook | } else if (shift == -63) { |
592 | ad69471c | pbrook | val >>= 63;
|
593 | ad69471c | pbrook | val++; |
594 | ad69471c | pbrook | val >>= 1;
|
595 | ad69471c | pbrook | } else if (shift < 0) { |
596 | 4bd4ee07 | Christophe Lyon | val >>= (-shift - 1);
|
597 | 4bd4ee07 | Christophe Lyon | if (val == INT64_MAX) {
|
598 | 4bd4ee07 | Christophe Lyon | /* In this case, it means that the rounding constant is 1,
|
599 | 4bd4ee07 | Christophe Lyon | * and the addition would overflow. Return the actual
|
600 | 4bd4ee07 | Christophe Lyon | * result directly. */
|
601 | 4bd4ee07 | Christophe Lyon | val = 0x4000000000000000LL;
|
602 | 4bd4ee07 | Christophe Lyon | } else {
|
603 | 4bd4ee07 | Christophe Lyon | val++; |
604 | 4bd4ee07 | Christophe Lyon | val >>= 1;
|
605 | 4bd4ee07 | Christophe Lyon | } |
606 | ad69471c | pbrook | } else {
|
607 | ad69471c | pbrook | val <<= shift; |
608 | ad69471c | pbrook | } |
609 | ad69471c | pbrook | return val;
|
610 | ad69471c | pbrook | } |
611 | ad69471c | pbrook | |
612 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
613 | ad69471c | pbrook | int8_t tmp; \ |
614 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
615 | 50f67e95 | Juha Riihimรคki | if (tmp >= (ssize_t)sizeof(src1) * 8 || \ |
616 | 50f67e95 | Juha Riihimรคki | tmp < -(ssize_t)sizeof(src1) * 8) { \ |
617 | ad69471c | pbrook | dest = 0; \
|
618 | 50f67e95 | Juha Riihimรคki | } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \ |
619 | ad69471c | pbrook | dest = src1 >> (tmp - 1); \
|
620 | ad69471c | pbrook | } else if (tmp < 0) { \ |
621 | ad69471c | pbrook | dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ |
622 | ad69471c | pbrook | } else { \
|
623 | ad69471c | pbrook | dest = src1 << tmp; \ |
624 | ad69471c | pbrook | }} while (0) |
625 | ad69471c | pbrook | NEON_VOP(rshl_u8, neon_u8, 4)
|
626 | ad69471c | pbrook | NEON_VOP(rshl_u16, neon_u16, 2)
|
627 | ad69471c | pbrook | #undef NEON_FN
|
628 | ad69471c | pbrook | |
629 | 4bd4ee07 | Christophe Lyon | /* The addition of the rounding constant may overflow, so we use an
|
630 | 4bd4ee07 | Christophe Lyon | * intermediate 64 bits accumulator. */
|
631 | 4bd4ee07 | Christophe Lyon | uint32_t HELPER(neon_rshl_u32)(uint32_t val, uint32_t shiftop) |
632 | 4bd4ee07 | Christophe Lyon | { |
633 | 4bd4ee07 | Christophe Lyon | uint32_t dest; |
634 | 4bd4ee07 | Christophe Lyon | int8_t shift = (int8_t)shiftop; |
635 | 4bd4ee07 | Christophe Lyon | if (shift >= 32 || shift < -32) { |
636 | 4bd4ee07 | Christophe Lyon | dest = 0;
|
637 | 4bd4ee07 | Christophe Lyon | } else if (shift == -32) { |
638 | 4bd4ee07 | Christophe Lyon | dest = val >> 31;
|
639 | 4bd4ee07 | Christophe Lyon | } else if (shift < 0) { |
640 | 4bd4ee07 | Christophe Lyon | uint64_t big_dest = ((uint64_t)val + (1 << (-1 - shift))); |
641 | 4bd4ee07 | Christophe Lyon | dest = big_dest >> -shift; |
642 | 4bd4ee07 | Christophe Lyon | } else {
|
643 | 4bd4ee07 | Christophe Lyon | dest = val << shift; |
644 | 4bd4ee07 | Christophe Lyon | } |
645 | 4bd4ee07 | Christophe Lyon | return dest;
|
646 | 4bd4ee07 | Christophe Lyon | } |
647 | 4bd4ee07 | Christophe Lyon | |
648 | 4bd4ee07 | Christophe Lyon | /* Handling addition overflow with 64 bits inputs values is more
|
649 | 4bd4ee07 | Christophe Lyon | * tricky than with 32 bits values. */
|
650 | ad69471c | pbrook | uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop) |
651 | ad69471c | pbrook | { |
652 | ad69471c | pbrook | int8_t shift = (uint8_t)shiftop; |
653 | ad69471c | pbrook | if (shift >= 64 || shift < 64) { |
654 | ad69471c | pbrook | val = 0;
|
655 | ad69471c | pbrook | } else if (shift == -64) { |
656 | ad69471c | pbrook | /* Rounding a 1-bit result just preserves that bit. */
|
657 | ad69471c | pbrook | val >>= 63;
|
658 | 4bd4ee07 | Christophe Lyon | } else if (shift < 0) { |
659 | 4bd4ee07 | Christophe Lyon | val >>= (-shift - 1);
|
660 | 4bd4ee07 | Christophe Lyon | if (val == UINT64_MAX) {
|
661 | 4bd4ee07 | Christophe Lyon | /* In this case, it means that the rounding constant is 1,
|
662 | 4bd4ee07 | Christophe Lyon | * and the addition would overflow. Return the actual
|
663 | 4bd4ee07 | Christophe Lyon | * result directly. */
|
664 | 4bd4ee07 | Christophe Lyon | val = 0x8000000000000000ULL;
|
665 | 4bd4ee07 | Christophe Lyon | } else {
|
666 | 4bd4ee07 | Christophe Lyon | val++; |
667 | 4bd4ee07 | Christophe Lyon | val >>= 1;
|
668 | 4bd4ee07 | Christophe Lyon | } |
669 | ad69471c | pbrook | } else {
|
670 | ad69471c | pbrook | val <<= shift; |
671 | ad69471c | pbrook | } |
672 | ad69471c | pbrook | return val;
|
673 | ad69471c | pbrook | } |
674 | ad69471c | pbrook | |
675 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
676 | ad69471c | pbrook | int8_t tmp; \ |
677 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
678 | 50f67e95 | Juha Riihimรคki | if (tmp >= (ssize_t)sizeof(src1) * 8) { \ |
679 | ad69471c | pbrook | if (src1) { \
|
680 | ad69471c | pbrook | SET_QC(); \ |
681 | ad69471c | pbrook | dest = ~0; \
|
682 | ad69471c | pbrook | } else { \
|
683 | ad69471c | pbrook | dest = 0; \
|
684 | ad69471c | pbrook | } \ |
685 | 50f67e95 | Juha Riihimรคki | } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \ |
686 | ad69471c | pbrook | dest = 0; \
|
687 | ad69471c | pbrook | } else if (tmp < 0) { \ |
688 | ad69471c | pbrook | dest = src1 >> -tmp; \ |
689 | ad69471c | pbrook | } else { \
|
690 | ad69471c | pbrook | dest = src1 << tmp; \ |
691 | ad69471c | pbrook | if ((dest >> tmp) != src1) { \
|
692 | ad69471c | pbrook | SET_QC(); \ |
693 | ad69471c | pbrook | dest = ~0; \
|
694 | ad69471c | pbrook | } \ |
695 | ad69471c | pbrook | }} while (0) |
696 | ad69471c | pbrook | NEON_VOP_ENV(qshl_u8, neon_u8, 4)
|
697 | ad69471c | pbrook | NEON_VOP_ENV(qshl_u16, neon_u16, 2)
|
698 | ad69471c | pbrook | NEON_VOP_ENV(qshl_u32, neon_u32, 1)
|
699 | ad69471c | pbrook | #undef NEON_FN
|
700 | ad69471c | pbrook | |
701 | ad69471c | pbrook | uint64_t HELPER(neon_qshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop) |
702 | ad69471c | pbrook | { |
703 | ad69471c | pbrook | int8_t shift = (int8_t)shiftop; |
704 | ad69471c | pbrook | if (shift >= 64) { |
705 | ad69471c | pbrook | if (val) {
|
706 | ad69471c | pbrook | val = ~(uint64_t)0;
|
707 | ad69471c | pbrook | SET_QC(); |
708 | ad69471c | pbrook | } |
709 | ad69471c | pbrook | } else if (shift <= -64) { |
710 | ad69471c | pbrook | val = 0;
|
711 | ad69471c | pbrook | } else if (shift < 0) { |
712 | ad69471c | pbrook | val >>= -shift; |
713 | ad69471c | pbrook | } else {
|
714 | ad69471c | pbrook | uint64_t tmp = val; |
715 | ad69471c | pbrook | val <<= shift; |
716 | ad69471c | pbrook | if ((val >> shift) != tmp) {
|
717 | ad69471c | pbrook | SET_QC(); |
718 | ad69471c | pbrook | val = ~(uint64_t)0;
|
719 | ad69471c | pbrook | } |
720 | ad69471c | pbrook | } |
721 | ad69471c | pbrook | return val;
|
722 | ad69471c | pbrook | } |
723 | ad69471c | pbrook | |
724 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
725 | ad69471c | pbrook | int8_t tmp; \ |
726 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
727 | 50f67e95 | Juha Riihimรคki | if (tmp >= (ssize_t)sizeof(src1) * 8) { \ |
728 | a5d88f3e | Peter Maydell | if (src1) { \
|
729 | ad69471c | pbrook | SET_QC(); \ |
730 | a5d88f3e | Peter Maydell | dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \ |
731 | a5d88f3e | Peter Maydell | if (src1 > 0) { \ |
732 | a5d88f3e | Peter Maydell | dest--; \ |
733 | a5d88f3e | Peter Maydell | } \ |
734 | a5d88f3e | Peter Maydell | } else { \
|
735 | a5d88f3e | Peter Maydell | dest = src1; \ |
736 | a5d88f3e | Peter Maydell | } \ |
737 | 50f67e95 | Juha Riihimรคki | } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \ |
738 | ad69471c | pbrook | dest = src1 >> 31; \
|
739 | ad69471c | pbrook | } else if (tmp < 0) { \ |
740 | ad69471c | pbrook | dest = src1 >> -tmp; \ |
741 | ad69471c | pbrook | } else { \
|
742 | ad69471c | pbrook | dest = src1 << tmp; \ |
743 | ad69471c | pbrook | if ((dest >> tmp) != src1) { \
|
744 | ad69471c | pbrook | SET_QC(); \ |
745 | a5d88f3e | Peter Maydell | dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \ |
746 | a5d88f3e | Peter Maydell | if (src1 > 0) { \ |
747 | a5d88f3e | Peter Maydell | dest--; \ |
748 | a5d88f3e | Peter Maydell | } \ |
749 | ad69471c | pbrook | } \ |
750 | ad69471c | pbrook | }} while (0) |
751 | ad69471c | pbrook | NEON_VOP_ENV(qshl_s8, neon_s8, 4)
|
752 | ad69471c | pbrook | NEON_VOP_ENV(qshl_s16, neon_s16, 2)
|
753 | ad69471c | pbrook | NEON_VOP_ENV(qshl_s32, neon_s32, 1)
|
754 | ad69471c | pbrook | #undef NEON_FN
|
755 | ad69471c | pbrook | |
756 | ad69471c | pbrook | uint64_t HELPER(neon_qshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop) |
757 | ad69471c | pbrook | { |
758 | ad69471c | pbrook | int8_t shift = (uint8_t)shiftop; |
759 | ad69471c | pbrook | int64_t val = valop; |
760 | ad69471c | pbrook | if (shift >= 64) { |
761 | ad69471c | pbrook | if (val) {
|
762 | ad69471c | pbrook | SET_QC(); |
763 | eb7a3d79 | Peter Maydell | val = (val >> 63) ^ ~SIGNBIT64;
|
764 | ad69471c | pbrook | } |
765 | 4c9b70ae | Juha Riihimรคki | } else if (shift <= -64) { |
766 | ad69471c | pbrook | val >>= 63;
|
767 | ad69471c | pbrook | } else if (shift < 0) { |
768 | ad69471c | pbrook | val >>= -shift; |
769 | ad69471c | pbrook | } else {
|
770 | ad69471c | pbrook | int64_t tmp = val; |
771 | ad69471c | pbrook | val <<= shift; |
772 | ad69471c | pbrook | if ((val >> shift) != tmp) {
|
773 | ad69471c | pbrook | SET_QC(); |
774 | ad69471c | pbrook | val = (tmp >> 63) ^ ~SIGNBIT64;
|
775 | ad69471c | pbrook | } |
776 | ad69471c | pbrook | } |
777 | ad69471c | pbrook | return val;
|
778 | ad69471c | pbrook | } |
779 | ad69471c | pbrook | |
780 | 4ca4502c | Juha Riihimรคki | #define NEON_FN(dest, src1, src2) do { \ |
781 | 4ca4502c | Juha Riihimรคki | if (src1 & (1 << (sizeof(src1) * 8 - 1))) { \ |
782 | 4ca4502c | Juha Riihimรคki | SET_QC(); \ |
783 | 4ca4502c | Juha Riihimรคki | dest = 0; \
|
784 | 4ca4502c | Juha Riihimรคki | } else { \
|
785 | 4ca4502c | Juha Riihimรคki | int8_t tmp; \ |
786 | 4ca4502c | Juha Riihimรคki | tmp = (int8_t)src2; \ |
787 | 4ca4502c | Juha Riihimรคki | if (tmp >= (ssize_t)sizeof(src1) * 8) { \ |
788 | 4ca4502c | Juha Riihimรคki | if (src1) { \
|
789 | 4ca4502c | Juha Riihimรคki | SET_QC(); \ |
790 | 4ca4502c | Juha Riihimรคki | dest = ~0; \
|
791 | 4ca4502c | Juha Riihimรคki | } else { \
|
792 | 4ca4502c | Juha Riihimรคki | dest = 0; \
|
793 | 4ca4502c | Juha Riihimรคki | } \ |
794 | 4ca4502c | Juha Riihimรคki | } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \ |
795 | 4ca4502c | Juha Riihimรคki | dest = 0; \
|
796 | 4ca4502c | Juha Riihimรคki | } else if (tmp < 0) { \ |
797 | 4ca4502c | Juha Riihimรคki | dest = src1 >> -tmp; \ |
798 | 4ca4502c | Juha Riihimรคki | } else { \
|
799 | 4ca4502c | Juha Riihimรคki | dest = src1 << tmp; \ |
800 | 4ca4502c | Juha Riihimรคki | if ((dest >> tmp) != src1) { \
|
801 | 4ca4502c | Juha Riihimรคki | SET_QC(); \ |
802 | 4ca4502c | Juha Riihimรคki | dest = ~0; \
|
803 | 4ca4502c | Juha Riihimรคki | } \ |
804 | 4ca4502c | Juha Riihimรคki | } \ |
805 | 4ca4502c | Juha Riihimรคki | }} while (0) |
806 | 4ca4502c | Juha Riihimรคki | NEON_VOP_ENV(qshlu_s8, neon_u8, 4)
|
807 | 4ca4502c | Juha Riihimรคki | NEON_VOP_ENV(qshlu_s16, neon_u16, 2)
|
808 | 4ca4502c | Juha Riihimรคki | #undef NEON_FN
|
809 | 4ca4502c | Juha Riihimรคki | |
810 | 4ca4502c | Juha Riihimรคki | uint32_t HELPER(neon_qshlu_s32)(CPUState *env, uint32_t valop, uint32_t shiftop) |
811 | 4ca4502c | Juha Riihimรคki | { |
812 | 4ca4502c | Juha Riihimรคki | if ((int32_t)valop < 0) { |
813 | 4ca4502c | Juha Riihimรคki | SET_QC(); |
814 | 4ca4502c | Juha Riihimรคki | return 0; |
815 | 4ca4502c | Juha Riihimรคki | } |
816 | 4ca4502c | Juha Riihimรคki | return helper_neon_qshl_u32(env, valop, shiftop);
|
817 | 4ca4502c | Juha Riihimรคki | } |
818 | 4ca4502c | Juha Riihimรคki | |
819 | 4ca4502c | Juha Riihimรคki | uint64_t HELPER(neon_qshlu_s64)(CPUState *env, uint64_t valop, uint64_t shiftop) |
820 | 4ca4502c | Juha Riihimรคki | { |
821 | 4ca4502c | Juha Riihimรคki | if ((int64_t)valop < 0) { |
822 | 4ca4502c | Juha Riihimรคki | SET_QC(); |
823 | 4ca4502c | Juha Riihimรคki | return 0; |
824 | 4ca4502c | Juha Riihimรคki | } |
825 | 4ca4502c | Juha Riihimรคki | return helper_neon_qshl_u64(env, valop, shiftop);
|
826 | 4ca4502c | Juha Riihimรคki | } |
827 | ad69471c | pbrook | |
828 | ad69471c | pbrook | /* FIXME: This is wrong. */
|
829 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
830 | ad69471c | pbrook | int8_t tmp; \ |
831 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
832 | ad69471c | pbrook | if (tmp < 0) { \ |
833 | ad69471c | pbrook | dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ |
834 | ad69471c | pbrook | } else { \
|
835 | ad69471c | pbrook | dest = src1 << tmp; \ |
836 | ad69471c | pbrook | if ((dest >> tmp) != src1) { \
|
837 | ad69471c | pbrook | SET_QC(); \ |
838 | ad69471c | pbrook | dest = ~0; \
|
839 | ad69471c | pbrook | } \ |
840 | ad69471c | pbrook | }} while (0) |
841 | ad69471c | pbrook | NEON_VOP_ENV(qrshl_u8, neon_u8, 4)
|
842 | ad69471c | pbrook | NEON_VOP_ENV(qrshl_u16, neon_u16, 2)
|
843 | ad69471c | pbrook | #undef NEON_FN
|
844 | ad69471c | pbrook | |
845 | 4bd4ee07 | Christophe Lyon | /* The addition of the rounding constant may overflow, so we use an
|
846 | 4bd4ee07 | Christophe Lyon | * intermediate 64 bits accumulator. */
|
847 | 4bd4ee07 | Christophe Lyon | uint32_t HELPER(neon_qrshl_u32)(CPUState *env, uint32_t val, uint32_t shiftop) |
848 | 4bd4ee07 | Christophe Lyon | { |
849 | 4bd4ee07 | Christophe Lyon | uint32_t dest; |
850 | 4bd4ee07 | Christophe Lyon | int8_t shift = (int8_t)shiftop; |
851 | 4bd4ee07 | Christophe Lyon | if (shift < 0) { |
852 | 4bd4ee07 | Christophe Lyon | uint64_t big_dest = ((uint64_t)val + (1 << (-1 - shift))); |
853 | 4bd4ee07 | Christophe Lyon | dest = big_dest >> -shift; |
854 | 4bd4ee07 | Christophe Lyon | } else {
|
855 | 4bd4ee07 | Christophe Lyon | dest = val << shift; |
856 | 4bd4ee07 | Christophe Lyon | if ((dest >> shift) != val) {
|
857 | 4bd4ee07 | Christophe Lyon | SET_QC(); |
858 | 4bd4ee07 | Christophe Lyon | dest = ~0;
|
859 | 4bd4ee07 | Christophe Lyon | } |
860 | 4bd4ee07 | Christophe Lyon | } |
861 | 4bd4ee07 | Christophe Lyon | return dest;
|
862 | 4bd4ee07 | Christophe Lyon | } |
863 | 4bd4ee07 | Christophe Lyon | |
864 | 4bd4ee07 | Christophe Lyon | /* Handling addition overflow with 64 bits inputs values is more
|
865 | 4bd4ee07 | Christophe Lyon | * tricky than with 32 bits values. */
|
866 | ad69471c | pbrook | uint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop) |
867 | ad69471c | pbrook | { |
868 | ad69471c | pbrook | int8_t shift = (int8_t)shiftop; |
869 | ad69471c | pbrook | if (shift < 0) { |
870 | 4bd4ee07 | Christophe Lyon | val >>= (-shift - 1);
|
871 | 4bd4ee07 | Christophe Lyon | if (val == UINT64_MAX) {
|
872 | 4bd4ee07 | Christophe Lyon | /* In this case, it means that the rounding constant is 1,
|
873 | 4bd4ee07 | Christophe Lyon | * and the addition would overflow. Return the actual
|
874 | 4bd4ee07 | Christophe Lyon | * result directly. */
|
875 | 4bd4ee07 | Christophe Lyon | val = 0x8000000000000000ULL;
|
876 | 4bd4ee07 | Christophe Lyon | } else {
|
877 | 4bd4ee07 | Christophe Lyon | val++; |
878 | 4bd4ee07 | Christophe Lyon | val >>= 1;
|
879 | 4bd4ee07 | Christophe Lyon | } |
880 | ad69471c | pbrook | } else { \
|
881 | ad69471c | pbrook | uint64_t tmp = val; |
882 | ad69471c | pbrook | val <<= shift; |
883 | ad69471c | pbrook | if ((val >> shift) != tmp) {
|
884 | ad69471c | pbrook | SET_QC(); |
885 | ad69471c | pbrook | val = ~0;
|
886 | ad69471c | pbrook | } |
887 | ad69471c | pbrook | } |
888 | ad69471c | pbrook | return val;
|
889 | ad69471c | pbrook | } |
890 | ad69471c | pbrook | |
891 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
892 | ad69471c | pbrook | int8_t tmp; \ |
893 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
894 | ad69471c | pbrook | if (tmp < 0) { \ |
895 | ad69471c | pbrook | dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ |
896 | ad69471c | pbrook | } else { \
|
897 | ad69471c | pbrook | dest = src1 << tmp; \ |
898 | ad69471c | pbrook | if ((dest >> tmp) != src1) { \
|
899 | ad69471c | pbrook | SET_QC(); \ |
900 | ad69471c | pbrook | dest = src1 >> 31; \
|
901 | ad69471c | pbrook | } \ |
902 | ad69471c | pbrook | }} while (0) |
903 | ad69471c | pbrook | NEON_VOP_ENV(qrshl_s8, neon_s8, 4)
|
904 | ad69471c | pbrook | NEON_VOP_ENV(qrshl_s16, neon_s16, 2)
|
905 | ad69471c | pbrook | #undef NEON_FN
|
906 | ad69471c | pbrook | |
907 | 4bd4ee07 | Christophe Lyon | /* The addition of the rounding constant may overflow, so we use an
|
908 | 4bd4ee07 | Christophe Lyon | * intermediate 64 bits accumulator. */
|
909 | 4bd4ee07 | Christophe Lyon | uint32_t HELPER(neon_qrshl_s32)(CPUState *env, uint32_t valop, uint32_t shiftop) |
910 | 4bd4ee07 | Christophe Lyon | { |
911 | 4bd4ee07 | Christophe Lyon | int32_t dest; |
912 | 4bd4ee07 | Christophe Lyon | int32_t val = (int32_t)valop; |
913 | 4bd4ee07 | Christophe Lyon | int8_t shift = (int8_t)shiftop; |
914 | 4bd4ee07 | Christophe Lyon | if (shift < 0) { |
915 | 4bd4ee07 | Christophe Lyon | int64_t big_dest = ((int64_t)val + (1 << (-1 - shift))); |
916 | 4bd4ee07 | Christophe Lyon | dest = big_dest >> -shift; |
917 | 4bd4ee07 | Christophe Lyon | } else {
|
918 | 4bd4ee07 | Christophe Lyon | dest = val << shift; |
919 | 4bd4ee07 | Christophe Lyon | if ((dest >> shift) != val) {
|
920 | 4bd4ee07 | Christophe Lyon | SET_QC(); |
921 | 4bd4ee07 | Christophe Lyon | dest = (val >> 31) ^ ~SIGNBIT;
|
922 | 4bd4ee07 | Christophe Lyon | } |
923 | 4bd4ee07 | Christophe Lyon | } |
924 | 4bd4ee07 | Christophe Lyon | return dest;
|
925 | 4bd4ee07 | Christophe Lyon | } |
926 | 4bd4ee07 | Christophe Lyon | |
927 | 4bd4ee07 | Christophe Lyon | /* Handling addition overflow with 64 bits inputs values is more
|
928 | 4bd4ee07 | Christophe Lyon | * tricky than with 32 bits values. */
|
929 | ad69471c | pbrook | uint64_t HELPER(neon_qrshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop) |
930 | ad69471c | pbrook | { |
931 | ad69471c | pbrook | int8_t shift = (uint8_t)shiftop; |
932 | ad69471c | pbrook | int64_t val = valop; |
933 | ad69471c | pbrook | |
934 | ad69471c | pbrook | if (shift < 0) { |
935 | 4bd4ee07 | Christophe Lyon | val >>= (-shift - 1);
|
936 | 4bd4ee07 | Christophe Lyon | if (val == INT64_MAX) {
|
937 | 4bd4ee07 | Christophe Lyon | /* In this case, it means that the rounding constant is 1,
|
938 | 4bd4ee07 | Christophe Lyon | * and the addition would overflow. Return the actual
|
939 | 4bd4ee07 | Christophe Lyon | * result directly. */
|
940 | 4bd4ee07 | Christophe Lyon | val = 0x4000000000000000ULL;
|
941 | 4bd4ee07 | Christophe Lyon | } else {
|
942 | 4bd4ee07 | Christophe Lyon | val++; |
943 | 4bd4ee07 | Christophe Lyon | val >>= 1;
|
944 | 4bd4ee07 | Christophe Lyon | } |
945 | ad69471c | pbrook | } else {
|
946 | 4bd4ee07 | Christophe Lyon | int64_t tmp = val; |
947 | ad69471c | pbrook | val <<= shift; |
948 | ad69471c | pbrook | if ((val >> shift) != tmp) {
|
949 | ad69471c | pbrook | SET_QC(); |
950 | 4bd4ee07 | Christophe Lyon | val = (tmp >> 63) ^ ~SIGNBIT64;
|
951 | ad69471c | pbrook | } |
952 | ad69471c | pbrook | } |
953 | ad69471c | pbrook | return val;
|
954 | ad69471c | pbrook | } |
955 | ad69471c | pbrook | |
956 | ad69471c | pbrook | uint32_t HELPER(neon_add_u8)(uint32_t a, uint32_t b) |
957 | ad69471c | pbrook | { |
958 | ad69471c | pbrook | uint32_t mask; |
959 | ad69471c | pbrook | mask = (a ^ b) & 0x80808080u;
|
960 | ad69471c | pbrook | a &= ~0x80808080u;
|
961 | ad69471c | pbrook | b &= ~0x80808080u;
|
962 | ad69471c | pbrook | return (a + b) ^ mask;
|
963 | ad69471c | pbrook | } |
964 | ad69471c | pbrook | |
965 | ad69471c | pbrook | uint32_t HELPER(neon_add_u16)(uint32_t a, uint32_t b) |
966 | ad69471c | pbrook | { |
967 | ad69471c | pbrook | uint32_t mask; |
968 | ad69471c | pbrook | mask = (a ^ b) & 0x80008000u;
|
969 | ad69471c | pbrook | a &= ~0x80008000u;
|
970 | ad69471c | pbrook | b &= ~0x80008000u;
|
971 | ad69471c | pbrook | return (a + b) ^ mask;
|
972 | ad69471c | pbrook | } |
973 | ad69471c | pbrook | |
974 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = src1 + src2
|
975 | ad69471c | pbrook | NEON_POP(padd_u8, neon_u8, 4)
|
976 | ad69471c | pbrook | NEON_POP(padd_u16, neon_u16, 2)
|
977 | ad69471c | pbrook | #undef NEON_FN
|
978 | ad69471c | pbrook | |
979 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = src1 - src2
|
980 | ad69471c | pbrook | NEON_VOP(sub_u8, neon_u8, 4)
|
981 | ad69471c | pbrook | NEON_VOP(sub_u16, neon_u16, 2)
|
982 | ad69471c | pbrook | #undef NEON_FN
|
983 | ad69471c | pbrook | |
984 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = src1 * src2
|
985 | ad69471c | pbrook | NEON_VOP(mul_u8, neon_u8, 4)
|
986 | ad69471c | pbrook | NEON_VOP(mul_u16, neon_u16, 2)
|
987 | ad69471c | pbrook | #undef NEON_FN
|
988 | ad69471c | pbrook | |
989 | 1654b2d6 | aurel32 | /* Polynomial multiplication is like integer multiplication except the
|
990 | ad69471c | pbrook | partial products are XORed, not added. */
|
991 | ad69471c | pbrook | uint32_t HELPER(neon_mul_p8)(uint32_t op1, uint32_t op2) |
992 | ad69471c | pbrook | { |
993 | ad69471c | pbrook | uint32_t mask; |
994 | ad69471c | pbrook | uint32_t result; |
995 | ad69471c | pbrook | result = 0;
|
996 | ad69471c | pbrook | while (op1) {
|
997 | ad69471c | pbrook | mask = 0;
|
998 | ad69471c | pbrook | if (op1 & 1) |
999 | ad69471c | pbrook | mask |= 0xff;
|
1000 | ad69471c | pbrook | if (op1 & (1 << 8)) |
1001 | ad69471c | pbrook | mask |= (0xff << 8); |
1002 | ad69471c | pbrook | if (op1 & (1 << 16)) |
1003 | ad69471c | pbrook | mask |= (0xff << 16); |
1004 | ad69471c | pbrook | if (op1 & (1 << 24)) |
1005 | ad69471c | pbrook | mask |= (0xff << 24); |
1006 | ad69471c | pbrook | result ^= op2 & mask; |
1007 | ad69471c | pbrook | op1 = (op1 >> 1) & 0x7f7f7f7f; |
1008 | ad69471c | pbrook | op2 = (op2 << 1) & 0xfefefefe; |
1009 | ad69471c | pbrook | } |
1010 | ad69471c | pbrook | return result;
|
1011 | ad69471c | pbrook | } |
1012 | ad69471c | pbrook | |
1013 | e5ca24cb | Peter Maydell | uint64_t HELPER(neon_mull_p8)(uint32_t op1, uint32_t op2) |
1014 | e5ca24cb | Peter Maydell | { |
1015 | e5ca24cb | Peter Maydell | uint64_t result = 0;
|
1016 | e5ca24cb | Peter Maydell | uint64_t mask; |
1017 | e5ca24cb | Peter Maydell | uint64_t op2ex = op2; |
1018 | e5ca24cb | Peter Maydell | op2ex = (op2ex & 0xff) |
|
1019 | e5ca24cb | Peter Maydell | ((op2ex & 0xff00) << 8) | |
1020 | e5ca24cb | Peter Maydell | ((op2ex & 0xff0000) << 16) | |
1021 | e5ca24cb | Peter Maydell | ((op2ex & 0xff000000) << 24); |
1022 | e5ca24cb | Peter Maydell | while (op1) {
|
1023 | e5ca24cb | Peter Maydell | mask = 0;
|
1024 | e5ca24cb | Peter Maydell | if (op1 & 1) { |
1025 | e5ca24cb | Peter Maydell | mask |= 0xffff;
|
1026 | e5ca24cb | Peter Maydell | } |
1027 | e5ca24cb | Peter Maydell | if (op1 & (1 << 8)) { |
1028 | e5ca24cb | Peter Maydell | mask |= (0xffffU << 16); |
1029 | e5ca24cb | Peter Maydell | } |
1030 | e5ca24cb | Peter Maydell | if (op1 & (1 << 16)) { |
1031 | e5ca24cb | Peter Maydell | mask |= (0xffffULL << 32); |
1032 | e5ca24cb | Peter Maydell | } |
1033 | e5ca24cb | Peter Maydell | if (op1 & (1 << 24)) { |
1034 | e5ca24cb | Peter Maydell | mask |= (0xffffULL << 48); |
1035 | e5ca24cb | Peter Maydell | } |
1036 | e5ca24cb | Peter Maydell | result ^= op2ex & mask; |
1037 | e5ca24cb | Peter Maydell | op1 = (op1 >> 1) & 0x7f7f7f7f; |
1038 | e5ca24cb | Peter Maydell | op2ex <<= 1;
|
1039 | e5ca24cb | Peter Maydell | } |
1040 | e5ca24cb | Peter Maydell | return result;
|
1041 | e5ca24cb | Peter Maydell | } |
1042 | e5ca24cb | Peter Maydell | |
1043 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 & src2) ? -1 : 0 |
1044 | ad69471c | pbrook | NEON_VOP(tst_u8, neon_u8, 4)
|
1045 | ad69471c | pbrook | NEON_VOP(tst_u16, neon_u16, 2)
|
1046 | ad69471c | pbrook | NEON_VOP(tst_u32, neon_u32, 1)
|
1047 | ad69471c | pbrook | #undef NEON_FN
|
1048 | ad69471c | pbrook | |
1049 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 == src2) ? -1 : 0 |
1050 | ad69471c | pbrook | NEON_VOP(ceq_u8, neon_u8, 4)
|
1051 | ad69471c | pbrook | NEON_VOP(ceq_u16, neon_u16, 2)
|
1052 | ad69471c | pbrook | NEON_VOP(ceq_u32, neon_u32, 1)
|
1053 | ad69471c | pbrook | #undef NEON_FN
|
1054 | ad69471c | pbrook | |
1055 | ad69471c | pbrook | #define NEON_FN(dest, src, dummy) dest = (src < 0) ? -src : src |
1056 | ad69471c | pbrook | NEON_VOP1(abs_s8, neon_s8, 4)
|
1057 | ad69471c | pbrook | NEON_VOP1(abs_s16, neon_s16, 2)
|
1058 | ad69471c | pbrook | #undef NEON_FN
|
1059 | ad69471c | pbrook | |
1060 | ad69471c | pbrook | /* Count Leading Sign/Zero Bits. */
|
1061 | ad69471c | pbrook | static inline int do_clz8(uint8_t x) |
1062 | ad69471c | pbrook | { |
1063 | ad69471c | pbrook | int n;
|
1064 | ad69471c | pbrook | for (n = 8; x; n--) |
1065 | ad69471c | pbrook | x >>= 1;
|
1066 | ad69471c | pbrook | return n;
|
1067 | ad69471c | pbrook | } |
1068 | ad69471c | pbrook | |
1069 | ad69471c | pbrook | static inline int do_clz16(uint16_t x) |
1070 | ad69471c | pbrook | { |
1071 | ad69471c | pbrook | int n;
|
1072 | ad69471c | pbrook | for (n = 16; x; n--) |
1073 | ad69471c | pbrook | x >>= 1;
|
1074 | ad69471c | pbrook | return n;
|
1075 | ad69471c | pbrook | } |
1076 | ad69471c | pbrook | |
1077 | ad69471c | pbrook | #define NEON_FN(dest, src, dummy) dest = do_clz8(src)
|
1078 | ad69471c | pbrook | NEON_VOP1(clz_u8, neon_u8, 4)
|
1079 | ad69471c | pbrook | #undef NEON_FN
|
1080 | ad69471c | pbrook | |
1081 | ad69471c | pbrook | #define NEON_FN(dest, src, dummy) dest = do_clz16(src)
|
1082 | ad69471c | pbrook | NEON_VOP1(clz_u16, neon_u16, 2)
|
1083 | ad69471c | pbrook | #undef NEON_FN
|
1084 | ad69471c | pbrook | |
1085 | ad69471c | pbrook | #define NEON_FN(dest, src, dummy) dest = do_clz8((src < 0) ? ~src : src) - 1 |
1086 | ad69471c | pbrook | NEON_VOP1(cls_s8, neon_s8, 4)
|
1087 | ad69471c | pbrook | #undef NEON_FN
|
1088 | ad69471c | pbrook | |
1089 | ad69471c | pbrook | #define NEON_FN(dest, src, dummy) dest = do_clz16((src < 0) ? ~src : src) - 1 |
1090 | ad69471c | pbrook | NEON_VOP1(cls_s16, neon_s16, 2)
|
1091 | ad69471c | pbrook | #undef NEON_FN
|
1092 | ad69471c | pbrook | |
1093 | ad69471c | pbrook | uint32_t HELPER(neon_cls_s32)(uint32_t x) |
1094 | ad69471c | pbrook | { |
1095 | ad69471c | pbrook | int count;
|
1096 | ad69471c | pbrook | if ((int32_t)x < 0) |
1097 | ad69471c | pbrook | x = ~x; |
1098 | ad69471c | pbrook | for (count = 32; x; count--) |
1099 | ad69471c | pbrook | x = x >> 1;
|
1100 | ad69471c | pbrook | return count - 1; |
1101 | ad69471c | pbrook | } |
1102 | ad69471c | pbrook | |
1103 | ad69471c | pbrook | /* Bit count. */
|
1104 | ad69471c | pbrook | uint32_t HELPER(neon_cnt_u8)(uint32_t x) |
1105 | ad69471c | pbrook | { |
1106 | ad69471c | pbrook | x = (x & 0x55555555) + ((x >> 1) & 0x55555555); |
1107 | ad69471c | pbrook | x = (x & 0x33333333) + ((x >> 2) & 0x33333333); |
1108 | ad69471c | pbrook | x = (x & 0x0f0f0f0f) + ((x >> 4) & 0x0f0f0f0f); |
1109 | ad69471c | pbrook | return x;
|
1110 | ad69471c | pbrook | } |
1111 | ad69471c | pbrook | |
1112 | ad69471c | pbrook | #define NEON_QDMULH16(dest, src1, src2, round) do { \ |
1113 | ad69471c | pbrook | uint32_t tmp = (int32_t)(int16_t) src1 * (int16_t) src2; \ |
1114 | ad69471c | pbrook | if ((tmp ^ (tmp << 1)) & SIGNBIT) { \ |
1115 | ad69471c | pbrook | SET_QC(); \ |
1116 | ad69471c | pbrook | tmp = (tmp >> 31) ^ ~SIGNBIT; \
|
1117 | 46eece9d | Juha Riihimรคki | } else { \
|
1118 | 46eece9d | Juha Riihimรคki | tmp <<= 1; \
|
1119 | ad69471c | pbrook | } \ |
1120 | ad69471c | pbrook | if (round) { \
|
1121 | ad69471c | pbrook | int32_t old = tmp; \ |
1122 | ad69471c | pbrook | tmp += 1 << 15; \ |
1123 | ad69471c | pbrook | if ((int32_t)tmp < old) { \
|
1124 | ad69471c | pbrook | SET_QC(); \ |
1125 | ad69471c | pbrook | tmp = SIGNBIT - 1; \
|
1126 | ad69471c | pbrook | } \ |
1127 | ad69471c | pbrook | } \ |
1128 | ad69471c | pbrook | dest = tmp >> 16; \
|
1129 | ad69471c | pbrook | } while(0) |
1130 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 0) |
1131 | ad69471c | pbrook | NEON_VOP_ENV(qdmulh_s16, neon_s16, 2)
|
1132 | ad69471c | pbrook | #undef NEON_FN
|
1133 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 1) |
1134 | ad69471c | pbrook | NEON_VOP_ENV(qrdmulh_s16, neon_s16, 2)
|
1135 | ad69471c | pbrook | #undef NEON_FN
|
1136 | ad69471c | pbrook | #undef NEON_QDMULH16
|
1137 | ad69471c | pbrook | |
1138 | ad69471c | pbrook | #define NEON_QDMULH32(dest, src1, src2, round) do { \ |
1139 | ad69471c | pbrook | uint64_t tmp = (int64_t)(int32_t) src1 * (int32_t) src2; \ |
1140 | ad69471c | pbrook | if ((tmp ^ (tmp << 1)) & SIGNBIT64) { \ |
1141 | ad69471c | pbrook | SET_QC(); \ |
1142 | ad69471c | pbrook | tmp = (tmp >> 63) ^ ~SIGNBIT64; \
|
1143 | ad69471c | pbrook | } else { \
|
1144 | ad69471c | pbrook | tmp <<= 1; \
|
1145 | ad69471c | pbrook | } \ |
1146 | ad69471c | pbrook | if (round) { \
|
1147 | ad69471c | pbrook | int64_t old = tmp; \ |
1148 | ad69471c | pbrook | tmp += (int64_t)1 << 31; \ |
1149 | ad69471c | pbrook | if ((int64_t)tmp < old) { \
|
1150 | ad69471c | pbrook | SET_QC(); \ |
1151 | ad69471c | pbrook | tmp = SIGNBIT64 - 1; \
|
1152 | ad69471c | pbrook | } \ |
1153 | ad69471c | pbrook | } \ |
1154 | ad69471c | pbrook | dest = tmp >> 32; \
|
1155 | ad69471c | pbrook | } while(0) |
1156 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 0) |
1157 | ad69471c | pbrook | NEON_VOP_ENV(qdmulh_s32, neon_s32, 1)
|
1158 | ad69471c | pbrook | #undef NEON_FN
|
1159 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 1) |
1160 | ad69471c | pbrook | NEON_VOP_ENV(qrdmulh_s32, neon_s32, 1)
|
1161 | ad69471c | pbrook | #undef NEON_FN
|
1162 | ad69471c | pbrook | #undef NEON_QDMULH32
|
1163 | ad69471c | pbrook | |
1164 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_u8)(uint64_t x) |
1165 | ad69471c | pbrook | { |
1166 | ad69471c | pbrook | return (x & 0xffu) | ((x >> 8) & 0xff00u) | ((x >> 16) & 0xff0000u) |
1167 | ad69471c | pbrook | | ((x >> 24) & 0xff000000u); |
1168 | ad69471c | pbrook | } |
1169 | ad69471c | pbrook | |
1170 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_u16)(uint64_t x) |
1171 | ad69471c | pbrook | { |
1172 | ad69471c | pbrook | return (x & 0xffffu) | ((x >> 16) & 0xffff0000u); |
1173 | ad69471c | pbrook | } |
1174 | ad69471c | pbrook | |
1175 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_high_u8)(uint64_t x) |
1176 | ad69471c | pbrook | { |
1177 | ad69471c | pbrook | return ((x >> 8) & 0xff) | ((x >> 16) & 0xff00) |
1178 | ad69471c | pbrook | | ((x >> 24) & 0xff0000) | ((x >> 32) & 0xff000000); |
1179 | ad69471c | pbrook | } |
1180 | ad69471c | pbrook | |
1181 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_high_u16)(uint64_t x) |
1182 | ad69471c | pbrook | { |
1183 | ad69471c | pbrook | return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000); |
1184 | ad69471c | pbrook | } |
1185 | ad69471c | pbrook | |
1186 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_round_high_u8)(uint64_t x) |
1187 | ad69471c | pbrook | { |
1188 | ad69471c | pbrook | x &= 0xff80ff80ff80ff80ull;
|
1189 | ad69471c | pbrook | x += 0x0080008000800080ull;
|
1190 | ad69471c | pbrook | return ((x >> 8) & 0xff) | ((x >> 16) & 0xff00) |
1191 | ad69471c | pbrook | | ((x >> 24) & 0xff0000) | ((x >> 32) & 0xff000000); |
1192 | ad69471c | pbrook | } |
1193 | ad69471c | pbrook | |
1194 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_round_high_u16)(uint64_t x) |
1195 | ad69471c | pbrook | { |
1196 | ad69471c | pbrook | x &= 0xffff8000ffff8000ull;
|
1197 | ad69471c | pbrook | x += 0x0000800000008000ull;
|
1198 | ad69471c | pbrook | return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000); |
1199 | ad69471c | pbrook | } |
1200 | ad69471c | pbrook | |
1201 | af1bbf30 | Juha Riihimรคki | uint32_t HELPER(neon_unarrow_sat8)(CPUState *env, uint64_t x) |
1202 | af1bbf30 | Juha Riihimรคki | { |
1203 | af1bbf30 | Juha Riihimรคki | uint16_t s; |
1204 | af1bbf30 | Juha Riihimรคki | uint8_t d; |
1205 | af1bbf30 | Juha Riihimรคki | uint32_t res = 0;
|
1206 | af1bbf30 | Juha Riihimรคki | #define SAT8(n) \
|
1207 | af1bbf30 | Juha Riihimรคki | s = x >> n; \ |
1208 | af1bbf30 | Juha Riihimรคki | if (s & 0x8000) { \ |
1209 | af1bbf30 | Juha Riihimรคki | SET_QC(); \ |
1210 | af1bbf30 | Juha Riihimรคki | } else { \
|
1211 | af1bbf30 | Juha Riihimรคki | if (s > 0xff) { \ |
1212 | af1bbf30 | Juha Riihimรคki | d = 0xff; \
|
1213 | af1bbf30 | Juha Riihimรคki | SET_QC(); \ |
1214 | af1bbf30 | Juha Riihimรคki | } else { \
|
1215 | af1bbf30 | Juha Riihimรคki | d = s; \ |
1216 | af1bbf30 | Juha Riihimรคki | } \ |
1217 | af1bbf30 | Juha Riihimรคki | res |= (uint32_t)d << (n / 2); \
|
1218 | af1bbf30 | Juha Riihimรคki | } |
1219 | af1bbf30 | Juha Riihimรคki | |
1220 | af1bbf30 | Juha Riihimรคki | SAT8(0);
|
1221 | af1bbf30 | Juha Riihimรคki | SAT8(16);
|
1222 | af1bbf30 | Juha Riihimรคki | SAT8(32);
|
1223 | af1bbf30 | Juha Riihimรคki | SAT8(48);
|
1224 | af1bbf30 | Juha Riihimรคki | #undef SAT8
|
1225 | af1bbf30 | Juha Riihimรคki | return res;
|
1226 | af1bbf30 | Juha Riihimรคki | } |
1227 | af1bbf30 | Juha Riihimรคki | |
1228 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_sat_u8)(CPUState *env, uint64_t x) |
1229 | ad69471c | pbrook | { |
1230 | ad69471c | pbrook | uint16_t s; |
1231 | ad69471c | pbrook | uint8_t d; |
1232 | ad69471c | pbrook | uint32_t res = 0;
|
1233 | ad69471c | pbrook | #define SAT8(n) \
|
1234 | ad69471c | pbrook | s = x >> n; \ |
1235 | ad69471c | pbrook | if (s > 0xff) { \ |
1236 | ad69471c | pbrook | d = 0xff; \
|
1237 | ad69471c | pbrook | SET_QC(); \ |
1238 | ad69471c | pbrook | } else { \
|
1239 | ad69471c | pbrook | d = s; \ |
1240 | ad69471c | pbrook | } \ |
1241 | ad69471c | pbrook | res |= (uint32_t)d << (n / 2);
|
1242 | ad69471c | pbrook | |
1243 | ad69471c | pbrook | SAT8(0);
|
1244 | ad69471c | pbrook | SAT8(16);
|
1245 | ad69471c | pbrook | SAT8(32);
|
1246 | ad69471c | pbrook | SAT8(48);
|
1247 | ad69471c | pbrook | #undef SAT8
|
1248 | ad69471c | pbrook | return res;
|
1249 | ad69471c | pbrook | } |
1250 | ad69471c | pbrook | |
1251 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_sat_s8)(CPUState *env, uint64_t x) |
1252 | ad69471c | pbrook | { |
1253 | ad69471c | pbrook | int16_t s; |
1254 | ad69471c | pbrook | uint8_t d; |
1255 | ad69471c | pbrook | uint32_t res = 0;
|
1256 | ad69471c | pbrook | #define SAT8(n) \
|
1257 | ad69471c | pbrook | s = x >> n; \ |
1258 | ad69471c | pbrook | if (s != (int8_t)s) { \
|
1259 | ad69471c | pbrook | d = (s >> 15) ^ 0x7f; \ |
1260 | ad69471c | pbrook | SET_QC(); \ |
1261 | ad69471c | pbrook | } else { \
|
1262 | ad69471c | pbrook | d = s; \ |
1263 | ad69471c | pbrook | } \ |
1264 | ad69471c | pbrook | res |= (uint32_t)d << (n / 2);
|
1265 | ad69471c | pbrook | |
1266 | ad69471c | pbrook | SAT8(0);
|
1267 | ad69471c | pbrook | SAT8(16);
|
1268 | ad69471c | pbrook | SAT8(32);
|
1269 | ad69471c | pbrook | SAT8(48);
|
1270 | ad69471c | pbrook | #undef SAT8
|
1271 | ad69471c | pbrook | return res;
|
1272 | ad69471c | pbrook | } |
1273 | ad69471c | pbrook | |
1274 | af1bbf30 | Juha Riihimรคki | uint32_t HELPER(neon_unarrow_sat16)(CPUState *env, uint64_t x) |
1275 | af1bbf30 | Juha Riihimรคki | { |
1276 | af1bbf30 | Juha Riihimรคki | uint32_t high; |
1277 | af1bbf30 | Juha Riihimรคki | uint32_t low; |
1278 | af1bbf30 | Juha Riihimรคki | low = x; |
1279 | af1bbf30 | Juha Riihimรคki | if (low & 0x80000000) { |
1280 | af1bbf30 | Juha Riihimรคki | low = 0;
|
1281 | af1bbf30 | Juha Riihimรคki | SET_QC(); |
1282 | af1bbf30 | Juha Riihimรคki | } else if (low > 0xffff) { |
1283 | af1bbf30 | Juha Riihimรคki | low = 0xffff;
|
1284 | af1bbf30 | Juha Riihimรคki | SET_QC(); |
1285 | af1bbf30 | Juha Riihimรคki | } |
1286 | af1bbf30 | Juha Riihimรคki | high = x >> 32;
|
1287 | af1bbf30 | Juha Riihimรคki | if (high & 0x80000000) { |
1288 | af1bbf30 | Juha Riihimรคki | high = 0;
|
1289 | af1bbf30 | Juha Riihimรคki | SET_QC(); |
1290 | af1bbf30 | Juha Riihimรคki | } else if (high > 0xffff) { |
1291 | af1bbf30 | Juha Riihimรคki | high = 0xffff;
|
1292 | af1bbf30 | Juha Riihimรคki | SET_QC(); |
1293 | af1bbf30 | Juha Riihimรคki | } |
1294 | af1bbf30 | Juha Riihimรคki | return low | (high << 16); |
1295 | af1bbf30 | Juha Riihimรคki | } |
1296 | af1bbf30 | Juha Riihimรคki | |
1297 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_sat_u16)(CPUState *env, uint64_t x) |
1298 | ad69471c | pbrook | { |
1299 | ad69471c | pbrook | uint32_t high; |
1300 | ad69471c | pbrook | uint32_t low; |
1301 | ad69471c | pbrook | low = x; |
1302 | ad69471c | pbrook | if (low > 0xffff) { |
1303 | ad69471c | pbrook | low = 0xffff;
|
1304 | ad69471c | pbrook | SET_QC(); |
1305 | ad69471c | pbrook | } |
1306 | ad69471c | pbrook | high = x >> 32;
|
1307 | ad69471c | pbrook | if (high > 0xffff) { |
1308 | ad69471c | pbrook | high = 0xffff;
|
1309 | ad69471c | pbrook | SET_QC(); |
1310 | ad69471c | pbrook | } |
1311 | ad69471c | pbrook | return low | (high << 16); |
1312 | ad69471c | pbrook | } |
1313 | ad69471c | pbrook | |
1314 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_sat_s16)(CPUState *env, uint64_t x) |
1315 | ad69471c | pbrook | { |
1316 | ad69471c | pbrook | int32_t low; |
1317 | ad69471c | pbrook | int32_t high; |
1318 | ad69471c | pbrook | low = x; |
1319 | ad69471c | pbrook | if (low != (int16_t)low) {
|
1320 | ad69471c | pbrook | low = (low >> 31) ^ 0x7fff; |
1321 | ad69471c | pbrook | SET_QC(); |
1322 | ad69471c | pbrook | } |
1323 | ad69471c | pbrook | high = x >> 32;
|
1324 | ad69471c | pbrook | if (high != (int16_t)high) {
|
1325 | ad69471c | pbrook | high = (high >> 31) ^ 0x7fff; |
1326 | ad69471c | pbrook | SET_QC(); |
1327 | ad69471c | pbrook | } |
1328 | ad69471c | pbrook | return (uint16_t)low | (high << 16); |
1329 | ad69471c | pbrook | } |
1330 | ad69471c | pbrook | |
1331 | af1bbf30 | Juha Riihimรคki | uint32_t HELPER(neon_unarrow_sat32)(CPUState *env, uint64_t x) |
1332 | af1bbf30 | Juha Riihimรคki | { |
1333 | af1bbf30 | Juha Riihimรคki | if (x & 0x8000000000000000ull) { |
1334 | af1bbf30 | Juha Riihimรคki | SET_QC(); |
1335 | af1bbf30 | Juha Riihimรคki | return 0; |
1336 | af1bbf30 | Juha Riihimรคki | } |
1337 | af1bbf30 | Juha Riihimรคki | if (x > 0xffffffffu) { |
1338 | af1bbf30 | Juha Riihimรคki | SET_QC(); |
1339 | af1bbf30 | Juha Riihimรคki | return 0xffffffffu; |
1340 | af1bbf30 | Juha Riihimรคki | } |
1341 | af1bbf30 | Juha Riihimรคki | return x;
|
1342 | af1bbf30 | Juha Riihimรคki | } |
1343 | af1bbf30 | Juha Riihimรคki | |
1344 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_sat_u32)(CPUState *env, uint64_t x) |
1345 | ad69471c | pbrook | { |
1346 | ad69471c | pbrook | if (x > 0xffffffffu) { |
1347 | ad69471c | pbrook | SET_QC(); |
1348 | ad69471c | pbrook | return 0xffffffffu; |
1349 | ad69471c | pbrook | } |
1350 | ad69471c | pbrook | return x;
|
1351 | ad69471c | pbrook | } |
1352 | ad69471c | pbrook | |
1353 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_sat_s32)(CPUState *env, uint64_t x) |
1354 | ad69471c | pbrook | { |
1355 | ad69471c | pbrook | if ((int64_t)x != (int32_t)x) {
|
1356 | ad69471c | pbrook | SET_QC(); |
1357 | cc2212c2 | Peter Maydell | return ((int64_t)x >> 63) ^ 0x7fffffff; |
1358 | ad69471c | pbrook | } |
1359 | ad69471c | pbrook | return x;
|
1360 | ad69471c | pbrook | } |
1361 | ad69471c | pbrook | |
1362 | ad69471c | pbrook | uint64_t HELPER(neon_widen_u8)(uint32_t x) |
1363 | ad69471c | pbrook | { |
1364 | ad69471c | pbrook | uint64_t tmp; |
1365 | ad69471c | pbrook | uint64_t ret; |
1366 | ad69471c | pbrook | ret = (uint8_t)x; |
1367 | ad69471c | pbrook | tmp = (uint8_t)(x >> 8);
|
1368 | ad69471c | pbrook | ret |= tmp << 16;
|
1369 | ad69471c | pbrook | tmp = (uint8_t)(x >> 16);
|
1370 | ad69471c | pbrook | ret |= tmp << 32;
|
1371 | ad69471c | pbrook | tmp = (uint8_t)(x >> 24);
|
1372 | ad69471c | pbrook | ret |= tmp << 48;
|
1373 | ad69471c | pbrook | return ret;
|
1374 | ad69471c | pbrook | } |
1375 | ad69471c | pbrook | |
1376 | ad69471c | pbrook | uint64_t HELPER(neon_widen_s8)(uint32_t x) |
1377 | ad69471c | pbrook | { |
1378 | ad69471c | pbrook | uint64_t tmp; |
1379 | ad69471c | pbrook | uint64_t ret; |
1380 | ad69471c | pbrook | ret = (uint16_t)(int8_t)x; |
1381 | ad69471c | pbrook | tmp = (uint16_t)(int8_t)(x >> 8);
|
1382 | ad69471c | pbrook | ret |= tmp << 16;
|
1383 | ad69471c | pbrook | tmp = (uint16_t)(int8_t)(x >> 16);
|
1384 | ad69471c | pbrook | ret |= tmp << 32;
|
1385 | ad69471c | pbrook | tmp = (uint16_t)(int8_t)(x >> 24);
|
1386 | ad69471c | pbrook | ret |= tmp << 48;
|
1387 | ad69471c | pbrook | return ret;
|
1388 | ad69471c | pbrook | } |
1389 | ad69471c | pbrook | |
1390 | ad69471c | pbrook | uint64_t HELPER(neon_widen_u16)(uint32_t x) |
1391 | ad69471c | pbrook | { |
1392 | ad69471c | pbrook | uint64_t high = (uint16_t)(x >> 16);
|
1393 | ad69471c | pbrook | return ((uint16_t)x) | (high << 32); |
1394 | ad69471c | pbrook | } |
1395 | ad69471c | pbrook | |
1396 | ad69471c | pbrook | uint64_t HELPER(neon_widen_s16)(uint32_t x) |
1397 | ad69471c | pbrook | { |
1398 | ad69471c | pbrook | uint64_t high = (int16_t)(x >> 16);
|
1399 | ad69471c | pbrook | return ((uint32_t)(int16_t)x) | (high << 32); |
1400 | ad69471c | pbrook | } |
1401 | ad69471c | pbrook | |
1402 | ad69471c | pbrook | uint64_t HELPER(neon_addl_u16)(uint64_t a, uint64_t b) |
1403 | ad69471c | pbrook | { |
1404 | ad69471c | pbrook | uint64_t mask; |
1405 | ad69471c | pbrook | mask = (a ^ b) & 0x8000800080008000ull;
|
1406 | ad69471c | pbrook | a &= ~0x8000800080008000ull;
|
1407 | ad69471c | pbrook | b &= ~0x8000800080008000ull;
|
1408 | ad69471c | pbrook | return (a + b) ^ mask;
|
1409 | ad69471c | pbrook | } |
1410 | ad69471c | pbrook | |
1411 | ad69471c | pbrook | uint64_t HELPER(neon_addl_u32)(uint64_t a, uint64_t b) |
1412 | ad69471c | pbrook | { |
1413 | ad69471c | pbrook | uint64_t mask; |
1414 | ad69471c | pbrook | mask = (a ^ b) & 0x8000000080000000ull;
|
1415 | ad69471c | pbrook | a &= ~0x8000000080000000ull;
|
1416 | ad69471c | pbrook | b &= ~0x8000000080000000ull;
|
1417 | ad69471c | pbrook | return (a + b) ^ mask;
|
1418 | ad69471c | pbrook | } |
1419 | ad69471c | pbrook | |
1420 | ad69471c | pbrook | uint64_t HELPER(neon_paddl_u16)(uint64_t a, uint64_t b) |
1421 | ad69471c | pbrook | { |
1422 | ad69471c | pbrook | uint64_t tmp; |
1423 | ad69471c | pbrook | uint64_t tmp2; |
1424 | ad69471c | pbrook | |
1425 | ad69471c | pbrook | tmp = a & 0x0000ffff0000ffffull;
|
1426 | ad69471c | pbrook | tmp += (a >> 16) & 0x0000ffff0000ffffull; |
1427 | ad69471c | pbrook | tmp2 = b & 0xffff0000ffff0000ull;
|
1428 | ad69471c | pbrook | tmp2 += (b << 16) & 0xffff0000ffff0000ull; |
1429 | ad69471c | pbrook | return ( tmp & 0xffff) |
1430 | ad69471c | pbrook | | ((tmp >> 16) & 0xffff0000ull) |
1431 | ad69471c | pbrook | | ((tmp2 << 16) & 0xffff00000000ull) |
1432 | ad69471c | pbrook | | ( tmp2 & 0xffff000000000000ull);
|
1433 | ad69471c | pbrook | } |
1434 | ad69471c | pbrook | |
1435 | ad69471c | pbrook | uint64_t HELPER(neon_paddl_u32)(uint64_t a, uint64_t b) |
1436 | ad69471c | pbrook | { |
1437 | ad69471c | pbrook | uint32_t low = a + (a >> 32);
|
1438 | ad69471c | pbrook | uint32_t high = b + (b >> 32);
|
1439 | ad69471c | pbrook | return low + ((uint64_t)high << 32); |
1440 | ad69471c | pbrook | } |
1441 | ad69471c | pbrook | |
1442 | ad69471c | pbrook | uint64_t HELPER(neon_subl_u16)(uint64_t a, uint64_t b) |
1443 | ad69471c | pbrook | { |
1444 | ad69471c | pbrook | uint64_t mask; |
1445 | ad69471c | pbrook | mask = (a ^ ~b) & 0x8000800080008000ull;
|
1446 | ad69471c | pbrook | a |= 0x8000800080008000ull;
|
1447 | ad69471c | pbrook | b &= ~0x8000800080008000ull;
|
1448 | ad69471c | pbrook | return (a - b) ^ mask;
|
1449 | ad69471c | pbrook | } |
1450 | ad69471c | pbrook | |
1451 | ad69471c | pbrook | uint64_t HELPER(neon_subl_u32)(uint64_t a, uint64_t b) |
1452 | ad69471c | pbrook | { |
1453 | ad69471c | pbrook | uint64_t mask; |
1454 | ad69471c | pbrook | mask = (a ^ ~b) & 0x8000000080000000ull;
|
1455 | ad69471c | pbrook | a |= 0x8000000080000000ull;
|
1456 | ad69471c | pbrook | b &= ~0x8000000080000000ull;
|
1457 | ad69471c | pbrook | return (a - b) ^ mask;
|
1458 | ad69471c | pbrook | } |
1459 | ad69471c | pbrook | |
1460 | ad69471c | pbrook | uint64_t HELPER(neon_addl_saturate_s32)(CPUState *env, uint64_t a, uint64_t b) |
1461 | ad69471c | pbrook | { |
1462 | ad69471c | pbrook | uint32_t x, y; |
1463 | ad69471c | pbrook | uint32_t low, high; |
1464 | ad69471c | pbrook | |
1465 | ad69471c | pbrook | x = a; |
1466 | ad69471c | pbrook | y = b; |
1467 | ad69471c | pbrook | low = x + y; |
1468 | ad69471c | pbrook | if (((low ^ x) & SIGNBIT) && !((x ^ y) & SIGNBIT)) {
|
1469 | ad69471c | pbrook | SET_QC(); |
1470 | ad69471c | pbrook | low = ((int32_t)x >> 31) ^ ~SIGNBIT;
|
1471 | ad69471c | pbrook | } |
1472 | ad69471c | pbrook | x = a >> 32;
|
1473 | ad69471c | pbrook | y = b >> 32;
|
1474 | ad69471c | pbrook | high = x + y; |
1475 | ad69471c | pbrook | if (((high ^ x) & SIGNBIT) && !((x ^ y) & SIGNBIT)) {
|
1476 | ad69471c | pbrook | SET_QC(); |
1477 | ad69471c | pbrook | high = ((int32_t)x >> 31) ^ ~SIGNBIT;
|
1478 | ad69471c | pbrook | } |
1479 | ad69471c | pbrook | return low | ((uint64_t)high << 32); |
1480 | ad69471c | pbrook | } |
1481 | ad69471c | pbrook | |
1482 | ad69471c | pbrook | uint64_t HELPER(neon_addl_saturate_s64)(CPUState *env, uint64_t a, uint64_t b) |
1483 | ad69471c | pbrook | { |
1484 | ad69471c | pbrook | uint64_t result; |
1485 | ad69471c | pbrook | |
1486 | ad69471c | pbrook | result = a + b; |
1487 | ad69471c | pbrook | if (((result ^ a) & SIGNBIT64) && !((a ^ b) & SIGNBIT64)) {
|
1488 | ad69471c | pbrook | SET_QC(); |
1489 | ad69471c | pbrook | result = ((int64_t)a >> 63) ^ ~SIGNBIT64;
|
1490 | ad69471c | pbrook | } |
1491 | ad69471c | pbrook | return result;
|
1492 | ad69471c | pbrook | } |
1493 | ad69471c | pbrook | |
1494 | ad69471c | pbrook | #define DO_ABD(dest, x, y, type) do { \ |
1495 | ad69471c | pbrook | type tmp_x = x; \ |
1496 | ad69471c | pbrook | type tmp_y = y; \ |
1497 | ad69471c | pbrook | dest = ((tmp_x > tmp_y) ? tmp_x - tmp_y : tmp_y - tmp_x); \ |
1498 | ad69471c | pbrook | } while(0) |
1499 | ad69471c | pbrook | |
1500 | ad69471c | pbrook | uint64_t HELPER(neon_abdl_u16)(uint32_t a, uint32_t b) |
1501 | ad69471c | pbrook | { |
1502 | ad69471c | pbrook | uint64_t tmp; |
1503 | ad69471c | pbrook | uint64_t result; |
1504 | ad69471c | pbrook | DO_ABD(result, a, b, uint8_t); |
1505 | ad69471c | pbrook | DO_ABD(tmp, a >> 8, b >> 8, uint8_t); |
1506 | ad69471c | pbrook | result |= tmp << 16;
|
1507 | ad69471c | pbrook | DO_ABD(tmp, a >> 16, b >> 16, uint8_t); |
1508 | ad69471c | pbrook | result |= tmp << 32;
|
1509 | ad69471c | pbrook | DO_ABD(tmp, a >> 24, b >> 24, uint8_t); |
1510 | ad69471c | pbrook | result |= tmp << 48;
|
1511 | ad69471c | pbrook | return result;
|
1512 | ad69471c | pbrook | } |
1513 | ad69471c | pbrook | |
1514 | ad69471c | pbrook | uint64_t HELPER(neon_abdl_s16)(uint32_t a, uint32_t b) |
1515 | ad69471c | pbrook | { |
1516 | ad69471c | pbrook | uint64_t tmp; |
1517 | ad69471c | pbrook | uint64_t result; |
1518 | ad69471c | pbrook | DO_ABD(result, a, b, int8_t); |
1519 | ad69471c | pbrook | DO_ABD(tmp, a >> 8, b >> 8, int8_t); |
1520 | ad69471c | pbrook | result |= tmp << 16;
|
1521 | ad69471c | pbrook | DO_ABD(tmp, a >> 16, b >> 16, int8_t); |
1522 | ad69471c | pbrook | result |= tmp << 32;
|
1523 | ad69471c | pbrook | DO_ABD(tmp, a >> 24, b >> 24, int8_t); |
1524 | ad69471c | pbrook | result |= tmp << 48;
|
1525 | ad69471c | pbrook | return result;
|
1526 | ad69471c | pbrook | } |
1527 | ad69471c | pbrook | |
1528 | ad69471c | pbrook | uint64_t HELPER(neon_abdl_u32)(uint32_t a, uint32_t b) |
1529 | ad69471c | pbrook | { |
1530 | ad69471c | pbrook | uint64_t tmp; |
1531 | ad69471c | pbrook | uint64_t result; |
1532 | ad69471c | pbrook | DO_ABD(result, a, b, uint16_t); |
1533 | ad69471c | pbrook | DO_ABD(tmp, a >> 16, b >> 16, uint16_t); |
1534 | ad69471c | pbrook | return result | (tmp << 32); |
1535 | ad69471c | pbrook | } |
1536 | ad69471c | pbrook | |
1537 | ad69471c | pbrook | uint64_t HELPER(neon_abdl_s32)(uint32_t a, uint32_t b) |
1538 | ad69471c | pbrook | { |
1539 | ad69471c | pbrook | uint64_t tmp; |
1540 | ad69471c | pbrook | uint64_t result; |
1541 | ad69471c | pbrook | DO_ABD(result, a, b, int16_t); |
1542 | ad69471c | pbrook | DO_ABD(tmp, a >> 16, b >> 16, int16_t); |
1543 | ad69471c | pbrook | return result | (tmp << 32); |
1544 | ad69471c | pbrook | } |
1545 | ad69471c | pbrook | |
1546 | ad69471c | pbrook | uint64_t HELPER(neon_abdl_u64)(uint32_t a, uint32_t b) |
1547 | ad69471c | pbrook | { |
1548 | ad69471c | pbrook | uint64_t result; |
1549 | ad69471c | pbrook | DO_ABD(result, a, b, uint32_t); |
1550 | ad69471c | pbrook | return result;
|
1551 | ad69471c | pbrook | } |
1552 | ad69471c | pbrook | |
1553 | ad69471c | pbrook | uint64_t HELPER(neon_abdl_s64)(uint32_t a, uint32_t b) |
1554 | ad69471c | pbrook | { |
1555 | ad69471c | pbrook | uint64_t result; |
1556 | ad69471c | pbrook | DO_ABD(result, a, b, int32_t); |
1557 | ad69471c | pbrook | return result;
|
1558 | ad69471c | pbrook | } |
1559 | ad69471c | pbrook | #undef DO_ABD
|
1560 | ad69471c | pbrook | |
1561 | ad69471c | pbrook | /* Widening multiply. Named type is the source type. */
|
1562 | ad69471c | pbrook | #define DO_MULL(dest, x, y, type1, type2) do { \ |
1563 | ad69471c | pbrook | type1 tmp_x = x; \ |
1564 | ad69471c | pbrook | type1 tmp_y = y; \ |
1565 | ad69471c | pbrook | dest = (type2)((type2)tmp_x * (type2)tmp_y); \ |
1566 | ad69471c | pbrook | } while(0) |
1567 | ad69471c | pbrook | |
1568 | ad69471c | pbrook | uint64_t HELPER(neon_mull_u8)(uint32_t a, uint32_t b) |
1569 | ad69471c | pbrook | { |
1570 | ad69471c | pbrook | uint64_t tmp; |
1571 | ad69471c | pbrook | uint64_t result; |
1572 | ad69471c | pbrook | |
1573 | ad69471c | pbrook | DO_MULL(result, a, b, uint8_t, uint16_t); |
1574 | ad69471c | pbrook | DO_MULL(tmp, a >> 8, b >> 8, uint8_t, uint16_t); |
1575 | ad69471c | pbrook | result |= tmp << 16;
|
1576 | ad69471c | pbrook | DO_MULL(tmp, a >> 16, b >> 16, uint8_t, uint16_t); |
1577 | ad69471c | pbrook | result |= tmp << 32;
|
1578 | ad69471c | pbrook | DO_MULL(tmp, a >> 24, b >> 24, uint8_t, uint16_t); |
1579 | ad69471c | pbrook | result |= tmp << 48;
|
1580 | ad69471c | pbrook | return result;
|
1581 | ad69471c | pbrook | } |
1582 | ad69471c | pbrook | |
1583 | ad69471c | pbrook | uint64_t HELPER(neon_mull_s8)(uint32_t a, uint32_t b) |
1584 | ad69471c | pbrook | { |
1585 | ad69471c | pbrook | uint64_t tmp; |
1586 | ad69471c | pbrook | uint64_t result; |
1587 | ad69471c | pbrook | |
1588 | ad69471c | pbrook | DO_MULL(result, a, b, int8_t, uint16_t); |
1589 | ad69471c | pbrook | DO_MULL(tmp, a >> 8, b >> 8, int8_t, uint16_t); |
1590 | ad69471c | pbrook | result |= tmp << 16;
|
1591 | ad69471c | pbrook | DO_MULL(tmp, a >> 16, b >> 16, int8_t, uint16_t); |
1592 | ad69471c | pbrook | result |= tmp << 32;
|
1593 | ad69471c | pbrook | DO_MULL(tmp, a >> 24, b >> 24, int8_t, uint16_t); |
1594 | ad69471c | pbrook | result |= tmp << 48;
|
1595 | ad69471c | pbrook | return result;
|
1596 | ad69471c | pbrook | } |
1597 | ad69471c | pbrook | |
1598 | ad69471c | pbrook | uint64_t HELPER(neon_mull_u16)(uint32_t a, uint32_t b) |
1599 | ad69471c | pbrook | { |
1600 | ad69471c | pbrook | uint64_t tmp; |
1601 | ad69471c | pbrook | uint64_t result; |
1602 | ad69471c | pbrook | |
1603 | ad69471c | pbrook | DO_MULL(result, a, b, uint16_t, uint32_t); |
1604 | ad69471c | pbrook | DO_MULL(tmp, a >> 16, b >> 16, uint16_t, uint32_t); |
1605 | ad69471c | pbrook | return result | (tmp << 32); |
1606 | ad69471c | pbrook | } |
1607 | ad69471c | pbrook | |
1608 | ad69471c | pbrook | uint64_t HELPER(neon_mull_s16)(uint32_t a, uint32_t b) |
1609 | ad69471c | pbrook | { |
1610 | ad69471c | pbrook | uint64_t tmp; |
1611 | ad69471c | pbrook | uint64_t result; |
1612 | ad69471c | pbrook | |
1613 | ad69471c | pbrook | DO_MULL(result, a, b, int16_t, uint32_t); |
1614 | ad69471c | pbrook | DO_MULL(tmp, a >> 16, b >> 16, int16_t, uint32_t); |
1615 | ad69471c | pbrook | return result | (tmp << 32); |
1616 | ad69471c | pbrook | } |
1617 | ad69471c | pbrook | |
1618 | ad69471c | pbrook | uint64_t HELPER(neon_negl_u16)(uint64_t x) |
1619 | ad69471c | pbrook | { |
1620 | ad69471c | pbrook | uint16_t tmp; |
1621 | ad69471c | pbrook | uint64_t result; |
1622 | ad69471c | pbrook | result = (uint16_t)-x; |
1623 | ad69471c | pbrook | tmp = -(x >> 16);
|
1624 | ad69471c | pbrook | result |= (uint64_t)tmp << 16;
|
1625 | ad69471c | pbrook | tmp = -(x >> 32);
|
1626 | ad69471c | pbrook | result |= (uint64_t)tmp << 32;
|
1627 | ad69471c | pbrook | tmp = -(x >> 48);
|
1628 | ad69471c | pbrook | result |= (uint64_t)tmp << 48;
|
1629 | ad69471c | pbrook | return result;
|
1630 | ad69471c | pbrook | } |
1631 | ad69471c | pbrook | |
1632 | ad69471c | pbrook | uint64_t HELPER(neon_negl_u32)(uint64_t x) |
1633 | ad69471c | pbrook | { |
1634 | ad69471c | pbrook | uint32_t low = -x; |
1635 | ad69471c | pbrook | uint32_t high = -(x >> 32);
|
1636 | ad69471c | pbrook | return low | ((uint64_t)high << 32); |
1637 | ad69471c | pbrook | } |
1638 | ad69471c | pbrook | |
1639 | ad69471c | pbrook | /* FIXME: There should be a native op for this. */
|
1640 | ad69471c | pbrook | uint64_t HELPER(neon_negl_u64)(uint64_t x) |
1641 | ad69471c | pbrook | { |
1642 | ad69471c | pbrook | return -x;
|
1643 | ad69471c | pbrook | } |
1644 | ad69471c | pbrook | |
1645 | ad69471c | pbrook | /* Saturnating sign manuipulation. */
|
1646 | ad69471c | pbrook | /* ??? Make these use NEON_VOP1 */
|
1647 | ad69471c | pbrook | #define DO_QABS8(x) do { \ |
1648 | ad69471c | pbrook | if (x == (int8_t)0x80) { \ |
1649 | ad69471c | pbrook | x = 0x7f; \
|
1650 | ad69471c | pbrook | SET_QC(); \ |
1651 | ad69471c | pbrook | } else if (x < 0) { \ |
1652 | ad69471c | pbrook | x = -x; \ |
1653 | ad69471c | pbrook | }} while (0) |
1654 | ad69471c | pbrook | uint32_t HELPER(neon_qabs_s8)(CPUState *env, uint32_t x) |
1655 | ad69471c | pbrook | { |
1656 | ad69471c | pbrook | neon_s8 vec; |
1657 | ad69471c | pbrook | NEON_UNPACK(neon_s8, vec, x); |
1658 | ad69471c | pbrook | DO_QABS8(vec.v1); |
1659 | ad69471c | pbrook | DO_QABS8(vec.v2); |
1660 | ad69471c | pbrook | DO_QABS8(vec.v3); |
1661 | ad69471c | pbrook | DO_QABS8(vec.v4); |
1662 | ad69471c | pbrook | NEON_PACK(neon_s8, x, vec); |
1663 | ad69471c | pbrook | return x;
|
1664 | ad69471c | pbrook | } |
1665 | ad69471c | pbrook | #undef DO_QABS8
|
1666 | ad69471c | pbrook | |
1667 | ad69471c | pbrook | #define DO_QNEG8(x) do { \ |
1668 | ad69471c | pbrook | if (x == (int8_t)0x80) { \ |
1669 | ad69471c | pbrook | x = 0x7f; \
|
1670 | ad69471c | pbrook | SET_QC(); \ |
1671 | ad69471c | pbrook | } else { \
|
1672 | ad69471c | pbrook | x = -x; \ |
1673 | ad69471c | pbrook | }} while (0) |
1674 | ad69471c | pbrook | uint32_t HELPER(neon_qneg_s8)(CPUState *env, uint32_t x) |
1675 | ad69471c | pbrook | { |
1676 | ad69471c | pbrook | neon_s8 vec; |
1677 | ad69471c | pbrook | NEON_UNPACK(neon_s8, vec, x); |
1678 | ad69471c | pbrook | DO_QNEG8(vec.v1); |
1679 | ad69471c | pbrook | DO_QNEG8(vec.v2); |
1680 | ad69471c | pbrook | DO_QNEG8(vec.v3); |
1681 | ad69471c | pbrook | DO_QNEG8(vec.v4); |
1682 | ad69471c | pbrook | NEON_PACK(neon_s8, x, vec); |
1683 | ad69471c | pbrook | return x;
|
1684 | ad69471c | pbrook | } |
1685 | ad69471c | pbrook | #undef DO_QNEG8
|
1686 | ad69471c | pbrook | |
1687 | ad69471c | pbrook | #define DO_QABS16(x) do { \ |
1688 | ad69471c | pbrook | if (x == (int16_t)0x8000) { \ |
1689 | ad69471c | pbrook | x = 0x7fff; \
|
1690 | ad69471c | pbrook | SET_QC(); \ |
1691 | ad69471c | pbrook | } else if (x < 0) { \ |
1692 | ad69471c | pbrook | x = -x; \ |
1693 | ad69471c | pbrook | }} while (0) |
1694 | ad69471c | pbrook | uint32_t HELPER(neon_qabs_s16)(CPUState *env, uint32_t x) |
1695 | ad69471c | pbrook | { |
1696 | ad69471c | pbrook | neon_s16 vec; |
1697 | ad69471c | pbrook | NEON_UNPACK(neon_s16, vec, x); |
1698 | ad69471c | pbrook | DO_QABS16(vec.v1); |
1699 | ad69471c | pbrook | DO_QABS16(vec.v2); |
1700 | ad69471c | pbrook | NEON_PACK(neon_s16, x, vec); |
1701 | ad69471c | pbrook | return x;
|
1702 | ad69471c | pbrook | } |
1703 | ad69471c | pbrook | #undef DO_QABS16
|
1704 | ad69471c | pbrook | |
1705 | ad69471c | pbrook | #define DO_QNEG16(x) do { \ |
1706 | ad69471c | pbrook | if (x == (int16_t)0x8000) { \ |
1707 | ad69471c | pbrook | x = 0x7fff; \
|
1708 | ad69471c | pbrook | SET_QC(); \ |
1709 | ad69471c | pbrook | } else { \
|
1710 | ad69471c | pbrook | x = -x; \ |
1711 | ad69471c | pbrook | }} while (0) |
1712 | ad69471c | pbrook | uint32_t HELPER(neon_qneg_s16)(CPUState *env, uint32_t x) |
1713 | ad69471c | pbrook | { |
1714 | ad69471c | pbrook | neon_s16 vec; |
1715 | ad69471c | pbrook | NEON_UNPACK(neon_s16, vec, x); |
1716 | ad69471c | pbrook | DO_QNEG16(vec.v1); |
1717 | ad69471c | pbrook | DO_QNEG16(vec.v2); |
1718 | ad69471c | pbrook | NEON_PACK(neon_s16, x, vec); |
1719 | ad69471c | pbrook | return x;
|
1720 | ad69471c | pbrook | } |
1721 | ad69471c | pbrook | #undef DO_QNEG16
|
1722 | ad69471c | pbrook | |
1723 | ad69471c | pbrook | uint32_t HELPER(neon_qabs_s32)(CPUState *env, uint32_t x) |
1724 | ad69471c | pbrook | { |
1725 | ad69471c | pbrook | if (x == SIGNBIT) {
|
1726 | ad69471c | pbrook | SET_QC(); |
1727 | ad69471c | pbrook | x = ~SIGNBIT; |
1728 | ad69471c | pbrook | } else if ((int32_t)x < 0) { |
1729 | ad69471c | pbrook | x = -x; |
1730 | ad69471c | pbrook | } |
1731 | ad69471c | pbrook | return x;
|
1732 | ad69471c | pbrook | } |
1733 | ad69471c | pbrook | |
1734 | ad69471c | pbrook | uint32_t HELPER(neon_qneg_s32)(CPUState *env, uint32_t x) |
1735 | ad69471c | pbrook | { |
1736 | ad69471c | pbrook | if (x == SIGNBIT) {
|
1737 | ad69471c | pbrook | SET_QC(); |
1738 | ad69471c | pbrook | x = ~SIGNBIT; |
1739 | ad69471c | pbrook | } else {
|
1740 | ad69471c | pbrook | x = -x; |
1741 | ad69471c | pbrook | } |
1742 | ad69471c | pbrook | return x;
|
1743 | ad69471c | pbrook | } |
1744 | ad69471c | pbrook | |
1745 | ad69471c | pbrook | /* NEON Float helpers. */
|
1746 | ad69471c | pbrook | uint32_t HELPER(neon_min_f32)(uint32_t a, uint32_t b) |
1747 | ad69471c | pbrook | { |
1748 | ad69471c | pbrook | float32 f0 = vfp_itos(a); |
1749 | ad69471c | pbrook | float32 f1 = vfp_itos(b); |
1750 | ad69471c | pbrook | return (float32_compare_quiet(f0, f1, NFS) == -1) ? a : b; |
1751 | ad69471c | pbrook | } |
1752 | ad69471c | pbrook | |
1753 | ad69471c | pbrook | uint32_t HELPER(neon_max_f32)(uint32_t a, uint32_t b) |
1754 | ad69471c | pbrook | { |
1755 | ad69471c | pbrook | float32 f0 = vfp_itos(a); |
1756 | ad69471c | pbrook | float32 f1 = vfp_itos(b); |
1757 | ad69471c | pbrook | return (float32_compare_quiet(f0, f1, NFS) == 1) ? a : b; |
1758 | ad69471c | pbrook | } |
1759 | ad69471c | pbrook | |
1760 | ad69471c | pbrook | uint32_t HELPER(neon_abd_f32)(uint32_t a, uint32_t b) |
1761 | ad69471c | pbrook | { |
1762 | ad69471c | pbrook | float32 f0 = vfp_itos(a); |
1763 | ad69471c | pbrook | float32 f1 = vfp_itos(b); |
1764 | ad69471c | pbrook | return vfp_stoi((float32_compare_quiet(f0, f1, NFS) == 1) |
1765 | ad69471c | pbrook | ? float32_sub(f0, f1, NFS) |
1766 | ad69471c | pbrook | : float32_sub(f1, f0, NFS)); |
1767 | ad69471c | pbrook | } |
1768 | ad69471c | pbrook | |
1769 | ad69471c | pbrook | uint32_t HELPER(neon_add_f32)(uint32_t a, uint32_t b) |
1770 | ad69471c | pbrook | { |
1771 | ad69471c | pbrook | return vfp_stoi(float32_add(vfp_itos(a), vfp_itos(b), NFS));
|
1772 | ad69471c | pbrook | } |
1773 | ad69471c | pbrook | |
1774 | ad69471c | pbrook | uint32_t HELPER(neon_sub_f32)(uint32_t a, uint32_t b) |
1775 | ad69471c | pbrook | { |
1776 | ad69471c | pbrook | return vfp_stoi(float32_sub(vfp_itos(a), vfp_itos(b), NFS));
|
1777 | ad69471c | pbrook | } |
1778 | ad69471c | pbrook | |
1779 | ad69471c | pbrook | uint32_t HELPER(neon_mul_f32)(uint32_t a, uint32_t b) |
1780 | ad69471c | pbrook | { |
1781 | ad69471c | pbrook | return vfp_stoi(float32_mul(vfp_itos(a), vfp_itos(b), NFS));
|
1782 | ad69471c | pbrook | } |
1783 | ad69471c | pbrook | |
1784 | ad69471c | pbrook | /* Floating point comparisons produce an integer result. */
|
1785 | ad69471c | pbrook | #define NEON_VOP_FCMP(name, cmp) \
|
1786 | ad69471c | pbrook | uint32_t HELPER(neon_##name)(uint32_t a, uint32_t b) \ |
1787 | ad69471c | pbrook | { \ |
1788 | ad69471c | pbrook | if (float32_compare_quiet(vfp_itos(a), vfp_itos(b), NFS) cmp 0) \ |
1789 | ad69471c | pbrook | return ~0; \ |
1790 | ad69471c | pbrook | else \
|
1791 | ad69471c | pbrook | return 0; \ |
1792 | ad69471c | pbrook | } |
1793 | ad69471c | pbrook | |
1794 | ad69471c | pbrook | NEON_VOP_FCMP(ceq_f32, ==) |
1795 | ad69471c | pbrook | NEON_VOP_FCMP(cge_f32, >=) |
1796 | ad69471c | pbrook | NEON_VOP_FCMP(cgt_f32, >) |
1797 | ad69471c | pbrook | |
1798 | ad69471c | pbrook | uint32_t HELPER(neon_acge_f32)(uint32_t a, uint32_t b) |
1799 | ad69471c | pbrook | { |
1800 | ad69471c | pbrook | float32 f0 = float32_abs(vfp_itos(a)); |
1801 | ad69471c | pbrook | float32 f1 = float32_abs(vfp_itos(b)); |
1802 | ad69471c | pbrook | return (float32_compare_quiet(f0, f1,NFS) >= 0) ? ~0 : 0; |
1803 | ad69471c | pbrook | } |
1804 | ad69471c | pbrook | |
1805 | ad69471c | pbrook | uint32_t HELPER(neon_acgt_f32)(uint32_t a, uint32_t b) |
1806 | ad69471c | pbrook | { |
1807 | ad69471c | pbrook | float32 f0 = float32_abs(vfp_itos(a)); |
1808 | ad69471c | pbrook | float32 f1 = float32_abs(vfp_itos(b)); |
1809 | ad69471c | pbrook | return (float32_compare_quiet(f0, f1, NFS) > 0) ? ~0 : 0; |
1810 | ad69471c | pbrook | } |
1811 | 02acedf9 | Peter Maydell | |
1812 | 02acedf9 | Peter Maydell | #define ELEM(V, N, SIZE) (((V) >> ((N) * (SIZE))) & ((1ull << (SIZE)) - 1)) |
1813 | 02acedf9 | Peter Maydell | |
1814 | 02acedf9 | Peter Maydell | void HELPER(neon_qunzip8)(CPUState *env, uint32_t rd, uint32_t rm)
|
1815 | 02acedf9 | Peter Maydell | { |
1816 | 02acedf9 | Peter Maydell | uint64_t zm0 = float64_val(env->vfp.regs[rm]); |
1817 | 02acedf9 | Peter Maydell | uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
|
1818 | 02acedf9 | Peter Maydell | uint64_t zd0 = float64_val(env->vfp.regs[rd]); |
1819 | 02acedf9 | Peter Maydell | uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
|
1820 | 02acedf9 | Peter Maydell | uint64_t d0 = ELEM(zd0, 0, 8) | (ELEM(zd0, 2, 8) << 8) |
1821 | 02acedf9 | Peter Maydell | | (ELEM(zd0, 4, 8) << 16) | (ELEM(zd0, 6, 8) << 24) |
1822 | 02acedf9 | Peter Maydell | | (ELEM(zd1, 0, 8) << 32) | (ELEM(zd1, 2, 8) << 40) |
1823 | 02acedf9 | Peter Maydell | | (ELEM(zd1, 4, 8) << 48) | (ELEM(zd1, 6, 8) << 56); |
1824 | 02acedf9 | Peter Maydell | uint64_t d1 = ELEM(zm0, 0, 8) | (ELEM(zm0, 2, 8) << 8) |
1825 | 02acedf9 | Peter Maydell | | (ELEM(zm0, 4, 8) << 16) | (ELEM(zm0, 6, 8) << 24) |
1826 | 02acedf9 | Peter Maydell | | (ELEM(zm1, 0, 8) << 32) | (ELEM(zm1, 2, 8) << 40) |
1827 | 02acedf9 | Peter Maydell | | (ELEM(zm1, 4, 8) << 48) | (ELEM(zm1, 6, 8) << 56); |
1828 | 02acedf9 | Peter Maydell | uint64_t m0 = ELEM(zd0, 1, 8) | (ELEM(zd0, 3, 8) << 8) |
1829 | 02acedf9 | Peter Maydell | | (ELEM(zd0, 5, 8) << 16) | (ELEM(zd0, 7, 8) << 24) |
1830 | 02acedf9 | Peter Maydell | | (ELEM(zd1, 1, 8) << 32) | (ELEM(zd1, 3, 8) << 40) |
1831 | 02acedf9 | Peter Maydell | | (ELEM(zd1, 5, 8) << 48) | (ELEM(zd1, 7, 8) << 56); |
1832 | 02acedf9 | Peter Maydell | uint64_t m1 = ELEM(zm0, 1, 8) | (ELEM(zm0, 3, 8) << 8) |
1833 | 02acedf9 | Peter Maydell | | (ELEM(zm0, 5, 8) << 16) | (ELEM(zm0, 7, 8) << 24) |
1834 | 02acedf9 | Peter Maydell | | (ELEM(zm1, 1, 8) << 32) | (ELEM(zm1, 3, 8) << 40) |
1835 | 02acedf9 | Peter Maydell | | (ELEM(zm1, 5, 8) << 48) | (ELEM(zm1, 7, 8) << 56); |
1836 | 02acedf9 | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
1837 | 02acedf9 | Peter Maydell | env->vfp.regs[rm + 1] = make_float64(m1);
|
1838 | 02acedf9 | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
1839 | 02acedf9 | Peter Maydell | env->vfp.regs[rd + 1] = make_float64(d1);
|
1840 | 02acedf9 | Peter Maydell | } |
1841 | 02acedf9 | Peter Maydell | |
1842 | 02acedf9 | Peter Maydell | void HELPER(neon_qunzip16)(CPUState *env, uint32_t rd, uint32_t rm)
|
1843 | 02acedf9 | Peter Maydell | { |
1844 | 02acedf9 | Peter Maydell | uint64_t zm0 = float64_val(env->vfp.regs[rm]); |
1845 | 02acedf9 | Peter Maydell | uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
|
1846 | 02acedf9 | Peter Maydell | uint64_t zd0 = float64_val(env->vfp.regs[rd]); |
1847 | 02acedf9 | Peter Maydell | uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
|
1848 | 02acedf9 | Peter Maydell | uint64_t d0 = ELEM(zd0, 0, 16) | (ELEM(zd0, 2, 16) << 16) |
1849 | 02acedf9 | Peter Maydell | | (ELEM(zd1, 0, 16) << 32) | (ELEM(zd1, 2, 16) << 48); |
1850 | 02acedf9 | Peter Maydell | uint64_t d1 = ELEM(zm0, 0, 16) | (ELEM(zm0, 2, 16) << 16) |
1851 | 02acedf9 | Peter Maydell | | (ELEM(zm1, 0, 16) << 32) | (ELEM(zm1, 2, 16) << 48); |
1852 | 02acedf9 | Peter Maydell | uint64_t m0 = ELEM(zd0, 1, 16) | (ELEM(zd0, 3, 16) << 16) |
1853 | 02acedf9 | Peter Maydell | | (ELEM(zd1, 1, 16) << 32) | (ELEM(zd1, 3, 16) << 48); |
1854 | 02acedf9 | Peter Maydell | uint64_t m1 = ELEM(zm0, 1, 16) | (ELEM(zm0, 3, 16) << 16) |
1855 | 02acedf9 | Peter Maydell | | (ELEM(zm1, 1, 16) << 32) | (ELEM(zm1, 3, 16) << 48); |
1856 | 02acedf9 | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
1857 | 02acedf9 | Peter Maydell | env->vfp.regs[rm + 1] = make_float64(m1);
|
1858 | 02acedf9 | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
1859 | 02acedf9 | Peter Maydell | env->vfp.regs[rd + 1] = make_float64(d1);
|
1860 | 02acedf9 | Peter Maydell | } |
1861 | 02acedf9 | Peter Maydell | |
1862 | 02acedf9 | Peter Maydell | void HELPER(neon_qunzip32)(CPUState *env, uint32_t rd, uint32_t rm)
|
1863 | 02acedf9 | Peter Maydell | { |
1864 | 02acedf9 | Peter Maydell | uint64_t zm0 = float64_val(env->vfp.regs[rm]); |
1865 | 02acedf9 | Peter Maydell | uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
|
1866 | 02acedf9 | Peter Maydell | uint64_t zd0 = float64_val(env->vfp.regs[rd]); |
1867 | 02acedf9 | Peter Maydell | uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
|
1868 | 02acedf9 | Peter Maydell | uint64_t d0 = ELEM(zd0, 0, 32) | (ELEM(zd1, 0, 32) << 32); |
1869 | 02acedf9 | Peter Maydell | uint64_t d1 = ELEM(zm0, 0, 32) | (ELEM(zm1, 0, 32) << 32); |
1870 | 02acedf9 | Peter Maydell | uint64_t m0 = ELEM(zd0, 1, 32) | (ELEM(zd1, 1, 32) << 32); |
1871 | 02acedf9 | Peter Maydell | uint64_t m1 = ELEM(zm0, 1, 32) | (ELEM(zm1, 1, 32) << 32); |
1872 | 02acedf9 | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
1873 | 02acedf9 | Peter Maydell | env->vfp.regs[rm + 1] = make_float64(m1);
|
1874 | 02acedf9 | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
1875 | 02acedf9 | Peter Maydell | env->vfp.regs[rd + 1] = make_float64(d1);
|
1876 | 02acedf9 | Peter Maydell | } |
1877 | 02acedf9 | Peter Maydell | |
1878 | 02acedf9 | Peter Maydell | void HELPER(neon_unzip8)(CPUState *env, uint32_t rd, uint32_t rm)
|
1879 | 02acedf9 | Peter Maydell | { |
1880 | 02acedf9 | Peter Maydell | uint64_t zm = float64_val(env->vfp.regs[rm]); |
1881 | 02acedf9 | Peter Maydell | uint64_t zd = float64_val(env->vfp.regs[rd]); |
1882 | 02acedf9 | Peter Maydell | uint64_t d0 = ELEM(zd, 0, 8) | (ELEM(zd, 2, 8) << 8) |
1883 | 02acedf9 | Peter Maydell | | (ELEM(zd, 4, 8) << 16) | (ELEM(zd, 6, 8) << 24) |
1884 | 02acedf9 | Peter Maydell | | (ELEM(zm, 0, 8) << 32) | (ELEM(zm, 2, 8) << 40) |
1885 | 02acedf9 | Peter Maydell | | (ELEM(zm, 4, 8) << 48) | (ELEM(zm, 6, 8) << 56); |
1886 | 02acedf9 | Peter Maydell | uint64_t m0 = ELEM(zd, 1, 8) | (ELEM(zd, 3, 8) << 8) |
1887 | 02acedf9 | Peter Maydell | | (ELEM(zd, 5, 8) << 16) | (ELEM(zd, 7, 8) << 24) |
1888 | 02acedf9 | Peter Maydell | | (ELEM(zm, 1, 8) << 32) | (ELEM(zm, 3, 8) << 40) |
1889 | 02acedf9 | Peter Maydell | | (ELEM(zm, 5, 8) << 48) | (ELEM(zm, 7, 8) << 56); |
1890 | 02acedf9 | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
1891 | 02acedf9 | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
1892 | 02acedf9 | Peter Maydell | } |
1893 | 02acedf9 | Peter Maydell | |
1894 | 02acedf9 | Peter Maydell | void HELPER(neon_unzip16)(CPUState *env, uint32_t rd, uint32_t rm)
|
1895 | 02acedf9 | Peter Maydell | { |
1896 | 02acedf9 | Peter Maydell | uint64_t zm = float64_val(env->vfp.regs[rm]); |
1897 | 02acedf9 | Peter Maydell | uint64_t zd = float64_val(env->vfp.regs[rd]); |
1898 | 02acedf9 | Peter Maydell | uint64_t d0 = ELEM(zd, 0, 16) | (ELEM(zd, 2, 16) << 16) |
1899 | 02acedf9 | Peter Maydell | | (ELEM(zm, 0, 16) << 32) | (ELEM(zm, 2, 16) << 48); |
1900 | 02acedf9 | Peter Maydell | uint64_t m0 = ELEM(zd, 1, 16) | (ELEM(zd, 3, 16) << 16) |
1901 | 02acedf9 | Peter Maydell | | (ELEM(zm, 1, 16) << 32) | (ELEM(zm, 3, 16) << 48); |
1902 | 02acedf9 | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
1903 | 02acedf9 | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
1904 | 02acedf9 | Peter Maydell | } |
1905 | d68a6f3a | Peter Maydell | |
1906 | d68a6f3a | Peter Maydell | void HELPER(neon_qzip8)(CPUState *env, uint32_t rd, uint32_t rm)
|
1907 | d68a6f3a | Peter Maydell | { |
1908 | d68a6f3a | Peter Maydell | uint64_t zm0 = float64_val(env->vfp.regs[rm]); |
1909 | d68a6f3a | Peter Maydell | uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
|
1910 | d68a6f3a | Peter Maydell | uint64_t zd0 = float64_val(env->vfp.regs[rd]); |
1911 | d68a6f3a | Peter Maydell | uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
|
1912 | d68a6f3a | Peter Maydell | uint64_t d0 = ELEM(zd0, 0, 8) | (ELEM(zm0, 0, 8) << 8) |
1913 | d68a6f3a | Peter Maydell | | (ELEM(zd0, 1, 8) << 16) | (ELEM(zm0, 1, 8) << 24) |
1914 | d68a6f3a | Peter Maydell | | (ELEM(zd0, 2, 8) << 32) | (ELEM(zm0, 2, 8) << 40) |
1915 | d68a6f3a | Peter Maydell | | (ELEM(zd0, 3, 8) << 48) | (ELEM(zm0, 3, 8) << 56); |
1916 | d68a6f3a | Peter Maydell | uint64_t d1 = ELEM(zd0, 4, 8) | (ELEM(zm0, 4, 8) << 8) |
1917 | d68a6f3a | Peter Maydell | | (ELEM(zd0, 5, 8) << 16) | (ELEM(zm0, 5, 8) << 24) |
1918 | d68a6f3a | Peter Maydell | | (ELEM(zd0, 6, 8) << 32) | (ELEM(zm0, 6, 8) << 40) |
1919 | d68a6f3a | Peter Maydell | | (ELEM(zd0, 7, 8) << 48) | (ELEM(zm0, 7, 8) << 56); |
1920 | d68a6f3a | Peter Maydell | uint64_t m0 = ELEM(zd1, 0, 8) | (ELEM(zm1, 0, 8) << 8) |
1921 | d68a6f3a | Peter Maydell | | (ELEM(zd1, 1, 8) << 16) | (ELEM(zm1, 1, 8) << 24) |
1922 | d68a6f3a | Peter Maydell | | (ELEM(zd1, 2, 8) << 32) | (ELEM(zm1, 2, 8) << 40) |
1923 | d68a6f3a | Peter Maydell | | (ELEM(zd1, 3, 8) << 48) | (ELEM(zm1, 3, 8) << 56); |
1924 | d68a6f3a | Peter Maydell | uint64_t m1 = ELEM(zd1, 4, 8) | (ELEM(zm1, 4, 8) << 8) |
1925 | d68a6f3a | Peter Maydell | | (ELEM(zd1, 5, 8) << 16) | (ELEM(zm1, 5, 8) << 24) |
1926 | d68a6f3a | Peter Maydell | | (ELEM(zd1, 6, 8) << 32) | (ELEM(zm1, 6, 8) << 40) |
1927 | d68a6f3a | Peter Maydell | | (ELEM(zd1, 7, 8) << 48) | (ELEM(zm1, 7, 8) << 56); |
1928 | d68a6f3a | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
1929 | d68a6f3a | Peter Maydell | env->vfp.regs[rm + 1] = make_float64(m1);
|
1930 | d68a6f3a | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
1931 | d68a6f3a | Peter Maydell | env->vfp.regs[rd + 1] = make_float64(d1);
|
1932 | d68a6f3a | Peter Maydell | } |
1933 | d68a6f3a | Peter Maydell | |
1934 | d68a6f3a | Peter Maydell | void HELPER(neon_qzip16)(CPUState *env, uint32_t rd, uint32_t rm)
|
1935 | d68a6f3a | Peter Maydell | { |
1936 | d68a6f3a | Peter Maydell | uint64_t zm0 = float64_val(env->vfp.regs[rm]); |
1937 | d68a6f3a | Peter Maydell | uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
|
1938 | d68a6f3a | Peter Maydell | uint64_t zd0 = float64_val(env->vfp.regs[rd]); |
1939 | d68a6f3a | Peter Maydell | uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
|
1940 | d68a6f3a | Peter Maydell | uint64_t d0 = ELEM(zd0, 0, 16) | (ELEM(zm0, 0, 16) << 16) |
1941 | d68a6f3a | Peter Maydell | | (ELEM(zd0, 1, 16) << 32) | (ELEM(zm0, 1, 16) << 48); |
1942 | d68a6f3a | Peter Maydell | uint64_t d1 = ELEM(zd0, 2, 16) | (ELEM(zm0, 2, 16) << 16) |
1943 | d68a6f3a | Peter Maydell | | (ELEM(zd0, 3, 16) << 32) | (ELEM(zm0, 3, 16) << 48); |
1944 | d68a6f3a | Peter Maydell | uint64_t m0 = ELEM(zd1, 0, 16) | (ELEM(zm1, 0, 16) << 16) |
1945 | d68a6f3a | Peter Maydell | | (ELEM(zd1, 1, 16) << 32) | (ELEM(zm1, 1, 16) << 48); |
1946 | d68a6f3a | Peter Maydell | uint64_t m1 = ELEM(zd1, 2, 16) | (ELEM(zm1, 2, 16) << 16) |
1947 | d68a6f3a | Peter Maydell | | (ELEM(zd1, 3, 16) << 32) | (ELEM(zm1, 3, 16) << 48); |
1948 | d68a6f3a | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
1949 | d68a6f3a | Peter Maydell | env->vfp.regs[rm + 1] = make_float64(m1);
|
1950 | d68a6f3a | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
1951 | d68a6f3a | Peter Maydell | env->vfp.regs[rd + 1] = make_float64(d1);
|
1952 | d68a6f3a | Peter Maydell | } |
1953 | d68a6f3a | Peter Maydell | |
1954 | d68a6f3a | Peter Maydell | void HELPER(neon_qzip32)(CPUState *env, uint32_t rd, uint32_t rm)
|
1955 | d68a6f3a | Peter Maydell | { |
1956 | d68a6f3a | Peter Maydell | uint64_t zm0 = float64_val(env->vfp.regs[rm]); |
1957 | d68a6f3a | Peter Maydell | uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
|
1958 | d68a6f3a | Peter Maydell | uint64_t zd0 = float64_val(env->vfp.regs[rd]); |
1959 | d68a6f3a | Peter Maydell | uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
|
1960 | d68a6f3a | Peter Maydell | uint64_t d0 = ELEM(zd0, 0, 32) | (ELEM(zm0, 0, 32) << 32); |
1961 | d68a6f3a | Peter Maydell | uint64_t d1 = ELEM(zd0, 1, 32) | (ELEM(zm0, 1, 32) << 32); |
1962 | d68a6f3a | Peter Maydell | uint64_t m0 = ELEM(zd1, 0, 32) | (ELEM(zm1, 0, 32) << 32); |
1963 | d68a6f3a | Peter Maydell | uint64_t m1 = ELEM(zd1, 1, 32) | (ELEM(zm1, 1, 32) << 32); |
1964 | d68a6f3a | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
1965 | d68a6f3a | Peter Maydell | env->vfp.regs[rm + 1] = make_float64(m1);
|
1966 | d68a6f3a | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
1967 | d68a6f3a | Peter Maydell | env->vfp.regs[rd + 1] = make_float64(d1);
|
1968 | d68a6f3a | Peter Maydell | } |
1969 | d68a6f3a | Peter Maydell | |
1970 | d68a6f3a | Peter Maydell | void HELPER(neon_zip8)(CPUState *env, uint32_t rd, uint32_t rm)
|
1971 | d68a6f3a | Peter Maydell | { |
1972 | d68a6f3a | Peter Maydell | uint64_t zm = float64_val(env->vfp.regs[rm]); |
1973 | d68a6f3a | Peter Maydell | uint64_t zd = float64_val(env->vfp.regs[rd]); |
1974 | d68a6f3a | Peter Maydell | uint64_t d0 = ELEM(zd, 0, 8) | (ELEM(zm, 0, 8) << 8) |
1975 | d68a6f3a | Peter Maydell | | (ELEM(zd, 1, 8) << 16) | (ELEM(zm, 1, 8) << 24) |
1976 | d68a6f3a | Peter Maydell | | (ELEM(zd, 2, 8) << 32) | (ELEM(zm, 2, 8) << 40) |
1977 | d68a6f3a | Peter Maydell | | (ELEM(zd, 3, 8) << 48) | (ELEM(zm, 3, 8) << 56); |
1978 | d68a6f3a | Peter Maydell | uint64_t m0 = ELEM(zd, 4, 8) | (ELEM(zm, 4, 8) << 8) |
1979 | d68a6f3a | Peter Maydell | | (ELEM(zd, 5, 8) << 16) | (ELEM(zm, 5, 8) << 24) |
1980 | d68a6f3a | Peter Maydell | | (ELEM(zd, 6, 8) << 32) | (ELEM(zm, 6, 8) << 40) |
1981 | d68a6f3a | Peter Maydell | | (ELEM(zd, 7, 8) << 48) | (ELEM(zm, 7, 8) << 56); |
1982 | d68a6f3a | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
1983 | d68a6f3a | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
1984 | d68a6f3a | Peter Maydell | } |
1985 | d68a6f3a | Peter Maydell | |
1986 | d68a6f3a | Peter Maydell | void HELPER(neon_zip16)(CPUState *env, uint32_t rd, uint32_t rm)
|
1987 | d68a6f3a | Peter Maydell | { |
1988 | d68a6f3a | Peter Maydell | uint64_t zm = float64_val(env->vfp.regs[rm]); |
1989 | d68a6f3a | Peter Maydell | uint64_t zd = float64_val(env->vfp.regs[rd]); |
1990 | d68a6f3a | Peter Maydell | uint64_t d0 = ELEM(zd, 0, 16) | (ELEM(zm, 0, 16) << 16) |
1991 | d68a6f3a | Peter Maydell | | (ELEM(zd, 1, 16) << 32) | (ELEM(zm, 1, 16) << 48); |
1992 | d68a6f3a | Peter Maydell | uint64_t m0 = ELEM(zd, 2, 16) | (ELEM(zm, 2, 16) << 16) |
1993 | d68a6f3a | Peter Maydell | | (ELEM(zd, 3, 16) << 32) | (ELEM(zm, 3, 16) << 48); |
1994 | d68a6f3a | Peter Maydell | env->vfp.regs[rm] = make_float64(m0); |
1995 | d68a6f3a | Peter Maydell | env->vfp.regs[rd] = make_float64(d0); |
1996 | d68a6f3a | Peter Maydell | } |