Statistics
| Branch: | Revision:

root / target-arm / neon_helper.c @ f8bf8606

History | View | Annotate | Download (52.8 kB)

1 e677137d pbrook
/*
2 e677137d pbrook
 * ARM NEON vector operations.
3 e677137d pbrook
 *
4 e677137d pbrook
 * Copyright (c) 2007, 2008 CodeSourcery.
5 e677137d pbrook
 * Written by Paul Brook
6 e677137d pbrook
 *
7 e677137d pbrook
 * This code is licenced under the GNU GPL v2.
8 e677137d pbrook
 */
9 ad69471c pbrook
#include <stdlib.h>
10 ad69471c pbrook
#include <stdio.h>
11 ad69471c pbrook
12 ad69471c pbrook
#include "cpu.h"
13 ad69471c pbrook
#include "exec-all.h"
14 ad69471c pbrook
#include "helpers.h"
15 ad69471c pbrook
16 ad69471c pbrook
#define SIGNBIT (uint32_t)0x80000000
17 ad69471c pbrook
#define SIGNBIT64 ((uint64_t)1 << 63)
18 ad69471c pbrook
19 ad69471c pbrook
#define SET_QC() env->vfp.xregs[ARM_VFP_FPSCR] = CPSR_Q
20 ad69471c pbrook
21 ad69471c pbrook
static float_status neon_float_status;
22 ad69471c pbrook
#define NFS &neon_float_status
23 ad69471c pbrook
24 ad69471c pbrook
/* Helper routines to perform bitwise copies between float and int.  */
25 ad69471c pbrook
static inline float32 vfp_itos(uint32_t i)
26 ad69471c pbrook
{
27 ad69471c pbrook
    union {
28 ad69471c pbrook
        uint32_t i;
29 ad69471c pbrook
        float32 s;
30 ad69471c pbrook
    } v;
31 ad69471c pbrook
32 ad69471c pbrook
    v.i = i;
33 ad69471c pbrook
    return v.s;
34 ad69471c pbrook
}
35 ad69471c pbrook
36 ad69471c pbrook
static inline uint32_t vfp_stoi(float32 s)
37 ad69471c pbrook
{
38 ad69471c pbrook
    union {
39 ad69471c pbrook
        uint32_t i;
40 ad69471c pbrook
        float32 s;
41 ad69471c pbrook
    } v;
42 ad69471c pbrook
43 ad69471c pbrook
    v.s = s;
44 ad69471c pbrook
    return v.i;
45 ad69471c pbrook
}
46 ad69471c pbrook
47 ad69471c pbrook
#define NEON_TYPE1(name, type) \
48 ad69471c pbrook
typedef struct \
49 ad69471c pbrook
{ \
50 ad69471c pbrook
    type v1; \
51 ad69471c pbrook
} neon_##name;
52 e2542fe2 Juan Quintela
#ifdef HOST_WORDS_BIGENDIAN
53 ad69471c pbrook
#define NEON_TYPE2(name, type) \
54 ad69471c pbrook
typedef struct \
55 ad69471c pbrook
{ \
56 ad69471c pbrook
    type v2; \
57 ad69471c pbrook
    type v1; \
58 ad69471c pbrook
} neon_##name;
59 ad69471c pbrook
#define NEON_TYPE4(name, type) \
60 ad69471c pbrook
typedef struct \
61 ad69471c pbrook
{ \
62 ad69471c pbrook
    type v4; \
63 ad69471c pbrook
    type v3; \
64 ad69471c pbrook
    type v2; \
65 ad69471c pbrook
    type v1; \
66 ad69471c pbrook
} neon_##name;
67 ad69471c pbrook
#else
68 ad69471c pbrook
#define NEON_TYPE2(name, type) \
69 ad69471c pbrook
typedef struct \
70 ad69471c pbrook
{ \
71 ad69471c pbrook
    type v1; \
72 ad69471c pbrook
    type v2; \
73 ad69471c pbrook
} neon_##name;
74 ad69471c pbrook
#define NEON_TYPE4(name, type) \
75 ad69471c pbrook
typedef struct \
76 ad69471c pbrook
{ \
77 ad69471c pbrook
    type v1; \
78 ad69471c pbrook
    type v2; \
79 ad69471c pbrook
    type v3; \
80 ad69471c pbrook
    type v4; \
81 ad69471c pbrook
} neon_##name;
82 ad69471c pbrook
#endif
83 ad69471c pbrook
84 ad69471c pbrook
NEON_TYPE4(s8, int8_t)
85 ad69471c pbrook
NEON_TYPE4(u8, uint8_t)
86 ad69471c pbrook
NEON_TYPE2(s16, int16_t)
87 ad69471c pbrook
NEON_TYPE2(u16, uint16_t)
88 ad69471c pbrook
NEON_TYPE1(s32, int32_t)
89 ad69471c pbrook
NEON_TYPE1(u32, uint32_t)
90 ad69471c pbrook
#undef NEON_TYPE4
91 ad69471c pbrook
#undef NEON_TYPE2
92 ad69471c pbrook
#undef NEON_TYPE1
93 ad69471c pbrook
94 ad69471c pbrook
/* Copy from a uint32_t to a vector structure type.  */
95 ad69471c pbrook
#define NEON_UNPACK(vtype, dest, val) do { \
96 ad69471c pbrook
    union { \
97 ad69471c pbrook
        vtype v; \
98 ad69471c pbrook
        uint32_t i; \
99 ad69471c pbrook
    } conv_u; \
100 ad69471c pbrook
    conv_u.i = (val); \
101 ad69471c pbrook
    dest = conv_u.v; \
102 ad69471c pbrook
    } while(0)
103 ad69471c pbrook
104 ad69471c pbrook
/* Copy from a vector structure type to a uint32_t.  */
105 ad69471c pbrook
#define NEON_PACK(vtype, dest, val) do { \
106 ad69471c pbrook
    union { \
107 ad69471c pbrook
        vtype v; \
108 ad69471c pbrook
        uint32_t i; \
109 ad69471c pbrook
    } conv_u; \
110 ad69471c pbrook
    conv_u.v = (val); \
111 ad69471c pbrook
    dest = conv_u.i; \
112 ad69471c pbrook
    } while(0)
113 ad69471c pbrook
114 ad69471c pbrook
#define NEON_DO1 \
115 ad69471c pbrook
    NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1);
116 ad69471c pbrook
#define NEON_DO2 \
117 ad69471c pbrook
    NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \
118 ad69471c pbrook
    NEON_FN(vdest.v2, vsrc1.v2, vsrc2.v2);
119 ad69471c pbrook
#define NEON_DO4 \
120 ad69471c pbrook
    NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \
121 ad69471c pbrook
    NEON_FN(vdest.v2, vsrc1.v2, vsrc2.v2); \
122 ad69471c pbrook
    NEON_FN(vdest.v3, vsrc1.v3, vsrc2.v3); \
123 ad69471c pbrook
    NEON_FN(vdest.v4, vsrc1.v4, vsrc2.v4);
124 ad69471c pbrook
125 ad69471c pbrook
#define NEON_VOP_BODY(vtype, n) \
126 ad69471c pbrook
{ \
127 ad69471c pbrook
    uint32_t res; \
128 ad69471c pbrook
    vtype vsrc1; \
129 ad69471c pbrook
    vtype vsrc2; \
130 ad69471c pbrook
    vtype vdest; \
131 ad69471c pbrook
    NEON_UNPACK(vtype, vsrc1, arg1); \
132 ad69471c pbrook
    NEON_UNPACK(vtype, vsrc2, arg2); \
133 ad69471c pbrook
    NEON_DO##n; \
134 ad69471c pbrook
    NEON_PACK(vtype, res, vdest); \
135 ad69471c pbrook
    return res; \
136 ad69471c pbrook
}
137 ad69471c pbrook
138 ad69471c pbrook
#define NEON_VOP(name, vtype, n) \
139 ad69471c pbrook
uint32_t HELPER(glue(neon_,name))(uint32_t arg1, uint32_t arg2) \
140 ad69471c pbrook
NEON_VOP_BODY(vtype, n)
141 ad69471c pbrook
142 ad69471c pbrook
#define NEON_VOP_ENV(name, vtype, n) \
143 ad69471c pbrook
uint32_t HELPER(glue(neon_,name))(CPUState *env, uint32_t arg1, uint32_t arg2) \
144 ad69471c pbrook
NEON_VOP_BODY(vtype, n)
145 ad69471c pbrook
146 ad69471c pbrook
/* Pairwise operations.  */
147 ad69471c pbrook
/* For 32-bit elements each segment only contains a single element, so
148 ad69471c pbrook
   the elementwise and pairwise operations are the same.  */
149 ad69471c pbrook
#define NEON_PDO2 \
150 ad69471c pbrook
    NEON_FN(vdest.v1, vsrc1.v1, vsrc1.v2); \
151 ad69471c pbrook
    NEON_FN(vdest.v2, vsrc2.v1, vsrc2.v2);
152 ad69471c pbrook
#define NEON_PDO4 \
153 ad69471c pbrook
    NEON_FN(vdest.v1, vsrc1.v1, vsrc1.v2); \
154 ad69471c pbrook
    NEON_FN(vdest.v2, vsrc1.v3, vsrc1.v4); \
155 ad69471c pbrook
    NEON_FN(vdest.v3, vsrc2.v1, vsrc2.v2); \
156 ad69471c pbrook
    NEON_FN(vdest.v4, vsrc2.v3, vsrc2.v4); \
157 ad69471c pbrook
158 ad69471c pbrook
#define NEON_POP(name, vtype, n) \
159 ad69471c pbrook
uint32_t HELPER(glue(neon_,name))(uint32_t arg1, uint32_t arg2) \
160 ad69471c pbrook
{ \
161 ad69471c pbrook
    uint32_t res; \
162 ad69471c pbrook
    vtype vsrc1; \
163 ad69471c pbrook
    vtype vsrc2; \
164 ad69471c pbrook
    vtype vdest; \
165 ad69471c pbrook
    NEON_UNPACK(vtype, vsrc1, arg1); \
166 ad69471c pbrook
    NEON_UNPACK(vtype, vsrc2, arg2); \
167 ad69471c pbrook
    NEON_PDO##n; \
168 ad69471c pbrook
    NEON_PACK(vtype, res, vdest); \
169 ad69471c pbrook
    return res; \
170 ad69471c pbrook
}
171 ad69471c pbrook
172 ad69471c pbrook
/* Unary operators.  */
173 ad69471c pbrook
#define NEON_VOP1(name, vtype, n) \
174 ad69471c pbrook
uint32_t HELPER(glue(neon_,name))(uint32_t arg) \
175 ad69471c pbrook
{ \
176 ad69471c pbrook
    vtype vsrc1; \
177 ad69471c pbrook
    vtype vdest; \
178 ad69471c pbrook
    NEON_UNPACK(vtype, vsrc1, arg); \
179 ad69471c pbrook
    NEON_DO##n; \
180 ad69471c pbrook
    NEON_PACK(vtype, arg, vdest); \
181 ad69471c pbrook
    return arg; \
182 ad69471c pbrook
}
183 ad69471c pbrook
184 ad69471c pbrook
185 ad69471c pbrook
#define NEON_USAT(dest, src1, src2, type) do { \
186 ad69471c pbrook
    uint32_t tmp = (uint32_t)src1 + (uint32_t)src2; \
187 ad69471c pbrook
    if (tmp != (type)tmp) { \
188 ad69471c pbrook
        SET_QC(); \
189 ad69471c pbrook
        dest = ~0; \
190 ad69471c pbrook
    } else { \
191 ad69471c pbrook
        dest = tmp; \
192 ad69471c pbrook
    }} while(0)
193 ad69471c pbrook
#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t)
194 ad69471c pbrook
NEON_VOP_ENV(qadd_u8, neon_u8, 4)
195 ad69471c pbrook
#undef NEON_FN
196 ad69471c pbrook
#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t)
197 ad69471c pbrook
NEON_VOP_ENV(qadd_u16, neon_u16, 2)
198 ad69471c pbrook
#undef NEON_FN
199 ad69471c pbrook
#undef NEON_USAT
200 ad69471c pbrook
201 72902672 Christophe Lyon
uint32_t HELPER(neon_qadd_u32)(CPUState *env, uint32_t a, uint32_t b)
202 72902672 Christophe Lyon
{
203 72902672 Christophe Lyon
    uint32_t res = a + b;
204 72902672 Christophe Lyon
    if (res < a) {
205 72902672 Christophe Lyon
        SET_QC();
206 72902672 Christophe Lyon
        res = ~0;
207 72902672 Christophe Lyon
    }
208 72902672 Christophe Lyon
    return res;
209 72902672 Christophe Lyon
}
210 72902672 Christophe Lyon
211 72902672 Christophe Lyon
uint64_t HELPER(neon_qadd_u64)(CPUState *env, uint64_t src1, uint64_t src2)
212 72902672 Christophe Lyon
{
213 72902672 Christophe Lyon
    uint64_t res;
214 72902672 Christophe Lyon
215 72902672 Christophe Lyon
    res = src1 + src2;
216 72902672 Christophe Lyon
    if (res < src1) {
217 72902672 Christophe Lyon
        SET_QC();
218 72902672 Christophe Lyon
        res = ~(uint64_t)0;
219 72902672 Christophe Lyon
    }
220 72902672 Christophe Lyon
    return res;
221 72902672 Christophe Lyon
}
222 72902672 Christophe Lyon
223 ad69471c pbrook
#define NEON_SSAT(dest, src1, src2, type) do { \
224 ad69471c pbrook
    int32_t tmp = (uint32_t)src1 + (uint32_t)src2; \
225 ad69471c pbrook
    if (tmp != (type)tmp) { \
226 ad69471c pbrook
        SET_QC(); \
227 ad69471c pbrook
        if (src2 > 0) { \
228 ad69471c pbrook
            tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \
229 ad69471c pbrook
        } else { \
230 ad69471c pbrook
            tmp = 1 << (sizeof(type) * 8 - 1); \
231 ad69471c pbrook
        } \
232 ad69471c pbrook
    } \
233 ad69471c pbrook
    dest = tmp; \
234 ad69471c pbrook
    } while(0)
235 ad69471c pbrook
#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t)
236 ad69471c pbrook
NEON_VOP_ENV(qadd_s8, neon_s8, 4)
237 ad69471c pbrook
#undef NEON_FN
238 ad69471c pbrook
#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t)
239 ad69471c pbrook
NEON_VOP_ENV(qadd_s16, neon_s16, 2)
240 ad69471c pbrook
#undef NEON_FN
241 ad69471c pbrook
#undef NEON_SSAT
242 ad69471c pbrook
243 72902672 Christophe Lyon
uint32_t HELPER(neon_qadd_s32)(CPUState *env, uint32_t a, uint32_t b)
244 72902672 Christophe Lyon
{
245 72902672 Christophe Lyon
    uint32_t res = a + b;
246 72902672 Christophe Lyon
    if (((res ^ a) & SIGNBIT) && !((a ^ b) & SIGNBIT)) {
247 72902672 Christophe Lyon
        SET_QC();
248 72902672 Christophe Lyon
        res = ~(((int32_t)a >> 31) ^ SIGNBIT);
249 72902672 Christophe Lyon
    }
250 72902672 Christophe Lyon
    return res;
251 72902672 Christophe Lyon
}
252 72902672 Christophe Lyon
253 72902672 Christophe Lyon
uint64_t HELPER(neon_qadd_s64)(CPUState *env, uint64_t src1, uint64_t src2)
254 72902672 Christophe Lyon
{
255 72902672 Christophe Lyon
    uint64_t res;
256 72902672 Christophe Lyon
257 72902672 Christophe Lyon
    res = src1 + src2;
258 72902672 Christophe Lyon
    if (((res ^ src1) & SIGNBIT64) && !((src1 ^ src2) & SIGNBIT64)) {
259 72902672 Christophe Lyon
        SET_QC();
260 72902672 Christophe Lyon
        res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64;
261 72902672 Christophe Lyon
    }
262 72902672 Christophe Lyon
    return res;
263 72902672 Christophe Lyon
}
264 72902672 Christophe Lyon
265 ad69471c pbrook
#define NEON_USAT(dest, src1, src2, type) do { \
266 ad69471c pbrook
    uint32_t tmp = (uint32_t)src1 - (uint32_t)src2; \
267 ad69471c pbrook
    if (tmp != (type)tmp) { \
268 ad69471c pbrook
        SET_QC(); \
269 ad69471c pbrook
        dest = 0; \
270 ad69471c pbrook
    } else { \
271 ad69471c pbrook
        dest = tmp; \
272 ad69471c pbrook
    }} while(0)
273 ad69471c pbrook
#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t)
274 ad69471c pbrook
NEON_VOP_ENV(qsub_u8, neon_u8, 4)
275 ad69471c pbrook
#undef NEON_FN
276 ad69471c pbrook
#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t)
277 ad69471c pbrook
NEON_VOP_ENV(qsub_u16, neon_u16, 2)
278 ad69471c pbrook
#undef NEON_FN
279 ad69471c pbrook
#undef NEON_USAT
280 ad69471c pbrook
281 72902672 Christophe Lyon
uint32_t HELPER(neon_qsub_u32)(CPUState *env, uint32_t a, uint32_t b)
282 72902672 Christophe Lyon
{
283 72902672 Christophe Lyon
    uint32_t res = a - b;
284 72902672 Christophe Lyon
    if (res > a) {
285 72902672 Christophe Lyon
        SET_QC();
286 72902672 Christophe Lyon
        res = 0;
287 72902672 Christophe Lyon
    }
288 72902672 Christophe Lyon
    return res;
289 72902672 Christophe Lyon
}
290 72902672 Christophe Lyon
291 72902672 Christophe Lyon
uint64_t HELPER(neon_qsub_u64)(CPUState *env, uint64_t src1, uint64_t src2)
292 72902672 Christophe Lyon
{
293 72902672 Christophe Lyon
    uint64_t res;
294 72902672 Christophe Lyon
295 72902672 Christophe Lyon
    if (src1 < src2) {
296 72902672 Christophe Lyon
        SET_QC();
297 72902672 Christophe Lyon
        res = 0;
298 72902672 Christophe Lyon
    } else {
299 72902672 Christophe Lyon
        res = src1 - src2;
300 72902672 Christophe Lyon
    }
301 72902672 Christophe Lyon
    return res;
302 72902672 Christophe Lyon
}
303 72902672 Christophe Lyon
304 ad69471c pbrook
#define NEON_SSAT(dest, src1, src2, type) do { \
305 ad69471c pbrook
    int32_t tmp = (uint32_t)src1 - (uint32_t)src2; \
306 ad69471c pbrook
    if (tmp != (type)tmp) { \
307 ad69471c pbrook
        SET_QC(); \
308 ad69471c pbrook
        if (src2 < 0) { \
309 ad69471c pbrook
            tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \
310 ad69471c pbrook
        } else { \
311 ad69471c pbrook
            tmp = 1 << (sizeof(type) * 8 - 1); \
312 ad69471c pbrook
        } \
313 ad69471c pbrook
    } \
314 ad69471c pbrook
    dest = tmp; \
315 ad69471c pbrook
    } while(0)
316 ad69471c pbrook
#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t)
317 ad69471c pbrook
NEON_VOP_ENV(qsub_s8, neon_s8, 4)
318 ad69471c pbrook
#undef NEON_FN
319 ad69471c pbrook
#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t)
320 ad69471c pbrook
NEON_VOP_ENV(qsub_s16, neon_s16, 2)
321 ad69471c pbrook
#undef NEON_FN
322 ad69471c pbrook
#undef NEON_SSAT
323 ad69471c pbrook
324 72902672 Christophe Lyon
uint32_t HELPER(neon_qsub_s32)(CPUState *env, uint32_t a, uint32_t b)
325 72902672 Christophe Lyon
{
326 72902672 Christophe Lyon
    uint32_t res = a - b;
327 72902672 Christophe Lyon
    if (((res ^ a) & SIGNBIT) && ((a ^ b) & SIGNBIT)) {
328 72902672 Christophe Lyon
        SET_QC();
329 72902672 Christophe Lyon
        res = ~(((int32_t)a >> 31) ^ SIGNBIT);
330 72902672 Christophe Lyon
    }
331 72902672 Christophe Lyon
    return res;
332 72902672 Christophe Lyon
}
333 72902672 Christophe Lyon
334 72902672 Christophe Lyon
uint64_t HELPER(neon_qsub_s64)(CPUState *env, uint64_t src1, uint64_t src2)
335 72902672 Christophe Lyon
{
336 72902672 Christophe Lyon
    uint64_t res;
337 72902672 Christophe Lyon
338 72902672 Christophe Lyon
    res = src1 - src2;
339 72902672 Christophe Lyon
    if (((res ^ src1) & SIGNBIT64) && ((src1 ^ src2) & SIGNBIT64)) {
340 72902672 Christophe Lyon
        SET_QC();
341 72902672 Christophe Lyon
        res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64;
342 72902672 Christophe Lyon
    }
343 72902672 Christophe Lyon
    return res;
344 72902672 Christophe Lyon
}
345 72902672 Christophe Lyon
346 ad69471c pbrook
#define NEON_FN(dest, src1, src2) dest = (src1 + src2) >> 1
347 ad69471c pbrook
NEON_VOP(hadd_s8, neon_s8, 4)
348 ad69471c pbrook
NEON_VOP(hadd_u8, neon_u8, 4)
349 ad69471c pbrook
NEON_VOP(hadd_s16, neon_s16, 2)
350 ad69471c pbrook
NEON_VOP(hadd_u16, neon_u16, 2)
351 ad69471c pbrook
#undef NEON_FN
352 ad69471c pbrook
353 ad69471c pbrook
int32_t HELPER(neon_hadd_s32)(int32_t src1, int32_t src2)
354 ad69471c pbrook
{
355 ad69471c pbrook
    int32_t dest;
356 ad69471c pbrook
357 ad69471c pbrook
    dest = (src1 >> 1) + (src2 >> 1);
358 ad69471c pbrook
    if (src1 & src2 & 1)
359 ad69471c pbrook
        dest++;
360 ad69471c pbrook
    return dest;
361 ad69471c pbrook
}
362 ad69471c pbrook
363 ad69471c pbrook
uint32_t HELPER(neon_hadd_u32)(uint32_t src1, uint32_t src2)
364 ad69471c pbrook
{
365 ad69471c pbrook
    uint32_t dest;
366 ad69471c pbrook
367 ad69471c pbrook
    dest = (src1 >> 1) + (src2 >> 1);
368 ad69471c pbrook
    if (src1 & src2 & 1)
369 ad69471c pbrook
        dest++;
370 ad69471c pbrook
    return dest;
371 ad69471c pbrook
}
372 ad69471c pbrook
373 ad69471c pbrook
#define NEON_FN(dest, src1, src2) dest = (src1 + src2 + 1) >> 1
374 ad69471c pbrook
NEON_VOP(rhadd_s8, neon_s8, 4)
375 ad69471c pbrook
NEON_VOP(rhadd_u8, neon_u8, 4)
376 ad69471c pbrook
NEON_VOP(rhadd_s16, neon_s16, 2)
377 ad69471c pbrook
NEON_VOP(rhadd_u16, neon_u16, 2)
378 ad69471c pbrook
#undef NEON_FN
379 ad69471c pbrook
380 ad69471c pbrook
int32_t HELPER(neon_rhadd_s32)(int32_t src1, int32_t src2)
381 ad69471c pbrook
{
382 ad69471c pbrook
    int32_t dest;
383 ad69471c pbrook
384 ad69471c pbrook
    dest = (src1 >> 1) + (src2 >> 1);
385 ad69471c pbrook
    if ((src1 | src2) & 1)
386 ad69471c pbrook
        dest++;
387 ad69471c pbrook
    return dest;
388 ad69471c pbrook
}
389 ad69471c pbrook
390 ad69471c pbrook
uint32_t HELPER(neon_rhadd_u32)(uint32_t src1, uint32_t src2)
391 ad69471c pbrook
{
392 ad69471c pbrook
    uint32_t dest;
393 ad69471c pbrook
394 ad69471c pbrook
    dest = (src1 >> 1) + (src2 >> 1);
395 ad69471c pbrook
    if ((src1 | src2) & 1)
396 ad69471c pbrook
        dest++;
397 ad69471c pbrook
    return dest;
398 ad69471c pbrook
}
399 ad69471c pbrook
400 ad69471c pbrook
#define NEON_FN(dest, src1, src2) dest = (src1 - src2) >> 1
401 ad69471c pbrook
NEON_VOP(hsub_s8, neon_s8, 4)
402 ad69471c pbrook
NEON_VOP(hsub_u8, neon_u8, 4)
403 ad69471c pbrook
NEON_VOP(hsub_s16, neon_s16, 2)
404 ad69471c pbrook
NEON_VOP(hsub_u16, neon_u16, 2)
405 ad69471c pbrook
#undef NEON_FN
406 ad69471c pbrook
407 ad69471c pbrook
int32_t HELPER(neon_hsub_s32)(int32_t src1, int32_t src2)
408 ad69471c pbrook
{
409 ad69471c pbrook
    int32_t dest;
410 ad69471c pbrook
411 ad69471c pbrook
    dest = (src1 >> 1) - (src2 >> 1);
412 ad69471c pbrook
    if ((~src1) & src2 & 1)
413 ad69471c pbrook
        dest--;
414 ad69471c pbrook
    return dest;
415 ad69471c pbrook
}
416 ad69471c pbrook
417 ad69471c pbrook
uint32_t HELPER(neon_hsub_u32)(uint32_t src1, uint32_t src2)
418 ad69471c pbrook
{
419 ad69471c pbrook
    uint32_t dest;
420 ad69471c pbrook
421 ad69471c pbrook
    dest = (src1 >> 1) - (src2 >> 1);
422 ad69471c pbrook
    if ((~src1) & src2 & 1)
423 ad69471c pbrook
        dest--;
424 ad69471c pbrook
    return dest;
425 ad69471c pbrook
}
426 ad69471c pbrook
427 ad69471c pbrook
#define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? ~0 : 0
428 ad69471c pbrook
NEON_VOP(cgt_s8, neon_s8, 4)
429 ad69471c pbrook
NEON_VOP(cgt_u8, neon_u8, 4)
430 ad69471c pbrook
NEON_VOP(cgt_s16, neon_s16, 2)
431 ad69471c pbrook
NEON_VOP(cgt_u16, neon_u16, 2)
432 ad69471c pbrook
NEON_VOP(cgt_s32, neon_s32, 1)
433 ad69471c pbrook
NEON_VOP(cgt_u32, neon_u32, 1)
434 ad69471c pbrook
#undef NEON_FN
435 ad69471c pbrook
436 ad69471c pbrook
#define NEON_FN(dest, src1, src2) dest = (src1 >= src2) ? ~0 : 0
437 ad69471c pbrook
NEON_VOP(cge_s8, neon_s8, 4)
438 ad69471c pbrook
NEON_VOP(cge_u8, neon_u8, 4)
439 ad69471c pbrook
NEON_VOP(cge_s16, neon_s16, 2)
440 ad69471c pbrook
NEON_VOP(cge_u16, neon_u16, 2)
441 ad69471c pbrook
NEON_VOP(cge_s32, neon_s32, 1)
442 ad69471c pbrook
NEON_VOP(cge_u32, neon_u32, 1)
443 ad69471c pbrook
#undef NEON_FN
444 ad69471c pbrook
445 ad69471c pbrook
#define NEON_FN(dest, src1, src2) dest = (src1 < src2) ? src1 : src2
446 ad69471c pbrook
NEON_VOP(min_s8, neon_s8, 4)
447 ad69471c pbrook
NEON_VOP(min_u8, neon_u8, 4)
448 ad69471c pbrook
NEON_VOP(min_s16, neon_s16, 2)
449 ad69471c pbrook
NEON_VOP(min_u16, neon_u16, 2)
450 ad69471c pbrook
NEON_VOP(min_s32, neon_s32, 1)
451 ad69471c pbrook
NEON_VOP(min_u32, neon_u32, 1)
452 ad69471c pbrook
NEON_POP(pmin_s8, neon_s8, 4)
453 ad69471c pbrook
NEON_POP(pmin_u8, neon_u8, 4)
454 ad69471c pbrook
NEON_POP(pmin_s16, neon_s16, 2)
455 ad69471c pbrook
NEON_POP(pmin_u16, neon_u16, 2)
456 ad69471c pbrook
#undef NEON_FN
457 ad69471c pbrook
458 ad69471c pbrook
#define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? src1 : src2
459 ad69471c pbrook
NEON_VOP(max_s8, neon_s8, 4)
460 ad69471c pbrook
NEON_VOP(max_u8, neon_u8, 4)
461 ad69471c pbrook
NEON_VOP(max_s16, neon_s16, 2)
462 ad69471c pbrook
NEON_VOP(max_u16, neon_u16, 2)
463 ad69471c pbrook
NEON_VOP(max_s32, neon_s32, 1)
464 ad69471c pbrook
NEON_VOP(max_u32, neon_u32, 1)
465 ad69471c pbrook
NEON_POP(pmax_s8, neon_s8, 4)
466 ad69471c pbrook
NEON_POP(pmax_u8, neon_u8, 4)
467 ad69471c pbrook
NEON_POP(pmax_s16, neon_s16, 2)
468 ad69471c pbrook
NEON_POP(pmax_u16, neon_u16, 2)
469 ad69471c pbrook
#undef NEON_FN
470 ad69471c pbrook
471 ad69471c pbrook
#define NEON_FN(dest, src1, src2) \
472 ad69471c pbrook
    dest = (src1 > src2) ? (src1 - src2) : (src2 - src1)
473 ad69471c pbrook
NEON_VOP(abd_s8, neon_s8, 4)
474 ad69471c pbrook
NEON_VOP(abd_u8, neon_u8, 4)
475 ad69471c pbrook
NEON_VOP(abd_s16, neon_s16, 2)
476 ad69471c pbrook
NEON_VOP(abd_u16, neon_u16, 2)
477 ad69471c pbrook
NEON_VOP(abd_s32, neon_s32, 1)
478 ad69471c pbrook
NEON_VOP(abd_u32, neon_u32, 1)
479 ad69471c pbrook
#undef NEON_FN
480 ad69471c pbrook
481 ad69471c pbrook
#define NEON_FN(dest, src1, src2) do { \
482 ad69471c pbrook
    int8_t tmp; \
483 ad69471c pbrook
    tmp = (int8_t)src2; \
484 50f67e95 Juha Riihimรคki
    if (tmp >= (ssize_t)sizeof(src1) * 8 || \
485 50f67e95 Juha Riihimรคki
        tmp <= -(ssize_t)sizeof(src1) * 8) { \
486 ad69471c pbrook
        dest = 0; \
487 ad69471c pbrook
    } else if (tmp < 0) { \
488 ad69471c pbrook
        dest = src1 >> -tmp; \
489 ad69471c pbrook
    } else { \
490 ad69471c pbrook
        dest = src1 << tmp; \
491 ad69471c pbrook
    }} while (0)
492 ad69471c pbrook
NEON_VOP(shl_u8, neon_u8, 4)
493 ad69471c pbrook
NEON_VOP(shl_u16, neon_u16, 2)
494 ad69471c pbrook
NEON_VOP(shl_u32, neon_u32, 1)
495 ad69471c pbrook
#undef NEON_FN
496 ad69471c pbrook
497 ad69471c pbrook
uint64_t HELPER(neon_shl_u64)(uint64_t val, uint64_t shiftop)
498 ad69471c pbrook
{
499 ad69471c pbrook
    int8_t shift = (int8_t)shiftop;
500 ad69471c pbrook
    if (shift >= 64 || shift <= -64) {
501 ad69471c pbrook
        val = 0;
502 ad69471c pbrook
    } else if (shift < 0) {
503 ad69471c pbrook
        val >>= -shift;
504 ad69471c pbrook
    } else {
505 ad69471c pbrook
        val <<= shift;
506 ad69471c pbrook
    }
507 ad69471c pbrook
    return val;
508 ad69471c pbrook
}
509 ad69471c pbrook
510 ad69471c pbrook
#define NEON_FN(dest, src1, src2) do { \
511 ad69471c pbrook
    int8_t tmp; \
512 ad69471c pbrook
    tmp = (int8_t)src2; \
513 50f67e95 Juha Riihimรคki
    if (tmp >= (ssize_t)sizeof(src1) * 8) { \
514 ad69471c pbrook
        dest = 0; \
515 50f67e95 Juha Riihimรคki
    } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \
516 ad69471c pbrook
        dest = src1 >> (sizeof(src1) * 8 - 1); \
517 ad69471c pbrook
    } else if (tmp < 0) { \
518 ad69471c pbrook
        dest = src1 >> -tmp; \
519 ad69471c pbrook
    } else { \
520 ad69471c pbrook
        dest = src1 << tmp; \
521 ad69471c pbrook
    }} while (0)
522 ad69471c pbrook
NEON_VOP(shl_s8, neon_s8, 4)
523 ad69471c pbrook
NEON_VOP(shl_s16, neon_s16, 2)
524 ad69471c pbrook
NEON_VOP(shl_s32, neon_s32, 1)
525 ad69471c pbrook
#undef NEON_FN
526 ad69471c pbrook
527 ad69471c pbrook
uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop)
528 ad69471c pbrook
{
529 ad69471c pbrook
    int8_t shift = (int8_t)shiftop;
530 ad69471c pbrook
    int64_t val = valop;
531 ad69471c pbrook
    if (shift >= 64) {
532 ad69471c pbrook
        val = 0;
533 ad69471c pbrook
    } else if (shift <= -64) {
534 ad69471c pbrook
        val >>= 63;
535 ad69471c pbrook
    } else if (shift < 0) {
536 ad69471c pbrook
        val >>= -shift;
537 ad69471c pbrook
    } else {
538 ad69471c pbrook
        val <<= shift;
539 ad69471c pbrook
    }
540 ad69471c pbrook
    return val;
541 ad69471c pbrook
}
542 ad69471c pbrook
543 ad69471c pbrook
#define NEON_FN(dest, src1, src2) do { \
544 ad69471c pbrook
    int8_t tmp; \
545 ad69471c pbrook
    tmp = (int8_t)src2; \
546 0670a7b6 Peter Maydell
    if ((tmp >= (ssize_t)sizeof(src1) * 8) \
547 0670a7b6 Peter Maydell
        || (tmp <= -(ssize_t)sizeof(src1) * 8)) { \
548 ad69471c pbrook
        dest = 0; \
549 ad69471c pbrook
    } else if (tmp < 0) { \
550 ad69471c pbrook
        dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
551 ad69471c pbrook
    } else { \
552 ad69471c pbrook
        dest = src1 << tmp; \
553 ad69471c pbrook
    }} while (0)
554 ad69471c pbrook
NEON_VOP(rshl_s8, neon_s8, 4)
555 ad69471c pbrook
NEON_VOP(rshl_s16, neon_s16, 2)
556 ad69471c pbrook
#undef NEON_FN
557 ad69471c pbrook
558 4bd4ee07 Christophe Lyon
/* The addition of the rounding constant may overflow, so we use an
559 4bd4ee07 Christophe Lyon
 * intermediate 64 bits accumulator.  */
560 4bd4ee07 Christophe Lyon
uint32_t HELPER(neon_rshl_s32)(uint32_t valop, uint32_t shiftop)
561 4bd4ee07 Christophe Lyon
{
562 4bd4ee07 Christophe Lyon
    int32_t dest;
563 4bd4ee07 Christophe Lyon
    int32_t val = (int32_t)valop;
564 4bd4ee07 Christophe Lyon
    int8_t shift = (int8_t)shiftop;
565 4bd4ee07 Christophe Lyon
    if ((shift >= 32) || (shift <= -32)) {
566 4bd4ee07 Christophe Lyon
        dest = 0;
567 4bd4ee07 Christophe Lyon
    } else if (shift < 0) {
568 4bd4ee07 Christophe Lyon
        int64_t big_dest = ((int64_t)val + (1 << (-1 - shift)));
569 4bd4ee07 Christophe Lyon
        dest = big_dest >> -shift;
570 4bd4ee07 Christophe Lyon
    } else {
571 4bd4ee07 Christophe Lyon
        dest = val << shift;
572 4bd4ee07 Christophe Lyon
    }
573 4bd4ee07 Christophe Lyon
    return dest;
574 4bd4ee07 Christophe Lyon
}
575 4bd4ee07 Christophe Lyon
576 4bd4ee07 Christophe Lyon
/* Handling addition overflow with 64 bits inputs values is more
577 4bd4ee07 Christophe Lyon
 * tricky than with 32 bits values.  */
578 ad69471c pbrook
uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
579 ad69471c pbrook
{
580 ad69471c pbrook
    int8_t shift = (int8_t)shiftop;
581 ad69471c pbrook
    int64_t val = valop;
582 0670a7b6 Peter Maydell
    if ((shift >= 64) || (shift <= -64)) {
583 ad69471c pbrook
        val = 0;
584 ad69471c pbrook
    } else if (shift < 0) {
585 4bd4ee07 Christophe Lyon
        val >>= (-shift - 1);
586 4bd4ee07 Christophe Lyon
        if (val == INT64_MAX) {
587 4bd4ee07 Christophe Lyon
            /* In this case, it means that the rounding constant is 1,
588 4bd4ee07 Christophe Lyon
             * and the addition would overflow. Return the actual
589 4bd4ee07 Christophe Lyon
             * result directly.  */
590 4bd4ee07 Christophe Lyon
            val = 0x4000000000000000LL;
591 4bd4ee07 Christophe Lyon
        } else {
592 4bd4ee07 Christophe Lyon
            val++;
593 4bd4ee07 Christophe Lyon
            val >>= 1;
594 4bd4ee07 Christophe Lyon
        }
595 ad69471c pbrook
    } else {
596 ad69471c pbrook
        val <<= shift;
597 ad69471c pbrook
    }
598 ad69471c pbrook
    return val;
599 ad69471c pbrook
}
600 ad69471c pbrook
601 ad69471c pbrook
#define NEON_FN(dest, src1, src2) do { \
602 ad69471c pbrook
    int8_t tmp; \
603 ad69471c pbrook
    tmp = (int8_t)src2; \
604 50f67e95 Juha Riihimรคki
    if (tmp >= (ssize_t)sizeof(src1) * 8 || \
605 50f67e95 Juha Riihimรคki
        tmp < -(ssize_t)sizeof(src1) * 8) { \
606 ad69471c pbrook
        dest = 0; \
607 50f67e95 Juha Riihimรคki
    } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \
608 b6c63b98 Christophe Lyon
        dest = src1 >> (-tmp - 1); \
609 ad69471c pbrook
    } else if (tmp < 0) { \
610 ad69471c pbrook
        dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
611 ad69471c pbrook
    } else { \
612 ad69471c pbrook
        dest = src1 << tmp; \
613 ad69471c pbrook
    }} while (0)
614 ad69471c pbrook
NEON_VOP(rshl_u8, neon_u8, 4)
615 ad69471c pbrook
NEON_VOP(rshl_u16, neon_u16, 2)
616 ad69471c pbrook
#undef NEON_FN
617 ad69471c pbrook
618 4bd4ee07 Christophe Lyon
/* The addition of the rounding constant may overflow, so we use an
619 4bd4ee07 Christophe Lyon
 * intermediate 64 bits accumulator.  */
620 4bd4ee07 Christophe Lyon
uint32_t HELPER(neon_rshl_u32)(uint32_t val, uint32_t shiftop)
621 4bd4ee07 Christophe Lyon
{
622 4bd4ee07 Christophe Lyon
    uint32_t dest;
623 4bd4ee07 Christophe Lyon
    int8_t shift = (int8_t)shiftop;
624 4bd4ee07 Christophe Lyon
    if (shift >= 32 || shift < -32) {
625 4bd4ee07 Christophe Lyon
        dest = 0;
626 4bd4ee07 Christophe Lyon
    } else if (shift == -32) {
627 4bd4ee07 Christophe Lyon
        dest = val >> 31;
628 4bd4ee07 Christophe Lyon
    } else if (shift < 0) {
629 4bd4ee07 Christophe Lyon
        uint64_t big_dest = ((uint64_t)val + (1 << (-1 - shift)));
630 4bd4ee07 Christophe Lyon
        dest = big_dest >> -shift;
631 4bd4ee07 Christophe Lyon
    } else {
632 4bd4ee07 Christophe Lyon
        dest = val << shift;
633 4bd4ee07 Christophe Lyon
    }
634 4bd4ee07 Christophe Lyon
    return dest;
635 4bd4ee07 Christophe Lyon
}
636 4bd4ee07 Christophe Lyon
637 4bd4ee07 Christophe Lyon
/* Handling addition overflow with 64 bits inputs values is more
638 4bd4ee07 Christophe Lyon
 * tricky than with 32 bits values.  */
639 ad69471c pbrook
uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop)
640 ad69471c pbrook
{
641 ad69471c pbrook
    int8_t shift = (uint8_t)shiftop;
642 51e3930f Christophe Lyon
    if (shift >= 64 || shift < -64) {
643 ad69471c pbrook
        val = 0;
644 ad69471c pbrook
    } else if (shift == -64) {
645 ad69471c pbrook
        /* Rounding a 1-bit result just preserves that bit.  */
646 ad69471c pbrook
        val >>= 63;
647 4bd4ee07 Christophe Lyon
    } else if (shift < 0) {
648 4bd4ee07 Christophe Lyon
        val >>= (-shift - 1);
649 4bd4ee07 Christophe Lyon
        if (val == UINT64_MAX) {
650 4bd4ee07 Christophe Lyon
            /* In this case, it means that the rounding constant is 1,
651 4bd4ee07 Christophe Lyon
             * and the addition would overflow. Return the actual
652 4bd4ee07 Christophe Lyon
             * result directly.  */
653 4bd4ee07 Christophe Lyon
            val = 0x8000000000000000ULL;
654 4bd4ee07 Christophe Lyon
        } else {
655 4bd4ee07 Christophe Lyon
            val++;
656 4bd4ee07 Christophe Lyon
            val >>= 1;
657 4bd4ee07 Christophe Lyon
        }
658 ad69471c pbrook
    } else {
659 ad69471c pbrook
        val <<= shift;
660 ad69471c pbrook
    }
661 ad69471c pbrook
    return val;
662 ad69471c pbrook
}
663 ad69471c pbrook
664 ad69471c pbrook
#define NEON_FN(dest, src1, src2) do { \
665 ad69471c pbrook
    int8_t tmp; \
666 ad69471c pbrook
    tmp = (int8_t)src2; \
667 50f67e95 Juha Riihimรคki
    if (tmp >= (ssize_t)sizeof(src1) * 8) { \
668 ad69471c pbrook
        if (src1) { \
669 ad69471c pbrook
            SET_QC(); \
670 ad69471c pbrook
            dest = ~0; \
671 ad69471c pbrook
        } else { \
672 ad69471c pbrook
            dest = 0; \
673 ad69471c pbrook
        } \
674 50f67e95 Juha Riihimรคki
    } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \
675 ad69471c pbrook
        dest = 0; \
676 ad69471c pbrook
    } else if (tmp < 0) { \
677 ad69471c pbrook
        dest = src1 >> -tmp; \
678 ad69471c pbrook
    } else { \
679 ad69471c pbrook
        dest = src1 << tmp; \
680 ad69471c pbrook
        if ((dest >> tmp) != src1) { \
681 ad69471c pbrook
            SET_QC(); \
682 ad69471c pbrook
            dest = ~0; \
683 ad69471c pbrook
        } \
684 ad69471c pbrook
    }} while (0)
685 ad69471c pbrook
NEON_VOP_ENV(qshl_u8, neon_u8, 4)
686 ad69471c pbrook
NEON_VOP_ENV(qshl_u16, neon_u16, 2)
687 ad69471c pbrook
NEON_VOP_ENV(qshl_u32, neon_u32, 1)
688 ad69471c pbrook
#undef NEON_FN
689 ad69471c pbrook
690 ad69471c pbrook
uint64_t HELPER(neon_qshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop)
691 ad69471c pbrook
{
692 ad69471c pbrook
    int8_t shift = (int8_t)shiftop;
693 ad69471c pbrook
    if (shift >= 64) {
694 ad69471c pbrook
        if (val) {
695 ad69471c pbrook
            val = ~(uint64_t)0;
696 ad69471c pbrook
            SET_QC();
697 ad69471c pbrook
        }
698 ad69471c pbrook
    } else if (shift <= -64) {
699 ad69471c pbrook
        val = 0;
700 ad69471c pbrook
    } else if (shift < 0) {
701 ad69471c pbrook
        val >>= -shift;
702 ad69471c pbrook
    } else {
703 ad69471c pbrook
        uint64_t tmp = val;
704 ad69471c pbrook
        val <<= shift;
705 ad69471c pbrook
        if ((val >> shift) != tmp) {
706 ad69471c pbrook
            SET_QC();
707 ad69471c pbrook
            val = ~(uint64_t)0;
708 ad69471c pbrook
        }
709 ad69471c pbrook
    }
710 ad69471c pbrook
    return val;
711 ad69471c pbrook
}
712 ad69471c pbrook
713 ad69471c pbrook
#define NEON_FN(dest, src1, src2) do { \
714 ad69471c pbrook
    int8_t tmp; \
715 ad69471c pbrook
    tmp = (int8_t)src2; \
716 50f67e95 Juha Riihimรคki
    if (tmp >= (ssize_t)sizeof(src1) * 8) { \
717 a5d88f3e Peter Maydell
        if (src1) { \
718 ad69471c pbrook
            SET_QC(); \
719 a5d88f3e Peter Maydell
            dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \
720 a5d88f3e Peter Maydell
            if (src1 > 0) { \
721 a5d88f3e Peter Maydell
                dest--; \
722 a5d88f3e Peter Maydell
            } \
723 a5d88f3e Peter Maydell
        } else { \
724 a5d88f3e Peter Maydell
            dest = src1; \
725 a5d88f3e Peter Maydell
        } \
726 50f67e95 Juha Riihimรคki
    } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \
727 ad69471c pbrook
        dest = src1 >> 31; \
728 ad69471c pbrook
    } else if (tmp < 0) { \
729 ad69471c pbrook
        dest = src1 >> -tmp; \
730 ad69471c pbrook
    } else { \
731 ad69471c pbrook
        dest = src1 << tmp; \
732 ad69471c pbrook
        if ((dest >> tmp) != src1) { \
733 ad69471c pbrook
            SET_QC(); \
734 a5d88f3e Peter Maydell
            dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \
735 a5d88f3e Peter Maydell
            if (src1 > 0) { \
736 a5d88f3e Peter Maydell
                dest--; \
737 a5d88f3e Peter Maydell
            } \
738 ad69471c pbrook
        } \
739 ad69471c pbrook
    }} while (0)
740 ad69471c pbrook
NEON_VOP_ENV(qshl_s8, neon_s8, 4)
741 ad69471c pbrook
NEON_VOP_ENV(qshl_s16, neon_s16, 2)
742 ad69471c pbrook
NEON_VOP_ENV(qshl_s32, neon_s32, 1)
743 ad69471c pbrook
#undef NEON_FN
744 ad69471c pbrook
745 ad69471c pbrook
uint64_t HELPER(neon_qshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop)
746 ad69471c pbrook
{
747 ad69471c pbrook
    int8_t shift = (uint8_t)shiftop;
748 ad69471c pbrook
    int64_t val = valop;
749 ad69471c pbrook
    if (shift >= 64) {
750 ad69471c pbrook
        if (val) {
751 ad69471c pbrook
            SET_QC();
752 eb7a3d79 Peter Maydell
            val = (val >> 63) ^ ~SIGNBIT64;
753 ad69471c pbrook
        }
754 4c9b70ae Juha Riihimรคki
    } else if (shift <= -64) {
755 ad69471c pbrook
        val >>= 63;
756 ad69471c pbrook
    } else if (shift < 0) {
757 ad69471c pbrook
        val >>= -shift;
758 ad69471c pbrook
    } else {
759 ad69471c pbrook
        int64_t tmp = val;
760 ad69471c pbrook
        val <<= shift;
761 ad69471c pbrook
        if ((val >> shift) != tmp) {
762 ad69471c pbrook
            SET_QC();
763 ad69471c pbrook
            val = (tmp >> 63) ^ ~SIGNBIT64;
764 ad69471c pbrook
        }
765 ad69471c pbrook
    }
766 ad69471c pbrook
    return val;
767 ad69471c pbrook
}
768 ad69471c pbrook
769 4ca4502c Juha Riihimรคki
#define NEON_FN(dest, src1, src2) do { \
770 4ca4502c Juha Riihimรคki
    if (src1 & (1 << (sizeof(src1) * 8 - 1))) { \
771 4ca4502c Juha Riihimรคki
        SET_QC(); \
772 4ca4502c Juha Riihimรคki
        dest = 0; \
773 4ca4502c Juha Riihimรคki
    } else { \
774 4ca4502c Juha Riihimรคki
        int8_t tmp; \
775 4ca4502c Juha Riihimรคki
        tmp = (int8_t)src2; \
776 4ca4502c Juha Riihimรคki
        if (tmp >= (ssize_t)sizeof(src1) * 8) { \
777 4ca4502c Juha Riihimรคki
            if (src1) { \
778 4ca4502c Juha Riihimรคki
                SET_QC(); \
779 4ca4502c Juha Riihimรคki
                dest = ~0; \
780 4ca4502c Juha Riihimรคki
            } else { \
781 4ca4502c Juha Riihimรคki
                dest = 0; \
782 4ca4502c Juha Riihimรคki
            } \
783 4ca4502c Juha Riihimรคki
        } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \
784 4ca4502c Juha Riihimรคki
            dest = 0; \
785 4ca4502c Juha Riihimรคki
        } else if (tmp < 0) { \
786 4ca4502c Juha Riihimรคki
            dest = src1 >> -tmp; \
787 4ca4502c Juha Riihimรคki
        } else { \
788 4ca4502c Juha Riihimรคki
            dest = src1 << tmp; \
789 4ca4502c Juha Riihimรคki
            if ((dest >> tmp) != src1) { \
790 4ca4502c Juha Riihimรคki
                SET_QC(); \
791 4ca4502c Juha Riihimรคki
                dest = ~0; \
792 4ca4502c Juha Riihimรคki
            } \
793 4ca4502c Juha Riihimรคki
        } \
794 4ca4502c Juha Riihimรคki
    }} while (0)
795 4ca4502c Juha Riihimรคki
NEON_VOP_ENV(qshlu_s8, neon_u8, 4)
796 4ca4502c Juha Riihimรคki
NEON_VOP_ENV(qshlu_s16, neon_u16, 2)
797 4ca4502c Juha Riihimรคki
#undef NEON_FN
798 4ca4502c Juha Riihimรคki
799 4ca4502c Juha Riihimรคki
uint32_t HELPER(neon_qshlu_s32)(CPUState *env, uint32_t valop, uint32_t shiftop)
800 4ca4502c Juha Riihimรคki
{
801 4ca4502c Juha Riihimรคki
    if ((int32_t)valop < 0) {
802 4ca4502c Juha Riihimรคki
        SET_QC();
803 4ca4502c Juha Riihimรคki
        return 0;
804 4ca4502c Juha Riihimรคki
    }
805 4ca4502c Juha Riihimรคki
    return helper_neon_qshl_u32(env, valop, shiftop);
806 4ca4502c Juha Riihimรคki
}
807 4ca4502c Juha Riihimรคki
808 4ca4502c Juha Riihimรคki
uint64_t HELPER(neon_qshlu_s64)(CPUState *env, uint64_t valop, uint64_t shiftop)
809 4ca4502c Juha Riihimรคki
{
810 4ca4502c Juha Riihimรคki
    if ((int64_t)valop < 0) {
811 4ca4502c Juha Riihimรคki
        SET_QC();
812 4ca4502c Juha Riihimรคki
        return 0;
813 4ca4502c Juha Riihimรคki
    }
814 4ca4502c Juha Riihimรคki
    return helper_neon_qshl_u64(env, valop, shiftop);
815 4ca4502c Juha Riihimรคki
}
816 ad69471c pbrook
817 ad69471c pbrook
/* FIXME: This is wrong.  */
818 ad69471c pbrook
#define NEON_FN(dest, src1, src2) do { \
819 ad69471c pbrook
    int8_t tmp; \
820 ad69471c pbrook
    tmp = (int8_t)src2; \
821 33ebc293 Peter Maydell
    if (tmp >= (ssize_t)sizeof(src1) * 8) { \
822 33ebc293 Peter Maydell
        if (src1) { \
823 33ebc293 Peter Maydell
            SET_QC(); \
824 33ebc293 Peter Maydell
            dest = ~0; \
825 33ebc293 Peter Maydell
        } else { \
826 33ebc293 Peter Maydell
            dest = 0; \
827 33ebc293 Peter Maydell
        } \
828 33ebc293 Peter Maydell
    } else if (tmp < -(ssize_t)sizeof(src1) * 8) { \
829 33ebc293 Peter Maydell
        dest = 0; \
830 33ebc293 Peter Maydell
    } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \
831 33ebc293 Peter Maydell
        dest = src1 >> (sizeof(src1) * 8 - 1); \
832 33ebc293 Peter Maydell
    } else if (tmp < 0) { \
833 ad69471c pbrook
        dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
834 ad69471c pbrook
    } else { \
835 ad69471c pbrook
        dest = src1 << tmp; \
836 ad69471c pbrook
        if ((dest >> tmp) != src1) { \
837 ad69471c pbrook
            SET_QC(); \
838 ad69471c pbrook
            dest = ~0; \
839 ad69471c pbrook
        } \
840 ad69471c pbrook
    }} while (0)
841 ad69471c pbrook
NEON_VOP_ENV(qrshl_u8, neon_u8, 4)
842 ad69471c pbrook
NEON_VOP_ENV(qrshl_u16, neon_u16, 2)
843 ad69471c pbrook
#undef NEON_FN
844 ad69471c pbrook
845 4bd4ee07 Christophe Lyon
/* The addition of the rounding constant may overflow, so we use an
846 4bd4ee07 Christophe Lyon
 * intermediate 64 bits accumulator.  */
847 4bd4ee07 Christophe Lyon
uint32_t HELPER(neon_qrshl_u32)(CPUState *env, uint32_t val, uint32_t shiftop)
848 4bd4ee07 Christophe Lyon
{
849 4bd4ee07 Christophe Lyon
    uint32_t dest;
850 4bd4ee07 Christophe Lyon
    int8_t shift = (int8_t)shiftop;
851 33ebc293 Peter Maydell
    if (shift >= 32) {
852 33ebc293 Peter Maydell
        if (val) {
853 33ebc293 Peter Maydell
            SET_QC();
854 33ebc293 Peter Maydell
            dest = ~0;
855 33ebc293 Peter Maydell
        } else {
856 33ebc293 Peter Maydell
            dest = 0;
857 33ebc293 Peter Maydell
        }
858 33ebc293 Peter Maydell
    } else if (shift < -32) {
859 33ebc293 Peter Maydell
        dest = 0;
860 33ebc293 Peter Maydell
    } else if (shift == -32) {
861 33ebc293 Peter Maydell
        dest = val >> 31;
862 33ebc293 Peter Maydell
    } else if (shift < 0) {
863 4bd4ee07 Christophe Lyon
        uint64_t big_dest = ((uint64_t)val + (1 << (-1 - shift)));
864 4bd4ee07 Christophe Lyon
        dest = big_dest >> -shift;
865 4bd4ee07 Christophe Lyon
    } else {
866 4bd4ee07 Christophe Lyon
        dest = val << shift;
867 4bd4ee07 Christophe Lyon
        if ((dest >> shift) != val) {
868 4bd4ee07 Christophe Lyon
            SET_QC();
869 4bd4ee07 Christophe Lyon
            dest = ~0;
870 4bd4ee07 Christophe Lyon
        }
871 4bd4ee07 Christophe Lyon
    }
872 4bd4ee07 Christophe Lyon
    return dest;
873 4bd4ee07 Christophe Lyon
}
874 4bd4ee07 Christophe Lyon
875 4bd4ee07 Christophe Lyon
/* Handling addition overflow with 64 bits inputs values is more
876 4bd4ee07 Christophe Lyon
 * tricky than with 32 bits values.  */
877 ad69471c pbrook
uint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop)
878 ad69471c pbrook
{
879 ad69471c pbrook
    int8_t shift = (int8_t)shiftop;
880 33ebc293 Peter Maydell
    if (shift >= 64) {
881 33ebc293 Peter Maydell
        if (val) {
882 33ebc293 Peter Maydell
            SET_QC();
883 33ebc293 Peter Maydell
            val = ~0;
884 33ebc293 Peter Maydell
        }
885 33ebc293 Peter Maydell
    } else if (shift < -64) {
886 33ebc293 Peter Maydell
        val = 0;
887 33ebc293 Peter Maydell
    } else if (shift == -64) {
888 33ebc293 Peter Maydell
        val >>= 63;
889 33ebc293 Peter Maydell
    } else if (shift < 0) {
890 4bd4ee07 Christophe Lyon
        val >>= (-shift - 1);
891 4bd4ee07 Christophe Lyon
        if (val == UINT64_MAX) {
892 4bd4ee07 Christophe Lyon
            /* In this case, it means that the rounding constant is 1,
893 4bd4ee07 Christophe Lyon
             * and the addition would overflow. Return the actual
894 4bd4ee07 Christophe Lyon
             * result directly.  */
895 4bd4ee07 Christophe Lyon
            val = 0x8000000000000000ULL;
896 4bd4ee07 Christophe Lyon
        } else {
897 4bd4ee07 Christophe Lyon
            val++;
898 4bd4ee07 Christophe Lyon
            val >>= 1;
899 4bd4ee07 Christophe Lyon
        }
900 ad69471c pbrook
    } else { \
901 ad69471c pbrook
        uint64_t tmp = val;
902 ad69471c pbrook
        val <<= shift;
903 ad69471c pbrook
        if ((val >> shift) != tmp) {
904 ad69471c pbrook
            SET_QC();
905 ad69471c pbrook
            val = ~0;
906 ad69471c pbrook
        }
907 ad69471c pbrook
    }
908 ad69471c pbrook
    return val;
909 ad69471c pbrook
}
910 ad69471c pbrook
911 ad69471c pbrook
#define NEON_FN(dest, src1, src2) do { \
912 ad69471c pbrook
    int8_t tmp; \
913 ad69471c pbrook
    tmp = (int8_t)src2; \
914 7b6ecf5b Peter Maydell
    if (tmp >= (ssize_t)sizeof(src1) * 8) { \
915 7b6ecf5b Peter Maydell
        if (src1) { \
916 7b6ecf5b Peter Maydell
            SET_QC(); \
917 7b6ecf5b Peter Maydell
            dest = (1 << (sizeof(src1) * 8 - 1)); \
918 7b6ecf5b Peter Maydell
            if (src1 > 0) { \
919 7b6ecf5b Peter Maydell
                dest--; \
920 7b6ecf5b Peter Maydell
            } \
921 7b6ecf5b Peter Maydell
        } else { \
922 7b6ecf5b Peter Maydell
            dest = 0; \
923 7b6ecf5b Peter Maydell
        } \
924 7b6ecf5b Peter Maydell
    } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \
925 7b6ecf5b Peter Maydell
        dest = 0; \
926 7b6ecf5b Peter Maydell
    } else if (tmp < 0) { \
927 ad69471c pbrook
        dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
928 ad69471c pbrook
    } else { \
929 ad69471c pbrook
        dest = src1 << tmp; \
930 ad69471c pbrook
        if ((dest >> tmp) != src1) { \
931 ad69471c pbrook
            SET_QC(); \
932 960e623b Peter Maydell
            dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \
933 960e623b Peter Maydell
            if (src1 > 0) { \
934 960e623b Peter Maydell
                dest--; \
935 960e623b Peter Maydell
            } \
936 ad69471c pbrook
        } \
937 ad69471c pbrook
    }} while (0)
938 ad69471c pbrook
NEON_VOP_ENV(qrshl_s8, neon_s8, 4)
939 ad69471c pbrook
NEON_VOP_ENV(qrshl_s16, neon_s16, 2)
940 ad69471c pbrook
#undef NEON_FN
941 ad69471c pbrook
942 4bd4ee07 Christophe Lyon
/* The addition of the rounding constant may overflow, so we use an
943 4bd4ee07 Christophe Lyon
 * intermediate 64 bits accumulator.  */
944 4bd4ee07 Christophe Lyon
uint32_t HELPER(neon_qrshl_s32)(CPUState *env, uint32_t valop, uint32_t shiftop)
945 4bd4ee07 Christophe Lyon
{
946 4bd4ee07 Christophe Lyon
    int32_t dest;
947 4bd4ee07 Christophe Lyon
    int32_t val = (int32_t)valop;
948 4bd4ee07 Christophe Lyon
    int8_t shift = (int8_t)shiftop;
949 7b6ecf5b Peter Maydell
    if (shift >= 32) {
950 7b6ecf5b Peter Maydell
        if (val) {
951 7b6ecf5b Peter Maydell
            SET_QC();
952 7b6ecf5b Peter Maydell
            dest = (val >> 31) ^ ~SIGNBIT;
953 7b6ecf5b Peter Maydell
        } else {
954 7b6ecf5b Peter Maydell
            dest = 0;
955 7b6ecf5b Peter Maydell
        }
956 7b6ecf5b Peter Maydell
    } else if (shift <= -32) {
957 7b6ecf5b Peter Maydell
        dest = 0;
958 7b6ecf5b Peter Maydell
    } else if (shift < 0) {
959 4bd4ee07 Christophe Lyon
        int64_t big_dest = ((int64_t)val + (1 << (-1 - shift)));
960 4bd4ee07 Christophe Lyon
        dest = big_dest >> -shift;
961 4bd4ee07 Christophe Lyon
    } else {
962 4bd4ee07 Christophe Lyon
        dest = val << shift;
963 4bd4ee07 Christophe Lyon
        if ((dest >> shift) != val) {
964 4bd4ee07 Christophe Lyon
            SET_QC();
965 4bd4ee07 Christophe Lyon
            dest = (val >> 31) ^ ~SIGNBIT;
966 4bd4ee07 Christophe Lyon
        }
967 4bd4ee07 Christophe Lyon
    }
968 4bd4ee07 Christophe Lyon
    return dest;
969 4bd4ee07 Christophe Lyon
}
970 4bd4ee07 Christophe Lyon
971 4bd4ee07 Christophe Lyon
/* Handling addition overflow with 64 bits inputs values is more
972 4bd4ee07 Christophe Lyon
 * tricky than with 32 bits values.  */
973 ad69471c pbrook
uint64_t HELPER(neon_qrshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop)
974 ad69471c pbrook
{
975 ad69471c pbrook
    int8_t shift = (uint8_t)shiftop;
976 ad69471c pbrook
    int64_t val = valop;
977 ad69471c pbrook
978 7b6ecf5b Peter Maydell
    if (shift >= 64) {
979 7b6ecf5b Peter Maydell
        if (val) {
980 7b6ecf5b Peter Maydell
            SET_QC();
981 7b6ecf5b Peter Maydell
            val = (val >> 63) ^ ~SIGNBIT64;
982 7b6ecf5b Peter Maydell
        }
983 7b6ecf5b Peter Maydell
    } else if (shift <= -64) {
984 7b6ecf5b Peter Maydell
        val = 0;
985 7b6ecf5b Peter Maydell
    } else if (shift < 0) {
986 4bd4ee07 Christophe Lyon
        val >>= (-shift - 1);
987 4bd4ee07 Christophe Lyon
        if (val == INT64_MAX) {
988 4bd4ee07 Christophe Lyon
            /* In this case, it means that the rounding constant is 1,
989 4bd4ee07 Christophe Lyon
             * and the addition would overflow. Return the actual
990 4bd4ee07 Christophe Lyon
             * result directly.  */
991 4bd4ee07 Christophe Lyon
            val = 0x4000000000000000ULL;
992 4bd4ee07 Christophe Lyon
        } else {
993 4bd4ee07 Christophe Lyon
            val++;
994 4bd4ee07 Christophe Lyon
            val >>= 1;
995 4bd4ee07 Christophe Lyon
        }
996 ad69471c pbrook
    } else {
997 4bd4ee07 Christophe Lyon
        int64_t tmp = val;
998 ad69471c pbrook
        val <<= shift;
999 ad69471c pbrook
        if ((val >> shift) != tmp) {
1000 ad69471c pbrook
            SET_QC();
1001 4bd4ee07 Christophe Lyon
            val = (tmp >> 63) ^ ~SIGNBIT64;
1002 ad69471c pbrook
        }
1003 ad69471c pbrook
    }
1004 ad69471c pbrook
    return val;
1005 ad69471c pbrook
}
1006 ad69471c pbrook
1007 ad69471c pbrook
uint32_t HELPER(neon_add_u8)(uint32_t a, uint32_t b)
1008 ad69471c pbrook
{
1009 ad69471c pbrook
    uint32_t mask;
1010 ad69471c pbrook
    mask = (a ^ b) & 0x80808080u;
1011 ad69471c pbrook
    a &= ~0x80808080u;
1012 ad69471c pbrook
    b &= ~0x80808080u;
1013 ad69471c pbrook
    return (a + b) ^ mask;
1014 ad69471c pbrook
}
1015 ad69471c pbrook
1016 ad69471c pbrook
uint32_t HELPER(neon_add_u16)(uint32_t a, uint32_t b)
1017 ad69471c pbrook
{
1018 ad69471c pbrook
    uint32_t mask;
1019 ad69471c pbrook
    mask = (a ^ b) & 0x80008000u;
1020 ad69471c pbrook
    a &= ~0x80008000u;
1021 ad69471c pbrook
    b &= ~0x80008000u;
1022 ad69471c pbrook
    return (a + b) ^ mask;
1023 ad69471c pbrook
}
1024 ad69471c pbrook
1025 ad69471c pbrook
#define NEON_FN(dest, src1, src2) dest = src1 + src2
1026 ad69471c pbrook
NEON_POP(padd_u8, neon_u8, 4)
1027 ad69471c pbrook
NEON_POP(padd_u16, neon_u16, 2)
1028 ad69471c pbrook
#undef NEON_FN
1029 ad69471c pbrook
1030 ad69471c pbrook
#define NEON_FN(dest, src1, src2) dest = src1 - src2
1031 ad69471c pbrook
NEON_VOP(sub_u8, neon_u8, 4)
1032 ad69471c pbrook
NEON_VOP(sub_u16, neon_u16, 2)
1033 ad69471c pbrook
#undef NEON_FN
1034 ad69471c pbrook
1035 ad69471c pbrook
#define NEON_FN(dest, src1, src2) dest = src1 * src2
1036 ad69471c pbrook
NEON_VOP(mul_u8, neon_u8, 4)
1037 ad69471c pbrook
NEON_VOP(mul_u16, neon_u16, 2)
1038 ad69471c pbrook
#undef NEON_FN
1039 ad69471c pbrook
1040 1654b2d6 aurel32
/* Polynomial multiplication is like integer multiplication except the
1041 ad69471c pbrook
   partial products are XORed, not added.  */
1042 ad69471c pbrook
uint32_t HELPER(neon_mul_p8)(uint32_t op1, uint32_t op2)
1043 ad69471c pbrook
{
1044 ad69471c pbrook
    uint32_t mask;
1045 ad69471c pbrook
    uint32_t result;
1046 ad69471c pbrook
    result = 0;
1047 ad69471c pbrook
    while (op1) {
1048 ad69471c pbrook
        mask = 0;
1049 ad69471c pbrook
        if (op1 & 1)
1050 ad69471c pbrook
            mask |= 0xff;
1051 ad69471c pbrook
        if (op1 & (1 << 8))
1052 ad69471c pbrook
            mask |= (0xff << 8);
1053 ad69471c pbrook
        if (op1 & (1 << 16))
1054 ad69471c pbrook
            mask |= (0xff << 16);
1055 ad69471c pbrook
        if (op1 & (1 << 24))
1056 ad69471c pbrook
            mask |= (0xff << 24);
1057 ad69471c pbrook
        result ^= op2 & mask;
1058 ad69471c pbrook
        op1 = (op1 >> 1) & 0x7f7f7f7f;
1059 ad69471c pbrook
        op2 = (op2 << 1) & 0xfefefefe;
1060 ad69471c pbrook
    }
1061 ad69471c pbrook
    return result;
1062 ad69471c pbrook
}
1063 ad69471c pbrook
1064 e5ca24cb Peter Maydell
uint64_t HELPER(neon_mull_p8)(uint32_t op1, uint32_t op2)
1065 e5ca24cb Peter Maydell
{
1066 e5ca24cb Peter Maydell
    uint64_t result = 0;
1067 e5ca24cb Peter Maydell
    uint64_t mask;
1068 e5ca24cb Peter Maydell
    uint64_t op2ex = op2;
1069 e5ca24cb Peter Maydell
    op2ex = (op2ex & 0xff) |
1070 e5ca24cb Peter Maydell
        ((op2ex & 0xff00) << 8) |
1071 e5ca24cb Peter Maydell
        ((op2ex & 0xff0000) << 16) |
1072 e5ca24cb Peter Maydell
        ((op2ex & 0xff000000) << 24);
1073 e5ca24cb Peter Maydell
    while (op1) {
1074 e5ca24cb Peter Maydell
        mask = 0;
1075 e5ca24cb Peter Maydell
        if (op1 & 1) {
1076 e5ca24cb Peter Maydell
            mask |= 0xffff;
1077 e5ca24cb Peter Maydell
        }
1078 e5ca24cb Peter Maydell
        if (op1 & (1 << 8)) {
1079 e5ca24cb Peter Maydell
            mask |= (0xffffU << 16);
1080 e5ca24cb Peter Maydell
        }
1081 e5ca24cb Peter Maydell
        if (op1 & (1 << 16)) {
1082 e5ca24cb Peter Maydell
            mask |= (0xffffULL << 32);
1083 e5ca24cb Peter Maydell
        }
1084 e5ca24cb Peter Maydell
        if (op1 & (1 << 24)) {
1085 e5ca24cb Peter Maydell
            mask |= (0xffffULL << 48);
1086 e5ca24cb Peter Maydell
        }
1087 e5ca24cb Peter Maydell
        result ^= op2ex & mask;
1088 e5ca24cb Peter Maydell
        op1 = (op1 >> 1) & 0x7f7f7f7f;
1089 e5ca24cb Peter Maydell
        op2ex <<= 1;
1090 e5ca24cb Peter Maydell
    }
1091 e5ca24cb Peter Maydell
    return result;
1092 e5ca24cb Peter Maydell
}
1093 e5ca24cb Peter Maydell
1094 ad69471c pbrook
#define NEON_FN(dest, src1, src2) dest = (src1 & src2) ? -1 : 0
1095 ad69471c pbrook
NEON_VOP(tst_u8, neon_u8, 4)
1096 ad69471c pbrook
NEON_VOP(tst_u16, neon_u16, 2)
1097 ad69471c pbrook
NEON_VOP(tst_u32, neon_u32, 1)
1098 ad69471c pbrook
#undef NEON_FN
1099 ad69471c pbrook
1100 ad69471c pbrook
#define NEON_FN(dest, src1, src2) dest = (src1 == src2) ? -1 : 0
1101 ad69471c pbrook
NEON_VOP(ceq_u8, neon_u8, 4)
1102 ad69471c pbrook
NEON_VOP(ceq_u16, neon_u16, 2)
1103 ad69471c pbrook
NEON_VOP(ceq_u32, neon_u32, 1)
1104 ad69471c pbrook
#undef NEON_FN
1105 ad69471c pbrook
1106 ad69471c pbrook
#define NEON_FN(dest, src, dummy) dest = (src < 0) ? -src : src
1107 ad69471c pbrook
NEON_VOP1(abs_s8, neon_s8, 4)
1108 ad69471c pbrook
NEON_VOP1(abs_s16, neon_s16, 2)
1109 ad69471c pbrook
#undef NEON_FN
1110 ad69471c pbrook
1111 ad69471c pbrook
/* Count Leading Sign/Zero Bits.  */
1112 ad69471c pbrook
static inline int do_clz8(uint8_t x)
1113 ad69471c pbrook
{
1114 ad69471c pbrook
    int n;
1115 ad69471c pbrook
    for (n = 8; x; n--)
1116 ad69471c pbrook
        x >>= 1;
1117 ad69471c pbrook
    return n;
1118 ad69471c pbrook
}
1119 ad69471c pbrook
1120 ad69471c pbrook
static inline int do_clz16(uint16_t x)
1121 ad69471c pbrook
{
1122 ad69471c pbrook
    int n;
1123 ad69471c pbrook
    for (n = 16; x; n--)
1124 ad69471c pbrook
        x >>= 1;
1125 ad69471c pbrook
    return n;
1126 ad69471c pbrook
}
1127 ad69471c pbrook
1128 ad69471c pbrook
#define NEON_FN(dest, src, dummy) dest = do_clz8(src)
1129 ad69471c pbrook
NEON_VOP1(clz_u8, neon_u8, 4)
1130 ad69471c pbrook
#undef NEON_FN
1131 ad69471c pbrook
1132 ad69471c pbrook
#define NEON_FN(dest, src, dummy) dest = do_clz16(src)
1133 ad69471c pbrook
NEON_VOP1(clz_u16, neon_u16, 2)
1134 ad69471c pbrook
#undef NEON_FN
1135 ad69471c pbrook
1136 ad69471c pbrook
#define NEON_FN(dest, src, dummy) dest = do_clz8((src < 0) ? ~src : src) - 1
1137 ad69471c pbrook
NEON_VOP1(cls_s8, neon_s8, 4)
1138 ad69471c pbrook
#undef NEON_FN
1139 ad69471c pbrook
1140 ad69471c pbrook
#define NEON_FN(dest, src, dummy) dest = do_clz16((src < 0) ? ~src : src) - 1
1141 ad69471c pbrook
NEON_VOP1(cls_s16, neon_s16, 2)
1142 ad69471c pbrook
#undef NEON_FN
1143 ad69471c pbrook
1144 ad69471c pbrook
uint32_t HELPER(neon_cls_s32)(uint32_t x)
1145 ad69471c pbrook
{
1146 ad69471c pbrook
    int count;
1147 ad69471c pbrook
    if ((int32_t)x < 0)
1148 ad69471c pbrook
        x = ~x;
1149 ad69471c pbrook
    for (count = 32; x; count--)
1150 ad69471c pbrook
        x = x >> 1;
1151 ad69471c pbrook
    return count - 1;
1152 ad69471c pbrook
}
1153 ad69471c pbrook
1154 ad69471c pbrook
/* Bit count.  */
1155 ad69471c pbrook
uint32_t HELPER(neon_cnt_u8)(uint32_t x)
1156 ad69471c pbrook
{
1157 ad69471c pbrook
    x = (x & 0x55555555) + ((x >>  1) & 0x55555555);
1158 ad69471c pbrook
    x = (x & 0x33333333) + ((x >>  2) & 0x33333333);
1159 ad69471c pbrook
    x = (x & 0x0f0f0f0f) + ((x >>  4) & 0x0f0f0f0f);
1160 ad69471c pbrook
    return x;
1161 ad69471c pbrook
}
1162 ad69471c pbrook
1163 ad69471c pbrook
#define NEON_QDMULH16(dest, src1, src2, round) do { \
1164 ad69471c pbrook
    uint32_t tmp = (int32_t)(int16_t) src1 * (int16_t) src2; \
1165 ad69471c pbrook
    if ((tmp ^ (tmp << 1)) & SIGNBIT) { \
1166 ad69471c pbrook
        SET_QC(); \
1167 ad69471c pbrook
        tmp = (tmp >> 31) ^ ~SIGNBIT; \
1168 46eece9d Juha Riihimรคki
    } else { \
1169 46eece9d Juha Riihimรคki
        tmp <<= 1; \
1170 ad69471c pbrook
    } \
1171 ad69471c pbrook
    if (round) { \
1172 ad69471c pbrook
        int32_t old = tmp; \
1173 ad69471c pbrook
        tmp += 1 << 15; \
1174 ad69471c pbrook
        if ((int32_t)tmp < old) { \
1175 ad69471c pbrook
            SET_QC(); \
1176 ad69471c pbrook
            tmp = SIGNBIT - 1; \
1177 ad69471c pbrook
        } \
1178 ad69471c pbrook
    } \
1179 ad69471c pbrook
    dest = tmp >> 16; \
1180 ad69471c pbrook
    } while(0)
1181 ad69471c pbrook
#define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 0)
1182 ad69471c pbrook
NEON_VOP_ENV(qdmulh_s16, neon_s16, 2)
1183 ad69471c pbrook
#undef NEON_FN
1184 ad69471c pbrook
#define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 1)
1185 ad69471c pbrook
NEON_VOP_ENV(qrdmulh_s16, neon_s16, 2)
1186 ad69471c pbrook
#undef NEON_FN
1187 ad69471c pbrook
#undef NEON_QDMULH16
1188 ad69471c pbrook
1189 ad69471c pbrook
#define NEON_QDMULH32(dest, src1, src2, round) do { \
1190 ad69471c pbrook
    uint64_t tmp = (int64_t)(int32_t) src1 * (int32_t) src2; \
1191 ad69471c pbrook
    if ((tmp ^ (tmp << 1)) & SIGNBIT64) { \
1192 ad69471c pbrook
        SET_QC(); \
1193 ad69471c pbrook
        tmp = (tmp >> 63) ^ ~SIGNBIT64; \
1194 ad69471c pbrook
    } else { \
1195 ad69471c pbrook
        tmp <<= 1; \
1196 ad69471c pbrook
    } \
1197 ad69471c pbrook
    if (round) { \
1198 ad69471c pbrook
        int64_t old = tmp; \
1199 ad69471c pbrook
        tmp += (int64_t)1 << 31; \
1200 ad69471c pbrook
        if ((int64_t)tmp < old) { \
1201 ad69471c pbrook
            SET_QC(); \
1202 ad69471c pbrook
            tmp = SIGNBIT64 - 1; \
1203 ad69471c pbrook
        } \
1204 ad69471c pbrook
    } \
1205 ad69471c pbrook
    dest = tmp >> 32; \
1206 ad69471c pbrook
    } while(0)
1207 ad69471c pbrook
#define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 0)
1208 ad69471c pbrook
NEON_VOP_ENV(qdmulh_s32, neon_s32, 1)
1209 ad69471c pbrook
#undef NEON_FN
1210 ad69471c pbrook
#define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 1)
1211 ad69471c pbrook
NEON_VOP_ENV(qrdmulh_s32, neon_s32, 1)
1212 ad69471c pbrook
#undef NEON_FN
1213 ad69471c pbrook
#undef NEON_QDMULH32
1214 ad69471c pbrook
1215 ad69471c pbrook
uint32_t HELPER(neon_narrow_u8)(uint64_t x)
1216 ad69471c pbrook
{
1217 ad69471c pbrook
    return (x & 0xffu) | ((x >> 8) & 0xff00u) | ((x >> 16) & 0xff0000u)
1218 ad69471c pbrook
           | ((x >> 24) & 0xff000000u);
1219 ad69471c pbrook
}
1220 ad69471c pbrook
1221 ad69471c pbrook
uint32_t HELPER(neon_narrow_u16)(uint64_t x)
1222 ad69471c pbrook
{
1223 ad69471c pbrook
    return (x & 0xffffu) | ((x >> 16) & 0xffff0000u);
1224 ad69471c pbrook
}
1225 ad69471c pbrook
1226 ad69471c pbrook
uint32_t HELPER(neon_narrow_high_u8)(uint64_t x)
1227 ad69471c pbrook
{
1228 ad69471c pbrook
    return ((x >> 8) & 0xff) | ((x >> 16) & 0xff00)
1229 ad69471c pbrook
            | ((x >> 24) & 0xff0000) | ((x >> 32) & 0xff000000);
1230 ad69471c pbrook
}
1231 ad69471c pbrook
1232 ad69471c pbrook
uint32_t HELPER(neon_narrow_high_u16)(uint64_t x)
1233 ad69471c pbrook
{
1234 ad69471c pbrook
    return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000);
1235 ad69471c pbrook
}
1236 ad69471c pbrook
1237 ad69471c pbrook
uint32_t HELPER(neon_narrow_round_high_u8)(uint64_t x)
1238 ad69471c pbrook
{
1239 ad69471c pbrook
    x &= 0xff80ff80ff80ff80ull;
1240 ad69471c pbrook
    x += 0x0080008000800080ull;
1241 ad69471c pbrook
    return ((x >> 8) & 0xff) | ((x >> 16) & 0xff00)
1242 ad69471c pbrook
            | ((x >> 24) & 0xff0000) | ((x >> 32) & 0xff000000);
1243 ad69471c pbrook
}
1244 ad69471c pbrook
1245 ad69471c pbrook
uint32_t HELPER(neon_narrow_round_high_u16)(uint64_t x)
1246 ad69471c pbrook
{
1247 ad69471c pbrook
    x &= 0xffff8000ffff8000ull;
1248 ad69471c pbrook
    x += 0x0000800000008000ull;
1249 ad69471c pbrook
    return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000);
1250 ad69471c pbrook
}
1251 ad69471c pbrook
1252 af1bbf30 Juha Riihimรคki
uint32_t HELPER(neon_unarrow_sat8)(CPUState *env, uint64_t x)
1253 af1bbf30 Juha Riihimรคki
{
1254 af1bbf30 Juha Riihimรคki
    uint16_t s;
1255 af1bbf30 Juha Riihimรคki
    uint8_t d;
1256 af1bbf30 Juha Riihimรคki
    uint32_t res = 0;
1257 af1bbf30 Juha Riihimรคki
#define SAT8(n) \
1258 af1bbf30 Juha Riihimรคki
    s = x >> n; \
1259 af1bbf30 Juha Riihimรคki
    if (s & 0x8000) { \
1260 af1bbf30 Juha Riihimรคki
        SET_QC(); \
1261 af1bbf30 Juha Riihimรคki
    } else { \
1262 af1bbf30 Juha Riihimรคki
        if (s > 0xff) { \
1263 af1bbf30 Juha Riihimรคki
            d = 0xff; \
1264 af1bbf30 Juha Riihimรคki
            SET_QC(); \
1265 af1bbf30 Juha Riihimรคki
        } else  { \
1266 af1bbf30 Juha Riihimรคki
            d = s; \
1267 af1bbf30 Juha Riihimรคki
        } \
1268 af1bbf30 Juha Riihimรคki
        res |= (uint32_t)d << (n / 2); \
1269 af1bbf30 Juha Riihimรคki
    }
1270 af1bbf30 Juha Riihimรคki
1271 af1bbf30 Juha Riihimรคki
    SAT8(0);
1272 af1bbf30 Juha Riihimรคki
    SAT8(16);
1273 af1bbf30 Juha Riihimรคki
    SAT8(32);
1274 af1bbf30 Juha Riihimรคki
    SAT8(48);
1275 af1bbf30 Juha Riihimรคki
#undef SAT8
1276 af1bbf30 Juha Riihimรคki
    return res;
1277 af1bbf30 Juha Riihimรคki
}
1278 af1bbf30 Juha Riihimรคki
1279 ad69471c pbrook
uint32_t HELPER(neon_narrow_sat_u8)(CPUState *env, uint64_t x)
1280 ad69471c pbrook
{
1281 ad69471c pbrook
    uint16_t s;
1282 ad69471c pbrook
    uint8_t d;
1283 ad69471c pbrook
    uint32_t res = 0;
1284 ad69471c pbrook
#define SAT8(n) \
1285 ad69471c pbrook
    s = x >> n; \
1286 ad69471c pbrook
    if (s > 0xff) { \
1287 ad69471c pbrook
        d = 0xff; \
1288 ad69471c pbrook
        SET_QC(); \
1289 ad69471c pbrook
    } else  { \
1290 ad69471c pbrook
        d = s; \
1291 ad69471c pbrook
    } \
1292 ad69471c pbrook
    res |= (uint32_t)d << (n / 2);
1293 ad69471c pbrook
1294 ad69471c pbrook
    SAT8(0);
1295 ad69471c pbrook
    SAT8(16);
1296 ad69471c pbrook
    SAT8(32);
1297 ad69471c pbrook
    SAT8(48);
1298 ad69471c pbrook
#undef SAT8
1299 ad69471c pbrook
    return res;
1300 ad69471c pbrook
}
1301 ad69471c pbrook
1302 ad69471c pbrook
uint32_t HELPER(neon_narrow_sat_s8)(CPUState *env, uint64_t x)
1303 ad69471c pbrook
{
1304 ad69471c pbrook
    int16_t s;
1305 ad69471c pbrook
    uint8_t d;
1306 ad69471c pbrook
    uint32_t res = 0;
1307 ad69471c pbrook
#define SAT8(n) \
1308 ad69471c pbrook
    s = x >> n; \
1309 ad69471c pbrook
    if (s != (int8_t)s) { \
1310 ad69471c pbrook
        d = (s >> 15) ^ 0x7f; \
1311 ad69471c pbrook
        SET_QC(); \
1312 ad69471c pbrook
    } else  { \
1313 ad69471c pbrook
        d = s; \
1314 ad69471c pbrook
    } \
1315 ad69471c pbrook
    res |= (uint32_t)d << (n / 2);
1316 ad69471c pbrook
1317 ad69471c pbrook
    SAT8(0);
1318 ad69471c pbrook
    SAT8(16);
1319 ad69471c pbrook
    SAT8(32);
1320 ad69471c pbrook
    SAT8(48);
1321 ad69471c pbrook
#undef SAT8
1322 ad69471c pbrook
    return res;
1323 ad69471c pbrook
}
1324 ad69471c pbrook
1325 af1bbf30 Juha Riihimรคki
uint32_t HELPER(neon_unarrow_sat16)(CPUState *env, uint64_t x)
1326 af1bbf30 Juha Riihimรคki
{
1327 af1bbf30 Juha Riihimรคki
    uint32_t high;
1328 af1bbf30 Juha Riihimรคki
    uint32_t low;
1329 af1bbf30 Juha Riihimรคki
    low = x;
1330 af1bbf30 Juha Riihimรคki
    if (low & 0x80000000) {
1331 af1bbf30 Juha Riihimรคki
        low = 0;
1332 af1bbf30 Juha Riihimรคki
        SET_QC();
1333 af1bbf30 Juha Riihimรคki
    } else if (low > 0xffff) {
1334 af1bbf30 Juha Riihimรคki
        low = 0xffff;
1335 af1bbf30 Juha Riihimรคki
        SET_QC();
1336 af1bbf30 Juha Riihimรคki
    }
1337 af1bbf30 Juha Riihimรคki
    high = x >> 32;
1338 af1bbf30 Juha Riihimรคki
    if (high & 0x80000000) {
1339 af1bbf30 Juha Riihimรคki
        high = 0;
1340 af1bbf30 Juha Riihimรคki
        SET_QC();
1341 af1bbf30 Juha Riihimรคki
    } else if (high > 0xffff) {
1342 af1bbf30 Juha Riihimรคki
        high = 0xffff;
1343 af1bbf30 Juha Riihimรคki
        SET_QC();
1344 af1bbf30 Juha Riihimรคki
    }
1345 af1bbf30 Juha Riihimรคki
    return low | (high << 16);
1346 af1bbf30 Juha Riihimรคki
}
1347 af1bbf30 Juha Riihimรคki
1348 ad69471c pbrook
uint32_t HELPER(neon_narrow_sat_u16)(CPUState *env, uint64_t x)
1349 ad69471c pbrook
{
1350 ad69471c pbrook
    uint32_t high;
1351 ad69471c pbrook
    uint32_t low;
1352 ad69471c pbrook
    low = x;
1353 ad69471c pbrook
    if (low > 0xffff) {
1354 ad69471c pbrook
        low = 0xffff;
1355 ad69471c pbrook
        SET_QC();
1356 ad69471c pbrook
    }
1357 ad69471c pbrook
    high = x >> 32;
1358 ad69471c pbrook
    if (high > 0xffff) {
1359 ad69471c pbrook
        high = 0xffff;
1360 ad69471c pbrook
        SET_QC();
1361 ad69471c pbrook
    }
1362 ad69471c pbrook
    return low | (high << 16);
1363 ad69471c pbrook
}
1364 ad69471c pbrook
1365 ad69471c pbrook
uint32_t HELPER(neon_narrow_sat_s16)(CPUState *env, uint64_t x)
1366 ad69471c pbrook
{
1367 ad69471c pbrook
    int32_t low;
1368 ad69471c pbrook
    int32_t high;
1369 ad69471c pbrook
    low = x;
1370 ad69471c pbrook
    if (low != (int16_t)low) {
1371 ad69471c pbrook
        low = (low >> 31) ^ 0x7fff;
1372 ad69471c pbrook
        SET_QC();
1373 ad69471c pbrook
    }
1374 ad69471c pbrook
    high = x >> 32;
1375 ad69471c pbrook
    if (high != (int16_t)high) {
1376 ad69471c pbrook
        high = (high >> 31) ^ 0x7fff;
1377 ad69471c pbrook
        SET_QC();
1378 ad69471c pbrook
    }
1379 ad69471c pbrook
    return (uint16_t)low | (high << 16);
1380 ad69471c pbrook
}
1381 ad69471c pbrook
1382 af1bbf30 Juha Riihimรคki
uint32_t HELPER(neon_unarrow_sat32)(CPUState *env, uint64_t x)
1383 af1bbf30 Juha Riihimรคki
{
1384 af1bbf30 Juha Riihimรคki
    if (x & 0x8000000000000000ull) {
1385 af1bbf30 Juha Riihimรคki
        SET_QC();
1386 af1bbf30 Juha Riihimรคki
        return 0;
1387 af1bbf30 Juha Riihimรคki
    }
1388 af1bbf30 Juha Riihimรคki
    if (x > 0xffffffffu) {
1389 af1bbf30 Juha Riihimรคki
        SET_QC();
1390 af1bbf30 Juha Riihimรคki
        return 0xffffffffu;
1391 af1bbf30 Juha Riihimรคki
    }
1392 af1bbf30 Juha Riihimรคki
    return x;
1393 af1bbf30 Juha Riihimรคki
}
1394 af1bbf30 Juha Riihimรคki
1395 ad69471c pbrook
uint32_t HELPER(neon_narrow_sat_u32)(CPUState *env, uint64_t x)
1396 ad69471c pbrook
{
1397 ad69471c pbrook
    if (x > 0xffffffffu) {
1398 ad69471c pbrook
        SET_QC();
1399 ad69471c pbrook
        return 0xffffffffu;
1400 ad69471c pbrook
    }
1401 ad69471c pbrook
    return x;
1402 ad69471c pbrook
}
1403 ad69471c pbrook
1404 ad69471c pbrook
uint32_t HELPER(neon_narrow_sat_s32)(CPUState *env, uint64_t x)
1405 ad69471c pbrook
{
1406 ad69471c pbrook
    if ((int64_t)x != (int32_t)x) {
1407 ad69471c pbrook
        SET_QC();
1408 cc2212c2 Peter Maydell
        return ((int64_t)x >> 63) ^ 0x7fffffff;
1409 ad69471c pbrook
    }
1410 ad69471c pbrook
    return x;
1411 ad69471c pbrook
}
1412 ad69471c pbrook
1413 ad69471c pbrook
uint64_t HELPER(neon_widen_u8)(uint32_t x)
1414 ad69471c pbrook
{
1415 ad69471c pbrook
    uint64_t tmp;
1416 ad69471c pbrook
    uint64_t ret;
1417 ad69471c pbrook
    ret = (uint8_t)x;
1418 ad69471c pbrook
    tmp = (uint8_t)(x >> 8);
1419 ad69471c pbrook
    ret |= tmp << 16;
1420 ad69471c pbrook
    tmp = (uint8_t)(x >> 16);
1421 ad69471c pbrook
    ret |= tmp << 32;
1422 ad69471c pbrook
    tmp = (uint8_t)(x >> 24);
1423 ad69471c pbrook
    ret |= tmp << 48;
1424 ad69471c pbrook
    return ret;
1425 ad69471c pbrook
}
1426 ad69471c pbrook
1427 ad69471c pbrook
uint64_t HELPER(neon_widen_s8)(uint32_t x)
1428 ad69471c pbrook
{
1429 ad69471c pbrook
    uint64_t tmp;
1430 ad69471c pbrook
    uint64_t ret;
1431 ad69471c pbrook
    ret = (uint16_t)(int8_t)x;
1432 ad69471c pbrook
    tmp = (uint16_t)(int8_t)(x >> 8);
1433 ad69471c pbrook
    ret |= tmp << 16;
1434 ad69471c pbrook
    tmp = (uint16_t)(int8_t)(x >> 16);
1435 ad69471c pbrook
    ret |= tmp << 32;
1436 ad69471c pbrook
    tmp = (uint16_t)(int8_t)(x >> 24);
1437 ad69471c pbrook
    ret |= tmp << 48;
1438 ad69471c pbrook
    return ret;
1439 ad69471c pbrook
}
1440 ad69471c pbrook
1441 ad69471c pbrook
uint64_t HELPER(neon_widen_u16)(uint32_t x)
1442 ad69471c pbrook
{
1443 ad69471c pbrook
    uint64_t high = (uint16_t)(x >> 16);
1444 ad69471c pbrook
    return ((uint16_t)x) | (high << 32);
1445 ad69471c pbrook
}
1446 ad69471c pbrook
1447 ad69471c pbrook
uint64_t HELPER(neon_widen_s16)(uint32_t x)
1448 ad69471c pbrook
{
1449 ad69471c pbrook
    uint64_t high = (int16_t)(x >> 16);
1450 ad69471c pbrook
    return ((uint32_t)(int16_t)x) | (high << 32);
1451 ad69471c pbrook
}
1452 ad69471c pbrook
1453 ad69471c pbrook
uint64_t HELPER(neon_addl_u16)(uint64_t a, uint64_t b)
1454 ad69471c pbrook
{
1455 ad69471c pbrook
    uint64_t mask;
1456 ad69471c pbrook
    mask = (a ^ b) & 0x8000800080008000ull;
1457 ad69471c pbrook
    a &= ~0x8000800080008000ull;
1458 ad69471c pbrook
    b &= ~0x8000800080008000ull;
1459 ad69471c pbrook
    return (a + b) ^ mask;
1460 ad69471c pbrook
}
1461 ad69471c pbrook
1462 ad69471c pbrook
uint64_t HELPER(neon_addl_u32)(uint64_t a, uint64_t b)
1463 ad69471c pbrook
{
1464 ad69471c pbrook
    uint64_t mask;
1465 ad69471c pbrook
    mask = (a ^ b) & 0x8000000080000000ull;
1466 ad69471c pbrook
    a &= ~0x8000000080000000ull;
1467 ad69471c pbrook
    b &= ~0x8000000080000000ull;
1468 ad69471c pbrook
    return (a + b) ^ mask;
1469 ad69471c pbrook
}
1470 ad69471c pbrook
1471 ad69471c pbrook
uint64_t HELPER(neon_paddl_u16)(uint64_t a, uint64_t b)
1472 ad69471c pbrook
{
1473 ad69471c pbrook
    uint64_t tmp;
1474 ad69471c pbrook
    uint64_t tmp2;
1475 ad69471c pbrook
1476 ad69471c pbrook
    tmp = a & 0x0000ffff0000ffffull;
1477 ad69471c pbrook
    tmp += (a >> 16) & 0x0000ffff0000ffffull;
1478 ad69471c pbrook
    tmp2 = b & 0xffff0000ffff0000ull;
1479 ad69471c pbrook
    tmp2 += (b << 16) & 0xffff0000ffff0000ull;
1480 ad69471c pbrook
    return    ( tmp         & 0xffff)
1481 ad69471c pbrook
            | ((tmp  >> 16) & 0xffff0000ull)
1482 ad69471c pbrook
            | ((tmp2 << 16) & 0xffff00000000ull)
1483 ad69471c pbrook
            | ( tmp2        & 0xffff000000000000ull);
1484 ad69471c pbrook
}
1485 ad69471c pbrook
1486 ad69471c pbrook
uint64_t HELPER(neon_paddl_u32)(uint64_t a, uint64_t b)
1487 ad69471c pbrook
{
1488 ad69471c pbrook
    uint32_t low = a + (a >> 32);
1489 ad69471c pbrook
    uint32_t high = b + (b >> 32);
1490 ad69471c pbrook
    return low + ((uint64_t)high << 32);
1491 ad69471c pbrook
}
1492 ad69471c pbrook
1493 ad69471c pbrook
uint64_t HELPER(neon_subl_u16)(uint64_t a, uint64_t b)
1494 ad69471c pbrook
{
1495 ad69471c pbrook
    uint64_t mask;
1496 ad69471c pbrook
    mask = (a ^ ~b) & 0x8000800080008000ull;
1497 ad69471c pbrook
    a |= 0x8000800080008000ull;
1498 ad69471c pbrook
    b &= ~0x8000800080008000ull;
1499 ad69471c pbrook
    return (a - b) ^ mask;
1500 ad69471c pbrook
}
1501 ad69471c pbrook
1502 ad69471c pbrook
uint64_t HELPER(neon_subl_u32)(uint64_t a, uint64_t b)
1503 ad69471c pbrook
{
1504 ad69471c pbrook
    uint64_t mask;
1505 ad69471c pbrook
    mask = (a ^ ~b) & 0x8000000080000000ull;
1506 ad69471c pbrook
    a |= 0x8000000080000000ull;
1507 ad69471c pbrook
    b &= ~0x8000000080000000ull;
1508 ad69471c pbrook
    return (a - b) ^ mask;
1509 ad69471c pbrook
}
1510 ad69471c pbrook
1511 ad69471c pbrook
uint64_t HELPER(neon_addl_saturate_s32)(CPUState *env, uint64_t a, uint64_t b)
1512 ad69471c pbrook
{
1513 ad69471c pbrook
    uint32_t x, y;
1514 ad69471c pbrook
    uint32_t low, high;
1515 ad69471c pbrook
1516 ad69471c pbrook
    x = a;
1517 ad69471c pbrook
    y = b;
1518 ad69471c pbrook
    low = x + y;
1519 ad69471c pbrook
    if (((low ^ x) & SIGNBIT) && !((x ^ y) & SIGNBIT)) {
1520 ad69471c pbrook
        SET_QC();
1521 ad69471c pbrook
        low = ((int32_t)x >> 31) ^ ~SIGNBIT;
1522 ad69471c pbrook
    }
1523 ad69471c pbrook
    x = a >> 32;
1524 ad69471c pbrook
    y = b >> 32;
1525 ad69471c pbrook
    high = x + y;
1526 ad69471c pbrook
    if (((high ^ x) & SIGNBIT) && !((x ^ y) & SIGNBIT)) {
1527 ad69471c pbrook
        SET_QC();
1528 ad69471c pbrook
        high = ((int32_t)x >> 31) ^ ~SIGNBIT;
1529 ad69471c pbrook
    }
1530 ad69471c pbrook
    return low | ((uint64_t)high << 32);
1531 ad69471c pbrook
}
1532 ad69471c pbrook
1533 ad69471c pbrook
uint64_t HELPER(neon_addl_saturate_s64)(CPUState *env, uint64_t a, uint64_t b)
1534 ad69471c pbrook
{
1535 ad69471c pbrook
    uint64_t result;
1536 ad69471c pbrook
1537 ad69471c pbrook
    result = a + b;
1538 ad69471c pbrook
    if (((result ^ a) & SIGNBIT64) && !((a ^ b) & SIGNBIT64)) {
1539 ad69471c pbrook
        SET_QC();
1540 ad69471c pbrook
        result = ((int64_t)a >> 63) ^ ~SIGNBIT64;
1541 ad69471c pbrook
    }
1542 ad69471c pbrook
    return result;
1543 ad69471c pbrook
}
1544 ad69471c pbrook
1545 ad69471c pbrook
#define DO_ABD(dest, x, y, type) do { \
1546 ad69471c pbrook
    type tmp_x = x; \
1547 ad69471c pbrook
    type tmp_y = y; \
1548 ad69471c pbrook
    dest = ((tmp_x > tmp_y) ? tmp_x - tmp_y : tmp_y - tmp_x); \
1549 ad69471c pbrook
    } while(0)
1550 ad69471c pbrook
1551 ad69471c pbrook
uint64_t HELPER(neon_abdl_u16)(uint32_t a, uint32_t b)
1552 ad69471c pbrook
{
1553 ad69471c pbrook
    uint64_t tmp;
1554 ad69471c pbrook
    uint64_t result;
1555 ad69471c pbrook
    DO_ABD(result, a, b, uint8_t);
1556 ad69471c pbrook
    DO_ABD(tmp, a >> 8, b >> 8, uint8_t);
1557 ad69471c pbrook
    result |= tmp << 16;
1558 ad69471c pbrook
    DO_ABD(tmp, a >> 16, b >> 16, uint8_t);
1559 ad69471c pbrook
    result |= tmp << 32;
1560 ad69471c pbrook
    DO_ABD(tmp, a >> 24, b >> 24, uint8_t);
1561 ad69471c pbrook
    result |= tmp << 48;
1562 ad69471c pbrook
    return result;
1563 ad69471c pbrook
}
1564 ad69471c pbrook
1565 ad69471c pbrook
uint64_t HELPER(neon_abdl_s16)(uint32_t a, uint32_t b)
1566 ad69471c pbrook
{
1567 ad69471c pbrook
    uint64_t tmp;
1568 ad69471c pbrook
    uint64_t result;
1569 ad69471c pbrook
    DO_ABD(result, a, b, int8_t);
1570 ad69471c pbrook
    DO_ABD(tmp, a >> 8, b >> 8, int8_t);
1571 ad69471c pbrook
    result |= tmp << 16;
1572 ad69471c pbrook
    DO_ABD(tmp, a >> 16, b >> 16, int8_t);
1573 ad69471c pbrook
    result |= tmp << 32;
1574 ad69471c pbrook
    DO_ABD(tmp, a >> 24, b >> 24, int8_t);
1575 ad69471c pbrook
    result |= tmp << 48;
1576 ad69471c pbrook
    return result;
1577 ad69471c pbrook
}
1578 ad69471c pbrook
1579 ad69471c pbrook
uint64_t HELPER(neon_abdl_u32)(uint32_t a, uint32_t b)
1580 ad69471c pbrook
{
1581 ad69471c pbrook
    uint64_t tmp;
1582 ad69471c pbrook
    uint64_t result;
1583 ad69471c pbrook
    DO_ABD(result, a, b, uint16_t);
1584 ad69471c pbrook
    DO_ABD(tmp, a >> 16, b >> 16, uint16_t);
1585 ad69471c pbrook
    return result | (tmp << 32);
1586 ad69471c pbrook
}
1587 ad69471c pbrook
1588 ad69471c pbrook
uint64_t HELPER(neon_abdl_s32)(uint32_t a, uint32_t b)
1589 ad69471c pbrook
{
1590 ad69471c pbrook
    uint64_t tmp;
1591 ad69471c pbrook
    uint64_t result;
1592 ad69471c pbrook
    DO_ABD(result, a, b, int16_t);
1593 ad69471c pbrook
    DO_ABD(tmp, a >> 16, b >> 16, int16_t);
1594 ad69471c pbrook
    return result | (tmp << 32);
1595 ad69471c pbrook
}
1596 ad69471c pbrook
1597 ad69471c pbrook
uint64_t HELPER(neon_abdl_u64)(uint32_t a, uint32_t b)
1598 ad69471c pbrook
{
1599 ad69471c pbrook
    uint64_t result;
1600 ad69471c pbrook
    DO_ABD(result, a, b, uint32_t);
1601 ad69471c pbrook
    return result;
1602 ad69471c pbrook
}
1603 ad69471c pbrook
1604 ad69471c pbrook
uint64_t HELPER(neon_abdl_s64)(uint32_t a, uint32_t b)
1605 ad69471c pbrook
{
1606 ad69471c pbrook
    uint64_t result;
1607 ad69471c pbrook
    DO_ABD(result, a, b, int32_t);
1608 ad69471c pbrook
    return result;
1609 ad69471c pbrook
}
1610 ad69471c pbrook
#undef DO_ABD
1611 ad69471c pbrook
1612 ad69471c pbrook
/* Widening multiply. Named type is the source type.  */
1613 ad69471c pbrook
#define DO_MULL(dest, x, y, type1, type2) do { \
1614 ad69471c pbrook
    type1 tmp_x = x; \
1615 ad69471c pbrook
    type1 tmp_y = y; \
1616 ad69471c pbrook
    dest = (type2)((type2)tmp_x * (type2)tmp_y); \
1617 ad69471c pbrook
    } while(0)
1618 ad69471c pbrook
1619 ad69471c pbrook
uint64_t HELPER(neon_mull_u8)(uint32_t a, uint32_t b)
1620 ad69471c pbrook
{
1621 ad69471c pbrook
    uint64_t tmp;
1622 ad69471c pbrook
    uint64_t result;
1623 ad69471c pbrook
1624 ad69471c pbrook
    DO_MULL(result, a, b, uint8_t, uint16_t);
1625 ad69471c pbrook
    DO_MULL(tmp, a >> 8, b >> 8, uint8_t, uint16_t);
1626 ad69471c pbrook
    result |= tmp << 16;
1627 ad69471c pbrook
    DO_MULL(tmp, a >> 16, b >> 16, uint8_t, uint16_t);
1628 ad69471c pbrook
    result |= tmp << 32;
1629 ad69471c pbrook
    DO_MULL(tmp, a >> 24, b >> 24, uint8_t, uint16_t);
1630 ad69471c pbrook
    result |= tmp << 48;
1631 ad69471c pbrook
    return result;
1632 ad69471c pbrook
}
1633 ad69471c pbrook
1634 ad69471c pbrook
uint64_t HELPER(neon_mull_s8)(uint32_t a, uint32_t b)
1635 ad69471c pbrook
{
1636 ad69471c pbrook
    uint64_t tmp;
1637 ad69471c pbrook
    uint64_t result;
1638 ad69471c pbrook
1639 ad69471c pbrook
    DO_MULL(result, a, b, int8_t, uint16_t);
1640 ad69471c pbrook
    DO_MULL(tmp, a >> 8, b >> 8, int8_t, uint16_t);
1641 ad69471c pbrook
    result |= tmp << 16;
1642 ad69471c pbrook
    DO_MULL(tmp, a >> 16, b >> 16, int8_t, uint16_t);
1643 ad69471c pbrook
    result |= tmp << 32;
1644 ad69471c pbrook
    DO_MULL(tmp, a >> 24, b >> 24, int8_t, uint16_t);
1645 ad69471c pbrook
    result |= tmp << 48;
1646 ad69471c pbrook
    return result;
1647 ad69471c pbrook
}
1648 ad69471c pbrook
1649 ad69471c pbrook
uint64_t HELPER(neon_mull_u16)(uint32_t a, uint32_t b)
1650 ad69471c pbrook
{
1651 ad69471c pbrook
    uint64_t tmp;
1652 ad69471c pbrook
    uint64_t result;
1653 ad69471c pbrook
1654 ad69471c pbrook
    DO_MULL(result, a, b, uint16_t, uint32_t);
1655 ad69471c pbrook
    DO_MULL(tmp, a >> 16, b >> 16, uint16_t, uint32_t);
1656 ad69471c pbrook
    return result | (tmp << 32);
1657 ad69471c pbrook
}
1658 ad69471c pbrook
1659 ad69471c pbrook
uint64_t HELPER(neon_mull_s16)(uint32_t a, uint32_t b)
1660 ad69471c pbrook
{
1661 ad69471c pbrook
    uint64_t tmp;
1662 ad69471c pbrook
    uint64_t result;
1663 ad69471c pbrook
1664 ad69471c pbrook
    DO_MULL(result, a, b, int16_t, uint32_t);
1665 ad69471c pbrook
    DO_MULL(tmp, a >> 16, b >> 16, int16_t, uint32_t);
1666 ad69471c pbrook
    return result | (tmp << 32);
1667 ad69471c pbrook
}
1668 ad69471c pbrook
1669 ad69471c pbrook
uint64_t HELPER(neon_negl_u16)(uint64_t x)
1670 ad69471c pbrook
{
1671 ad69471c pbrook
    uint16_t tmp;
1672 ad69471c pbrook
    uint64_t result;
1673 ad69471c pbrook
    result = (uint16_t)-x;
1674 ad69471c pbrook
    tmp = -(x >> 16);
1675 ad69471c pbrook
    result |= (uint64_t)tmp << 16;
1676 ad69471c pbrook
    tmp = -(x >> 32);
1677 ad69471c pbrook
    result |= (uint64_t)tmp << 32;
1678 ad69471c pbrook
    tmp = -(x >> 48);
1679 ad69471c pbrook
    result |= (uint64_t)tmp << 48;
1680 ad69471c pbrook
    return result;
1681 ad69471c pbrook
}
1682 ad69471c pbrook
1683 ad69471c pbrook
uint64_t HELPER(neon_negl_u32)(uint64_t x)
1684 ad69471c pbrook
{
1685 ad69471c pbrook
    uint32_t low = -x;
1686 ad69471c pbrook
    uint32_t high = -(x >> 32);
1687 ad69471c pbrook
    return low | ((uint64_t)high << 32);
1688 ad69471c pbrook
}
1689 ad69471c pbrook
1690 ad69471c pbrook
/* FIXME:  There should be a native op for this.  */
1691 ad69471c pbrook
uint64_t HELPER(neon_negl_u64)(uint64_t x)
1692 ad69471c pbrook
{
1693 ad69471c pbrook
    return -x;
1694 ad69471c pbrook
}
1695 ad69471c pbrook
1696 ad69471c pbrook
/* Saturnating sign manuipulation.  */
1697 ad69471c pbrook
/* ??? Make these use NEON_VOP1 */
1698 ad69471c pbrook
#define DO_QABS8(x) do { \
1699 ad69471c pbrook
    if (x == (int8_t)0x80) { \
1700 ad69471c pbrook
        x = 0x7f; \
1701 ad69471c pbrook
        SET_QC(); \
1702 ad69471c pbrook
    } else if (x < 0) { \
1703 ad69471c pbrook
        x = -x; \
1704 ad69471c pbrook
    }} while (0)
1705 ad69471c pbrook
uint32_t HELPER(neon_qabs_s8)(CPUState *env, uint32_t x)
1706 ad69471c pbrook
{
1707 ad69471c pbrook
    neon_s8 vec;
1708 ad69471c pbrook
    NEON_UNPACK(neon_s8, vec, x);
1709 ad69471c pbrook
    DO_QABS8(vec.v1);
1710 ad69471c pbrook
    DO_QABS8(vec.v2);
1711 ad69471c pbrook
    DO_QABS8(vec.v3);
1712 ad69471c pbrook
    DO_QABS8(vec.v4);
1713 ad69471c pbrook
    NEON_PACK(neon_s8, x, vec);
1714 ad69471c pbrook
    return x;
1715 ad69471c pbrook
}
1716 ad69471c pbrook
#undef DO_QABS8
1717 ad69471c pbrook
1718 ad69471c pbrook
#define DO_QNEG8(x) do { \
1719 ad69471c pbrook
    if (x == (int8_t)0x80) { \
1720 ad69471c pbrook
        x = 0x7f; \
1721 ad69471c pbrook
        SET_QC(); \
1722 ad69471c pbrook
    } else { \
1723 ad69471c pbrook
        x = -x; \
1724 ad69471c pbrook
    }} while (0)
1725 ad69471c pbrook
uint32_t HELPER(neon_qneg_s8)(CPUState *env, uint32_t x)
1726 ad69471c pbrook
{
1727 ad69471c pbrook
    neon_s8 vec;
1728 ad69471c pbrook
    NEON_UNPACK(neon_s8, vec, x);
1729 ad69471c pbrook
    DO_QNEG8(vec.v1);
1730 ad69471c pbrook
    DO_QNEG8(vec.v2);
1731 ad69471c pbrook
    DO_QNEG8(vec.v3);
1732 ad69471c pbrook
    DO_QNEG8(vec.v4);
1733 ad69471c pbrook
    NEON_PACK(neon_s8, x, vec);
1734 ad69471c pbrook
    return x;
1735 ad69471c pbrook
}
1736 ad69471c pbrook
#undef DO_QNEG8
1737 ad69471c pbrook
1738 ad69471c pbrook
#define DO_QABS16(x) do { \
1739 ad69471c pbrook
    if (x == (int16_t)0x8000) { \
1740 ad69471c pbrook
        x = 0x7fff; \
1741 ad69471c pbrook
        SET_QC(); \
1742 ad69471c pbrook
    } else if (x < 0) { \
1743 ad69471c pbrook
        x = -x; \
1744 ad69471c pbrook
    }} while (0)
1745 ad69471c pbrook
uint32_t HELPER(neon_qabs_s16)(CPUState *env, uint32_t x)
1746 ad69471c pbrook
{
1747 ad69471c pbrook
    neon_s16 vec;
1748 ad69471c pbrook
    NEON_UNPACK(neon_s16, vec, x);
1749 ad69471c pbrook
    DO_QABS16(vec.v1);
1750 ad69471c pbrook
    DO_QABS16(vec.v2);
1751 ad69471c pbrook
    NEON_PACK(neon_s16, x, vec);
1752 ad69471c pbrook
    return x;
1753 ad69471c pbrook
}
1754 ad69471c pbrook
#undef DO_QABS16
1755 ad69471c pbrook
1756 ad69471c pbrook
#define DO_QNEG16(x) do { \
1757 ad69471c pbrook
    if (x == (int16_t)0x8000) { \
1758 ad69471c pbrook
        x = 0x7fff; \
1759 ad69471c pbrook
        SET_QC(); \
1760 ad69471c pbrook
    } else { \
1761 ad69471c pbrook
        x = -x; \
1762 ad69471c pbrook
    }} while (0)
1763 ad69471c pbrook
uint32_t HELPER(neon_qneg_s16)(CPUState *env, uint32_t x)
1764 ad69471c pbrook
{
1765 ad69471c pbrook
    neon_s16 vec;
1766 ad69471c pbrook
    NEON_UNPACK(neon_s16, vec, x);
1767 ad69471c pbrook
    DO_QNEG16(vec.v1);
1768 ad69471c pbrook
    DO_QNEG16(vec.v2);
1769 ad69471c pbrook
    NEON_PACK(neon_s16, x, vec);
1770 ad69471c pbrook
    return x;
1771 ad69471c pbrook
}
1772 ad69471c pbrook
#undef DO_QNEG16
1773 ad69471c pbrook
1774 ad69471c pbrook
uint32_t HELPER(neon_qabs_s32)(CPUState *env, uint32_t x)
1775 ad69471c pbrook
{
1776 ad69471c pbrook
    if (x == SIGNBIT) {
1777 ad69471c pbrook
        SET_QC();
1778 ad69471c pbrook
        x = ~SIGNBIT;
1779 ad69471c pbrook
    } else if ((int32_t)x < 0) {
1780 ad69471c pbrook
        x = -x;
1781 ad69471c pbrook
    }
1782 ad69471c pbrook
    return x;
1783 ad69471c pbrook
}
1784 ad69471c pbrook
1785 ad69471c pbrook
uint32_t HELPER(neon_qneg_s32)(CPUState *env, uint32_t x)
1786 ad69471c pbrook
{
1787 ad69471c pbrook
    if (x == SIGNBIT) {
1788 ad69471c pbrook
        SET_QC();
1789 ad69471c pbrook
        x = ~SIGNBIT;
1790 ad69471c pbrook
    } else {
1791 ad69471c pbrook
        x = -x;
1792 ad69471c pbrook
    }
1793 ad69471c pbrook
    return x;
1794 ad69471c pbrook
}
1795 ad69471c pbrook
1796 ad69471c pbrook
/* NEON Float helpers.  */
1797 ad69471c pbrook
uint32_t HELPER(neon_min_f32)(uint32_t a, uint32_t b)
1798 ad69471c pbrook
{
1799 ad69471c pbrook
    float32 f0 = vfp_itos(a);
1800 ad69471c pbrook
    float32 f1 = vfp_itos(b);
1801 ad69471c pbrook
    return (float32_compare_quiet(f0, f1, NFS) == -1) ? a : b;
1802 ad69471c pbrook
}
1803 ad69471c pbrook
1804 ad69471c pbrook
uint32_t HELPER(neon_max_f32)(uint32_t a, uint32_t b)
1805 ad69471c pbrook
{
1806 ad69471c pbrook
    float32 f0 = vfp_itos(a);
1807 ad69471c pbrook
    float32 f1 = vfp_itos(b);
1808 ad69471c pbrook
    return (float32_compare_quiet(f0, f1, NFS) == 1) ? a : b;
1809 ad69471c pbrook
}
1810 ad69471c pbrook
1811 ad69471c pbrook
uint32_t HELPER(neon_abd_f32)(uint32_t a, uint32_t b)
1812 ad69471c pbrook
{
1813 ad69471c pbrook
    float32 f0 = vfp_itos(a);
1814 ad69471c pbrook
    float32 f1 = vfp_itos(b);
1815 ad69471c pbrook
    return vfp_stoi((float32_compare_quiet(f0, f1, NFS) == 1)
1816 ad69471c pbrook
                    ? float32_sub(f0, f1, NFS)
1817 ad69471c pbrook
                    : float32_sub(f1, f0, NFS));
1818 ad69471c pbrook
}
1819 ad69471c pbrook
1820 ad69471c pbrook
uint32_t HELPER(neon_add_f32)(uint32_t a, uint32_t b)
1821 ad69471c pbrook
{
1822 ad69471c pbrook
    return vfp_stoi(float32_add(vfp_itos(a), vfp_itos(b), NFS));
1823 ad69471c pbrook
}
1824 ad69471c pbrook
1825 ad69471c pbrook
uint32_t HELPER(neon_sub_f32)(uint32_t a, uint32_t b)
1826 ad69471c pbrook
{
1827 ad69471c pbrook
    return vfp_stoi(float32_sub(vfp_itos(a), vfp_itos(b), NFS));
1828 ad69471c pbrook
}
1829 ad69471c pbrook
1830 ad69471c pbrook
uint32_t HELPER(neon_mul_f32)(uint32_t a, uint32_t b)
1831 ad69471c pbrook
{
1832 ad69471c pbrook
    return vfp_stoi(float32_mul(vfp_itos(a), vfp_itos(b), NFS));
1833 ad69471c pbrook
}
1834 ad69471c pbrook
1835 ad69471c pbrook
/* Floating point comparisons produce an integer result.  */
1836 ad69471c pbrook
#define NEON_VOP_FCMP(name, cmp) \
1837 ad69471c pbrook
uint32_t HELPER(neon_##name)(uint32_t a, uint32_t b) \
1838 ad69471c pbrook
{ \
1839 ad69471c pbrook
    if (float32_compare_quiet(vfp_itos(a), vfp_itos(b), NFS) cmp 0) \
1840 ad69471c pbrook
        return ~0; \
1841 ad69471c pbrook
    else \
1842 ad69471c pbrook
        return 0; \
1843 ad69471c pbrook
}
1844 ad69471c pbrook
1845 ad69471c pbrook
NEON_VOP_FCMP(ceq_f32, ==)
1846 ad69471c pbrook
NEON_VOP_FCMP(cge_f32, >=)
1847 ad69471c pbrook
NEON_VOP_FCMP(cgt_f32, >)
1848 ad69471c pbrook
1849 ad69471c pbrook
uint32_t HELPER(neon_acge_f32)(uint32_t a, uint32_t b)
1850 ad69471c pbrook
{
1851 ad69471c pbrook
    float32 f0 = float32_abs(vfp_itos(a));
1852 ad69471c pbrook
    float32 f1 = float32_abs(vfp_itos(b));
1853 ad69471c pbrook
    return (float32_compare_quiet(f0, f1,NFS) >= 0) ? ~0 : 0;
1854 ad69471c pbrook
}
1855 ad69471c pbrook
1856 ad69471c pbrook
uint32_t HELPER(neon_acgt_f32)(uint32_t a, uint32_t b)
1857 ad69471c pbrook
{
1858 ad69471c pbrook
    float32 f0 = float32_abs(vfp_itos(a));
1859 ad69471c pbrook
    float32 f1 = float32_abs(vfp_itos(b));
1860 ad69471c pbrook
    return (float32_compare_quiet(f0, f1, NFS) > 0) ? ~0 : 0;
1861 ad69471c pbrook
}
1862 02acedf9 Peter Maydell
1863 02acedf9 Peter Maydell
#define ELEM(V, N, SIZE) (((V) >> ((N) * (SIZE))) & ((1ull << (SIZE)) - 1))
1864 02acedf9 Peter Maydell
1865 02acedf9 Peter Maydell
void HELPER(neon_qunzip8)(CPUState *env, uint32_t rd, uint32_t rm)
1866 02acedf9 Peter Maydell
{
1867 02acedf9 Peter Maydell
    uint64_t zm0 = float64_val(env->vfp.regs[rm]);
1868 02acedf9 Peter Maydell
    uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
1869 02acedf9 Peter Maydell
    uint64_t zd0 = float64_val(env->vfp.regs[rd]);
1870 02acedf9 Peter Maydell
    uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
1871 02acedf9 Peter Maydell
    uint64_t d0 = ELEM(zd0, 0, 8) | (ELEM(zd0, 2, 8) << 8)
1872 02acedf9 Peter Maydell
        | (ELEM(zd0, 4, 8) << 16) | (ELEM(zd0, 6, 8) << 24)
1873 02acedf9 Peter Maydell
        | (ELEM(zd1, 0, 8) << 32) | (ELEM(zd1, 2, 8) << 40)
1874 02acedf9 Peter Maydell
        | (ELEM(zd1, 4, 8) << 48) | (ELEM(zd1, 6, 8) << 56);
1875 02acedf9 Peter Maydell
    uint64_t d1 = ELEM(zm0, 0, 8) | (ELEM(zm0, 2, 8) << 8)
1876 02acedf9 Peter Maydell
        | (ELEM(zm0, 4, 8) << 16) | (ELEM(zm0, 6, 8) << 24)
1877 02acedf9 Peter Maydell
        | (ELEM(zm1, 0, 8) << 32) | (ELEM(zm1, 2, 8) << 40)
1878 02acedf9 Peter Maydell
        | (ELEM(zm1, 4, 8) << 48) | (ELEM(zm1, 6, 8) << 56);
1879 02acedf9 Peter Maydell
    uint64_t m0 = ELEM(zd0, 1, 8) | (ELEM(zd0, 3, 8) << 8)
1880 02acedf9 Peter Maydell
        | (ELEM(zd0, 5, 8) << 16) | (ELEM(zd0, 7, 8) << 24)
1881 02acedf9 Peter Maydell
        | (ELEM(zd1, 1, 8) << 32) | (ELEM(zd1, 3, 8) << 40)
1882 02acedf9 Peter Maydell
        | (ELEM(zd1, 5, 8) << 48) | (ELEM(zd1, 7, 8) << 56);
1883 02acedf9 Peter Maydell
    uint64_t m1 = ELEM(zm0, 1, 8) | (ELEM(zm0, 3, 8) << 8)
1884 02acedf9 Peter Maydell
        | (ELEM(zm0, 5, 8) << 16) | (ELEM(zm0, 7, 8) << 24)
1885 02acedf9 Peter Maydell
        | (ELEM(zm1, 1, 8) << 32) | (ELEM(zm1, 3, 8) << 40)
1886 02acedf9 Peter Maydell
        | (ELEM(zm1, 5, 8) << 48) | (ELEM(zm1, 7, 8) << 56);
1887 02acedf9 Peter Maydell
    env->vfp.regs[rm] = make_float64(m0);
1888 02acedf9 Peter Maydell
    env->vfp.regs[rm + 1] = make_float64(m1);
1889 02acedf9 Peter Maydell
    env->vfp.regs[rd] = make_float64(d0);
1890 02acedf9 Peter Maydell
    env->vfp.regs[rd + 1] = make_float64(d1);
1891 02acedf9 Peter Maydell
}
1892 02acedf9 Peter Maydell
1893 02acedf9 Peter Maydell
void HELPER(neon_qunzip16)(CPUState *env, uint32_t rd, uint32_t rm)
1894 02acedf9 Peter Maydell
{
1895 02acedf9 Peter Maydell
    uint64_t zm0 = float64_val(env->vfp.regs[rm]);
1896 02acedf9 Peter Maydell
    uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
1897 02acedf9 Peter Maydell
    uint64_t zd0 = float64_val(env->vfp.regs[rd]);
1898 02acedf9 Peter Maydell
    uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
1899 02acedf9 Peter Maydell
    uint64_t d0 = ELEM(zd0, 0, 16) | (ELEM(zd0, 2, 16) << 16)
1900 02acedf9 Peter Maydell
        | (ELEM(zd1, 0, 16) << 32) | (ELEM(zd1, 2, 16) << 48);
1901 02acedf9 Peter Maydell
    uint64_t d1 = ELEM(zm0, 0, 16) | (ELEM(zm0, 2, 16) << 16)
1902 02acedf9 Peter Maydell
        | (ELEM(zm1, 0, 16) << 32) | (ELEM(zm1, 2, 16) << 48);
1903 02acedf9 Peter Maydell
    uint64_t m0 = ELEM(zd0, 1, 16) | (ELEM(zd0, 3, 16) << 16)
1904 02acedf9 Peter Maydell
        | (ELEM(zd1, 1, 16) << 32) | (ELEM(zd1, 3, 16) << 48);
1905 02acedf9 Peter Maydell
    uint64_t m1 = ELEM(zm0, 1, 16) | (ELEM(zm0, 3, 16) << 16)
1906 02acedf9 Peter Maydell
        | (ELEM(zm1, 1, 16) << 32) | (ELEM(zm1, 3, 16) << 48);
1907 02acedf9 Peter Maydell
    env->vfp.regs[rm] = make_float64(m0);
1908 02acedf9 Peter Maydell
    env->vfp.regs[rm + 1] = make_float64(m1);
1909 02acedf9 Peter Maydell
    env->vfp.regs[rd] = make_float64(d0);
1910 02acedf9 Peter Maydell
    env->vfp.regs[rd + 1] = make_float64(d1);
1911 02acedf9 Peter Maydell
}
1912 02acedf9 Peter Maydell
1913 02acedf9 Peter Maydell
void HELPER(neon_qunzip32)(CPUState *env, uint32_t rd, uint32_t rm)
1914 02acedf9 Peter Maydell
{
1915 02acedf9 Peter Maydell
    uint64_t zm0 = float64_val(env->vfp.regs[rm]);
1916 02acedf9 Peter Maydell
    uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
1917 02acedf9 Peter Maydell
    uint64_t zd0 = float64_val(env->vfp.regs[rd]);
1918 02acedf9 Peter Maydell
    uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
1919 02acedf9 Peter Maydell
    uint64_t d0 = ELEM(zd0, 0, 32) | (ELEM(zd1, 0, 32) << 32);
1920 02acedf9 Peter Maydell
    uint64_t d1 = ELEM(zm0, 0, 32) | (ELEM(zm1, 0, 32) << 32);
1921 02acedf9 Peter Maydell
    uint64_t m0 = ELEM(zd0, 1, 32) | (ELEM(zd1, 1, 32) << 32);
1922 02acedf9 Peter Maydell
    uint64_t m1 = ELEM(zm0, 1, 32) | (ELEM(zm1, 1, 32) << 32);
1923 02acedf9 Peter Maydell
    env->vfp.regs[rm] = make_float64(m0);
1924 02acedf9 Peter Maydell
    env->vfp.regs[rm + 1] = make_float64(m1);
1925 02acedf9 Peter Maydell
    env->vfp.regs[rd] = make_float64(d0);
1926 02acedf9 Peter Maydell
    env->vfp.regs[rd + 1] = make_float64(d1);
1927 02acedf9 Peter Maydell
}
1928 02acedf9 Peter Maydell
1929 02acedf9 Peter Maydell
void HELPER(neon_unzip8)(CPUState *env, uint32_t rd, uint32_t rm)
1930 02acedf9 Peter Maydell
{
1931 02acedf9 Peter Maydell
    uint64_t zm = float64_val(env->vfp.regs[rm]);
1932 02acedf9 Peter Maydell
    uint64_t zd = float64_val(env->vfp.regs[rd]);
1933 02acedf9 Peter Maydell
    uint64_t d0 = ELEM(zd, 0, 8) | (ELEM(zd, 2, 8) << 8)
1934 02acedf9 Peter Maydell
        | (ELEM(zd, 4, 8) << 16) | (ELEM(zd, 6, 8) << 24)
1935 02acedf9 Peter Maydell
        | (ELEM(zm, 0, 8) << 32) | (ELEM(zm, 2, 8) << 40)
1936 02acedf9 Peter Maydell
        | (ELEM(zm, 4, 8) << 48) | (ELEM(zm, 6, 8) << 56);
1937 02acedf9 Peter Maydell
    uint64_t m0 = ELEM(zd, 1, 8) | (ELEM(zd, 3, 8) << 8)
1938 02acedf9 Peter Maydell
        | (ELEM(zd, 5, 8) << 16) | (ELEM(zd, 7, 8) << 24)
1939 02acedf9 Peter Maydell
        | (ELEM(zm, 1, 8) << 32) | (ELEM(zm, 3, 8) << 40)
1940 02acedf9 Peter Maydell
        | (ELEM(zm, 5, 8) << 48) | (ELEM(zm, 7, 8) << 56);
1941 02acedf9 Peter Maydell
    env->vfp.regs[rm] = make_float64(m0);
1942 02acedf9 Peter Maydell
    env->vfp.regs[rd] = make_float64(d0);
1943 02acedf9 Peter Maydell
}
1944 02acedf9 Peter Maydell
1945 02acedf9 Peter Maydell
void HELPER(neon_unzip16)(CPUState *env, uint32_t rd, uint32_t rm)
1946 02acedf9 Peter Maydell
{
1947 02acedf9 Peter Maydell
    uint64_t zm = float64_val(env->vfp.regs[rm]);
1948 02acedf9 Peter Maydell
    uint64_t zd = float64_val(env->vfp.regs[rd]);
1949 02acedf9 Peter Maydell
    uint64_t d0 = ELEM(zd, 0, 16) | (ELEM(zd, 2, 16) << 16)
1950 02acedf9 Peter Maydell
        | (ELEM(zm, 0, 16) << 32) | (ELEM(zm, 2, 16) << 48);
1951 02acedf9 Peter Maydell
    uint64_t m0 = ELEM(zd, 1, 16) | (ELEM(zd, 3, 16) << 16)
1952 02acedf9 Peter Maydell
        | (ELEM(zm, 1, 16) << 32) | (ELEM(zm, 3, 16) << 48);
1953 02acedf9 Peter Maydell
    env->vfp.regs[rm] = make_float64(m0);
1954 02acedf9 Peter Maydell
    env->vfp.regs[rd] = make_float64(d0);
1955 02acedf9 Peter Maydell
}
1956 d68a6f3a Peter Maydell
1957 d68a6f3a Peter Maydell
void HELPER(neon_qzip8)(CPUState *env, uint32_t rd, uint32_t rm)
1958 d68a6f3a Peter Maydell
{
1959 d68a6f3a Peter Maydell
    uint64_t zm0 = float64_val(env->vfp.regs[rm]);
1960 d68a6f3a Peter Maydell
    uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
1961 d68a6f3a Peter Maydell
    uint64_t zd0 = float64_val(env->vfp.regs[rd]);
1962 d68a6f3a Peter Maydell
    uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
1963 d68a6f3a Peter Maydell
    uint64_t d0 = ELEM(zd0, 0, 8) | (ELEM(zm0, 0, 8) << 8)
1964 d68a6f3a Peter Maydell
        | (ELEM(zd0, 1, 8) << 16) | (ELEM(zm0, 1, 8) << 24)
1965 d68a6f3a Peter Maydell
        | (ELEM(zd0, 2, 8) << 32) | (ELEM(zm0, 2, 8) << 40)
1966 d68a6f3a Peter Maydell
        | (ELEM(zd0, 3, 8) << 48) | (ELEM(zm0, 3, 8) << 56);
1967 d68a6f3a Peter Maydell
    uint64_t d1 = ELEM(zd0, 4, 8) | (ELEM(zm0, 4, 8) << 8)
1968 d68a6f3a Peter Maydell
        | (ELEM(zd0, 5, 8) << 16) | (ELEM(zm0, 5, 8) << 24)
1969 d68a6f3a Peter Maydell
        | (ELEM(zd0, 6, 8) << 32) | (ELEM(zm0, 6, 8) << 40)
1970 d68a6f3a Peter Maydell
        | (ELEM(zd0, 7, 8) << 48) | (ELEM(zm0, 7, 8) << 56);
1971 d68a6f3a Peter Maydell
    uint64_t m0 = ELEM(zd1, 0, 8) | (ELEM(zm1, 0, 8) << 8)
1972 d68a6f3a Peter Maydell
        | (ELEM(zd1, 1, 8) << 16) | (ELEM(zm1, 1, 8) << 24)
1973 d68a6f3a Peter Maydell
        | (ELEM(zd1, 2, 8) << 32) | (ELEM(zm1, 2, 8) << 40)
1974 d68a6f3a Peter Maydell
        | (ELEM(zd1, 3, 8) << 48) | (ELEM(zm1, 3, 8) << 56);
1975 d68a6f3a Peter Maydell
    uint64_t m1 = ELEM(zd1, 4, 8) | (ELEM(zm1, 4, 8) << 8)
1976 d68a6f3a Peter Maydell
        | (ELEM(zd1, 5, 8) << 16) | (ELEM(zm1, 5, 8) << 24)
1977 d68a6f3a Peter Maydell
        | (ELEM(zd1, 6, 8) << 32) | (ELEM(zm1, 6, 8) << 40)
1978 d68a6f3a Peter Maydell
        | (ELEM(zd1, 7, 8) << 48) | (ELEM(zm1, 7, 8) << 56);
1979 d68a6f3a Peter Maydell
    env->vfp.regs[rm] = make_float64(m0);
1980 d68a6f3a Peter Maydell
    env->vfp.regs[rm + 1] = make_float64(m1);
1981 d68a6f3a Peter Maydell
    env->vfp.regs[rd] = make_float64(d0);
1982 d68a6f3a Peter Maydell
    env->vfp.regs[rd + 1] = make_float64(d1);
1983 d68a6f3a Peter Maydell
}
1984 d68a6f3a Peter Maydell
1985 d68a6f3a Peter Maydell
void HELPER(neon_qzip16)(CPUState *env, uint32_t rd, uint32_t rm)
1986 d68a6f3a Peter Maydell
{
1987 d68a6f3a Peter Maydell
    uint64_t zm0 = float64_val(env->vfp.regs[rm]);
1988 d68a6f3a Peter Maydell
    uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
1989 d68a6f3a Peter Maydell
    uint64_t zd0 = float64_val(env->vfp.regs[rd]);
1990 d68a6f3a Peter Maydell
    uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
1991 d68a6f3a Peter Maydell
    uint64_t d0 = ELEM(zd0, 0, 16) | (ELEM(zm0, 0, 16) << 16)
1992 d68a6f3a Peter Maydell
        | (ELEM(zd0, 1, 16) << 32) | (ELEM(zm0, 1, 16) << 48);
1993 d68a6f3a Peter Maydell
    uint64_t d1 = ELEM(zd0, 2, 16) | (ELEM(zm0, 2, 16) << 16)
1994 d68a6f3a Peter Maydell
        | (ELEM(zd0, 3, 16) << 32) | (ELEM(zm0, 3, 16) << 48);
1995 d68a6f3a Peter Maydell
    uint64_t m0 = ELEM(zd1, 0, 16) | (ELEM(zm1, 0, 16) << 16)
1996 d68a6f3a Peter Maydell
        | (ELEM(zd1, 1, 16) << 32) | (ELEM(zm1, 1, 16) << 48);
1997 d68a6f3a Peter Maydell
    uint64_t m1 = ELEM(zd1, 2, 16) | (ELEM(zm1, 2, 16) << 16)
1998 d68a6f3a Peter Maydell
        | (ELEM(zd1, 3, 16) << 32) | (ELEM(zm1, 3, 16) << 48);
1999 d68a6f3a Peter Maydell
    env->vfp.regs[rm] = make_float64(m0);
2000 d68a6f3a Peter Maydell
    env->vfp.regs[rm + 1] = make_float64(m1);
2001 d68a6f3a Peter Maydell
    env->vfp.regs[rd] = make_float64(d0);
2002 d68a6f3a Peter Maydell
    env->vfp.regs[rd + 1] = make_float64(d1);
2003 d68a6f3a Peter Maydell
}
2004 d68a6f3a Peter Maydell
2005 d68a6f3a Peter Maydell
void HELPER(neon_qzip32)(CPUState *env, uint32_t rd, uint32_t rm)
2006 d68a6f3a Peter Maydell
{
2007 d68a6f3a Peter Maydell
    uint64_t zm0 = float64_val(env->vfp.regs[rm]);
2008 d68a6f3a Peter Maydell
    uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
2009 d68a6f3a Peter Maydell
    uint64_t zd0 = float64_val(env->vfp.regs[rd]);
2010 d68a6f3a Peter Maydell
    uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
2011 d68a6f3a Peter Maydell
    uint64_t d0 = ELEM(zd0, 0, 32) | (ELEM(zm0, 0, 32) << 32);
2012 d68a6f3a Peter Maydell
    uint64_t d1 = ELEM(zd0, 1, 32) | (ELEM(zm0, 1, 32) << 32);
2013 d68a6f3a Peter Maydell
    uint64_t m0 = ELEM(zd1, 0, 32) | (ELEM(zm1, 0, 32) << 32);
2014 d68a6f3a Peter Maydell
    uint64_t m1 = ELEM(zd1, 1, 32) | (ELEM(zm1, 1, 32) << 32);
2015 d68a6f3a Peter Maydell
    env->vfp.regs[rm] = make_float64(m0);
2016 d68a6f3a Peter Maydell
    env->vfp.regs[rm + 1] = make_float64(m1);
2017 d68a6f3a Peter Maydell
    env->vfp.regs[rd] = make_float64(d0);
2018 d68a6f3a Peter Maydell
    env->vfp.regs[rd + 1] = make_float64(d1);
2019 d68a6f3a Peter Maydell
}
2020 d68a6f3a Peter Maydell
2021 d68a6f3a Peter Maydell
void HELPER(neon_zip8)(CPUState *env, uint32_t rd, uint32_t rm)
2022 d68a6f3a Peter Maydell
{
2023 d68a6f3a Peter Maydell
    uint64_t zm = float64_val(env->vfp.regs[rm]);
2024 d68a6f3a Peter Maydell
    uint64_t zd = float64_val(env->vfp.regs[rd]);
2025 d68a6f3a Peter Maydell
    uint64_t d0 = ELEM(zd, 0, 8) | (ELEM(zm, 0, 8) << 8)
2026 d68a6f3a Peter Maydell
        | (ELEM(zd, 1, 8) << 16) | (ELEM(zm, 1, 8) << 24)
2027 d68a6f3a Peter Maydell
        | (ELEM(zd, 2, 8) << 32) | (ELEM(zm, 2, 8) << 40)
2028 d68a6f3a Peter Maydell
        | (ELEM(zd, 3, 8) << 48) | (ELEM(zm, 3, 8) << 56);
2029 d68a6f3a Peter Maydell
    uint64_t m0 = ELEM(zd, 4, 8) | (ELEM(zm, 4, 8) << 8)
2030 d68a6f3a Peter Maydell
        | (ELEM(zd, 5, 8) << 16) | (ELEM(zm, 5, 8) << 24)
2031 d68a6f3a Peter Maydell
        | (ELEM(zd, 6, 8) << 32) | (ELEM(zm, 6, 8) << 40)
2032 d68a6f3a Peter Maydell
        | (ELEM(zd, 7, 8) << 48) | (ELEM(zm, 7, 8) << 56);
2033 d68a6f3a Peter Maydell
    env->vfp.regs[rm] = make_float64(m0);
2034 d68a6f3a Peter Maydell
    env->vfp.regs[rd] = make_float64(d0);
2035 d68a6f3a Peter Maydell
}
2036 d68a6f3a Peter Maydell
2037 d68a6f3a Peter Maydell
void HELPER(neon_zip16)(CPUState *env, uint32_t rd, uint32_t rm)
2038 d68a6f3a Peter Maydell
{
2039 d68a6f3a Peter Maydell
    uint64_t zm = float64_val(env->vfp.regs[rm]);
2040 d68a6f3a Peter Maydell
    uint64_t zd = float64_val(env->vfp.regs[rd]);
2041 d68a6f3a Peter Maydell
    uint64_t d0 = ELEM(zd, 0, 16) | (ELEM(zm, 0, 16) << 16)
2042 d68a6f3a Peter Maydell
        | (ELEM(zd, 1, 16) << 32) | (ELEM(zm, 1, 16) << 48);
2043 d68a6f3a Peter Maydell
    uint64_t m0 = ELEM(zd, 2, 16) | (ELEM(zm, 2, 16) << 16)
2044 d68a6f3a Peter Maydell
        | (ELEM(zd, 3, 16) << 32) | (ELEM(zm, 3, 16) << 48);
2045 d68a6f3a Peter Maydell
    env->vfp.regs[rm] = make_float64(m0);
2046 d68a6f3a Peter Maydell
    env->vfp.regs[rd] = make_float64(d0);
2047 d68a6f3a Peter Maydell
}