Statistics
| Branch: | Revision:

root / target-arm / neon_helper.c @ feature-archipelago

History | View | Annotate | Download (53.2 kB)

1 e677137d pbrook
/*
2 e677137d pbrook
 * ARM NEON vector operations.
3 e677137d pbrook
 *
4 e677137d pbrook
 * Copyright (c) 2007, 2008 CodeSourcery.
5 e677137d pbrook
 * Written by Paul Brook
6 e677137d pbrook
 *
7 8e31bf38 Matthew Fernandez
 * This code is licensed under the GNU GPL v2.
8 e677137d pbrook
 */
9 ad69471c pbrook
#include <stdlib.h>
10 ad69471c pbrook
#include <stdio.h>
11 ad69471c pbrook
12 ad69471c pbrook
#include "cpu.h"
13 022c62cb Paolo Bonzini
#include "exec/exec-all.h"
14 7b59220e Lluís
#include "helper.h"
15 ad69471c pbrook
16 ad69471c pbrook
#define SIGNBIT (uint32_t)0x80000000
17 ad69471c pbrook
#define SIGNBIT64 ((uint64_t)1 << 63)
18 ad69471c pbrook
19 7e598de0 Matt Craighead
#define SET_QC() env->vfp.xregs[ARM_VFP_FPSCR] |= CPSR_Q
20 ad69471c pbrook
21 ad69471c pbrook
#define NEON_TYPE1(name, type) \
22 ad69471c pbrook
typedef struct \
23 ad69471c pbrook
{ \
24 ad69471c pbrook
    type v1; \
25 ad69471c pbrook
} neon_##name;
26 e2542fe2 Juan Quintela
#ifdef HOST_WORDS_BIGENDIAN
27 ad69471c pbrook
#define NEON_TYPE2(name, type) \
28 ad69471c pbrook
typedef struct \
29 ad69471c pbrook
{ \
30 ad69471c pbrook
    type v2; \
31 ad69471c pbrook
    type v1; \
32 ad69471c pbrook
} neon_##name;
33 ad69471c pbrook
#define NEON_TYPE4(name, type) \
34 ad69471c pbrook
typedef struct \
35 ad69471c pbrook
{ \
36 ad69471c pbrook
    type v4; \
37 ad69471c pbrook
    type v3; \
38 ad69471c pbrook
    type v2; \
39 ad69471c pbrook
    type v1; \
40 ad69471c pbrook
} neon_##name;
41 ad69471c pbrook
#else
42 ad69471c pbrook
#define NEON_TYPE2(name, type) \
43 ad69471c pbrook
typedef struct \
44 ad69471c pbrook
{ \
45 ad69471c pbrook
    type v1; \
46 ad69471c pbrook
    type v2; \
47 ad69471c pbrook
} neon_##name;
48 ad69471c pbrook
#define NEON_TYPE4(name, type) \
49 ad69471c pbrook
typedef struct \
50 ad69471c pbrook
{ \
51 ad69471c pbrook
    type v1; \
52 ad69471c pbrook
    type v2; \
53 ad69471c pbrook
    type v3; \
54 ad69471c pbrook
    type v4; \
55 ad69471c pbrook
} neon_##name;
56 ad69471c pbrook
#endif
57 ad69471c pbrook
58 ad69471c pbrook
NEON_TYPE4(s8, int8_t)
59 ad69471c pbrook
NEON_TYPE4(u8, uint8_t)
60 ad69471c pbrook
NEON_TYPE2(s16, int16_t)
61 ad69471c pbrook
NEON_TYPE2(u16, uint16_t)
62 ad69471c pbrook
NEON_TYPE1(s32, int32_t)
63 ad69471c pbrook
NEON_TYPE1(u32, uint32_t)
64 ad69471c pbrook
#undef NEON_TYPE4
65 ad69471c pbrook
#undef NEON_TYPE2
66 ad69471c pbrook
#undef NEON_TYPE1
67 ad69471c pbrook
68 ad69471c pbrook
/* Copy from a uint32_t to a vector structure type.  */
69 ad69471c pbrook
#define NEON_UNPACK(vtype, dest, val) do { \
70 ad69471c pbrook
    union { \
71 ad69471c pbrook
        vtype v; \
72 ad69471c pbrook
        uint32_t i; \
73 ad69471c pbrook
    } conv_u; \
74 ad69471c pbrook
    conv_u.i = (val); \
75 ad69471c pbrook
    dest = conv_u.v; \
76 ad69471c pbrook
    } while(0)
77 ad69471c pbrook
78 ad69471c pbrook
/* Copy from a vector structure type to a uint32_t.  */
79 ad69471c pbrook
#define NEON_PACK(vtype, dest, val) do { \
80 ad69471c pbrook
    union { \
81 ad69471c pbrook
        vtype v; \
82 ad69471c pbrook
        uint32_t i; \
83 ad69471c pbrook
    } conv_u; \
84 ad69471c pbrook
    conv_u.v = (val); \
85 ad69471c pbrook
    dest = conv_u.i; \
86 ad69471c pbrook
    } while(0)
87 ad69471c pbrook
88 ad69471c pbrook
#define NEON_DO1 \
89 ad69471c pbrook
    NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1);
90 ad69471c pbrook
#define NEON_DO2 \
91 ad69471c pbrook
    NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \
92 ad69471c pbrook
    NEON_FN(vdest.v2, vsrc1.v2, vsrc2.v2);
93 ad69471c pbrook
#define NEON_DO4 \
94 ad69471c pbrook
    NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \
95 ad69471c pbrook
    NEON_FN(vdest.v2, vsrc1.v2, vsrc2.v2); \
96 ad69471c pbrook
    NEON_FN(vdest.v3, vsrc1.v3, vsrc2.v3); \
97 ad69471c pbrook
    NEON_FN(vdest.v4, vsrc1.v4, vsrc2.v4);
98 ad69471c pbrook
99 ad69471c pbrook
#define NEON_VOP_BODY(vtype, n) \
100 ad69471c pbrook
{ \
101 ad69471c pbrook
    uint32_t res; \
102 ad69471c pbrook
    vtype vsrc1; \
103 ad69471c pbrook
    vtype vsrc2; \
104 ad69471c pbrook
    vtype vdest; \
105 ad69471c pbrook
    NEON_UNPACK(vtype, vsrc1, arg1); \
106 ad69471c pbrook
    NEON_UNPACK(vtype, vsrc2, arg2); \
107 ad69471c pbrook
    NEON_DO##n; \
108 ad69471c pbrook
    NEON_PACK(vtype, res, vdest); \
109 ad69471c pbrook
    return res; \
110 ad69471c pbrook
}
111 ad69471c pbrook
112 ad69471c pbrook
#define NEON_VOP(name, vtype, n) \
113 ad69471c pbrook
uint32_t HELPER(glue(neon_,name))(uint32_t arg1, uint32_t arg2) \
114 ad69471c pbrook
NEON_VOP_BODY(vtype, n)
115 ad69471c pbrook
116 02da0b2d Peter Maydell
#define NEON_VOP_ENV(name, vtype, n) \
117 0ecb72a5 Andreas Färber
uint32_t HELPER(glue(neon_,name))(CPUARMState *env, uint32_t arg1, uint32_t arg2) \
118 02da0b2d Peter Maydell
NEON_VOP_BODY(vtype, n)
119 02da0b2d Peter Maydell
120 ad69471c pbrook
/* Pairwise operations.  */
121 ad69471c pbrook
/* For 32-bit elements each segment only contains a single element, so
122 ad69471c pbrook
   the elementwise and pairwise operations are the same.  */
123 ad69471c pbrook
#define NEON_PDO2 \
124 ad69471c pbrook
    NEON_FN(vdest.v1, vsrc1.v1, vsrc1.v2); \
125 ad69471c pbrook
    NEON_FN(vdest.v2, vsrc2.v1, vsrc2.v2);
126 ad69471c pbrook
#define NEON_PDO4 \
127 ad69471c pbrook
    NEON_FN(vdest.v1, vsrc1.v1, vsrc1.v2); \
128 ad69471c pbrook
    NEON_FN(vdest.v2, vsrc1.v3, vsrc1.v4); \
129 ad69471c pbrook
    NEON_FN(vdest.v3, vsrc2.v1, vsrc2.v2); \
130 ad69471c pbrook
    NEON_FN(vdest.v4, vsrc2.v3, vsrc2.v4); \
131 ad69471c pbrook
132 ad69471c pbrook
#define NEON_POP(name, vtype, n) \
133 ad69471c pbrook
uint32_t HELPER(glue(neon_,name))(uint32_t arg1, uint32_t arg2) \
134 ad69471c pbrook
{ \
135 ad69471c pbrook
    uint32_t res; \
136 ad69471c pbrook
    vtype vsrc1; \
137 ad69471c pbrook
    vtype vsrc2; \
138 ad69471c pbrook
    vtype vdest; \
139 ad69471c pbrook
    NEON_UNPACK(vtype, vsrc1, arg1); \
140 ad69471c pbrook
    NEON_UNPACK(vtype, vsrc2, arg2); \
141 ad69471c pbrook
    NEON_PDO##n; \
142 ad69471c pbrook
    NEON_PACK(vtype, res, vdest); \
143 ad69471c pbrook
    return res; \
144 ad69471c pbrook
}
145 ad69471c pbrook
146 ad69471c pbrook
/* Unary operators.  */
147 ad69471c pbrook
#define NEON_VOP1(name, vtype, n) \
148 ad69471c pbrook
uint32_t HELPER(glue(neon_,name))(uint32_t arg) \
149 ad69471c pbrook
{ \
150 ad69471c pbrook
    vtype vsrc1; \
151 ad69471c pbrook
    vtype vdest; \
152 ad69471c pbrook
    NEON_UNPACK(vtype, vsrc1, arg); \
153 ad69471c pbrook
    NEON_DO##n; \
154 ad69471c pbrook
    NEON_PACK(vtype, arg, vdest); \
155 ad69471c pbrook
    return arg; \
156 ad69471c pbrook
}
157 ad69471c pbrook
158 ad69471c pbrook
159 ad69471c pbrook
#define NEON_USAT(dest, src1, src2, type) do { \
160 ad69471c pbrook
    uint32_t tmp = (uint32_t)src1 + (uint32_t)src2; \
161 ad69471c pbrook
    if (tmp != (type)tmp) { \
162 ad69471c pbrook
        SET_QC(); \
163 ad69471c pbrook
        dest = ~0; \
164 ad69471c pbrook
    } else { \
165 ad69471c pbrook
        dest = tmp; \
166 ad69471c pbrook
    }} while(0)
167 ad69471c pbrook
#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t)
168 02da0b2d Peter Maydell
NEON_VOP_ENV(qadd_u8, neon_u8, 4)
169 ad69471c pbrook
#undef NEON_FN
170 ad69471c pbrook
#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t)
171 02da0b2d Peter Maydell
NEON_VOP_ENV(qadd_u16, neon_u16, 2)
172 ad69471c pbrook
#undef NEON_FN
173 ad69471c pbrook
#undef NEON_USAT
174 ad69471c pbrook
175 0ecb72a5 Andreas Färber
uint32_t HELPER(neon_qadd_u32)(CPUARMState *env, uint32_t a, uint32_t b)
176 72902672 Christophe Lyon
{
177 72902672 Christophe Lyon
    uint32_t res = a + b;
178 72902672 Christophe Lyon
    if (res < a) {
179 72902672 Christophe Lyon
        SET_QC();
180 72902672 Christophe Lyon
        res = ~0;
181 72902672 Christophe Lyon
    }
182 72902672 Christophe Lyon
    return res;
183 72902672 Christophe Lyon
}
184 72902672 Christophe Lyon
185 0ecb72a5 Andreas Färber
uint64_t HELPER(neon_qadd_u64)(CPUARMState *env, uint64_t src1, uint64_t src2)
186 72902672 Christophe Lyon
{
187 72902672 Christophe Lyon
    uint64_t res;
188 72902672 Christophe Lyon
189 72902672 Christophe Lyon
    res = src1 + src2;
190 72902672 Christophe Lyon
    if (res < src1) {
191 72902672 Christophe Lyon
        SET_QC();
192 72902672 Christophe Lyon
        res = ~(uint64_t)0;
193 72902672 Christophe Lyon
    }
194 72902672 Christophe Lyon
    return res;
195 72902672 Christophe Lyon
}
196 72902672 Christophe Lyon
197 ad69471c pbrook
#define NEON_SSAT(dest, src1, src2, type) do { \
198 ad69471c pbrook
    int32_t tmp = (uint32_t)src1 + (uint32_t)src2; \
199 ad69471c pbrook
    if (tmp != (type)tmp) { \
200 ad69471c pbrook
        SET_QC(); \
201 ad69471c pbrook
        if (src2 > 0) { \
202 ad69471c pbrook
            tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \
203 ad69471c pbrook
        } else { \
204 ad69471c pbrook
            tmp = 1 << (sizeof(type) * 8 - 1); \
205 ad69471c pbrook
        } \
206 ad69471c pbrook
    } \
207 ad69471c pbrook
    dest = tmp; \
208 ad69471c pbrook
    } while(0)
209 ad69471c pbrook
#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t)
210 02da0b2d Peter Maydell
NEON_VOP_ENV(qadd_s8, neon_s8, 4)
211 ad69471c pbrook
#undef NEON_FN
212 ad69471c pbrook
#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t)
213 02da0b2d Peter Maydell
NEON_VOP_ENV(qadd_s16, neon_s16, 2)
214 ad69471c pbrook
#undef NEON_FN
215 ad69471c pbrook
#undef NEON_SSAT
216 ad69471c pbrook
217 0ecb72a5 Andreas Färber
uint32_t HELPER(neon_qadd_s32)(CPUARMState *env, uint32_t a, uint32_t b)
218 72902672 Christophe Lyon
{
219 72902672 Christophe Lyon
    uint32_t res = a + b;
220 72902672 Christophe Lyon
    if (((res ^ a) & SIGNBIT) && !((a ^ b) & SIGNBIT)) {
221 72902672 Christophe Lyon
        SET_QC();
222 72902672 Christophe Lyon
        res = ~(((int32_t)a >> 31) ^ SIGNBIT);
223 72902672 Christophe Lyon
    }
224 72902672 Christophe Lyon
    return res;
225 72902672 Christophe Lyon
}
226 72902672 Christophe Lyon
227 0ecb72a5 Andreas Färber
uint64_t HELPER(neon_qadd_s64)(CPUARMState *env, uint64_t src1, uint64_t src2)
228 72902672 Christophe Lyon
{
229 72902672 Christophe Lyon
    uint64_t res;
230 72902672 Christophe Lyon
231 72902672 Christophe Lyon
    res = src1 + src2;
232 72902672 Christophe Lyon
    if (((res ^ src1) & SIGNBIT64) && !((src1 ^ src2) & SIGNBIT64)) {
233 72902672 Christophe Lyon
        SET_QC();
234 72902672 Christophe Lyon
        res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64;
235 72902672 Christophe Lyon
    }
236 72902672 Christophe Lyon
    return res;
237 72902672 Christophe Lyon
}
238 72902672 Christophe Lyon
239 ad69471c pbrook
#define NEON_USAT(dest, src1, src2, type) do { \
240 ad69471c pbrook
    uint32_t tmp = (uint32_t)src1 - (uint32_t)src2; \
241 ad69471c pbrook
    if (tmp != (type)tmp) { \
242 ad69471c pbrook
        SET_QC(); \
243 ad69471c pbrook
        dest = 0; \
244 ad69471c pbrook
    } else { \
245 ad69471c pbrook
        dest = tmp; \
246 ad69471c pbrook
    }} while(0)
247 ad69471c pbrook
#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t)
248 02da0b2d Peter Maydell
NEON_VOP_ENV(qsub_u8, neon_u8, 4)
249 ad69471c pbrook
#undef NEON_FN
250 ad69471c pbrook
#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t)
251 02da0b2d Peter Maydell
NEON_VOP_ENV(qsub_u16, neon_u16, 2)
252 ad69471c pbrook
#undef NEON_FN
253 ad69471c pbrook
#undef NEON_USAT
254 ad69471c pbrook
255 0ecb72a5 Andreas Färber
uint32_t HELPER(neon_qsub_u32)(CPUARMState *env, uint32_t a, uint32_t b)
256 72902672 Christophe Lyon
{
257 72902672 Christophe Lyon
    uint32_t res = a - b;
258 72902672 Christophe Lyon
    if (res > a) {
259 72902672 Christophe Lyon
        SET_QC();
260 72902672 Christophe Lyon
        res = 0;
261 72902672 Christophe Lyon
    }
262 72902672 Christophe Lyon
    return res;
263 72902672 Christophe Lyon
}
264 72902672 Christophe Lyon
265 0ecb72a5 Andreas Färber
uint64_t HELPER(neon_qsub_u64)(CPUARMState *env, uint64_t src1, uint64_t src2)
266 72902672 Christophe Lyon
{
267 72902672 Christophe Lyon
    uint64_t res;
268 72902672 Christophe Lyon
269 72902672 Christophe Lyon
    if (src1 < src2) {
270 72902672 Christophe Lyon
        SET_QC();
271 72902672 Christophe Lyon
        res = 0;
272 72902672 Christophe Lyon
    } else {
273 72902672 Christophe Lyon
        res = src1 - src2;
274 72902672 Christophe Lyon
    }
275 72902672 Christophe Lyon
    return res;
276 72902672 Christophe Lyon
}
277 72902672 Christophe Lyon
278 ad69471c pbrook
#define NEON_SSAT(dest, src1, src2, type) do { \
279 ad69471c pbrook
    int32_t tmp = (uint32_t)src1 - (uint32_t)src2; \
280 ad69471c pbrook
    if (tmp != (type)tmp) { \
281 ad69471c pbrook
        SET_QC(); \
282 ad69471c pbrook
        if (src2 < 0) { \
283 ad69471c pbrook
            tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \
284 ad69471c pbrook
        } else { \
285 ad69471c pbrook
            tmp = 1 << (sizeof(type) * 8 - 1); \
286 ad69471c pbrook
        } \
287 ad69471c pbrook
    } \
288 ad69471c pbrook
    dest = tmp; \
289 ad69471c pbrook
    } while(0)
290 ad69471c pbrook
#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t)
291 02da0b2d Peter Maydell
NEON_VOP_ENV(qsub_s8, neon_s8, 4)
292 ad69471c pbrook
#undef NEON_FN
293 ad69471c pbrook
#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t)
294 02da0b2d Peter Maydell
NEON_VOP_ENV(qsub_s16, neon_s16, 2)
295 ad69471c pbrook
#undef NEON_FN
296 ad69471c pbrook
#undef NEON_SSAT
297 ad69471c pbrook
298 0ecb72a5 Andreas Färber
uint32_t HELPER(neon_qsub_s32)(CPUARMState *env, uint32_t a, uint32_t b)
299 72902672 Christophe Lyon
{
300 72902672 Christophe Lyon
    uint32_t res = a - b;
301 72902672 Christophe Lyon
    if (((res ^ a) & SIGNBIT) && ((a ^ b) & SIGNBIT)) {
302 72902672 Christophe Lyon
        SET_QC();
303 72902672 Christophe Lyon
        res = ~(((int32_t)a >> 31) ^ SIGNBIT);
304 72902672 Christophe Lyon
    }
305 72902672 Christophe Lyon
    return res;
306 72902672 Christophe Lyon
}
307 72902672 Christophe Lyon
308 0ecb72a5 Andreas Färber
uint64_t HELPER(neon_qsub_s64)(CPUARMState *env, uint64_t src1, uint64_t src2)
309 72902672 Christophe Lyon
{
310 72902672 Christophe Lyon
    uint64_t res;
311 72902672 Christophe Lyon
312 72902672 Christophe Lyon
    res = src1 - src2;
313 72902672 Christophe Lyon
    if (((res ^ src1) & SIGNBIT64) && ((src1 ^ src2) & SIGNBIT64)) {
314 72902672 Christophe Lyon
        SET_QC();
315 72902672 Christophe Lyon
        res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64;
316 72902672 Christophe Lyon
    }
317 72902672 Christophe Lyon
    return res;
318 72902672 Christophe Lyon
}
319 72902672 Christophe Lyon
320 ad69471c pbrook
#define NEON_FN(dest, src1, src2) dest = (src1 + src2) >> 1
321 ad69471c pbrook
NEON_VOP(hadd_s8, neon_s8, 4)
322 ad69471c pbrook
NEON_VOP(hadd_u8, neon_u8, 4)
323 ad69471c pbrook
NEON_VOP(hadd_s16, neon_s16, 2)
324 ad69471c pbrook
NEON_VOP(hadd_u16, neon_u16, 2)
325 ad69471c pbrook
#undef NEON_FN
326 ad69471c pbrook
327 ad69471c pbrook
int32_t HELPER(neon_hadd_s32)(int32_t src1, int32_t src2)
328 ad69471c pbrook
{
329 ad69471c pbrook
    int32_t dest;
330 ad69471c pbrook
331 ad69471c pbrook
    dest = (src1 >> 1) + (src2 >> 1);
332 ad69471c pbrook
    if (src1 & src2 & 1)
333 ad69471c pbrook
        dest++;
334 ad69471c pbrook
    return dest;
335 ad69471c pbrook
}
336 ad69471c pbrook
337 ad69471c pbrook
uint32_t HELPER(neon_hadd_u32)(uint32_t src1, uint32_t src2)
338 ad69471c pbrook
{
339 ad69471c pbrook
    uint32_t dest;
340 ad69471c pbrook
341 ad69471c pbrook
    dest = (src1 >> 1) + (src2 >> 1);
342 ad69471c pbrook
    if (src1 & src2 & 1)
343 ad69471c pbrook
        dest++;
344 ad69471c pbrook
    return dest;
345 ad69471c pbrook
}
346 ad69471c pbrook
347 ad69471c pbrook
#define NEON_FN(dest, src1, src2) dest = (src1 + src2 + 1) >> 1
348 ad69471c pbrook
NEON_VOP(rhadd_s8, neon_s8, 4)
349 ad69471c pbrook
NEON_VOP(rhadd_u8, neon_u8, 4)
350 ad69471c pbrook
NEON_VOP(rhadd_s16, neon_s16, 2)
351 ad69471c pbrook
NEON_VOP(rhadd_u16, neon_u16, 2)
352 ad69471c pbrook
#undef NEON_FN
353 ad69471c pbrook
354 ad69471c pbrook
int32_t HELPER(neon_rhadd_s32)(int32_t src1, int32_t src2)
355 ad69471c pbrook
{
356 ad69471c pbrook
    int32_t dest;
357 ad69471c pbrook
358 ad69471c pbrook
    dest = (src1 >> 1) + (src2 >> 1);
359 ad69471c pbrook
    if ((src1 | src2) & 1)
360 ad69471c pbrook
        dest++;
361 ad69471c pbrook
    return dest;
362 ad69471c pbrook
}
363 ad69471c pbrook
364 ad69471c pbrook
uint32_t HELPER(neon_rhadd_u32)(uint32_t src1, uint32_t src2)
365 ad69471c pbrook
{
366 ad69471c pbrook
    uint32_t dest;
367 ad69471c pbrook
368 ad69471c pbrook
    dest = (src1 >> 1) + (src2 >> 1);
369 ad69471c pbrook
    if ((src1 | src2) & 1)
370 ad69471c pbrook
        dest++;
371 ad69471c pbrook
    return dest;
372 ad69471c pbrook
}
373 ad69471c pbrook
374 ad69471c pbrook
#define NEON_FN(dest, src1, src2) dest = (src1 - src2) >> 1
375 ad69471c pbrook
NEON_VOP(hsub_s8, neon_s8, 4)
376 ad69471c pbrook
NEON_VOP(hsub_u8, neon_u8, 4)
377 ad69471c pbrook
NEON_VOP(hsub_s16, neon_s16, 2)
378 ad69471c pbrook
NEON_VOP(hsub_u16, neon_u16, 2)
379 ad69471c pbrook
#undef NEON_FN
380 ad69471c pbrook
381 ad69471c pbrook
int32_t HELPER(neon_hsub_s32)(int32_t src1, int32_t src2)
382 ad69471c pbrook
{
383 ad69471c pbrook
    int32_t dest;
384 ad69471c pbrook
385 ad69471c pbrook
    dest = (src1 >> 1) - (src2 >> 1);
386 ad69471c pbrook
    if ((~src1) & src2 & 1)
387 ad69471c pbrook
        dest--;
388 ad69471c pbrook
    return dest;
389 ad69471c pbrook
}
390 ad69471c pbrook
391 ad69471c pbrook
uint32_t HELPER(neon_hsub_u32)(uint32_t src1, uint32_t src2)
392 ad69471c pbrook
{
393 ad69471c pbrook
    uint32_t dest;
394 ad69471c pbrook
395 ad69471c pbrook
    dest = (src1 >> 1) - (src2 >> 1);
396 ad69471c pbrook
    if ((~src1) & src2 & 1)
397 ad69471c pbrook
        dest--;
398 ad69471c pbrook
    return dest;
399 ad69471c pbrook
}
400 ad69471c pbrook
401 ad69471c pbrook
#define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? ~0 : 0
402 ad69471c pbrook
NEON_VOP(cgt_s8, neon_s8, 4)
403 ad69471c pbrook
NEON_VOP(cgt_u8, neon_u8, 4)
404 ad69471c pbrook
NEON_VOP(cgt_s16, neon_s16, 2)
405 ad69471c pbrook
NEON_VOP(cgt_u16, neon_u16, 2)
406 ad69471c pbrook
NEON_VOP(cgt_s32, neon_s32, 1)
407 ad69471c pbrook
NEON_VOP(cgt_u32, neon_u32, 1)
408 ad69471c pbrook
#undef NEON_FN
409 ad69471c pbrook
410 ad69471c pbrook
#define NEON_FN(dest, src1, src2) dest = (src1 >= src2) ? ~0 : 0
411 ad69471c pbrook
NEON_VOP(cge_s8, neon_s8, 4)
412 ad69471c pbrook
NEON_VOP(cge_u8, neon_u8, 4)
413 ad69471c pbrook
NEON_VOP(cge_s16, neon_s16, 2)
414 ad69471c pbrook
NEON_VOP(cge_u16, neon_u16, 2)
415 ad69471c pbrook
NEON_VOP(cge_s32, neon_s32, 1)
416 ad69471c pbrook
NEON_VOP(cge_u32, neon_u32, 1)
417 ad69471c pbrook
#undef NEON_FN
418 ad69471c pbrook
419 ad69471c pbrook
#define NEON_FN(dest, src1, src2) dest = (src1 < src2) ? src1 : src2
420 ad69471c pbrook
NEON_VOP(min_s8, neon_s8, 4)
421 ad69471c pbrook
NEON_VOP(min_u8, neon_u8, 4)
422 ad69471c pbrook
NEON_VOP(min_s16, neon_s16, 2)
423 ad69471c pbrook
NEON_VOP(min_u16, neon_u16, 2)
424 ad69471c pbrook
NEON_VOP(min_s32, neon_s32, 1)
425 ad69471c pbrook
NEON_VOP(min_u32, neon_u32, 1)
426 ad69471c pbrook
NEON_POP(pmin_s8, neon_s8, 4)
427 ad69471c pbrook
NEON_POP(pmin_u8, neon_u8, 4)
428 ad69471c pbrook
NEON_POP(pmin_s16, neon_s16, 2)
429 ad69471c pbrook
NEON_POP(pmin_u16, neon_u16, 2)
430 ad69471c pbrook
#undef NEON_FN
431 ad69471c pbrook
432 ad69471c pbrook
#define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? src1 : src2
433 ad69471c pbrook
NEON_VOP(max_s8, neon_s8, 4)
434 ad69471c pbrook
NEON_VOP(max_u8, neon_u8, 4)
435 ad69471c pbrook
NEON_VOP(max_s16, neon_s16, 2)
436 ad69471c pbrook
NEON_VOP(max_u16, neon_u16, 2)
437 ad69471c pbrook
NEON_VOP(max_s32, neon_s32, 1)
438 ad69471c pbrook
NEON_VOP(max_u32, neon_u32, 1)
439 ad69471c pbrook
NEON_POP(pmax_s8, neon_s8, 4)
440 ad69471c pbrook
NEON_POP(pmax_u8, neon_u8, 4)
441 ad69471c pbrook
NEON_POP(pmax_s16, neon_s16, 2)
442 ad69471c pbrook
NEON_POP(pmax_u16, neon_u16, 2)
443 ad69471c pbrook
#undef NEON_FN
444 ad69471c pbrook
445 ad69471c pbrook
#define NEON_FN(dest, src1, src2) \
446 ad69471c pbrook
    dest = (src1 > src2) ? (src1 - src2) : (src2 - src1)
447 ad69471c pbrook
NEON_VOP(abd_s8, neon_s8, 4)
448 ad69471c pbrook
NEON_VOP(abd_u8, neon_u8, 4)
449 ad69471c pbrook
NEON_VOP(abd_s16, neon_s16, 2)
450 ad69471c pbrook
NEON_VOP(abd_u16, neon_u16, 2)
451 ad69471c pbrook
NEON_VOP(abd_s32, neon_s32, 1)
452 ad69471c pbrook
NEON_VOP(abd_u32, neon_u32, 1)
453 ad69471c pbrook
#undef NEON_FN
454 ad69471c pbrook
455 ad69471c pbrook
#define NEON_FN(dest, src1, src2) do { \
456 ad69471c pbrook
    int8_t tmp; \
457 ad69471c pbrook
    tmp = (int8_t)src2; \
458 50f67e95 Juha Riihimäki
    if (tmp >= (ssize_t)sizeof(src1) * 8 || \
459 50f67e95 Juha Riihimäki
        tmp <= -(ssize_t)sizeof(src1) * 8) { \
460 ad69471c pbrook
        dest = 0; \
461 ad69471c pbrook
    } else if (tmp < 0) { \
462 ad69471c pbrook
        dest = src1 >> -tmp; \
463 ad69471c pbrook
    } else { \
464 ad69471c pbrook
        dest = src1 << tmp; \
465 ad69471c pbrook
    }} while (0)
466 ad69471c pbrook
NEON_VOP(shl_u8, neon_u8, 4)
467 ad69471c pbrook
NEON_VOP(shl_u16, neon_u16, 2)
468 ad69471c pbrook
NEON_VOP(shl_u32, neon_u32, 1)
469 ad69471c pbrook
#undef NEON_FN
470 ad69471c pbrook
471 ad69471c pbrook
uint64_t HELPER(neon_shl_u64)(uint64_t val, uint64_t shiftop)
472 ad69471c pbrook
{
473 ad69471c pbrook
    int8_t shift = (int8_t)shiftop;
474 ad69471c pbrook
    if (shift >= 64 || shift <= -64) {
475 ad69471c pbrook
        val = 0;
476 ad69471c pbrook
    } else if (shift < 0) {
477 ad69471c pbrook
        val >>= -shift;
478 ad69471c pbrook
    } else {
479 ad69471c pbrook
        val <<= shift;
480 ad69471c pbrook
    }
481 ad69471c pbrook
    return val;
482 ad69471c pbrook
}
483 ad69471c pbrook
484 ad69471c pbrook
#define NEON_FN(dest, src1, src2) do { \
485 ad69471c pbrook
    int8_t tmp; \
486 ad69471c pbrook
    tmp = (int8_t)src2; \
487 50f67e95 Juha Riihimäki
    if (tmp >= (ssize_t)sizeof(src1) * 8) { \
488 ad69471c pbrook
        dest = 0; \
489 50f67e95 Juha Riihimäki
    } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \
490 ad69471c pbrook
        dest = src1 >> (sizeof(src1) * 8 - 1); \
491 ad69471c pbrook
    } else if (tmp < 0) { \
492 ad69471c pbrook
        dest = src1 >> -tmp; \
493 ad69471c pbrook
    } else { \
494 ad69471c pbrook
        dest = src1 << tmp; \
495 ad69471c pbrook
    }} while (0)
496 ad69471c pbrook
NEON_VOP(shl_s8, neon_s8, 4)
497 ad69471c pbrook
NEON_VOP(shl_s16, neon_s16, 2)
498 ad69471c pbrook
NEON_VOP(shl_s32, neon_s32, 1)
499 ad69471c pbrook
#undef NEON_FN
500 ad69471c pbrook
501 ad69471c pbrook
uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop)
502 ad69471c pbrook
{
503 ad69471c pbrook
    int8_t shift = (int8_t)shiftop;
504 ad69471c pbrook
    int64_t val = valop;
505 ad69471c pbrook
    if (shift >= 64) {
506 ad69471c pbrook
        val = 0;
507 ad69471c pbrook
    } else if (shift <= -64) {
508 ad69471c pbrook
        val >>= 63;
509 ad69471c pbrook
    } else if (shift < 0) {
510 ad69471c pbrook
        val >>= -shift;
511 ad69471c pbrook
    } else {
512 ad69471c pbrook
        val <<= shift;
513 ad69471c pbrook
    }
514 ad69471c pbrook
    return val;
515 ad69471c pbrook
}
516 ad69471c pbrook
517 ad69471c pbrook
#define NEON_FN(dest, src1, src2) do { \
518 ad69471c pbrook
    int8_t tmp; \
519 ad69471c pbrook
    tmp = (int8_t)src2; \
520 0670a7b6 Peter Maydell
    if ((tmp >= (ssize_t)sizeof(src1) * 8) \
521 0670a7b6 Peter Maydell
        || (tmp <= -(ssize_t)sizeof(src1) * 8)) { \
522 ad69471c pbrook
        dest = 0; \
523 ad69471c pbrook
    } else if (tmp < 0) { \
524 ad69471c pbrook
        dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
525 ad69471c pbrook
    } else { \
526 ad69471c pbrook
        dest = src1 << tmp; \
527 ad69471c pbrook
    }} while (0)
528 ad69471c pbrook
NEON_VOP(rshl_s8, neon_s8, 4)
529 ad69471c pbrook
NEON_VOP(rshl_s16, neon_s16, 2)
530 ad69471c pbrook
#undef NEON_FN
531 ad69471c pbrook
532 4bd4ee07 Christophe Lyon
/* The addition of the rounding constant may overflow, so we use an
533 b90372ad Peter Maydell
 * intermediate 64 bit accumulator.  */
534 4bd4ee07 Christophe Lyon
uint32_t HELPER(neon_rshl_s32)(uint32_t valop, uint32_t shiftop)
535 4bd4ee07 Christophe Lyon
{
536 4bd4ee07 Christophe Lyon
    int32_t dest;
537 4bd4ee07 Christophe Lyon
    int32_t val = (int32_t)valop;
538 4bd4ee07 Christophe Lyon
    int8_t shift = (int8_t)shiftop;
539 4bd4ee07 Christophe Lyon
    if ((shift >= 32) || (shift <= -32)) {
540 4bd4ee07 Christophe Lyon
        dest = 0;
541 4bd4ee07 Christophe Lyon
    } else if (shift < 0) {
542 4bd4ee07 Christophe Lyon
        int64_t big_dest = ((int64_t)val + (1 << (-1 - shift)));
543 4bd4ee07 Christophe Lyon
        dest = big_dest >> -shift;
544 4bd4ee07 Christophe Lyon
    } else {
545 4bd4ee07 Christophe Lyon
        dest = val << shift;
546 4bd4ee07 Christophe Lyon
    }
547 4bd4ee07 Christophe Lyon
    return dest;
548 4bd4ee07 Christophe Lyon
}
549 4bd4ee07 Christophe Lyon
550 b90372ad Peter Maydell
/* Handling addition overflow with 64 bit input values is more
551 b90372ad Peter Maydell
 * tricky than with 32 bit values.  */
552 ad69471c pbrook
uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
553 ad69471c pbrook
{
554 ad69471c pbrook
    int8_t shift = (int8_t)shiftop;
555 ad69471c pbrook
    int64_t val = valop;
556 0670a7b6 Peter Maydell
    if ((shift >= 64) || (shift <= -64)) {
557 ad69471c pbrook
        val = 0;
558 ad69471c pbrook
    } else if (shift < 0) {
559 4bd4ee07 Christophe Lyon
        val >>= (-shift - 1);
560 4bd4ee07 Christophe Lyon
        if (val == INT64_MAX) {
561 4bd4ee07 Christophe Lyon
            /* In this case, it means that the rounding constant is 1,
562 4bd4ee07 Christophe Lyon
             * and the addition would overflow. Return the actual
563 4bd4ee07 Christophe Lyon
             * result directly.  */
564 4bd4ee07 Christophe Lyon
            val = 0x4000000000000000LL;
565 4bd4ee07 Christophe Lyon
        } else {
566 4bd4ee07 Christophe Lyon
            val++;
567 4bd4ee07 Christophe Lyon
            val >>= 1;
568 4bd4ee07 Christophe Lyon
        }
569 ad69471c pbrook
    } else {
570 ad69471c pbrook
        val <<= shift;
571 ad69471c pbrook
    }
572 ad69471c pbrook
    return val;
573 ad69471c pbrook
}
574 ad69471c pbrook
575 ad69471c pbrook
#define NEON_FN(dest, src1, src2) do { \
576 ad69471c pbrook
    int8_t tmp; \
577 ad69471c pbrook
    tmp = (int8_t)src2; \
578 50f67e95 Juha Riihimäki
    if (tmp >= (ssize_t)sizeof(src1) * 8 || \
579 50f67e95 Juha Riihimäki
        tmp < -(ssize_t)sizeof(src1) * 8) { \
580 ad69471c pbrook
        dest = 0; \
581 50f67e95 Juha Riihimäki
    } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \
582 b6c63b98 Christophe Lyon
        dest = src1 >> (-tmp - 1); \
583 ad69471c pbrook
    } else if (tmp < 0) { \
584 ad69471c pbrook
        dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
585 ad69471c pbrook
    } else { \
586 ad69471c pbrook
        dest = src1 << tmp; \
587 ad69471c pbrook
    }} while (0)
588 ad69471c pbrook
NEON_VOP(rshl_u8, neon_u8, 4)
589 ad69471c pbrook
NEON_VOP(rshl_u16, neon_u16, 2)
590 ad69471c pbrook
#undef NEON_FN
591 ad69471c pbrook
592 4bd4ee07 Christophe Lyon
/* The addition of the rounding constant may overflow, so we use an
593 b90372ad Peter Maydell
 * intermediate 64 bit accumulator.  */
594 4bd4ee07 Christophe Lyon
uint32_t HELPER(neon_rshl_u32)(uint32_t val, uint32_t shiftop)
595 4bd4ee07 Christophe Lyon
{
596 4bd4ee07 Christophe Lyon
    uint32_t dest;
597 4bd4ee07 Christophe Lyon
    int8_t shift = (int8_t)shiftop;
598 4bd4ee07 Christophe Lyon
    if (shift >= 32 || shift < -32) {
599 4bd4ee07 Christophe Lyon
        dest = 0;
600 4bd4ee07 Christophe Lyon
    } else if (shift == -32) {
601 4bd4ee07 Christophe Lyon
        dest = val >> 31;
602 4bd4ee07 Christophe Lyon
    } else if (shift < 0) {
603 4bd4ee07 Christophe Lyon
        uint64_t big_dest = ((uint64_t)val + (1 << (-1 - shift)));
604 4bd4ee07 Christophe Lyon
        dest = big_dest >> -shift;
605 4bd4ee07 Christophe Lyon
    } else {
606 4bd4ee07 Christophe Lyon
        dest = val << shift;
607 4bd4ee07 Christophe Lyon
    }
608 4bd4ee07 Christophe Lyon
    return dest;
609 4bd4ee07 Christophe Lyon
}
610 4bd4ee07 Christophe Lyon
611 b90372ad Peter Maydell
/* Handling addition overflow with 64 bit input values is more
612 b90372ad Peter Maydell
 * tricky than with 32 bit values.  */
613 ad69471c pbrook
uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop)
614 ad69471c pbrook
{
615 ad69471c pbrook
    int8_t shift = (uint8_t)shiftop;
616 51e3930f Christophe Lyon
    if (shift >= 64 || shift < -64) {
617 ad69471c pbrook
        val = 0;
618 ad69471c pbrook
    } else if (shift == -64) {
619 ad69471c pbrook
        /* Rounding a 1-bit result just preserves that bit.  */
620 ad69471c pbrook
        val >>= 63;
621 4bd4ee07 Christophe Lyon
    } else if (shift < 0) {
622 4bd4ee07 Christophe Lyon
        val >>= (-shift - 1);
623 4bd4ee07 Christophe Lyon
        if (val == UINT64_MAX) {
624 4bd4ee07 Christophe Lyon
            /* In this case, it means that the rounding constant is 1,
625 4bd4ee07 Christophe Lyon
             * and the addition would overflow. Return the actual
626 4bd4ee07 Christophe Lyon
             * result directly.  */
627 4bd4ee07 Christophe Lyon
            val = 0x8000000000000000ULL;
628 4bd4ee07 Christophe Lyon
        } else {
629 4bd4ee07 Christophe Lyon
            val++;
630 4bd4ee07 Christophe Lyon
            val >>= 1;
631 4bd4ee07 Christophe Lyon
        }
632 ad69471c pbrook
    } else {
633 ad69471c pbrook
        val <<= shift;
634 ad69471c pbrook
    }
635 ad69471c pbrook
    return val;
636 ad69471c pbrook
}
637 ad69471c pbrook
638 ad69471c pbrook
#define NEON_FN(dest, src1, src2) do { \
639 ad69471c pbrook
    int8_t tmp; \
640 ad69471c pbrook
    tmp = (int8_t)src2; \
641 50f67e95 Juha Riihimäki
    if (tmp >= (ssize_t)sizeof(src1) * 8) { \
642 ad69471c pbrook
        if (src1) { \
643 ad69471c pbrook
            SET_QC(); \
644 ad69471c pbrook
            dest = ~0; \
645 ad69471c pbrook
        } else { \
646 ad69471c pbrook
            dest = 0; \
647 ad69471c pbrook
        } \
648 50f67e95 Juha Riihimäki
    } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \
649 ad69471c pbrook
        dest = 0; \
650 ad69471c pbrook
    } else if (tmp < 0) { \
651 ad69471c pbrook
        dest = src1 >> -tmp; \
652 ad69471c pbrook
    } else { \
653 ad69471c pbrook
        dest = src1 << tmp; \
654 ad69471c pbrook
        if ((dest >> tmp) != src1) { \
655 ad69471c pbrook
            SET_QC(); \
656 ad69471c pbrook
            dest = ~0; \
657 ad69471c pbrook
        } \
658 ad69471c pbrook
    }} while (0)
659 02da0b2d Peter Maydell
NEON_VOP_ENV(qshl_u8, neon_u8, 4)
660 02da0b2d Peter Maydell
NEON_VOP_ENV(qshl_u16, neon_u16, 2)
661 02da0b2d Peter Maydell
NEON_VOP_ENV(qshl_u32, neon_u32, 1)
662 ad69471c pbrook
#undef NEON_FN
663 ad69471c pbrook
664 0ecb72a5 Andreas Färber
uint64_t HELPER(neon_qshl_u64)(CPUARMState *env, uint64_t val, uint64_t shiftop)
665 ad69471c pbrook
{
666 ad69471c pbrook
    int8_t shift = (int8_t)shiftop;
667 ad69471c pbrook
    if (shift >= 64) {
668 ad69471c pbrook
        if (val) {
669 ad69471c pbrook
            val = ~(uint64_t)0;
670 ad69471c pbrook
            SET_QC();
671 ad69471c pbrook
        }
672 ad69471c pbrook
    } else if (shift <= -64) {
673 ad69471c pbrook
        val = 0;
674 ad69471c pbrook
    } else if (shift < 0) {
675 ad69471c pbrook
        val >>= -shift;
676 ad69471c pbrook
    } else {
677 ad69471c pbrook
        uint64_t tmp = val;
678 ad69471c pbrook
        val <<= shift;
679 ad69471c pbrook
        if ((val >> shift) != tmp) {
680 ad69471c pbrook
            SET_QC();
681 ad69471c pbrook
            val = ~(uint64_t)0;
682 ad69471c pbrook
        }
683 ad69471c pbrook
    }
684 ad69471c pbrook
    return val;
685 ad69471c pbrook
}
686 ad69471c pbrook
687 ad69471c pbrook
#define NEON_FN(dest, src1, src2) do { \
688 ad69471c pbrook
    int8_t tmp; \
689 ad69471c pbrook
    tmp = (int8_t)src2; \
690 50f67e95 Juha Riihimäki
    if (tmp >= (ssize_t)sizeof(src1) * 8) { \
691 a5d88f3e Peter Maydell
        if (src1) { \
692 ad69471c pbrook
            SET_QC(); \
693 a5d88f3e Peter Maydell
            dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \
694 a5d88f3e Peter Maydell
            if (src1 > 0) { \
695 a5d88f3e Peter Maydell
                dest--; \
696 a5d88f3e Peter Maydell
            } \
697 a5d88f3e Peter Maydell
        } else { \
698 a5d88f3e Peter Maydell
            dest = src1; \
699 a5d88f3e Peter Maydell
        } \
700 50f67e95 Juha Riihimäki
    } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \
701 ad69471c pbrook
        dest = src1 >> 31; \
702 ad69471c pbrook
    } else if (tmp < 0) { \
703 ad69471c pbrook
        dest = src1 >> -tmp; \
704 ad69471c pbrook
    } else { \
705 ad69471c pbrook
        dest = src1 << tmp; \
706 ad69471c pbrook
        if ((dest >> tmp) != src1) { \
707 ad69471c pbrook
            SET_QC(); \
708 a5d88f3e Peter Maydell
            dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \
709 a5d88f3e Peter Maydell
            if (src1 > 0) { \
710 a5d88f3e Peter Maydell
                dest--; \
711 a5d88f3e Peter Maydell
            } \
712 ad69471c pbrook
        } \
713 ad69471c pbrook
    }} while (0)
714 02da0b2d Peter Maydell
NEON_VOP_ENV(qshl_s8, neon_s8, 4)
715 02da0b2d Peter Maydell
NEON_VOP_ENV(qshl_s16, neon_s16, 2)
716 02da0b2d Peter Maydell
NEON_VOP_ENV(qshl_s32, neon_s32, 1)
717 ad69471c pbrook
#undef NEON_FN
718 ad69471c pbrook
719 0ecb72a5 Andreas Färber
uint64_t HELPER(neon_qshl_s64)(CPUARMState *env, uint64_t valop, uint64_t shiftop)
720 ad69471c pbrook
{
721 ad69471c pbrook
    int8_t shift = (uint8_t)shiftop;
722 ad69471c pbrook
    int64_t val = valop;
723 ad69471c pbrook
    if (shift >= 64) {
724 ad69471c pbrook
        if (val) {
725 ad69471c pbrook
            SET_QC();
726 eb7a3d79 Peter Maydell
            val = (val >> 63) ^ ~SIGNBIT64;
727 ad69471c pbrook
        }
728 4c9b70ae Juha Riihimäki
    } else if (shift <= -64) {
729 ad69471c pbrook
        val >>= 63;
730 ad69471c pbrook
    } else if (shift < 0) {
731 ad69471c pbrook
        val >>= -shift;
732 ad69471c pbrook
    } else {
733 ad69471c pbrook
        int64_t tmp = val;
734 ad69471c pbrook
        val <<= shift;
735 ad69471c pbrook
        if ((val >> shift) != tmp) {
736 ad69471c pbrook
            SET_QC();
737 ad69471c pbrook
            val = (tmp >> 63) ^ ~SIGNBIT64;
738 ad69471c pbrook
        }
739 ad69471c pbrook
    }
740 ad69471c pbrook
    return val;
741 ad69471c pbrook
}
742 ad69471c pbrook
743 4ca4502c Juha Riihimäki
#define NEON_FN(dest, src1, src2) do { \
744 4ca4502c Juha Riihimäki
    if (src1 & (1 << (sizeof(src1) * 8 - 1))) { \
745 4ca4502c Juha Riihimäki
        SET_QC(); \
746 4ca4502c Juha Riihimäki
        dest = 0; \
747 4ca4502c Juha Riihimäki
    } else { \
748 4ca4502c Juha Riihimäki
        int8_t tmp; \
749 4ca4502c Juha Riihimäki
        tmp = (int8_t)src2; \
750 4ca4502c Juha Riihimäki
        if (tmp >= (ssize_t)sizeof(src1) * 8) { \
751 4ca4502c Juha Riihimäki
            if (src1) { \
752 4ca4502c Juha Riihimäki
                SET_QC(); \
753 4ca4502c Juha Riihimäki
                dest = ~0; \
754 4ca4502c Juha Riihimäki
            } else { \
755 4ca4502c Juha Riihimäki
                dest = 0; \
756 4ca4502c Juha Riihimäki
            } \
757 4ca4502c Juha Riihimäki
        } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \
758 4ca4502c Juha Riihimäki
            dest = 0; \
759 4ca4502c Juha Riihimäki
        } else if (tmp < 0) { \
760 4ca4502c Juha Riihimäki
            dest = src1 >> -tmp; \
761 4ca4502c Juha Riihimäki
        } else { \
762 4ca4502c Juha Riihimäki
            dest = src1 << tmp; \
763 4ca4502c Juha Riihimäki
            if ((dest >> tmp) != src1) { \
764 4ca4502c Juha Riihimäki
                SET_QC(); \
765 4ca4502c Juha Riihimäki
                dest = ~0; \
766 4ca4502c Juha Riihimäki
            } \
767 4ca4502c Juha Riihimäki
        } \
768 4ca4502c Juha Riihimäki
    }} while (0)
769 02da0b2d Peter Maydell
NEON_VOP_ENV(qshlu_s8, neon_u8, 4)
770 02da0b2d Peter Maydell
NEON_VOP_ENV(qshlu_s16, neon_u16, 2)
771 4ca4502c Juha Riihimäki
#undef NEON_FN
772 4ca4502c Juha Riihimäki
773 0ecb72a5 Andreas Färber
uint32_t HELPER(neon_qshlu_s32)(CPUARMState *env, uint32_t valop, uint32_t shiftop)
774 4ca4502c Juha Riihimäki
{
775 4ca4502c Juha Riihimäki
    if ((int32_t)valop < 0) {
776 4ca4502c Juha Riihimäki
        SET_QC();
777 4ca4502c Juha Riihimäki
        return 0;
778 4ca4502c Juha Riihimäki
    }
779 02da0b2d Peter Maydell
    return helper_neon_qshl_u32(env, valop, shiftop);
780 4ca4502c Juha Riihimäki
}
781 4ca4502c Juha Riihimäki
782 0ecb72a5 Andreas Färber
uint64_t HELPER(neon_qshlu_s64)(CPUARMState *env, uint64_t valop, uint64_t shiftop)
783 4ca4502c Juha Riihimäki
{
784 4ca4502c Juha Riihimäki
    if ((int64_t)valop < 0) {
785 4ca4502c Juha Riihimäki
        SET_QC();
786 4ca4502c Juha Riihimäki
        return 0;
787 4ca4502c Juha Riihimäki
    }
788 02da0b2d Peter Maydell
    return helper_neon_qshl_u64(env, valop, shiftop);
789 4ca4502c Juha Riihimäki
}
790 ad69471c pbrook
791 ad69471c pbrook
#define NEON_FN(dest, src1, src2) do { \
792 ad69471c pbrook
    int8_t tmp; \
793 ad69471c pbrook
    tmp = (int8_t)src2; \
794 33ebc293 Peter Maydell
    if (tmp >= (ssize_t)sizeof(src1) * 8) { \
795 33ebc293 Peter Maydell
        if (src1) { \
796 33ebc293 Peter Maydell
            SET_QC(); \
797 33ebc293 Peter Maydell
            dest = ~0; \
798 33ebc293 Peter Maydell
        } else { \
799 33ebc293 Peter Maydell
            dest = 0; \
800 33ebc293 Peter Maydell
        } \
801 33ebc293 Peter Maydell
    } else if (tmp < -(ssize_t)sizeof(src1) * 8) { \
802 33ebc293 Peter Maydell
        dest = 0; \
803 33ebc293 Peter Maydell
    } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \
804 33ebc293 Peter Maydell
        dest = src1 >> (sizeof(src1) * 8 - 1); \
805 33ebc293 Peter Maydell
    } else if (tmp < 0) { \
806 ad69471c pbrook
        dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
807 ad69471c pbrook
    } else { \
808 ad69471c pbrook
        dest = src1 << tmp; \
809 ad69471c pbrook
        if ((dest >> tmp) != src1) { \
810 ad69471c pbrook
            SET_QC(); \
811 ad69471c pbrook
            dest = ~0; \
812 ad69471c pbrook
        } \
813 ad69471c pbrook
    }} while (0)
814 02da0b2d Peter Maydell
NEON_VOP_ENV(qrshl_u8, neon_u8, 4)
815 02da0b2d Peter Maydell
NEON_VOP_ENV(qrshl_u16, neon_u16, 2)
816 ad69471c pbrook
#undef NEON_FN
817 ad69471c pbrook
818 4bd4ee07 Christophe Lyon
/* The addition of the rounding constant may overflow, so we use an
819 b90372ad Peter Maydell
 * intermediate 64 bit accumulator.  */
820 0ecb72a5 Andreas Färber
uint32_t HELPER(neon_qrshl_u32)(CPUARMState *env, uint32_t val, uint32_t shiftop)
821 4bd4ee07 Christophe Lyon
{
822 4bd4ee07 Christophe Lyon
    uint32_t dest;
823 4bd4ee07 Christophe Lyon
    int8_t shift = (int8_t)shiftop;
824 33ebc293 Peter Maydell
    if (shift >= 32) {
825 33ebc293 Peter Maydell
        if (val) {
826 33ebc293 Peter Maydell
            SET_QC();
827 33ebc293 Peter Maydell
            dest = ~0;
828 33ebc293 Peter Maydell
        } else {
829 33ebc293 Peter Maydell
            dest = 0;
830 33ebc293 Peter Maydell
        }
831 33ebc293 Peter Maydell
    } else if (shift < -32) {
832 33ebc293 Peter Maydell
        dest = 0;
833 33ebc293 Peter Maydell
    } else if (shift == -32) {
834 33ebc293 Peter Maydell
        dest = val >> 31;
835 33ebc293 Peter Maydell
    } else if (shift < 0) {
836 4bd4ee07 Christophe Lyon
        uint64_t big_dest = ((uint64_t)val + (1 << (-1 - shift)));
837 4bd4ee07 Christophe Lyon
        dest = big_dest >> -shift;
838 4bd4ee07 Christophe Lyon
    } else {
839 4bd4ee07 Christophe Lyon
        dest = val << shift;
840 4bd4ee07 Christophe Lyon
        if ((dest >> shift) != val) {
841 4bd4ee07 Christophe Lyon
            SET_QC();
842 4bd4ee07 Christophe Lyon
            dest = ~0;
843 4bd4ee07 Christophe Lyon
        }
844 4bd4ee07 Christophe Lyon
    }
845 4bd4ee07 Christophe Lyon
    return dest;
846 4bd4ee07 Christophe Lyon
}
847 4bd4ee07 Christophe Lyon
848 b90372ad Peter Maydell
/* Handling addition overflow with 64 bit input values is more
849 b90372ad Peter Maydell
 * tricky than with 32 bit values.  */
850 0ecb72a5 Andreas Färber
uint64_t HELPER(neon_qrshl_u64)(CPUARMState *env, uint64_t val, uint64_t shiftop)
851 ad69471c pbrook
{
852 ad69471c pbrook
    int8_t shift = (int8_t)shiftop;
853 33ebc293 Peter Maydell
    if (shift >= 64) {
854 33ebc293 Peter Maydell
        if (val) {
855 33ebc293 Peter Maydell
            SET_QC();
856 33ebc293 Peter Maydell
            val = ~0;
857 33ebc293 Peter Maydell
        }
858 33ebc293 Peter Maydell
    } else if (shift < -64) {
859 33ebc293 Peter Maydell
        val = 0;
860 33ebc293 Peter Maydell
    } else if (shift == -64) {
861 33ebc293 Peter Maydell
        val >>= 63;
862 33ebc293 Peter Maydell
    } else if (shift < 0) {
863 4bd4ee07 Christophe Lyon
        val >>= (-shift - 1);
864 4bd4ee07 Christophe Lyon
        if (val == UINT64_MAX) {
865 4bd4ee07 Christophe Lyon
            /* In this case, it means that the rounding constant is 1,
866 4bd4ee07 Christophe Lyon
             * and the addition would overflow. Return the actual
867 4bd4ee07 Christophe Lyon
             * result directly.  */
868 4bd4ee07 Christophe Lyon
            val = 0x8000000000000000ULL;
869 4bd4ee07 Christophe Lyon
        } else {
870 4bd4ee07 Christophe Lyon
            val++;
871 4bd4ee07 Christophe Lyon
            val >>= 1;
872 4bd4ee07 Christophe Lyon
        }
873 ad69471c pbrook
    } else { \
874 ad69471c pbrook
        uint64_t tmp = val;
875 ad69471c pbrook
        val <<= shift;
876 ad69471c pbrook
        if ((val >> shift) != tmp) {
877 ad69471c pbrook
            SET_QC();
878 ad69471c pbrook
            val = ~0;
879 ad69471c pbrook
        }
880 ad69471c pbrook
    }
881 ad69471c pbrook
    return val;
882 ad69471c pbrook
}
883 ad69471c pbrook
884 ad69471c pbrook
#define NEON_FN(dest, src1, src2) do { \
885 ad69471c pbrook
    int8_t tmp; \
886 ad69471c pbrook
    tmp = (int8_t)src2; \
887 7b6ecf5b Peter Maydell
    if (tmp >= (ssize_t)sizeof(src1) * 8) { \
888 7b6ecf5b Peter Maydell
        if (src1) { \
889 7b6ecf5b Peter Maydell
            SET_QC(); \
890 7b6ecf5b Peter Maydell
            dest = (1 << (sizeof(src1) * 8 - 1)); \
891 7b6ecf5b Peter Maydell
            if (src1 > 0) { \
892 7b6ecf5b Peter Maydell
                dest--; \
893 7b6ecf5b Peter Maydell
            } \
894 7b6ecf5b Peter Maydell
        } else { \
895 7b6ecf5b Peter Maydell
            dest = 0; \
896 7b6ecf5b Peter Maydell
        } \
897 7b6ecf5b Peter Maydell
    } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \
898 7b6ecf5b Peter Maydell
        dest = 0; \
899 7b6ecf5b Peter Maydell
    } else if (tmp < 0) { \
900 ad69471c pbrook
        dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
901 ad69471c pbrook
    } else { \
902 ad69471c pbrook
        dest = src1 << tmp; \
903 ad69471c pbrook
        if ((dest >> tmp) != src1) { \
904 ad69471c pbrook
            SET_QC(); \
905 960e623b Peter Maydell
            dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \
906 960e623b Peter Maydell
            if (src1 > 0) { \
907 960e623b Peter Maydell
                dest--; \
908 960e623b Peter Maydell
            } \
909 ad69471c pbrook
        } \
910 ad69471c pbrook
    }} while (0)
911 02da0b2d Peter Maydell
NEON_VOP_ENV(qrshl_s8, neon_s8, 4)
912 02da0b2d Peter Maydell
NEON_VOP_ENV(qrshl_s16, neon_s16, 2)
913 ad69471c pbrook
#undef NEON_FN
914 ad69471c pbrook
915 4bd4ee07 Christophe Lyon
/* The addition of the rounding constant may overflow, so we use an
916 b90372ad Peter Maydell
 * intermediate 64 bit accumulator.  */
917 0ecb72a5 Andreas Färber
uint32_t HELPER(neon_qrshl_s32)(CPUARMState *env, uint32_t valop, uint32_t shiftop)
918 4bd4ee07 Christophe Lyon
{
919 4bd4ee07 Christophe Lyon
    int32_t dest;
920 4bd4ee07 Christophe Lyon
    int32_t val = (int32_t)valop;
921 4bd4ee07 Christophe Lyon
    int8_t shift = (int8_t)shiftop;
922 7b6ecf5b Peter Maydell
    if (shift >= 32) {
923 7b6ecf5b Peter Maydell
        if (val) {
924 7b6ecf5b Peter Maydell
            SET_QC();
925 7b6ecf5b Peter Maydell
            dest = (val >> 31) ^ ~SIGNBIT;
926 7b6ecf5b Peter Maydell
        } else {
927 7b6ecf5b Peter Maydell
            dest = 0;
928 7b6ecf5b Peter Maydell
        }
929 7b6ecf5b Peter Maydell
    } else if (shift <= -32) {
930 7b6ecf5b Peter Maydell
        dest = 0;
931 7b6ecf5b Peter Maydell
    } else if (shift < 0) {
932 4bd4ee07 Christophe Lyon
        int64_t big_dest = ((int64_t)val + (1 << (-1 - shift)));
933 4bd4ee07 Christophe Lyon
        dest = big_dest >> -shift;
934 4bd4ee07 Christophe Lyon
    } else {
935 4bd4ee07 Christophe Lyon
        dest = val << shift;
936 4bd4ee07 Christophe Lyon
        if ((dest >> shift) != val) {
937 4bd4ee07 Christophe Lyon
            SET_QC();
938 4bd4ee07 Christophe Lyon
            dest = (val >> 31) ^ ~SIGNBIT;
939 4bd4ee07 Christophe Lyon
        }
940 4bd4ee07 Christophe Lyon
    }
941 4bd4ee07 Christophe Lyon
    return dest;
942 4bd4ee07 Christophe Lyon
}
943 4bd4ee07 Christophe Lyon
944 b90372ad Peter Maydell
/* Handling addition overflow with 64 bit input values is more
945 b90372ad Peter Maydell
 * tricky than with 32 bit values.  */
946 0ecb72a5 Andreas Färber
uint64_t HELPER(neon_qrshl_s64)(CPUARMState *env, uint64_t valop, uint64_t shiftop)
947 ad69471c pbrook
{
948 ad69471c pbrook
    int8_t shift = (uint8_t)shiftop;
949 ad69471c pbrook
    int64_t val = valop;
950 ad69471c pbrook
951 7b6ecf5b Peter Maydell
    if (shift >= 64) {
952 7b6ecf5b Peter Maydell
        if (val) {
953 7b6ecf5b Peter Maydell
            SET_QC();
954 7b6ecf5b Peter Maydell
            val = (val >> 63) ^ ~SIGNBIT64;
955 7b6ecf5b Peter Maydell
        }
956 7b6ecf5b Peter Maydell
    } else if (shift <= -64) {
957 7b6ecf5b Peter Maydell
        val = 0;
958 7b6ecf5b Peter Maydell
    } else if (shift < 0) {
959 4bd4ee07 Christophe Lyon
        val >>= (-shift - 1);
960 4bd4ee07 Christophe Lyon
        if (val == INT64_MAX) {
961 4bd4ee07 Christophe Lyon
            /* In this case, it means that the rounding constant is 1,
962 4bd4ee07 Christophe Lyon
             * and the addition would overflow. Return the actual
963 4bd4ee07 Christophe Lyon
             * result directly.  */
964 4bd4ee07 Christophe Lyon
            val = 0x4000000000000000ULL;
965 4bd4ee07 Christophe Lyon
        } else {
966 4bd4ee07 Christophe Lyon
            val++;
967 4bd4ee07 Christophe Lyon
            val >>= 1;
968 4bd4ee07 Christophe Lyon
        }
969 ad69471c pbrook
    } else {
970 4bd4ee07 Christophe Lyon
        int64_t tmp = val;
971 ad69471c pbrook
        val <<= shift;
972 ad69471c pbrook
        if ((val >> shift) != tmp) {
973 ad69471c pbrook
            SET_QC();
974 4bd4ee07 Christophe Lyon
            val = (tmp >> 63) ^ ~SIGNBIT64;
975 ad69471c pbrook
        }
976 ad69471c pbrook
    }
977 ad69471c pbrook
    return val;
978 ad69471c pbrook
}
979 ad69471c pbrook
980 ad69471c pbrook
uint32_t HELPER(neon_add_u8)(uint32_t a, uint32_t b)
981 ad69471c pbrook
{
982 ad69471c pbrook
    uint32_t mask;
983 ad69471c pbrook
    mask = (a ^ b) & 0x80808080u;
984 ad69471c pbrook
    a &= ~0x80808080u;
985 ad69471c pbrook
    b &= ~0x80808080u;
986 ad69471c pbrook
    return (a + b) ^ mask;
987 ad69471c pbrook
}
988 ad69471c pbrook
989 ad69471c pbrook
uint32_t HELPER(neon_add_u16)(uint32_t a, uint32_t b)
990 ad69471c pbrook
{
991 ad69471c pbrook
    uint32_t mask;
992 ad69471c pbrook
    mask = (a ^ b) & 0x80008000u;
993 ad69471c pbrook
    a &= ~0x80008000u;
994 ad69471c pbrook
    b &= ~0x80008000u;
995 ad69471c pbrook
    return (a + b) ^ mask;
996 ad69471c pbrook
}
997 ad69471c pbrook
998 ad69471c pbrook
#define NEON_FN(dest, src1, src2) dest = src1 + src2
999 ad69471c pbrook
NEON_POP(padd_u8, neon_u8, 4)
1000 ad69471c pbrook
NEON_POP(padd_u16, neon_u16, 2)
1001 ad69471c pbrook
#undef NEON_FN
1002 ad69471c pbrook
1003 ad69471c pbrook
#define NEON_FN(dest, src1, src2) dest = src1 - src2
1004 ad69471c pbrook
NEON_VOP(sub_u8, neon_u8, 4)
1005 ad69471c pbrook
NEON_VOP(sub_u16, neon_u16, 2)
1006 ad69471c pbrook
#undef NEON_FN
1007 ad69471c pbrook
1008 ad69471c pbrook
#define NEON_FN(dest, src1, src2) dest = src1 * src2
1009 ad69471c pbrook
NEON_VOP(mul_u8, neon_u8, 4)
1010 ad69471c pbrook
NEON_VOP(mul_u16, neon_u16, 2)
1011 ad69471c pbrook
#undef NEON_FN
1012 ad69471c pbrook
1013 1654b2d6 aurel32
/* Polynomial multiplication is like integer multiplication except the
1014 ad69471c pbrook
   partial products are XORed, not added.  */
1015 ad69471c pbrook
uint32_t HELPER(neon_mul_p8)(uint32_t op1, uint32_t op2)
1016 ad69471c pbrook
{
1017 ad69471c pbrook
    uint32_t mask;
1018 ad69471c pbrook
    uint32_t result;
1019 ad69471c pbrook
    result = 0;
1020 ad69471c pbrook
    while (op1) {
1021 ad69471c pbrook
        mask = 0;
1022 ad69471c pbrook
        if (op1 & 1)
1023 ad69471c pbrook
            mask |= 0xff;
1024 ad69471c pbrook
        if (op1 & (1 << 8))
1025 ad69471c pbrook
            mask |= (0xff << 8);
1026 ad69471c pbrook
        if (op1 & (1 << 16))
1027 ad69471c pbrook
            mask |= (0xff << 16);
1028 ad69471c pbrook
        if (op1 & (1 << 24))
1029 ad69471c pbrook
            mask |= (0xff << 24);
1030 ad69471c pbrook
        result ^= op2 & mask;
1031 ad69471c pbrook
        op1 = (op1 >> 1) & 0x7f7f7f7f;
1032 ad69471c pbrook
        op2 = (op2 << 1) & 0xfefefefe;
1033 ad69471c pbrook
    }
1034 ad69471c pbrook
    return result;
1035 ad69471c pbrook
}
1036 ad69471c pbrook
1037 e5ca24cb Peter Maydell
uint64_t HELPER(neon_mull_p8)(uint32_t op1, uint32_t op2)
1038 e5ca24cb Peter Maydell
{
1039 e5ca24cb Peter Maydell
    uint64_t result = 0;
1040 e5ca24cb Peter Maydell
    uint64_t mask;
1041 e5ca24cb Peter Maydell
    uint64_t op2ex = op2;
1042 e5ca24cb Peter Maydell
    op2ex = (op2ex & 0xff) |
1043 e5ca24cb Peter Maydell
        ((op2ex & 0xff00) << 8) |
1044 e5ca24cb Peter Maydell
        ((op2ex & 0xff0000) << 16) |
1045 e5ca24cb Peter Maydell
        ((op2ex & 0xff000000) << 24);
1046 e5ca24cb Peter Maydell
    while (op1) {
1047 e5ca24cb Peter Maydell
        mask = 0;
1048 e5ca24cb Peter Maydell
        if (op1 & 1) {
1049 e5ca24cb Peter Maydell
            mask |= 0xffff;
1050 e5ca24cb Peter Maydell
        }
1051 e5ca24cb Peter Maydell
        if (op1 & (1 << 8)) {
1052 e5ca24cb Peter Maydell
            mask |= (0xffffU << 16);
1053 e5ca24cb Peter Maydell
        }
1054 e5ca24cb Peter Maydell
        if (op1 & (1 << 16)) {
1055 e5ca24cb Peter Maydell
            mask |= (0xffffULL << 32);
1056 e5ca24cb Peter Maydell
        }
1057 e5ca24cb Peter Maydell
        if (op1 & (1 << 24)) {
1058 e5ca24cb Peter Maydell
            mask |= (0xffffULL << 48);
1059 e5ca24cb Peter Maydell
        }
1060 e5ca24cb Peter Maydell
        result ^= op2ex & mask;
1061 e5ca24cb Peter Maydell
        op1 = (op1 >> 1) & 0x7f7f7f7f;
1062 e5ca24cb Peter Maydell
        op2ex <<= 1;
1063 e5ca24cb Peter Maydell
    }
1064 e5ca24cb Peter Maydell
    return result;
1065 e5ca24cb Peter Maydell
}
1066 e5ca24cb Peter Maydell
1067 ad69471c pbrook
#define NEON_FN(dest, src1, src2) dest = (src1 & src2) ? -1 : 0
1068 ad69471c pbrook
NEON_VOP(tst_u8, neon_u8, 4)
1069 ad69471c pbrook
NEON_VOP(tst_u16, neon_u16, 2)
1070 ad69471c pbrook
NEON_VOP(tst_u32, neon_u32, 1)
1071 ad69471c pbrook
#undef NEON_FN
1072 ad69471c pbrook
1073 ad69471c pbrook
#define NEON_FN(dest, src1, src2) dest = (src1 == src2) ? -1 : 0
1074 ad69471c pbrook
NEON_VOP(ceq_u8, neon_u8, 4)
1075 ad69471c pbrook
NEON_VOP(ceq_u16, neon_u16, 2)
1076 ad69471c pbrook
NEON_VOP(ceq_u32, neon_u32, 1)
1077 ad69471c pbrook
#undef NEON_FN
1078 ad69471c pbrook
1079 ad69471c pbrook
#define NEON_FN(dest, src, dummy) dest = (src < 0) ? -src : src
1080 ad69471c pbrook
NEON_VOP1(abs_s8, neon_s8, 4)
1081 ad69471c pbrook
NEON_VOP1(abs_s16, neon_s16, 2)
1082 ad69471c pbrook
#undef NEON_FN
1083 ad69471c pbrook
1084 ad69471c pbrook
/* Count Leading Sign/Zero Bits.  */
1085 ad69471c pbrook
static inline int do_clz8(uint8_t x)
1086 ad69471c pbrook
{
1087 ad69471c pbrook
    int n;
1088 ad69471c pbrook
    for (n = 8; x; n--)
1089 ad69471c pbrook
        x >>= 1;
1090 ad69471c pbrook
    return n;
1091 ad69471c pbrook
}
1092 ad69471c pbrook
1093 ad69471c pbrook
static inline int do_clz16(uint16_t x)
1094 ad69471c pbrook
{
1095 ad69471c pbrook
    int n;
1096 ad69471c pbrook
    for (n = 16; x; n--)
1097 ad69471c pbrook
        x >>= 1;
1098 ad69471c pbrook
    return n;
1099 ad69471c pbrook
}
1100 ad69471c pbrook
1101 ad69471c pbrook
#define NEON_FN(dest, src, dummy) dest = do_clz8(src)
1102 ad69471c pbrook
NEON_VOP1(clz_u8, neon_u8, 4)
1103 ad69471c pbrook
#undef NEON_FN
1104 ad69471c pbrook
1105 ad69471c pbrook
#define NEON_FN(dest, src, dummy) dest = do_clz16(src)
1106 ad69471c pbrook
NEON_VOP1(clz_u16, neon_u16, 2)
1107 ad69471c pbrook
#undef NEON_FN
1108 ad69471c pbrook
1109 ad69471c pbrook
#define NEON_FN(dest, src, dummy) dest = do_clz8((src < 0) ? ~src : src) - 1
1110 ad69471c pbrook
NEON_VOP1(cls_s8, neon_s8, 4)
1111 ad69471c pbrook
#undef NEON_FN
1112 ad69471c pbrook
1113 ad69471c pbrook
#define NEON_FN(dest, src, dummy) dest = do_clz16((src < 0) ? ~src : src) - 1
1114 ad69471c pbrook
NEON_VOP1(cls_s16, neon_s16, 2)
1115 ad69471c pbrook
#undef NEON_FN
1116 ad69471c pbrook
1117 ad69471c pbrook
uint32_t HELPER(neon_cls_s32)(uint32_t x)
1118 ad69471c pbrook
{
1119 ad69471c pbrook
    int count;
1120 ad69471c pbrook
    if ((int32_t)x < 0)
1121 ad69471c pbrook
        x = ~x;
1122 ad69471c pbrook
    for (count = 32; x; count--)
1123 ad69471c pbrook
        x = x >> 1;
1124 ad69471c pbrook
    return count - 1;
1125 ad69471c pbrook
}
1126 ad69471c pbrook
1127 ad69471c pbrook
/* Bit count.  */
1128 ad69471c pbrook
uint32_t HELPER(neon_cnt_u8)(uint32_t x)
1129 ad69471c pbrook
{
1130 ad69471c pbrook
    x = (x & 0x55555555) + ((x >>  1) & 0x55555555);
1131 ad69471c pbrook
    x = (x & 0x33333333) + ((x >>  2) & 0x33333333);
1132 ad69471c pbrook
    x = (x & 0x0f0f0f0f) + ((x >>  4) & 0x0f0f0f0f);
1133 ad69471c pbrook
    return x;
1134 ad69471c pbrook
}
1135 ad69471c pbrook
1136 86cbc418 Peter Maydell
/* Reverse bits in each 8 bit word */
1137 86cbc418 Peter Maydell
uint32_t HELPER(neon_rbit_u8)(uint32_t x)
1138 86cbc418 Peter Maydell
{
1139 86cbc418 Peter Maydell
    x =  ((x & 0xf0f0f0f0) >> 4)
1140 86cbc418 Peter Maydell
       | ((x & 0x0f0f0f0f) << 4);
1141 86cbc418 Peter Maydell
    x =  ((x & 0x88888888) >> 3)
1142 86cbc418 Peter Maydell
       | ((x & 0x44444444) >> 1)
1143 86cbc418 Peter Maydell
       | ((x & 0x22222222) << 1)
1144 86cbc418 Peter Maydell
       | ((x & 0x11111111) << 3);
1145 86cbc418 Peter Maydell
    return x;
1146 86cbc418 Peter Maydell
}
1147 86cbc418 Peter Maydell
1148 ad69471c pbrook
#define NEON_QDMULH16(dest, src1, src2, round) do { \
1149 ad69471c pbrook
    uint32_t tmp = (int32_t)(int16_t) src1 * (int16_t) src2; \
1150 ad69471c pbrook
    if ((tmp ^ (tmp << 1)) & SIGNBIT) { \
1151 ad69471c pbrook
        SET_QC(); \
1152 ad69471c pbrook
        tmp = (tmp >> 31) ^ ~SIGNBIT; \
1153 46eece9d Juha Riihimäki
    } else { \
1154 46eece9d Juha Riihimäki
        tmp <<= 1; \
1155 ad69471c pbrook
    } \
1156 ad69471c pbrook
    if (round) { \
1157 ad69471c pbrook
        int32_t old = tmp; \
1158 ad69471c pbrook
        tmp += 1 << 15; \
1159 ad69471c pbrook
        if ((int32_t)tmp < old) { \
1160 ad69471c pbrook
            SET_QC(); \
1161 ad69471c pbrook
            tmp = SIGNBIT - 1; \
1162 ad69471c pbrook
        } \
1163 ad69471c pbrook
    } \
1164 ad69471c pbrook
    dest = tmp >> 16; \
1165 ad69471c pbrook
    } while(0)
1166 ad69471c pbrook
#define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 0)
1167 02da0b2d Peter Maydell
NEON_VOP_ENV(qdmulh_s16, neon_s16, 2)
1168 ad69471c pbrook
#undef NEON_FN
1169 ad69471c pbrook
#define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 1)
1170 02da0b2d Peter Maydell
NEON_VOP_ENV(qrdmulh_s16, neon_s16, 2)
1171 ad69471c pbrook
#undef NEON_FN
1172 ad69471c pbrook
#undef NEON_QDMULH16
1173 ad69471c pbrook
1174 ad69471c pbrook
#define NEON_QDMULH32(dest, src1, src2, round) do { \
1175 ad69471c pbrook
    uint64_t tmp = (int64_t)(int32_t) src1 * (int32_t) src2; \
1176 ad69471c pbrook
    if ((tmp ^ (tmp << 1)) & SIGNBIT64) { \
1177 ad69471c pbrook
        SET_QC(); \
1178 ad69471c pbrook
        tmp = (tmp >> 63) ^ ~SIGNBIT64; \
1179 ad69471c pbrook
    } else { \
1180 ad69471c pbrook
        tmp <<= 1; \
1181 ad69471c pbrook
    } \
1182 ad69471c pbrook
    if (round) { \
1183 ad69471c pbrook
        int64_t old = tmp; \
1184 ad69471c pbrook
        tmp += (int64_t)1 << 31; \
1185 ad69471c pbrook
        if ((int64_t)tmp < old) { \
1186 ad69471c pbrook
            SET_QC(); \
1187 ad69471c pbrook
            tmp = SIGNBIT64 - 1; \
1188 ad69471c pbrook
        } \
1189 ad69471c pbrook
    } \
1190 ad69471c pbrook
    dest = tmp >> 32; \
1191 ad69471c pbrook
    } while(0)
1192 ad69471c pbrook
#define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 0)
1193 02da0b2d Peter Maydell
NEON_VOP_ENV(qdmulh_s32, neon_s32, 1)
1194 ad69471c pbrook
#undef NEON_FN
1195 ad69471c pbrook
#define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 1)
1196 02da0b2d Peter Maydell
NEON_VOP_ENV(qrdmulh_s32, neon_s32, 1)
1197 ad69471c pbrook
#undef NEON_FN
1198 ad69471c pbrook
#undef NEON_QDMULH32
1199 ad69471c pbrook
1200 ad69471c pbrook
uint32_t HELPER(neon_narrow_u8)(uint64_t x)
1201 ad69471c pbrook
{
1202 ad69471c pbrook
    return (x & 0xffu) | ((x >> 8) & 0xff00u) | ((x >> 16) & 0xff0000u)
1203 ad69471c pbrook
           | ((x >> 24) & 0xff000000u);
1204 ad69471c pbrook
}
1205 ad69471c pbrook
1206 ad69471c pbrook
uint32_t HELPER(neon_narrow_u16)(uint64_t x)
1207 ad69471c pbrook
{
1208 ad69471c pbrook
    return (x & 0xffffu) | ((x >> 16) & 0xffff0000u);
1209 ad69471c pbrook
}
1210 ad69471c pbrook
1211 ad69471c pbrook
uint32_t HELPER(neon_narrow_high_u8)(uint64_t x)
1212 ad69471c pbrook
{
1213 ad69471c pbrook
    return ((x >> 8) & 0xff) | ((x >> 16) & 0xff00)
1214 ad69471c pbrook
            | ((x >> 24) & 0xff0000) | ((x >> 32) & 0xff000000);
1215 ad69471c pbrook
}
1216 ad69471c pbrook
1217 ad69471c pbrook
uint32_t HELPER(neon_narrow_high_u16)(uint64_t x)
1218 ad69471c pbrook
{
1219 ad69471c pbrook
    return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000);
1220 ad69471c pbrook
}
1221 ad69471c pbrook
1222 ad69471c pbrook
uint32_t HELPER(neon_narrow_round_high_u8)(uint64_t x)
1223 ad69471c pbrook
{
1224 ad69471c pbrook
    x &= 0xff80ff80ff80ff80ull;
1225 ad69471c pbrook
    x += 0x0080008000800080ull;
1226 ad69471c pbrook
    return ((x >> 8) & 0xff) | ((x >> 16) & 0xff00)
1227 ad69471c pbrook
            | ((x >> 24) & 0xff0000) | ((x >> 32) & 0xff000000);
1228 ad69471c pbrook
}
1229 ad69471c pbrook
1230 ad69471c pbrook
uint32_t HELPER(neon_narrow_round_high_u16)(uint64_t x)
1231 ad69471c pbrook
{
1232 ad69471c pbrook
    x &= 0xffff8000ffff8000ull;
1233 ad69471c pbrook
    x += 0x0000800000008000ull;
1234 ad69471c pbrook
    return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000);
1235 ad69471c pbrook
}
1236 ad69471c pbrook
1237 0ecb72a5 Andreas Färber
uint32_t HELPER(neon_unarrow_sat8)(CPUARMState *env, uint64_t x)
1238 af1bbf30 Juha Riihimäki
{
1239 af1bbf30 Juha Riihimäki
    uint16_t s;
1240 af1bbf30 Juha Riihimäki
    uint8_t d;
1241 af1bbf30 Juha Riihimäki
    uint32_t res = 0;
1242 af1bbf30 Juha Riihimäki
#define SAT8(n) \
1243 af1bbf30 Juha Riihimäki
    s = x >> n; \
1244 af1bbf30 Juha Riihimäki
    if (s & 0x8000) { \
1245 af1bbf30 Juha Riihimäki
        SET_QC(); \
1246 af1bbf30 Juha Riihimäki
    } else { \
1247 af1bbf30 Juha Riihimäki
        if (s > 0xff) { \
1248 af1bbf30 Juha Riihimäki
            d = 0xff; \
1249 af1bbf30 Juha Riihimäki
            SET_QC(); \
1250 af1bbf30 Juha Riihimäki
        } else  { \
1251 af1bbf30 Juha Riihimäki
            d = s; \
1252 af1bbf30 Juha Riihimäki
        } \
1253 af1bbf30 Juha Riihimäki
        res |= (uint32_t)d << (n / 2); \
1254 af1bbf30 Juha Riihimäki
    }
1255 af1bbf30 Juha Riihimäki
1256 af1bbf30 Juha Riihimäki
    SAT8(0);
1257 af1bbf30 Juha Riihimäki
    SAT8(16);
1258 af1bbf30 Juha Riihimäki
    SAT8(32);
1259 af1bbf30 Juha Riihimäki
    SAT8(48);
1260 af1bbf30 Juha Riihimäki
#undef SAT8
1261 af1bbf30 Juha Riihimäki
    return res;
1262 af1bbf30 Juha Riihimäki
}
1263 af1bbf30 Juha Riihimäki
1264 0ecb72a5 Andreas Färber
uint32_t HELPER(neon_narrow_sat_u8)(CPUARMState *env, uint64_t x)
1265 ad69471c pbrook
{
1266 ad69471c pbrook
    uint16_t s;
1267 ad69471c pbrook
    uint8_t d;
1268 ad69471c pbrook
    uint32_t res = 0;
1269 ad69471c pbrook
#define SAT8(n) \
1270 ad69471c pbrook
    s = x >> n; \
1271 ad69471c pbrook
    if (s > 0xff) { \
1272 ad69471c pbrook
        d = 0xff; \
1273 ad69471c pbrook
        SET_QC(); \
1274 ad69471c pbrook
    } else  { \
1275 ad69471c pbrook
        d = s; \
1276 ad69471c pbrook
    } \
1277 ad69471c pbrook
    res |= (uint32_t)d << (n / 2);
1278 ad69471c pbrook
1279 ad69471c pbrook
    SAT8(0);
1280 ad69471c pbrook
    SAT8(16);
1281 ad69471c pbrook
    SAT8(32);
1282 ad69471c pbrook
    SAT8(48);
1283 ad69471c pbrook
#undef SAT8
1284 ad69471c pbrook
    return res;
1285 ad69471c pbrook
}
1286 ad69471c pbrook
1287 0ecb72a5 Andreas Färber
uint32_t HELPER(neon_narrow_sat_s8)(CPUARMState *env, uint64_t x)
1288 ad69471c pbrook
{
1289 ad69471c pbrook
    int16_t s;
1290 ad69471c pbrook
    uint8_t d;
1291 ad69471c pbrook
    uint32_t res = 0;
1292 ad69471c pbrook
#define SAT8(n) \
1293 ad69471c pbrook
    s = x >> n; \
1294 ad69471c pbrook
    if (s != (int8_t)s) { \
1295 ad69471c pbrook
        d = (s >> 15) ^ 0x7f; \
1296 ad69471c pbrook
        SET_QC(); \
1297 ad69471c pbrook
    } else  { \
1298 ad69471c pbrook
        d = s; \
1299 ad69471c pbrook
    } \
1300 ad69471c pbrook
    res |= (uint32_t)d << (n / 2);
1301 ad69471c pbrook
1302 ad69471c pbrook
    SAT8(0);
1303 ad69471c pbrook
    SAT8(16);
1304 ad69471c pbrook
    SAT8(32);
1305 ad69471c pbrook
    SAT8(48);
1306 ad69471c pbrook
#undef SAT8
1307 ad69471c pbrook
    return res;
1308 ad69471c pbrook
}
1309 ad69471c pbrook
1310 0ecb72a5 Andreas Färber
uint32_t HELPER(neon_unarrow_sat16)(CPUARMState *env, uint64_t x)
1311 af1bbf30 Juha Riihimäki
{
1312 af1bbf30 Juha Riihimäki
    uint32_t high;
1313 af1bbf30 Juha Riihimäki
    uint32_t low;
1314 af1bbf30 Juha Riihimäki
    low = x;
1315 af1bbf30 Juha Riihimäki
    if (low & 0x80000000) {
1316 af1bbf30 Juha Riihimäki
        low = 0;
1317 af1bbf30 Juha Riihimäki
        SET_QC();
1318 af1bbf30 Juha Riihimäki
    } else if (low > 0xffff) {
1319 af1bbf30 Juha Riihimäki
        low = 0xffff;
1320 af1bbf30 Juha Riihimäki
        SET_QC();
1321 af1bbf30 Juha Riihimäki
    }
1322 af1bbf30 Juha Riihimäki
    high = x >> 32;
1323 af1bbf30 Juha Riihimäki
    if (high & 0x80000000) {
1324 af1bbf30 Juha Riihimäki
        high = 0;
1325 af1bbf30 Juha Riihimäki
        SET_QC();
1326 af1bbf30 Juha Riihimäki
    } else if (high > 0xffff) {
1327 af1bbf30 Juha Riihimäki
        high = 0xffff;
1328 af1bbf30 Juha Riihimäki
        SET_QC();
1329 af1bbf30 Juha Riihimäki
    }
1330 af1bbf30 Juha Riihimäki
    return low | (high << 16);
1331 af1bbf30 Juha Riihimäki
}
1332 af1bbf30 Juha Riihimäki
1333 0ecb72a5 Andreas Färber
uint32_t HELPER(neon_narrow_sat_u16)(CPUARMState *env, uint64_t x)
1334 ad69471c pbrook
{
1335 ad69471c pbrook
    uint32_t high;
1336 ad69471c pbrook
    uint32_t low;
1337 ad69471c pbrook
    low = x;
1338 ad69471c pbrook
    if (low > 0xffff) {
1339 ad69471c pbrook
        low = 0xffff;
1340 ad69471c pbrook
        SET_QC();
1341 ad69471c pbrook
    }
1342 ad69471c pbrook
    high = x >> 32;
1343 ad69471c pbrook
    if (high > 0xffff) {
1344 ad69471c pbrook
        high = 0xffff;
1345 ad69471c pbrook
        SET_QC();
1346 ad69471c pbrook
    }
1347 ad69471c pbrook
    return low | (high << 16);
1348 ad69471c pbrook
}
1349 ad69471c pbrook
1350 0ecb72a5 Andreas Färber
uint32_t HELPER(neon_narrow_sat_s16)(CPUARMState *env, uint64_t x)
1351 ad69471c pbrook
{
1352 ad69471c pbrook
    int32_t low;
1353 ad69471c pbrook
    int32_t high;
1354 ad69471c pbrook
    low = x;
1355 ad69471c pbrook
    if (low != (int16_t)low) {
1356 ad69471c pbrook
        low = (low >> 31) ^ 0x7fff;
1357 ad69471c pbrook
        SET_QC();
1358 ad69471c pbrook
    }
1359 ad69471c pbrook
    high = x >> 32;
1360 ad69471c pbrook
    if (high != (int16_t)high) {
1361 ad69471c pbrook
        high = (high >> 31) ^ 0x7fff;
1362 ad69471c pbrook
        SET_QC();
1363 ad69471c pbrook
    }
1364 ad69471c pbrook
    return (uint16_t)low | (high << 16);
1365 ad69471c pbrook
}
1366 ad69471c pbrook
1367 0ecb72a5 Andreas Färber
uint32_t HELPER(neon_unarrow_sat32)(CPUARMState *env, uint64_t x)
1368 af1bbf30 Juha Riihimäki
{
1369 af1bbf30 Juha Riihimäki
    if (x & 0x8000000000000000ull) {
1370 af1bbf30 Juha Riihimäki
        SET_QC();
1371 af1bbf30 Juha Riihimäki
        return 0;
1372 af1bbf30 Juha Riihimäki
    }
1373 af1bbf30 Juha Riihimäki
    if (x > 0xffffffffu) {
1374 af1bbf30 Juha Riihimäki
        SET_QC();
1375 af1bbf30 Juha Riihimäki
        return 0xffffffffu;
1376 af1bbf30 Juha Riihimäki
    }
1377 af1bbf30 Juha Riihimäki
    return x;
1378 af1bbf30 Juha Riihimäki
}
1379 af1bbf30 Juha Riihimäki
1380 0ecb72a5 Andreas Färber
uint32_t HELPER(neon_narrow_sat_u32)(CPUARMState *env, uint64_t x)
1381 ad69471c pbrook
{
1382 ad69471c pbrook
    if (x > 0xffffffffu) {
1383 ad69471c pbrook
        SET_QC();
1384 ad69471c pbrook
        return 0xffffffffu;
1385 ad69471c pbrook
    }
1386 ad69471c pbrook
    return x;
1387 ad69471c pbrook
}
1388 ad69471c pbrook
1389 0ecb72a5 Andreas Färber
uint32_t HELPER(neon_narrow_sat_s32)(CPUARMState *env, uint64_t x)
1390 ad69471c pbrook
{
1391 ad69471c pbrook
    if ((int64_t)x != (int32_t)x) {
1392 ad69471c pbrook
        SET_QC();
1393 cc2212c2 Peter Maydell
        return ((int64_t)x >> 63) ^ 0x7fffffff;
1394 ad69471c pbrook
    }
1395 ad69471c pbrook
    return x;
1396 ad69471c pbrook
}
1397 ad69471c pbrook
1398 ad69471c pbrook
uint64_t HELPER(neon_widen_u8)(uint32_t x)
1399 ad69471c pbrook
{
1400 ad69471c pbrook
    uint64_t tmp;
1401 ad69471c pbrook
    uint64_t ret;
1402 ad69471c pbrook
    ret = (uint8_t)x;
1403 ad69471c pbrook
    tmp = (uint8_t)(x >> 8);
1404 ad69471c pbrook
    ret |= tmp << 16;
1405 ad69471c pbrook
    tmp = (uint8_t)(x >> 16);
1406 ad69471c pbrook
    ret |= tmp << 32;
1407 ad69471c pbrook
    tmp = (uint8_t)(x >> 24);
1408 ad69471c pbrook
    ret |= tmp << 48;
1409 ad69471c pbrook
    return ret;
1410 ad69471c pbrook
}
1411 ad69471c pbrook
1412 ad69471c pbrook
uint64_t HELPER(neon_widen_s8)(uint32_t x)
1413 ad69471c pbrook
{
1414 ad69471c pbrook
    uint64_t tmp;
1415 ad69471c pbrook
    uint64_t ret;
1416 ad69471c pbrook
    ret = (uint16_t)(int8_t)x;
1417 ad69471c pbrook
    tmp = (uint16_t)(int8_t)(x >> 8);
1418 ad69471c pbrook
    ret |= tmp << 16;
1419 ad69471c pbrook
    tmp = (uint16_t)(int8_t)(x >> 16);
1420 ad69471c pbrook
    ret |= tmp << 32;
1421 ad69471c pbrook
    tmp = (uint16_t)(int8_t)(x >> 24);
1422 ad69471c pbrook
    ret |= tmp << 48;
1423 ad69471c pbrook
    return ret;
1424 ad69471c pbrook
}
1425 ad69471c pbrook
1426 ad69471c pbrook
uint64_t HELPER(neon_widen_u16)(uint32_t x)
1427 ad69471c pbrook
{
1428 ad69471c pbrook
    uint64_t high = (uint16_t)(x >> 16);
1429 ad69471c pbrook
    return ((uint16_t)x) | (high << 32);
1430 ad69471c pbrook
}
1431 ad69471c pbrook
1432 ad69471c pbrook
uint64_t HELPER(neon_widen_s16)(uint32_t x)
1433 ad69471c pbrook
{
1434 ad69471c pbrook
    uint64_t high = (int16_t)(x >> 16);
1435 ad69471c pbrook
    return ((uint32_t)(int16_t)x) | (high << 32);
1436 ad69471c pbrook
}
1437 ad69471c pbrook
1438 ad69471c pbrook
uint64_t HELPER(neon_addl_u16)(uint64_t a, uint64_t b)
1439 ad69471c pbrook
{
1440 ad69471c pbrook
    uint64_t mask;
1441 ad69471c pbrook
    mask = (a ^ b) & 0x8000800080008000ull;
1442 ad69471c pbrook
    a &= ~0x8000800080008000ull;
1443 ad69471c pbrook
    b &= ~0x8000800080008000ull;
1444 ad69471c pbrook
    return (a + b) ^ mask;
1445 ad69471c pbrook
}
1446 ad69471c pbrook
1447 ad69471c pbrook
uint64_t HELPER(neon_addl_u32)(uint64_t a, uint64_t b)
1448 ad69471c pbrook
{
1449 ad69471c pbrook
    uint64_t mask;
1450 ad69471c pbrook
    mask = (a ^ b) & 0x8000000080000000ull;
1451 ad69471c pbrook
    a &= ~0x8000000080000000ull;
1452 ad69471c pbrook
    b &= ~0x8000000080000000ull;
1453 ad69471c pbrook
    return (a + b) ^ mask;
1454 ad69471c pbrook
}
1455 ad69471c pbrook
1456 ad69471c pbrook
uint64_t HELPER(neon_paddl_u16)(uint64_t a, uint64_t b)
1457 ad69471c pbrook
{
1458 ad69471c pbrook
    uint64_t tmp;
1459 ad69471c pbrook
    uint64_t tmp2;
1460 ad69471c pbrook
1461 ad69471c pbrook
    tmp = a & 0x0000ffff0000ffffull;
1462 ad69471c pbrook
    tmp += (a >> 16) & 0x0000ffff0000ffffull;
1463 ad69471c pbrook
    tmp2 = b & 0xffff0000ffff0000ull;
1464 ad69471c pbrook
    tmp2 += (b << 16) & 0xffff0000ffff0000ull;
1465 ad69471c pbrook
    return    ( tmp         & 0xffff)
1466 ad69471c pbrook
            | ((tmp  >> 16) & 0xffff0000ull)
1467 ad69471c pbrook
            | ((tmp2 << 16) & 0xffff00000000ull)
1468 ad69471c pbrook
            | ( tmp2        & 0xffff000000000000ull);
1469 ad69471c pbrook
}
1470 ad69471c pbrook
1471 ad69471c pbrook
uint64_t HELPER(neon_paddl_u32)(uint64_t a, uint64_t b)
1472 ad69471c pbrook
{
1473 ad69471c pbrook
    uint32_t low = a + (a >> 32);
1474 ad69471c pbrook
    uint32_t high = b + (b >> 32);
1475 ad69471c pbrook
    return low + ((uint64_t)high << 32);
1476 ad69471c pbrook
}
1477 ad69471c pbrook
1478 ad69471c pbrook
uint64_t HELPER(neon_subl_u16)(uint64_t a, uint64_t b)
1479 ad69471c pbrook
{
1480 ad69471c pbrook
    uint64_t mask;
1481 ad69471c pbrook
    mask = (a ^ ~b) & 0x8000800080008000ull;
1482 ad69471c pbrook
    a |= 0x8000800080008000ull;
1483 ad69471c pbrook
    b &= ~0x8000800080008000ull;
1484 ad69471c pbrook
    return (a - b) ^ mask;
1485 ad69471c pbrook
}
1486 ad69471c pbrook
1487 ad69471c pbrook
uint64_t HELPER(neon_subl_u32)(uint64_t a, uint64_t b)
1488 ad69471c pbrook
{
1489 ad69471c pbrook
    uint64_t mask;
1490 ad69471c pbrook
    mask = (a ^ ~b) & 0x8000000080000000ull;
1491 ad69471c pbrook
    a |= 0x8000000080000000ull;
1492 ad69471c pbrook
    b &= ~0x8000000080000000ull;
1493 ad69471c pbrook
    return (a - b) ^ mask;
1494 ad69471c pbrook
}
1495 ad69471c pbrook
1496 0ecb72a5 Andreas Färber
uint64_t HELPER(neon_addl_saturate_s32)(CPUARMState *env, uint64_t a, uint64_t b)
1497 ad69471c pbrook
{
1498 ad69471c pbrook
    uint32_t x, y;
1499 ad69471c pbrook
    uint32_t low, high;
1500 ad69471c pbrook
1501 ad69471c pbrook
    x = a;
1502 ad69471c pbrook
    y = b;
1503 ad69471c pbrook
    low = x + y;
1504 ad69471c pbrook
    if (((low ^ x) & SIGNBIT) && !((x ^ y) & SIGNBIT)) {
1505 ad69471c pbrook
        SET_QC();
1506 ad69471c pbrook
        low = ((int32_t)x >> 31) ^ ~SIGNBIT;
1507 ad69471c pbrook
    }
1508 ad69471c pbrook
    x = a >> 32;
1509 ad69471c pbrook
    y = b >> 32;
1510 ad69471c pbrook
    high = x + y;
1511 ad69471c pbrook
    if (((high ^ x) & SIGNBIT) && !((x ^ y) & SIGNBIT)) {
1512 ad69471c pbrook
        SET_QC();
1513 ad69471c pbrook
        high = ((int32_t)x >> 31) ^ ~SIGNBIT;
1514 ad69471c pbrook
    }
1515 ad69471c pbrook
    return low | ((uint64_t)high << 32);
1516 ad69471c pbrook
}
1517 ad69471c pbrook
1518 0ecb72a5 Andreas Färber
uint64_t HELPER(neon_addl_saturate_s64)(CPUARMState *env, uint64_t a, uint64_t b)
1519 ad69471c pbrook
{
1520 ad69471c pbrook
    uint64_t result;
1521 ad69471c pbrook
1522 ad69471c pbrook
    result = a + b;
1523 ad69471c pbrook
    if (((result ^ a) & SIGNBIT64) && !((a ^ b) & SIGNBIT64)) {
1524 ad69471c pbrook
        SET_QC();
1525 ad69471c pbrook
        result = ((int64_t)a >> 63) ^ ~SIGNBIT64;
1526 ad69471c pbrook
    }
1527 ad69471c pbrook
    return result;
1528 ad69471c pbrook
}
1529 ad69471c pbrook
1530 4d9ad7f7 Peter Maydell
/* We have to do the arithmetic in a larger type than
1531 4d9ad7f7 Peter Maydell
 * the input type, because for example with a signed 32 bit
1532 4d9ad7f7 Peter Maydell
 * op the absolute difference can overflow a signed 32 bit value.
1533 4d9ad7f7 Peter Maydell
 */
1534 4d9ad7f7 Peter Maydell
#define DO_ABD(dest, x, y, intype, arithtype) do {            \
1535 4d9ad7f7 Peter Maydell
    arithtype tmp_x = (intype)(x);                            \
1536 4d9ad7f7 Peter Maydell
    arithtype tmp_y = (intype)(y);                            \
1537 ad69471c pbrook
    dest = ((tmp_x > tmp_y) ? tmp_x - tmp_y : tmp_y - tmp_x); \
1538 ad69471c pbrook
    } while(0)
1539 ad69471c pbrook
1540 ad69471c pbrook
uint64_t HELPER(neon_abdl_u16)(uint32_t a, uint32_t b)
1541 ad69471c pbrook
{
1542 ad69471c pbrook
    uint64_t tmp;
1543 ad69471c pbrook
    uint64_t result;
1544 4d9ad7f7 Peter Maydell
    DO_ABD(result, a, b, uint8_t, uint32_t);
1545 4d9ad7f7 Peter Maydell
    DO_ABD(tmp, a >> 8, b >> 8, uint8_t, uint32_t);
1546 ad69471c pbrook
    result |= tmp << 16;
1547 4d9ad7f7 Peter Maydell
    DO_ABD(tmp, a >> 16, b >> 16, uint8_t, uint32_t);
1548 ad69471c pbrook
    result |= tmp << 32;
1549 4d9ad7f7 Peter Maydell
    DO_ABD(tmp, a >> 24, b >> 24, uint8_t, uint32_t);
1550 ad69471c pbrook
    result |= tmp << 48;
1551 ad69471c pbrook
    return result;
1552 ad69471c pbrook
}
1553 ad69471c pbrook
1554 ad69471c pbrook
uint64_t HELPER(neon_abdl_s16)(uint32_t a, uint32_t b)
1555 ad69471c pbrook
{
1556 ad69471c pbrook
    uint64_t tmp;
1557 ad69471c pbrook
    uint64_t result;
1558 4d9ad7f7 Peter Maydell
    DO_ABD(result, a, b, int8_t, int32_t);
1559 4d9ad7f7 Peter Maydell
    DO_ABD(tmp, a >> 8, b >> 8, int8_t, int32_t);
1560 ad69471c pbrook
    result |= tmp << 16;
1561 4d9ad7f7 Peter Maydell
    DO_ABD(tmp, a >> 16, b >> 16, int8_t, int32_t);
1562 ad69471c pbrook
    result |= tmp << 32;
1563 4d9ad7f7 Peter Maydell
    DO_ABD(tmp, a >> 24, b >> 24, int8_t, int32_t);
1564 ad69471c pbrook
    result |= tmp << 48;
1565 ad69471c pbrook
    return result;
1566 ad69471c pbrook
}
1567 ad69471c pbrook
1568 ad69471c pbrook
uint64_t HELPER(neon_abdl_u32)(uint32_t a, uint32_t b)
1569 ad69471c pbrook
{
1570 ad69471c pbrook
    uint64_t tmp;
1571 ad69471c pbrook
    uint64_t result;
1572 4d9ad7f7 Peter Maydell
    DO_ABD(result, a, b, uint16_t, uint32_t);
1573 4d9ad7f7 Peter Maydell
    DO_ABD(tmp, a >> 16, b >> 16, uint16_t, uint32_t);
1574 ad69471c pbrook
    return result | (tmp << 32);
1575 ad69471c pbrook
}
1576 ad69471c pbrook
1577 ad69471c pbrook
uint64_t HELPER(neon_abdl_s32)(uint32_t a, uint32_t b)
1578 ad69471c pbrook
{
1579 ad69471c pbrook
    uint64_t tmp;
1580 ad69471c pbrook
    uint64_t result;
1581 4d9ad7f7 Peter Maydell
    DO_ABD(result, a, b, int16_t, int32_t);
1582 4d9ad7f7 Peter Maydell
    DO_ABD(tmp, a >> 16, b >> 16, int16_t, int32_t);
1583 ad69471c pbrook
    return result | (tmp << 32);
1584 ad69471c pbrook
}
1585 ad69471c pbrook
1586 ad69471c pbrook
uint64_t HELPER(neon_abdl_u64)(uint32_t a, uint32_t b)
1587 ad69471c pbrook
{
1588 ad69471c pbrook
    uint64_t result;
1589 4d9ad7f7 Peter Maydell
    DO_ABD(result, a, b, uint32_t, uint64_t);
1590 ad69471c pbrook
    return result;
1591 ad69471c pbrook
}
1592 ad69471c pbrook
1593 ad69471c pbrook
uint64_t HELPER(neon_abdl_s64)(uint32_t a, uint32_t b)
1594 ad69471c pbrook
{
1595 ad69471c pbrook
    uint64_t result;
1596 4d9ad7f7 Peter Maydell
    DO_ABD(result, a, b, int32_t, int64_t);
1597 ad69471c pbrook
    return result;
1598 ad69471c pbrook
}
1599 ad69471c pbrook
#undef DO_ABD
1600 ad69471c pbrook
1601 ad69471c pbrook
/* Widening multiply. Named type is the source type.  */
1602 ad69471c pbrook
#define DO_MULL(dest, x, y, type1, type2) do { \
1603 ad69471c pbrook
    type1 tmp_x = x; \
1604 ad69471c pbrook
    type1 tmp_y = y; \
1605 ad69471c pbrook
    dest = (type2)((type2)tmp_x * (type2)tmp_y); \
1606 ad69471c pbrook
    } while(0)
1607 ad69471c pbrook
1608 ad69471c pbrook
uint64_t HELPER(neon_mull_u8)(uint32_t a, uint32_t b)
1609 ad69471c pbrook
{
1610 ad69471c pbrook
    uint64_t tmp;
1611 ad69471c pbrook
    uint64_t result;
1612 ad69471c pbrook
1613 ad69471c pbrook
    DO_MULL(result, a, b, uint8_t, uint16_t);
1614 ad69471c pbrook
    DO_MULL(tmp, a >> 8, b >> 8, uint8_t, uint16_t);
1615 ad69471c pbrook
    result |= tmp << 16;
1616 ad69471c pbrook
    DO_MULL(tmp, a >> 16, b >> 16, uint8_t, uint16_t);
1617 ad69471c pbrook
    result |= tmp << 32;
1618 ad69471c pbrook
    DO_MULL(tmp, a >> 24, b >> 24, uint8_t, uint16_t);
1619 ad69471c pbrook
    result |= tmp << 48;
1620 ad69471c pbrook
    return result;
1621 ad69471c pbrook
}
1622 ad69471c pbrook
1623 ad69471c pbrook
uint64_t HELPER(neon_mull_s8)(uint32_t a, uint32_t b)
1624 ad69471c pbrook
{
1625 ad69471c pbrook
    uint64_t tmp;
1626 ad69471c pbrook
    uint64_t result;
1627 ad69471c pbrook
1628 ad69471c pbrook
    DO_MULL(result, a, b, int8_t, uint16_t);
1629 ad69471c pbrook
    DO_MULL(tmp, a >> 8, b >> 8, int8_t, uint16_t);
1630 ad69471c pbrook
    result |= tmp << 16;
1631 ad69471c pbrook
    DO_MULL(tmp, a >> 16, b >> 16, int8_t, uint16_t);
1632 ad69471c pbrook
    result |= tmp << 32;
1633 ad69471c pbrook
    DO_MULL(tmp, a >> 24, b >> 24, int8_t, uint16_t);
1634 ad69471c pbrook
    result |= tmp << 48;
1635 ad69471c pbrook
    return result;
1636 ad69471c pbrook
}
1637 ad69471c pbrook
1638 ad69471c pbrook
uint64_t HELPER(neon_mull_u16)(uint32_t a, uint32_t b)
1639 ad69471c pbrook
{
1640 ad69471c pbrook
    uint64_t tmp;
1641 ad69471c pbrook
    uint64_t result;
1642 ad69471c pbrook
1643 ad69471c pbrook
    DO_MULL(result, a, b, uint16_t, uint32_t);
1644 ad69471c pbrook
    DO_MULL(tmp, a >> 16, b >> 16, uint16_t, uint32_t);
1645 ad69471c pbrook
    return result | (tmp << 32);
1646 ad69471c pbrook
}
1647 ad69471c pbrook
1648 ad69471c pbrook
uint64_t HELPER(neon_mull_s16)(uint32_t a, uint32_t b)
1649 ad69471c pbrook
{
1650 ad69471c pbrook
    uint64_t tmp;
1651 ad69471c pbrook
    uint64_t result;
1652 ad69471c pbrook
1653 ad69471c pbrook
    DO_MULL(result, a, b, int16_t, uint32_t);
1654 ad69471c pbrook
    DO_MULL(tmp, a >> 16, b >> 16, int16_t, uint32_t);
1655 ad69471c pbrook
    return result | (tmp << 32);
1656 ad69471c pbrook
}
1657 ad69471c pbrook
1658 ad69471c pbrook
uint64_t HELPER(neon_negl_u16)(uint64_t x)
1659 ad69471c pbrook
{
1660 ad69471c pbrook
    uint16_t tmp;
1661 ad69471c pbrook
    uint64_t result;
1662 ad69471c pbrook
    result = (uint16_t)-x;
1663 ad69471c pbrook
    tmp = -(x >> 16);
1664 ad69471c pbrook
    result |= (uint64_t)tmp << 16;
1665 ad69471c pbrook
    tmp = -(x >> 32);
1666 ad69471c pbrook
    result |= (uint64_t)tmp << 32;
1667 ad69471c pbrook
    tmp = -(x >> 48);
1668 ad69471c pbrook
    result |= (uint64_t)tmp << 48;
1669 ad69471c pbrook
    return result;
1670 ad69471c pbrook
}
1671 ad69471c pbrook
1672 ad69471c pbrook
uint64_t HELPER(neon_negl_u32)(uint64_t x)
1673 ad69471c pbrook
{
1674 ad69471c pbrook
    uint32_t low = -x;
1675 ad69471c pbrook
    uint32_t high = -(x >> 32);
1676 ad69471c pbrook
    return low | ((uint64_t)high << 32);
1677 ad69471c pbrook
}
1678 ad69471c pbrook
1679 b90372ad Peter Maydell
/* Saturating sign manipulation.  */
1680 ad69471c pbrook
/* ??? Make these use NEON_VOP1 */
1681 ad69471c pbrook
#define DO_QABS8(x) do { \
1682 ad69471c pbrook
    if (x == (int8_t)0x80) { \
1683 ad69471c pbrook
        x = 0x7f; \
1684 ad69471c pbrook
        SET_QC(); \
1685 ad69471c pbrook
    } else if (x < 0) { \
1686 ad69471c pbrook
        x = -x; \
1687 ad69471c pbrook
    }} while (0)
1688 0ecb72a5 Andreas Färber
uint32_t HELPER(neon_qabs_s8)(CPUARMState *env, uint32_t x)
1689 ad69471c pbrook
{
1690 ad69471c pbrook
    neon_s8 vec;
1691 ad69471c pbrook
    NEON_UNPACK(neon_s8, vec, x);
1692 ad69471c pbrook
    DO_QABS8(vec.v1);
1693 ad69471c pbrook
    DO_QABS8(vec.v2);
1694 ad69471c pbrook
    DO_QABS8(vec.v3);
1695 ad69471c pbrook
    DO_QABS8(vec.v4);
1696 ad69471c pbrook
    NEON_PACK(neon_s8, x, vec);
1697 ad69471c pbrook
    return x;
1698 ad69471c pbrook
}
1699 ad69471c pbrook
#undef DO_QABS8
1700 ad69471c pbrook
1701 ad69471c pbrook
#define DO_QNEG8(x) do { \
1702 ad69471c pbrook
    if (x == (int8_t)0x80) { \
1703 ad69471c pbrook
        x = 0x7f; \
1704 ad69471c pbrook
        SET_QC(); \
1705 ad69471c pbrook
    } else { \
1706 ad69471c pbrook
        x = -x; \
1707 ad69471c pbrook
    }} while (0)
1708 0ecb72a5 Andreas Färber
uint32_t HELPER(neon_qneg_s8)(CPUARMState *env, uint32_t x)
1709 ad69471c pbrook
{
1710 ad69471c pbrook
    neon_s8 vec;
1711 ad69471c pbrook
    NEON_UNPACK(neon_s8, vec, x);
1712 ad69471c pbrook
    DO_QNEG8(vec.v1);
1713 ad69471c pbrook
    DO_QNEG8(vec.v2);
1714 ad69471c pbrook
    DO_QNEG8(vec.v3);
1715 ad69471c pbrook
    DO_QNEG8(vec.v4);
1716 ad69471c pbrook
    NEON_PACK(neon_s8, x, vec);
1717 ad69471c pbrook
    return x;
1718 ad69471c pbrook
}
1719 ad69471c pbrook
#undef DO_QNEG8
1720 ad69471c pbrook
1721 ad69471c pbrook
#define DO_QABS16(x) do { \
1722 ad69471c pbrook
    if (x == (int16_t)0x8000) { \
1723 ad69471c pbrook
        x = 0x7fff; \
1724 ad69471c pbrook
        SET_QC(); \
1725 ad69471c pbrook
    } else if (x < 0) { \
1726 ad69471c pbrook
        x = -x; \
1727 ad69471c pbrook
    }} while (0)
1728 0ecb72a5 Andreas Färber
uint32_t HELPER(neon_qabs_s16)(CPUARMState *env, uint32_t x)
1729 ad69471c pbrook
{
1730 ad69471c pbrook
    neon_s16 vec;
1731 ad69471c pbrook
    NEON_UNPACK(neon_s16, vec, x);
1732 ad69471c pbrook
    DO_QABS16(vec.v1);
1733 ad69471c pbrook
    DO_QABS16(vec.v2);
1734 ad69471c pbrook
    NEON_PACK(neon_s16, x, vec);
1735 ad69471c pbrook
    return x;
1736 ad69471c pbrook
}
1737 ad69471c pbrook
#undef DO_QABS16
1738 ad69471c pbrook
1739 ad69471c pbrook
#define DO_QNEG16(x) do { \
1740 ad69471c pbrook
    if (x == (int16_t)0x8000) { \
1741 ad69471c pbrook
        x = 0x7fff; \
1742 ad69471c pbrook
        SET_QC(); \
1743 ad69471c pbrook
    } else { \
1744 ad69471c pbrook
        x = -x; \
1745 ad69471c pbrook
    }} while (0)
1746 0ecb72a5 Andreas Färber
uint32_t HELPER(neon_qneg_s16)(CPUARMState *env, uint32_t x)
1747 ad69471c pbrook
{
1748 ad69471c pbrook
    neon_s16 vec;
1749 ad69471c pbrook
    NEON_UNPACK(neon_s16, vec, x);
1750 ad69471c pbrook
    DO_QNEG16(vec.v1);
1751 ad69471c pbrook
    DO_QNEG16(vec.v2);
1752 ad69471c pbrook
    NEON_PACK(neon_s16, x, vec);
1753 ad69471c pbrook
    return x;
1754 ad69471c pbrook
}
1755 ad69471c pbrook
#undef DO_QNEG16
1756 ad69471c pbrook
1757 0ecb72a5 Andreas Färber
uint32_t HELPER(neon_qabs_s32)(CPUARMState *env, uint32_t x)
1758 ad69471c pbrook
{
1759 ad69471c pbrook
    if (x == SIGNBIT) {
1760 ad69471c pbrook
        SET_QC();
1761 ad69471c pbrook
        x = ~SIGNBIT;
1762 ad69471c pbrook
    } else if ((int32_t)x < 0) {
1763 ad69471c pbrook
        x = -x;
1764 ad69471c pbrook
    }
1765 ad69471c pbrook
    return x;
1766 ad69471c pbrook
}
1767 ad69471c pbrook
1768 0ecb72a5 Andreas Färber
uint32_t HELPER(neon_qneg_s32)(CPUARMState *env, uint32_t x)
1769 ad69471c pbrook
{
1770 ad69471c pbrook
    if (x == SIGNBIT) {
1771 ad69471c pbrook
        SET_QC();
1772 ad69471c pbrook
        x = ~SIGNBIT;
1773 ad69471c pbrook
    } else {
1774 ad69471c pbrook
        x = -x;
1775 ad69471c pbrook
    }
1776 ad69471c pbrook
    return x;
1777 ad69471c pbrook
}
1778 ad69471c pbrook
1779 ad69471c pbrook
/* NEON Float helpers.  */
1780 aa47cfdd Peter Maydell
uint32_t HELPER(neon_abd_f32)(uint32_t a, uint32_t b, void *fpstp)
1781 ad69471c pbrook
{
1782 aa47cfdd Peter Maydell
    float_status *fpst = fpstp;
1783 51d85267 Peter Maydell
    float32 f0 = make_float32(a);
1784 51d85267 Peter Maydell
    float32 f1 = make_float32(b);
1785 aa47cfdd Peter Maydell
    return float32_val(float32_abs(float32_sub(f0, f1, fpst)));
1786 ad69471c pbrook
}
1787 ad69471c pbrook
1788 cab565c4 Peter Maydell
/* Floating point comparisons produce an integer result.
1789 cab565c4 Peter Maydell
 * Note that EQ doesn't signal InvalidOp for QNaNs but GE and GT do.
1790 cab565c4 Peter Maydell
 * Softfloat routines return 0/1, which we convert to the 0/-1 Neon requires.
1791 cab565c4 Peter Maydell
 */
1792 aa47cfdd Peter Maydell
uint32_t HELPER(neon_ceq_f32)(uint32_t a, uint32_t b, void *fpstp)
1793 cab565c4 Peter Maydell
{
1794 aa47cfdd Peter Maydell
    float_status *fpst = fpstp;
1795 aa47cfdd Peter Maydell
    return -float32_eq_quiet(make_float32(a), make_float32(b), fpst);
1796 cab565c4 Peter Maydell
}
1797 cab565c4 Peter Maydell
1798 aa47cfdd Peter Maydell
uint32_t HELPER(neon_cge_f32)(uint32_t a, uint32_t b, void *fpstp)
1799 cab565c4 Peter Maydell
{
1800 aa47cfdd Peter Maydell
    float_status *fpst = fpstp;
1801 aa47cfdd Peter Maydell
    return -float32_le(make_float32(b), make_float32(a), fpst);
1802 ad69471c pbrook
}
1803 ad69471c pbrook
1804 aa47cfdd Peter Maydell
uint32_t HELPER(neon_cgt_f32)(uint32_t a, uint32_t b, void *fpstp)
1805 cab565c4 Peter Maydell
{
1806 aa47cfdd Peter Maydell
    float_status *fpst = fpstp;
1807 aa47cfdd Peter Maydell
    return -float32_lt(make_float32(b), make_float32(a), fpst);
1808 cab565c4 Peter Maydell
}
1809 ad69471c pbrook
1810 aa47cfdd Peter Maydell
uint32_t HELPER(neon_acge_f32)(uint32_t a, uint32_t b, void *fpstp)
1811 ad69471c pbrook
{
1812 aa47cfdd Peter Maydell
    float_status *fpst = fpstp;
1813 51d85267 Peter Maydell
    float32 f0 = float32_abs(make_float32(a));
1814 51d85267 Peter Maydell
    float32 f1 = float32_abs(make_float32(b));
1815 aa47cfdd Peter Maydell
    return -float32_le(f1, f0, fpst);
1816 ad69471c pbrook
}
1817 ad69471c pbrook
1818 aa47cfdd Peter Maydell
uint32_t HELPER(neon_acgt_f32)(uint32_t a, uint32_t b, void *fpstp)
1819 ad69471c pbrook
{
1820 aa47cfdd Peter Maydell
    float_status *fpst = fpstp;
1821 51d85267 Peter Maydell
    float32 f0 = float32_abs(make_float32(a));
1822 51d85267 Peter Maydell
    float32 f1 = float32_abs(make_float32(b));
1823 aa47cfdd Peter Maydell
    return -float32_lt(f1, f0, fpst);
1824 ad69471c pbrook
}
1825 02acedf9 Peter Maydell
1826 057d5f62 Peter Maydell
uint64_t HELPER(neon_acge_f64)(uint64_t a, uint64_t b, void *fpstp)
1827 057d5f62 Peter Maydell
{
1828 057d5f62 Peter Maydell
    float_status *fpst = fpstp;
1829 057d5f62 Peter Maydell
    float64 f0 = float64_abs(make_float64(a));
1830 057d5f62 Peter Maydell
    float64 f1 = float64_abs(make_float64(b));
1831 057d5f62 Peter Maydell
    return -float64_le(f1, f0, fpst);
1832 057d5f62 Peter Maydell
}
1833 057d5f62 Peter Maydell
1834 057d5f62 Peter Maydell
uint64_t HELPER(neon_acgt_f64)(uint64_t a, uint64_t b, void *fpstp)
1835 057d5f62 Peter Maydell
{
1836 057d5f62 Peter Maydell
    float_status *fpst = fpstp;
1837 057d5f62 Peter Maydell
    float64 f0 = float64_abs(make_float64(a));
1838 057d5f62 Peter Maydell
    float64 f1 = float64_abs(make_float64(b));
1839 057d5f62 Peter Maydell
    return -float64_lt(f1, f0, fpst);
1840 057d5f62 Peter Maydell
}
1841 057d5f62 Peter Maydell
1842 02acedf9 Peter Maydell
#define ELEM(V, N, SIZE) (((V) >> ((N) * (SIZE))) & ((1ull << (SIZE)) - 1))
1843 02acedf9 Peter Maydell
1844 0ecb72a5 Andreas Färber
void HELPER(neon_qunzip8)(CPUARMState *env, uint32_t rd, uint32_t rm)
1845 02acedf9 Peter Maydell
{
1846 02acedf9 Peter Maydell
    uint64_t zm0 = float64_val(env->vfp.regs[rm]);
1847 02acedf9 Peter Maydell
    uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
1848 02acedf9 Peter Maydell
    uint64_t zd0 = float64_val(env->vfp.regs[rd]);
1849 02acedf9 Peter Maydell
    uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
1850 02acedf9 Peter Maydell
    uint64_t d0 = ELEM(zd0, 0, 8) | (ELEM(zd0, 2, 8) << 8)
1851 02acedf9 Peter Maydell
        | (ELEM(zd0, 4, 8) << 16) | (ELEM(zd0, 6, 8) << 24)
1852 02acedf9 Peter Maydell
        | (ELEM(zd1, 0, 8) << 32) | (ELEM(zd1, 2, 8) << 40)
1853 02acedf9 Peter Maydell
        | (ELEM(zd1, 4, 8) << 48) | (ELEM(zd1, 6, 8) << 56);
1854 02acedf9 Peter Maydell
    uint64_t d1 = ELEM(zm0, 0, 8) | (ELEM(zm0, 2, 8) << 8)
1855 02acedf9 Peter Maydell
        | (ELEM(zm0, 4, 8) << 16) | (ELEM(zm0, 6, 8) << 24)
1856 02acedf9 Peter Maydell
        | (ELEM(zm1, 0, 8) << 32) | (ELEM(zm1, 2, 8) << 40)
1857 02acedf9 Peter Maydell
        | (ELEM(zm1, 4, 8) << 48) | (ELEM(zm1, 6, 8) << 56);
1858 02acedf9 Peter Maydell
    uint64_t m0 = ELEM(zd0, 1, 8) | (ELEM(zd0, 3, 8) << 8)
1859 02acedf9 Peter Maydell
        | (ELEM(zd0, 5, 8) << 16) | (ELEM(zd0, 7, 8) << 24)
1860 02acedf9 Peter Maydell
        | (ELEM(zd1, 1, 8) << 32) | (ELEM(zd1, 3, 8) << 40)
1861 02acedf9 Peter Maydell
        | (ELEM(zd1, 5, 8) << 48) | (ELEM(zd1, 7, 8) << 56);
1862 02acedf9 Peter Maydell
    uint64_t m1 = ELEM(zm0, 1, 8) | (ELEM(zm0, 3, 8) << 8)
1863 02acedf9 Peter Maydell
        | (ELEM(zm0, 5, 8) << 16) | (ELEM(zm0, 7, 8) << 24)
1864 02acedf9 Peter Maydell
        | (ELEM(zm1, 1, 8) << 32) | (ELEM(zm1, 3, 8) << 40)
1865 02acedf9 Peter Maydell
        | (ELEM(zm1, 5, 8) << 48) | (ELEM(zm1, 7, 8) << 56);
1866 02acedf9 Peter Maydell
    env->vfp.regs[rm] = make_float64(m0);
1867 02acedf9 Peter Maydell
    env->vfp.regs[rm + 1] = make_float64(m1);
1868 02acedf9 Peter Maydell
    env->vfp.regs[rd] = make_float64(d0);
1869 02acedf9 Peter Maydell
    env->vfp.regs[rd + 1] = make_float64(d1);
1870 02acedf9 Peter Maydell
}
1871 02acedf9 Peter Maydell
1872 0ecb72a5 Andreas Färber
void HELPER(neon_qunzip16)(CPUARMState *env, uint32_t rd, uint32_t rm)
1873 02acedf9 Peter Maydell
{
1874 02acedf9 Peter Maydell
    uint64_t zm0 = float64_val(env->vfp.regs[rm]);
1875 02acedf9 Peter Maydell
    uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
1876 02acedf9 Peter Maydell
    uint64_t zd0 = float64_val(env->vfp.regs[rd]);
1877 02acedf9 Peter Maydell
    uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
1878 02acedf9 Peter Maydell
    uint64_t d0 = ELEM(zd0, 0, 16) | (ELEM(zd0, 2, 16) << 16)
1879 02acedf9 Peter Maydell
        | (ELEM(zd1, 0, 16) << 32) | (ELEM(zd1, 2, 16) << 48);
1880 02acedf9 Peter Maydell
    uint64_t d1 = ELEM(zm0, 0, 16) | (ELEM(zm0, 2, 16) << 16)
1881 02acedf9 Peter Maydell
        | (ELEM(zm1, 0, 16) << 32) | (ELEM(zm1, 2, 16) << 48);
1882 02acedf9 Peter Maydell
    uint64_t m0 = ELEM(zd0, 1, 16) | (ELEM(zd0, 3, 16) << 16)
1883 02acedf9 Peter Maydell
        | (ELEM(zd1, 1, 16) << 32) | (ELEM(zd1, 3, 16) << 48);
1884 02acedf9 Peter Maydell
    uint64_t m1 = ELEM(zm0, 1, 16) | (ELEM(zm0, 3, 16) << 16)
1885 02acedf9 Peter Maydell
        | (ELEM(zm1, 1, 16) << 32) | (ELEM(zm1, 3, 16) << 48);
1886 02acedf9 Peter Maydell
    env->vfp.regs[rm] = make_float64(m0);
1887 02acedf9 Peter Maydell
    env->vfp.regs[rm + 1] = make_float64(m1);
1888 02acedf9 Peter Maydell
    env->vfp.regs[rd] = make_float64(d0);
1889 02acedf9 Peter Maydell
    env->vfp.regs[rd + 1] = make_float64(d1);
1890 02acedf9 Peter Maydell
}
1891 02acedf9 Peter Maydell
1892 0ecb72a5 Andreas Färber
void HELPER(neon_qunzip32)(CPUARMState *env, uint32_t rd, uint32_t rm)
1893 02acedf9 Peter Maydell
{
1894 02acedf9 Peter Maydell
    uint64_t zm0 = float64_val(env->vfp.regs[rm]);
1895 02acedf9 Peter Maydell
    uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
1896 02acedf9 Peter Maydell
    uint64_t zd0 = float64_val(env->vfp.regs[rd]);
1897 02acedf9 Peter Maydell
    uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
1898 02acedf9 Peter Maydell
    uint64_t d0 = ELEM(zd0, 0, 32) | (ELEM(zd1, 0, 32) << 32);
1899 02acedf9 Peter Maydell
    uint64_t d1 = ELEM(zm0, 0, 32) | (ELEM(zm1, 0, 32) << 32);
1900 02acedf9 Peter Maydell
    uint64_t m0 = ELEM(zd0, 1, 32) | (ELEM(zd1, 1, 32) << 32);
1901 02acedf9 Peter Maydell
    uint64_t m1 = ELEM(zm0, 1, 32) | (ELEM(zm1, 1, 32) << 32);
1902 02acedf9 Peter Maydell
    env->vfp.regs[rm] = make_float64(m0);
1903 02acedf9 Peter Maydell
    env->vfp.regs[rm + 1] = make_float64(m1);
1904 02acedf9 Peter Maydell
    env->vfp.regs[rd] = make_float64(d0);
1905 02acedf9 Peter Maydell
    env->vfp.regs[rd + 1] = make_float64(d1);
1906 02acedf9 Peter Maydell
}
1907 02acedf9 Peter Maydell
1908 0ecb72a5 Andreas Färber
void HELPER(neon_unzip8)(CPUARMState *env, uint32_t rd, uint32_t rm)
1909 02acedf9 Peter Maydell
{
1910 02acedf9 Peter Maydell
    uint64_t zm = float64_val(env->vfp.regs[rm]);
1911 02acedf9 Peter Maydell
    uint64_t zd = float64_val(env->vfp.regs[rd]);
1912 02acedf9 Peter Maydell
    uint64_t d0 = ELEM(zd, 0, 8) | (ELEM(zd, 2, 8) << 8)
1913 02acedf9 Peter Maydell
        | (ELEM(zd, 4, 8) << 16) | (ELEM(zd, 6, 8) << 24)
1914 02acedf9 Peter Maydell
        | (ELEM(zm, 0, 8) << 32) | (ELEM(zm, 2, 8) << 40)
1915 02acedf9 Peter Maydell
        | (ELEM(zm, 4, 8) << 48) | (ELEM(zm, 6, 8) << 56);
1916 02acedf9 Peter Maydell
    uint64_t m0 = ELEM(zd, 1, 8) | (ELEM(zd, 3, 8) << 8)
1917 02acedf9 Peter Maydell
        | (ELEM(zd, 5, 8) << 16) | (ELEM(zd, 7, 8) << 24)
1918 02acedf9 Peter Maydell
        | (ELEM(zm, 1, 8) << 32) | (ELEM(zm, 3, 8) << 40)
1919 02acedf9 Peter Maydell
        | (ELEM(zm, 5, 8) << 48) | (ELEM(zm, 7, 8) << 56);
1920 02acedf9 Peter Maydell
    env->vfp.regs[rm] = make_float64(m0);
1921 02acedf9 Peter Maydell
    env->vfp.regs[rd] = make_float64(d0);
1922 02acedf9 Peter Maydell
}
1923 02acedf9 Peter Maydell
1924 0ecb72a5 Andreas Färber
void HELPER(neon_unzip16)(CPUARMState *env, uint32_t rd, uint32_t rm)
1925 02acedf9 Peter Maydell
{
1926 02acedf9 Peter Maydell
    uint64_t zm = float64_val(env->vfp.regs[rm]);
1927 02acedf9 Peter Maydell
    uint64_t zd = float64_val(env->vfp.regs[rd]);
1928 02acedf9 Peter Maydell
    uint64_t d0 = ELEM(zd, 0, 16) | (ELEM(zd, 2, 16) << 16)
1929 02acedf9 Peter Maydell
        | (ELEM(zm, 0, 16) << 32) | (ELEM(zm, 2, 16) << 48);
1930 02acedf9 Peter Maydell
    uint64_t m0 = ELEM(zd, 1, 16) | (ELEM(zd, 3, 16) << 16)
1931 02acedf9 Peter Maydell
        | (ELEM(zm, 1, 16) << 32) | (ELEM(zm, 3, 16) << 48);
1932 02acedf9 Peter Maydell
    env->vfp.regs[rm] = make_float64(m0);
1933 02acedf9 Peter Maydell
    env->vfp.regs[rd] = make_float64(d0);
1934 02acedf9 Peter Maydell
}
1935 d68a6f3a Peter Maydell
1936 0ecb72a5 Andreas Färber
void HELPER(neon_qzip8)(CPUARMState *env, uint32_t rd, uint32_t rm)
1937 d68a6f3a Peter Maydell
{
1938 d68a6f3a Peter Maydell
    uint64_t zm0 = float64_val(env->vfp.regs[rm]);
1939 d68a6f3a Peter Maydell
    uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
1940 d68a6f3a Peter Maydell
    uint64_t zd0 = float64_val(env->vfp.regs[rd]);
1941 d68a6f3a Peter Maydell
    uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
1942 d68a6f3a Peter Maydell
    uint64_t d0 = ELEM(zd0, 0, 8) | (ELEM(zm0, 0, 8) << 8)
1943 d68a6f3a Peter Maydell
        | (ELEM(zd0, 1, 8) << 16) | (ELEM(zm0, 1, 8) << 24)
1944 d68a6f3a Peter Maydell
        | (ELEM(zd0, 2, 8) << 32) | (ELEM(zm0, 2, 8) << 40)
1945 d68a6f3a Peter Maydell
        | (ELEM(zd0, 3, 8) << 48) | (ELEM(zm0, 3, 8) << 56);
1946 d68a6f3a Peter Maydell
    uint64_t d1 = ELEM(zd0, 4, 8) | (ELEM(zm0, 4, 8) << 8)
1947 d68a6f3a Peter Maydell
        | (ELEM(zd0, 5, 8) << 16) | (ELEM(zm0, 5, 8) << 24)
1948 d68a6f3a Peter Maydell
        | (ELEM(zd0, 6, 8) << 32) | (ELEM(zm0, 6, 8) << 40)
1949 d68a6f3a Peter Maydell
        | (ELEM(zd0, 7, 8) << 48) | (ELEM(zm0, 7, 8) << 56);
1950 d68a6f3a Peter Maydell
    uint64_t m0 = ELEM(zd1, 0, 8) | (ELEM(zm1, 0, 8) << 8)
1951 d68a6f3a Peter Maydell
        | (ELEM(zd1, 1, 8) << 16) | (ELEM(zm1, 1, 8) << 24)
1952 d68a6f3a Peter Maydell
        | (ELEM(zd1, 2, 8) << 32) | (ELEM(zm1, 2, 8) << 40)
1953 d68a6f3a Peter Maydell
        | (ELEM(zd1, 3, 8) << 48) | (ELEM(zm1, 3, 8) << 56);
1954 d68a6f3a Peter Maydell
    uint64_t m1 = ELEM(zd1, 4, 8) | (ELEM(zm1, 4, 8) << 8)
1955 d68a6f3a Peter Maydell
        | (ELEM(zd1, 5, 8) << 16) | (ELEM(zm1, 5, 8) << 24)
1956 d68a6f3a Peter Maydell
        | (ELEM(zd1, 6, 8) << 32) | (ELEM(zm1, 6, 8) << 40)
1957 d68a6f3a Peter Maydell
        | (ELEM(zd1, 7, 8) << 48) | (ELEM(zm1, 7, 8) << 56);
1958 d68a6f3a Peter Maydell
    env->vfp.regs[rm] = make_float64(m0);
1959 d68a6f3a Peter Maydell
    env->vfp.regs[rm + 1] = make_float64(m1);
1960 d68a6f3a Peter Maydell
    env->vfp.regs[rd] = make_float64(d0);
1961 d68a6f3a Peter Maydell
    env->vfp.regs[rd + 1] = make_float64(d1);
1962 d68a6f3a Peter Maydell
}
1963 d68a6f3a Peter Maydell
1964 0ecb72a5 Andreas Färber
void HELPER(neon_qzip16)(CPUARMState *env, uint32_t rd, uint32_t rm)
1965 d68a6f3a Peter Maydell
{
1966 d68a6f3a Peter Maydell
    uint64_t zm0 = float64_val(env->vfp.regs[rm]);
1967 d68a6f3a Peter Maydell
    uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
1968 d68a6f3a Peter Maydell
    uint64_t zd0 = float64_val(env->vfp.regs[rd]);
1969 d68a6f3a Peter Maydell
    uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
1970 d68a6f3a Peter Maydell
    uint64_t d0 = ELEM(zd0, 0, 16) | (ELEM(zm0, 0, 16) << 16)
1971 d68a6f3a Peter Maydell
        | (ELEM(zd0, 1, 16) << 32) | (ELEM(zm0, 1, 16) << 48);
1972 d68a6f3a Peter Maydell
    uint64_t d1 = ELEM(zd0, 2, 16) | (ELEM(zm0, 2, 16) << 16)
1973 d68a6f3a Peter Maydell
        | (ELEM(zd0, 3, 16) << 32) | (ELEM(zm0, 3, 16) << 48);
1974 d68a6f3a Peter Maydell
    uint64_t m0 = ELEM(zd1, 0, 16) | (ELEM(zm1, 0, 16) << 16)
1975 d68a6f3a Peter Maydell
        | (ELEM(zd1, 1, 16) << 32) | (ELEM(zm1, 1, 16) << 48);
1976 d68a6f3a Peter Maydell
    uint64_t m1 = ELEM(zd1, 2, 16) | (ELEM(zm1, 2, 16) << 16)
1977 d68a6f3a Peter Maydell
        | (ELEM(zd1, 3, 16) << 32) | (ELEM(zm1, 3, 16) << 48);
1978 d68a6f3a Peter Maydell
    env->vfp.regs[rm] = make_float64(m0);
1979 d68a6f3a Peter Maydell
    env->vfp.regs[rm + 1] = make_float64(m1);
1980 d68a6f3a Peter Maydell
    env->vfp.regs[rd] = make_float64(d0);
1981 d68a6f3a Peter Maydell
    env->vfp.regs[rd + 1] = make_float64(d1);
1982 d68a6f3a Peter Maydell
}
1983 d68a6f3a Peter Maydell
1984 0ecb72a5 Andreas Färber
void HELPER(neon_qzip32)(CPUARMState *env, uint32_t rd, uint32_t rm)
1985 d68a6f3a Peter Maydell
{
1986 d68a6f3a Peter Maydell
    uint64_t zm0 = float64_val(env->vfp.regs[rm]);
1987 d68a6f3a Peter Maydell
    uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
1988 d68a6f3a Peter Maydell
    uint64_t zd0 = float64_val(env->vfp.regs[rd]);
1989 d68a6f3a Peter Maydell
    uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
1990 d68a6f3a Peter Maydell
    uint64_t d0 = ELEM(zd0, 0, 32) | (ELEM(zm0, 0, 32) << 32);
1991 d68a6f3a Peter Maydell
    uint64_t d1 = ELEM(zd0, 1, 32) | (ELEM(zm0, 1, 32) << 32);
1992 d68a6f3a Peter Maydell
    uint64_t m0 = ELEM(zd1, 0, 32) | (ELEM(zm1, 0, 32) << 32);
1993 d68a6f3a Peter Maydell
    uint64_t m1 = ELEM(zd1, 1, 32) | (ELEM(zm1, 1, 32) << 32);
1994 d68a6f3a Peter Maydell
    env->vfp.regs[rm] = make_float64(m0);
1995 d68a6f3a Peter Maydell
    env->vfp.regs[rm + 1] = make_float64(m1);
1996 d68a6f3a Peter Maydell
    env->vfp.regs[rd] = make_float64(d0);
1997 d68a6f3a Peter Maydell
    env->vfp.regs[rd + 1] = make_float64(d1);
1998 d68a6f3a Peter Maydell
}
1999 d68a6f3a Peter Maydell
2000 0ecb72a5 Andreas Färber
void HELPER(neon_zip8)(CPUARMState *env, uint32_t rd, uint32_t rm)
2001 d68a6f3a Peter Maydell
{
2002 d68a6f3a Peter Maydell
    uint64_t zm = float64_val(env->vfp.regs[rm]);
2003 d68a6f3a Peter Maydell
    uint64_t zd = float64_val(env->vfp.regs[rd]);
2004 d68a6f3a Peter Maydell
    uint64_t d0 = ELEM(zd, 0, 8) | (ELEM(zm, 0, 8) << 8)
2005 d68a6f3a Peter Maydell
        | (ELEM(zd, 1, 8) << 16) | (ELEM(zm, 1, 8) << 24)
2006 d68a6f3a Peter Maydell
        | (ELEM(zd, 2, 8) << 32) | (ELEM(zm, 2, 8) << 40)
2007 d68a6f3a Peter Maydell
        | (ELEM(zd, 3, 8) << 48) | (ELEM(zm, 3, 8) << 56);
2008 d68a6f3a Peter Maydell
    uint64_t m0 = ELEM(zd, 4, 8) | (ELEM(zm, 4, 8) << 8)
2009 d68a6f3a Peter Maydell
        | (ELEM(zd, 5, 8) << 16) | (ELEM(zm, 5, 8) << 24)
2010 d68a6f3a Peter Maydell
        | (ELEM(zd, 6, 8) << 32) | (ELEM(zm, 6, 8) << 40)
2011 d68a6f3a Peter Maydell
        | (ELEM(zd, 7, 8) << 48) | (ELEM(zm, 7, 8) << 56);
2012 d68a6f3a Peter Maydell
    env->vfp.regs[rm] = make_float64(m0);
2013 d68a6f3a Peter Maydell
    env->vfp.regs[rd] = make_float64(d0);
2014 d68a6f3a Peter Maydell
}
2015 d68a6f3a Peter Maydell
2016 0ecb72a5 Andreas Färber
void HELPER(neon_zip16)(CPUARMState *env, uint32_t rd, uint32_t rm)
2017 d68a6f3a Peter Maydell
{
2018 d68a6f3a Peter Maydell
    uint64_t zm = float64_val(env->vfp.regs[rm]);
2019 d68a6f3a Peter Maydell
    uint64_t zd = float64_val(env->vfp.regs[rd]);
2020 d68a6f3a Peter Maydell
    uint64_t d0 = ELEM(zd, 0, 16) | (ELEM(zm, 0, 16) << 16)
2021 d68a6f3a Peter Maydell
        | (ELEM(zd, 1, 16) << 32) | (ELEM(zm, 1, 16) << 48);
2022 d68a6f3a Peter Maydell
    uint64_t m0 = ELEM(zd, 2, 16) | (ELEM(zm, 2, 16) << 16)
2023 d68a6f3a Peter Maydell
        | (ELEM(zd, 3, 16) << 32) | (ELEM(zm, 3, 16) << 48);
2024 d68a6f3a Peter Maydell
    env->vfp.regs[rm] = make_float64(m0);
2025 d68a6f3a Peter Maydell
    env->vfp.regs[rd] = make_float64(d0);
2026 d68a6f3a Peter Maydell
}