Statistics
| Branch: | Revision:

root / target-arm / neon_helper.c @ a88790a1

History | View | Annotate | Download (34.4 kB)

1 e677137d pbrook
/*
2 e677137d pbrook
 * ARM NEON vector operations.
3 e677137d pbrook
 *
4 e677137d pbrook
 * Copyright (c) 2007, 2008 CodeSourcery.
5 e677137d pbrook
 * Written by Paul Brook
6 e677137d pbrook
 *
7 e677137d pbrook
 * This code is licenced under the GNU GPL v2.
8 e677137d pbrook
 */
9 ad69471c pbrook
#include <stdlib.h>
10 ad69471c pbrook
#include <stdio.h>
11 ad69471c pbrook
12 ad69471c pbrook
#include "cpu.h"
13 ad69471c pbrook
#include "exec-all.h"
14 ad69471c pbrook
#include "helpers.h"
15 ad69471c pbrook
16 ad69471c pbrook
#define SIGNBIT (uint32_t)0x80000000
17 ad69471c pbrook
#define SIGNBIT64 ((uint64_t)1 << 63)
18 ad69471c pbrook
19 ad69471c pbrook
#define SET_QC() env->vfp.xregs[ARM_VFP_FPSCR] = CPSR_Q
20 ad69471c pbrook
21 ad69471c pbrook
static float_status neon_float_status;
22 ad69471c pbrook
#define NFS &neon_float_status
23 ad69471c pbrook
24 ad69471c pbrook
/* Helper routines to perform bitwise copies between float and int.  */
25 ad69471c pbrook
static inline float32 vfp_itos(uint32_t i)
26 ad69471c pbrook
{
27 ad69471c pbrook
    union {
28 ad69471c pbrook
        uint32_t i;
29 ad69471c pbrook
        float32 s;
30 ad69471c pbrook
    } v;
31 ad69471c pbrook
32 ad69471c pbrook
    v.i = i;
33 ad69471c pbrook
    return v.s;
34 ad69471c pbrook
}
35 ad69471c pbrook
36 ad69471c pbrook
static inline uint32_t vfp_stoi(float32 s)
37 ad69471c pbrook
{
38 ad69471c pbrook
    union {
39 ad69471c pbrook
        uint32_t i;
40 ad69471c pbrook
        float32 s;
41 ad69471c pbrook
    } v;
42 ad69471c pbrook
43 ad69471c pbrook
    v.s = s;
44 ad69471c pbrook
    return v.i;
45 ad69471c pbrook
}
46 ad69471c pbrook
47 ad69471c pbrook
#define NEON_TYPE1(name, type) \
48 ad69471c pbrook
typedef struct \
49 ad69471c pbrook
{ \
50 ad69471c pbrook
    type v1; \
51 ad69471c pbrook
} neon_##name;
52 e2542fe2 Juan Quintela
#ifdef HOST_WORDS_BIGENDIAN
53 ad69471c pbrook
#define NEON_TYPE2(name, type) \
54 ad69471c pbrook
typedef struct \
55 ad69471c pbrook
{ \
56 ad69471c pbrook
    type v2; \
57 ad69471c pbrook
    type v1; \
58 ad69471c pbrook
} neon_##name;
59 ad69471c pbrook
#define NEON_TYPE4(name, type) \
60 ad69471c pbrook
typedef struct \
61 ad69471c pbrook
{ \
62 ad69471c pbrook
    type v4; \
63 ad69471c pbrook
    type v3; \
64 ad69471c pbrook
    type v2; \
65 ad69471c pbrook
    type v1; \
66 ad69471c pbrook
} neon_##name;
67 ad69471c pbrook
#else
68 ad69471c pbrook
#define NEON_TYPE2(name, type) \
69 ad69471c pbrook
typedef struct \
70 ad69471c pbrook
{ \
71 ad69471c pbrook
    type v1; \
72 ad69471c pbrook
    type v2; \
73 ad69471c pbrook
} neon_##name;
74 ad69471c pbrook
#define NEON_TYPE4(name, type) \
75 ad69471c pbrook
typedef struct \
76 ad69471c pbrook
{ \
77 ad69471c pbrook
    type v1; \
78 ad69471c pbrook
    type v2; \
79 ad69471c pbrook
    type v3; \
80 ad69471c pbrook
    type v4; \
81 ad69471c pbrook
} neon_##name;
82 ad69471c pbrook
#endif
83 ad69471c pbrook
84 ad69471c pbrook
NEON_TYPE4(s8, int8_t)
85 ad69471c pbrook
NEON_TYPE4(u8, uint8_t)
86 ad69471c pbrook
NEON_TYPE2(s16, int16_t)
87 ad69471c pbrook
NEON_TYPE2(u16, uint16_t)
88 ad69471c pbrook
NEON_TYPE1(s32, int32_t)
89 ad69471c pbrook
NEON_TYPE1(u32, uint32_t)
90 ad69471c pbrook
#undef NEON_TYPE4
91 ad69471c pbrook
#undef NEON_TYPE2
92 ad69471c pbrook
#undef NEON_TYPE1
93 ad69471c pbrook
94 ad69471c pbrook
/* Copy from a uint32_t to a vector structure type.  */
95 ad69471c pbrook
#define NEON_UNPACK(vtype, dest, val) do { \
96 ad69471c pbrook
    union { \
97 ad69471c pbrook
        vtype v; \
98 ad69471c pbrook
        uint32_t i; \
99 ad69471c pbrook
    } conv_u; \
100 ad69471c pbrook
    conv_u.i = (val); \
101 ad69471c pbrook
    dest = conv_u.v; \
102 ad69471c pbrook
    } while(0)
103 ad69471c pbrook
104 ad69471c pbrook
/* Copy from a vector structure type to a uint32_t.  */
105 ad69471c pbrook
#define NEON_PACK(vtype, dest, val) do { \
106 ad69471c pbrook
    union { \
107 ad69471c pbrook
        vtype v; \
108 ad69471c pbrook
        uint32_t i; \
109 ad69471c pbrook
    } conv_u; \
110 ad69471c pbrook
    conv_u.v = (val); \
111 ad69471c pbrook
    dest = conv_u.i; \
112 ad69471c pbrook
    } while(0)
113 ad69471c pbrook
114 ad69471c pbrook
#define NEON_DO1 \
115 ad69471c pbrook
    NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1);
116 ad69471c pbrook
#define NEON_DO2 \
117 ad69471c pbrook
    NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \
118 ad69471c pbrook
    NEON_FN(vdest.v2, vsrc1.v2, vsrc2.v2);
119 ad69471c pbrook
#define NEON_DO4 \
120 ad69471c pbrook
    NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \
121 ad69471c pbrook
    NEON_FN(vdest.v2, vsrc1.v2, vsrc2.v2); \
122 ad69471c pbrook
    NEON_FN(vdest.v3, vsrc1.v3, vsrc2.v3); \
123 ad69471c pbrook
    NEON_FN(vdest.v4, vsrc1.v4, vsrc2.v4);
124 ad69471c pbrook
125 ad69471c pbrook
#define NEON_VOP_BODY(vtype, n) \
126 ad69471c pbrook
{ \
127 ad69471c pbrook
    uint32_t res; \
128 ad69471c pbrook
    vtype vsrc1; \
129 ad69471c pbrook
    vtype vsrc2; \
130 ad69471c pbrook
    vtype vdest; \
131 ad69471c pbrook
    NEON_UNPACK(vtype, vsrc1, arg1); \
132 ad69471c pbrook
    NEON_UNPACK(vtype, vsrc2, arg2); \
133 ad69471c pbrook
    NEON_DO##n; \
134 ad69471c pbrook
    NEON_PACK(vtype, res, vdest); \
135 ad69471c pbrook
    return res; \
136 ad69471c pbrook
}
137 ad69471c pbrook
138 ad69471c pbrook
#define NEON_VOP(name, vtype, n) \
139 ad69471c pbrook
uint32_t HELPER(glue(neon_,name))(uint32_t arg1, uint32_t arg2) \
140 ad69471c pbrook
NEON_VOP_BODY(vtype, n)
141 ad69471c pbrook
142 ad69471c pbrook
#define NEON_VOP_ENV(name, vtype, n) \
143 ad69471c pbrook
uint32_t HELPER(glue(neon_,name))(CPUState *env, uint32_t arg1, uint32_t arg2) \
144 ad69471c pbrook
NEON_VOP_BODY(vtype, n)
145 ad69471c pbrook
146 ad69471c pbrook
/* Pairwise operations.  */
147 ad69471c pbrook
/* For 32-bit elements each segment only contains a single element, so
148 ad69471c pbrook
   the elementwise and pairwise operations are the same.  */
149 ad69471c pbrook
#define NEON_PDO2 \
150 ad69471c pbrook
    NEON_FN(vdest.v1, vsrc1.v1, vsrc1.v2); \
151 ad69471c pbrook
    NEON_FN(vdest.v2, vsrc2.v1, vsrc2.v2);
152 ad69471c pbrook
#define NEON_PDO4 \
153 ad69471c pbrook
    NEON_FN(vdest.v1, vsrc1.v1, vsrc1.v2); \
154 ad69471c pbrook
    NEON_FN(vdest.v2, vsrc1.v3, vsrc1.v4); \
155 ad69471c pbrook
    NEON_FN(vdest.v3, vsrc2.v1, vsrc2.v2); \
156 ad69471c pbrook
    NEON_FN(vdest.v4, vsrc2.v3, vsrc2.v4); \
157 ad69471c pbrook
158 ad69471c pbrook
#define NEON_POP(name, vtype, n) \
159 ad69471c pbrook
uint32_t HELPER(glue(neon_,name))(uint32_t arg1, uint32_t arg2) \
160 ad69471c pbrook
{ \
161 ad69471c pbrook
    uint32_t res; \
162 ad69471c pbrook
    vtype vsrc1; \
163 ad69471c pbrook
    vtype vsrc2; \
164 ad69471c pbrook
    vtype vdest; \
165 ad69471c pbrook
    NEON_UNPACK(vtype, vsrc1, arg1); \
166 ad69471c pbrook
    NEON_UNPACK(vtype, vsrc2, arg2); \
167 ad69471c pbrook
    NEON_PDO##n; \
168 ad69471c pbrook
    NEON_PACK(vtype, res, vdest); \
169 ad69471c pbrook
    return res; \
170 ad69471c pbrook
}
171 ad69471c pbrook
172 ad69471c pbrook
/* Unary operators.  */
173 ad69471c pbrook
#define NEON_VOP1(name, vtype, n) \
174 ad69471c pbrook
uint32_t HELPER(glue(neon_,name))(uint32_t arg) \
175 ad69471c pbrook
{ \
176 ad69471c pbrook
    vtype vsrc1; \
177 ad69471c pbrook
    vtype vdest; \
178 ad69471c pbrook
    NEON_UNPACK(vtype, vsrc1, arg); \
179 ad69471c pbrook
    NEON_DO##n; \
180 ad69471c pbrook
    NEON_PACK(vtype, arg, vdest); \
181 ad69471c pbrook
    return arg; \
182 ad69471c pbrook
}
183 ad69471c pbrook
184 ad69471c pbrook
185 ad69471c pbrook
#define NEON_USAT(dest, src1, src2, type) do { \
186 ad69471c pbrook
    uint32_t tmp = (uint32_t)src1 + (uint32_t)src2; \
187 ad69471c pbrook
    if (tmp != (type)tmp) { \
188 ad69471c pbrook
        SET_QC(); \
189 ad69471c pbrook
        dest = ~0; \
190 ad69471c pbrook
    } else { \
191 ad69471c pbrook
        dest = tmp; \
192 ad69471c pbrook
    }} while(0)
193 ad69471c pbrook
#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t)
194 ad69471c pbrook
NEON_VOP_ENV(qadd_u8, neon_u8, 4)
195 ad69471c pbrook
#undef NEON_FN
196 ad69471c pbrook
#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t)
197 ad69471c pbrook
NEON_VOP_ENV(qadd_u16, neon_u16, 2)
198 ad69471c pbrook
#undef NEON_FN
199 ad69471c pbrook
#undef NEON_USAT
200 ad69471c pbrook
201 ad69471c pbrook
#define NEON_SSAT(dest, src1, src2, type) do { \
202 ad69471c pbrook
    int32_t tmp = (uint32_t)src1 + (uint32_t)src2; \
203 ad69471c pbrook
    if (tmp != (type)tmp) { \
204 ad69471c pbrook
        SET_QC(); \
205 ad69471c pbrook
        if (src2 > 0) { \
206 ad69471c pbrook
            tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \
207 ad69471c pbrook
        } else { \
208 ad69471c pbrook
            tmp = 1 << (sizeof(type) * 8 - 1); \
209 ad69471c pbrook
        } \
210 ad69471c pbrook
    } \
211 ad69471c pbrook
    dest = tmp; \
212 ad69471c pbrook
    } while(0)
213 ad69471c pbrook
#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t)
214 ad69471c pbrook
NEON_VOP_ENV(qadd_s8, neon_s8, 4)
215 ad69471c pbrook
#undef NEON_FN
216 ad69471c pbrook
#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t)
217 ad69471c pbrook
NEON_VOP_ENV(qadd_s16, neon_s16, 2)
218 ad69471c pbrook
#undef NEON_FN
219 ad69471c pbrook
#undef NEON_SSAT
220 ad69471c pbrook
221 ad69471c pbrook
#define NEON_USAT(dest, src1, src2, type) do { \
222 ad69471c pbrook
    uint32_t tmp = (uint32_t)src1 - (uint32_t)src2; \
223 ad69471c pbrook
    if (tmp != (type)tmp) { \
224 ad69471c pbrook
        SET_QC(); \
225 ad69471c pbrook
        dest = 0; \
226 ad69471c pbrook
    } else { \
227 ad69471c pbrook
        dest = tmp; \
228 ad69471c pbrook
    }} while(0)
229 ad69471c pbrook
#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t)
230 ad69471c pbrook
NEON_VOP_ENV(qsub_u8, neon_u8, 4)
231 ad69471c pbrook
#undef NEON_FN
232 ad69471c pbrook
#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t)
233 ad69471c pbrook
NEON_VOP_ENV(qsub_u16, neon_u16, 2)
234 ad69471c pbrook
#undef NEON_FN
235 ad69471c pbrook
#undef NEON_USAT
236 ad69471c pbrook
237 ad69471c pbrook
#define NEON_SSAT(dest, src1, src2, type) do { \
238 ad69471c pbrook
    int32_t tmp = (uint32_t)src1 - (uint32_t)src2; \
239 ad69471c pbrook
    if (tmp != (type)tmp) { \
240 ad69471c pbrook
        SET_QC(); \
241 ad69471c pbrook
        if (src2 < 0) { \
242 ad69471c pbrook
            tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \
243 ad69471c pbrook
        } else { \
244 ad69471c pbrook
            tmp = 1 << (sizeof(type) * 8 - 1); \
245 ad69471c pbrook
        } \
246 ad69471c pbrook
    } \
247 ad69471c pbrook
    dest = tmp; \
248 ad69471c pbrook
    } while(0)
249 ad69471c pbrook
#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t)
250 ad69471c pbrook
NEON_VOP_ENV(qsub_s8, neon_s8, 4)
251 ad69471c pbrook
#undef NEON_FN
252 ad69471c pbrook
#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t)
253 ad69471c pbrook
NEON_VOP_ENV(qsub_s16, neon_s16, 2)
254 ad69471c pbrook
#undef NEON_FN
255 ad69471c pbrook
#undef NEON_SSAT
256 ad69471c pbrook
257 ad69471c pbrook
#define NEON_FN(dest, src1, src2) dest = (src1 + src2) >> 1
258 ad69471c pbrook
NEON_VOP(hadd_s8, neon_s8, 4)
259 ad69471c pbrook
NEON_VOP(hadd_u8, neon_u8, 4)
260 ad69471c pbrook
NEON_VOP(hadd_s16, neon_s16, 2)
261 ad69471c pbrook
NEON_VOP(hadd_u16, neon_u16, 2)
262 ad69471c pbrook
#undef NEON_FN
263 ad69471c pbrook
264 ad69471c pbrook
int32_t HELPER(neon_hadd_s32)(int32_t src1, int32_t src2)
265 ad69471c pbrook
{
266 ad69471c pbrook
    int32_t dest;
267 ad69471c pbrook
268 ad69471c pbrook
    dest = (src1 >> 1) + (src2 >> 1);
269 ad69471c pbrook
    if (src1 & src2 & 1)
270 ad69471c pbrook
        dest++;
271 ad69471c pbrook
    return dest;
272 ad69471c pbrook
}
273 ad69471c pbrook
274 ad69471c pbrook
uint32_t HELPER(neon_hadd_u32)(uint32_t src1, uint32_t src2)
275 ad69471c pbrook
{
276 ad69471c pbrook
    uint32_t dest;
277 ad69471c pbrook
278 ad69471c pbrook
    dest = (src1 >> 1) + (src2 >> 1);
279 ad69471c pbrook
    if (src1 & src2 & 1)
280 ad69471c pbrook
        dest++;
281 ad69471c pbrook
    return dest;
282 ad69471c pbrook
}
283 ad69471c pbrook
284 ad69471c pbrook
#define NEON_FN(dest, src1, src2) dest = (src1 + src2 + 1) >> 1
285 ad69471c pbrook
NEON_VOP(rhadd_s8, neon_s8, 4)
286 ad69471c pbrook
NEON_VOP(rhadd_u8, neon_u8, 4)
287 ad69471c pbrook
NEON_VOP(rhadd_s16, neon_s16, 2)
288 ad69471c pbrook
NEON_VOP(rhadd_u16, neon_u16, 2)
289 ad69471c pbrook
#undef NEON_FN
290 ad69471c pbrook
291 ad69471c pbrook
int32_t HELPER(neon_rhadd_s32)(int32_t src1, int32_t src2)
292 ad69471c pbrook
{
293 ad69471c pbrook
    int32_t dest;
294 ad69471c pbrook
295 ad69471c pbrook
    dest = (src1 >> 1) + (src2 >> 1);
296 ad69471c pbrook
    if ((src1 | src2) & 1)
297 ad69471c pbrook
        dest++;
298 ad69471c pbrook
    return dest;
299 ad69471c pbrook
}
300 ad69471c pbrook
301 ad69471c pbrook
uint32_t HELPER(neon_rhadd_u32)(uint32_t src1, uint32_t src2)
302 ad69471c pbrook
{
303 ad69471c pbrook
    uint32_t dest;
304 ad69471c pbrook
305 ad69471c pbrook
    dest = (src1 >> 1) + (src2 >> 1);
306 ad69471c pbrook
    if ((src1 | src2) & 1)
307 ad69471c pbrook
        dest++;
308 ad69471c pbrook
    return dest;
309 ad69471c pbrook
}
310 ad69471c pbrook
311 ad69471c pbrook
#define NEON_FN(dest, src1, src2) dest = (src1 - src2) >> 1
312 ad69471c pbrook
NEON_VOP(hsub_s8, neon_s8, 4)
313 ad69471c pbrook
NEON_VOP(hsub_u8, neon_u8, 4)
314 ad69471c pbrook
NEON_VOP(hsub_s16, neon_s16, 2)
315 ad69471c pbrook
NEON_VOP(hsub_u16, neon_u16, 2)
316 ad69471c pbrook
#undef NEON_FN
317 ad69471c pbrook
318 ad69471c pbrook
int32_t HELPER(neon_hsub_s32)(int32_t src1, int32_t src2)
319 ad69471c pbrook
{
320 ad69471c pbrook
    int32_t dest;
321 ad69471c pbrook
322 ad69471c pbrook
    dest = (src1 >> 1) - (src2 >> 1);
323 ad69471c pbrook
    if ((~src1) & src2 & 1)
324 ad69471c pbrook
        dest--;
325 ad69471c pbrook
    return dest;
326 ad69471c pbrook
}
327 ad69471c pbrook
328 ad69471c pbrook
uint32_t HELPER(neon_hsub_u32)(uint32_t src1, uint32_t src2)
329 ad69471c pbrook
{
330 ad69471c pbrook
    uint32_t dest;
331 ad69471c pbrook
332 ad69471c pbrook
    dest = (src1 >> 1) - (src2 >> 1);
333 ad69471c pbrook
    if ((~src1) & src2 & 1)
334 ad69471c pbrook
        dest--;
335 ad69471c pbrook
    return dest;
336 ad69471c pbrook
}
337 ad69471c pbrook
338 ad69471c pbrook
#define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? ~0 : 0
339 ad69471c pbrook
NEON_VOP(cgt_s8, neon_s8, 4)
340 ad69471c pbrook
NEON_VOP(cgt_u8, neon_u8, 4)
341 ad69471c pbrook
NEON_VOP(cgt_s16, neon_s16, 2)
342 ad69471c pbrook
NEON_VOP(cgt_u16, neon_u16, 2)
343 ad69471c pbrook
NEON_VOP(cgt_s32, neon_s32, 1)
344 ad69471c pbrook
NEON_VOP(cgt_u32, neon_u32, 1)
345 ad69471c pbrook
#undef NEON_FN
346 ad69471c pbrook
347 ad69471c pbrook
#define NEON_FN(dest, src1, src2) dest = (src1 >= src2) ? ~0 : 0
348 ad69471c pbrook
NEON_VOP(cge_s8, neon_s8, 4)
349 ad69471c pbrook
NEON_VOP(cge_u8, neon_u8, 4)
350 ad69471c pbrook
NEON_VOP(cge_s16, neon_s16, 2)
351 ad69471c pbrook
NEON_VOP(cge_u16, neon_u16, 2)
352 ad69471c pbrook
NEON_VOP(cge_s32, neon_s32, 1)
353 ad69471c pbrook
NEON_VOP(cge_u32, neon_u32, 1)
354 ad69471c pbrook
#undef NEON_FN
355 ad69471c pbrook
356 ad69471c pbrook
#define NEON_FN(dest, src1, src2) dest = (src1 < src2) ? src1 : src2
357 ad69471c pbrook
NEON_VOP(min_s8, neon_s8, 4)
358 ad69471c pbrook
NEON_VOP(min_u8, neon_u8, 4)
359 ad69471c pbrook
NEON_VOP(min_s16, neon_s16, 2)
360 ad69471c pbrook
NEON_VOP(min_u16, neon_u16, 2)
361 ad69471c pbrook
NEON_VOP(min_s32, neon_s32, 1)
362 ad69471c pbrook
NEON_VOP(min_u32, neon_u32, 1)
363 ad69471c pbrook
NEON_POP(pmin_s8, neon_s8, 4)
364 ad69471c pbrook
NEON_POP(pmin_u8, neon_u8, 4)
365 ad69471c pbrook
NEON_POP(pmin_s16, neon_s16, 2)
366 ad69471c pbrook
NEON_POP(pmin_u16, neon_u16, 2)
367 ad69471c pbrook
#undef NEON_FN
368 ad69471c pbrook
369 ad69471c pbrook
#define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? src1 : src2
370 ad69471c pbrook
NEON_VOP(max_s8, neon_s8, 4)
371 ad69471c pbrook
NEON_VOP(max_u8, neon_u8, 4)
372 ad69471c pbrook
NEON_VOP(max_s16, neon_s16, 2)
373 ad69471c pbrook
NEON_VOP(max_u16, neon_u16, 2)
374 ad69471c pbrook
NEON_VOP(max_s32, neon_s32, 1)
375 ad69471c pbrook
NEON_VOP(max_u32, neon_u32, 1)
376 ad69471c pbrook
NEON_POP(pmax_s8, neon_s8, 4)
377 ad69471c pbrook
NEON_POP(pmax_u8, neon_u8, 4)
378 ad69471c pbrook
NEON_POP(pmax_s16, neon_s16, 2)
379 ad69471c pbrook
NEON_POP(pmax_u16, neon_u16, 2)
380 ad69471c pbrook
#undef NEON_FN
381 ad69471c pbrook
382 ad69471c pbrook
#define NEON_FN(dest, src1, src2) \
383 ad69471c pbrook
    dest = (src1 > src2) ? (src1 - src2) : (src2 - src1)
384 ad69471c pbrook
NEON_VOP(abd_s8, neon_s8, 4)
385 ad69471c pbrook
NEON_VOP(abd_u8, neon_u8, 4)
386 ad69471c pbrook
NEON_VOP(abd_s16, neon_s16, 2)
387 ad69471c pbrook
NEON_VOP(abd_u16, neon_u16, 2)
388 ad69471c pbrook
NEON_VOP(abd_s32, neon_s32, 1)
389 ad69471c pbrook
NEON_VOP(abd_u32, neon_u32, 1)
390 ad69471c pbrook
#undef NEON_FN
391 ad69471c pbrook
392 ad69471c pbrook
#define NEON_FN(dest, src1, src2) do { \
393 ad69471c pbrook
    int8_t tmp; \
394 ad69471c pbrook
    tmp = (int8_t)src2; \
395 50f67e95 Juha Riihimรคki
    if (tmp >= (ssize_t)sizeof(src1) * 8 || \
396 50f67e95 Juha Riihimรคki
        tmp <= -(ssize_t)sizeof(src1) * 8) { \
397 ad69471c pbrook
        dest = 0; \
398 ad69471c pbrook
    } else if (tmp < 0) { \
399 ad69471c pbrook
        dest = src1 >> -tmp; \
400 ad69471c pbrook
    } else { \
401 ad69471c pbrook
        dest = src1 << tmp; \
402 ad69471c pbrook
    }} while (0)
403 ad69471c pbrook
NEON_VOP(shl_u8, neon_u8, 4)
404 ad69471c pbrook
NEON_VOP(shl_u16, neon_u16, 2)
405 ad69471c pbrook
NEON_VOP(shl_u32, neon_u32, 1)
406 ad69471c pbrook
#undef NEON_FN
407 ad69471c pbrook
408 ad69471c pbrook
uint64_t HELPER(neon_shl_u64)(uint64_t val, uint64_t shiftop)
409 ad69471c pbrook
{
410 ad69471c pbrook
    int8_t shift = (int8_t)shiftop;
411 ad69471c pbrook
    if (shift >= 64 || shift <= -64) {
412 ad69471c pbrook
        val = 0;
413 ad69471c pbrook
    } else if (shift < 0) {
414 ad69471c pbrook
        val >>= -shift;
415 ad69471c pbrook
    } else {
416 ad69471c pbrook
        val <<= shift;
417 ad69471c pbrook
    }
418 ad69471c pbrook
    return val;
419 ad69471c pbrook
}
420 ad69471c pbrook
421 ad69471c pbrook
#define NEON_FN(dest, src1, src2) do { \
422 ad69471c pbrook
    int8_t tmp; \
423 ad69471c pbrook
    tmp = (int8_t)src2; \
424 50f67e95 Juha Riihimรคki
    if (tmp >= (ssize_t)sizeof(src1) * 8) { \
425 ad69471c pbrook
        dest = 0; \
426 50f67e95 Juha Riihimรคki
    } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \
427 ad69471c pbrook
        dest = src1 >> (sizeof(src1) * 8 - 1); \
428 ad69471c pbrook
    } else if (tmp < 0) { \
429 ad69471c pbrook
        dest = src1 >> -tmp; \
430 ad69471c pbrook
    } else { \
431 ad69471c pbrook
        dest = src1 << tmp; \
432 ad69471c pbrook
    }} while (0)
433 ad69471c pbrook
NEON_VOP(shl_s8, neon_s8, 4)
434 ad69471c pbrook
NEON_VOP(shl_s16, neon_s16, 2)
435 ad69471c pbrook
NEON_VOP(shl_s32, neon_s32, 1)
436 ad69471c pbrook
#undef NEON_FN
437 ad69471c pbrook
438 ad69471c pbrook
uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop)
439 ad69471c pbrook
{
440 ad69471c pbrook
    int8_t shift = (int8_t)shiftop;
441 ad69471c pbrook
    int64_t val = valop;
442 ad69471c pbrook
    if (shift >= 64) {
443 ad69471c pbrook
        val = 0;
444 ad69471c pbrook
    } else if (shift <= -64) {
445 ad69471c pbrook
        val >>= 63;
446 ad69471c pbrook
    } else if (shift < 0) {
447 ad69471c pbrook
        val >>= -shift;
448 ad69471c pbrook
    } else {
449 ad69471c pbrook
        val <<= shift;
450 ad69471c pbrook
    }
451 ad69471c pbrook
    return val;
452 ad69471c pbrook
}
453 ad69471c pbrook
454 ad69471c pbrook
#define NEON_FN(dest, src1, src2) do { \
455 ad69471c pbrook
    int8_t tmp; \
456 ad69471c pbrook
    tmp = (int8_t)src2; \
457 50f67e95 Juha Riihimรคki
    if (tmp >= (ssize_t)sizeof(src1) * 8) { \
458 ad69471c pbrook
        dest = 0; \
459 50f67e95 Juha Riihimรคki
    } else if (tmp < -(ssize_t)sizeof(src1) * 8) { \
460 cb76e138 Paul Brook
        dest = src1 >> (sizeof(src1) * 8 - 1); \
461 50f67e95 Juha Riihimรคki
    } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \
462 ad69471c pbrook
        dest = src1 >> (tmp - 1); \
463 ad69471c pbrook
        dest++; \
464 cb76e138 Paul Brook
        dest >>= 1; \
465 ad69471c pbrook
    } else if (tmp < 0) { \
466 ad69471c pbrook
        dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
467 ad69471c pbrook
    } else { \
468 ad69471c pbrook
        dest = src1 << tmp; \
469 ad69471c pbrook
    }} while (0)
470 ad69471c pbrook
NEON_VOP(rshl_s8, neon_s8, 4)
471 ad69471c pbrook
NEON_VOP(rshl_s16, neon_s16, 2)
472 ad69471c pbrook
NEON_VOP(rshl_s32, neon_s32, 1)
473 ad69471c pbrook
#undef NEON_FN
474 ad69471c pbrook
475 ad69471c pbrook
uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
476 ad69471c pbrook
{
477 ad69471c pbrook
    int8_t shift = (int8_t)shiftop;
478 ad69471c pbrook
    int64_t val = valop;
479 ad69471c pbrook
    if (shift >= 64) {
480 ad69471c pbrook
        val = 0;
481 ad69471c pbrook
    } else if (shift < -64) {
482 ad69471c pbrook
        val >>= 63;
483 ad69471c pbrook
    } else if (shift == -63) {
484 ad69471c pbrook
        val >>= 63;
485 ad69471c pbrook
        val++;
486 ad69471c pbrook
        val >>= 1;
487 ad69471c pbrook
    } else if (shift < 0) {
488 ad69471c pbrook
        val = (val + ((int64_t)1 << (-1 - shift))) >> -shift;
489 ad69471c pbrook
    } else {
490 ad69471c pbrook
        val <<= shift;
491 ad69471c pbrook
    }
492 ad69471c pbrook
    return val;
493 ad69471c pbrook
}
494 ad69471c pbrook
495 ad69471c pbrook
#define NEON_FN(dest, src1, src2) do { \
496 ad69471c pbrook
    int8_t tmp; \
497 ad69471c pbrook
    tmp = (int8_t)src2; \
498 50f67e95 Juha Riihimรคki
    if (tmp >= (ssize_t)sizeof(src1) * 8 || \
499 50f67e95 Juha Riihimรคki
        tmp < -(ssize_t)sizeof(src1) * 8) { \
500 ad69471c pbrook
        dest = 0; \
501 50f67e95 Juha Riihimรคki
    } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \
502 ad69471c pbrook
        dest = src1 >> (tmp - 1); \
503 ad69471c pbrook
    } else if (tmp < 0) { \
504 ad69471c pbrook
        dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
505 ad69471c pbrook
    } else { \
506 ad69471c pbrook
        dest = src1 << tmp; \
507 ad69471c pbrook
    }} while (0)
508 ad69471c pbrook
NEON_VOP(rshl_u8, neon_u8, 4)
509 ad69471c pbrook
NEON_VOP(rshl_u16, neon_u16, 2)
510 ad69471c pbrook
NEON_VOP(rshl_u32, neon_u32, 1)
511 ad69471c pbrook
#undef NEON_FN
512 ad69471c pbrook
513 ad69471c pbrook
uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop)
514 ad69471c pbrook
{
515 ad69471c pbrook
    int8_t shift = (uint8_t)shiftop;
516 ad69471c pbrook
    if (shift >= 64 || shift < 64) {
517 ad69471c pbrook
        val = 0;
518 ad69471c pbrook
    } else if (shift == -64) {
519 ad69471c pbrook
        /* Rounding a 1-bit result just preserves that bit.  */
520 ad69471c pbrook
        val >>= 63;
521 ad69471c pbrook
    } if (shift < 0) {
522 ad69471c pbrook
        val = (val + ((uint64_t)1 << (-1 - shift))) >> -shift;
523 ad69471c pbrook
        val >>= -shift;
524 ad69471c pbrook
    } else {
525 ad69471c pbrook
        val <<= shift;
526 ad69471c pbrook
    }
527 ad69471c pbrook
    return val;
528 ad69471c pbrook
}
529 ad69471c pbrook
530 ad69471c pbrook
#define NEON_FN(dest, src1, src2) do { \
531 ad69471c pbrook
    int8_t tmp; \
532 ad69471c pbrook
    tmp = (int8_t)src2; \
533 50f67e95 Juha Riihimรคki
    if (tmp >= (ssize_t)sizeof(src1) * 8) { \
534 ad69471c pbrook
        if (src1) { \
535 ad69471c pbrook
            SET_QC(); \
536 ad69471c pbrook
            dest = ~0; \
537 ad69471c pbrook
        } else { \
538 ad69471c pbrook
            dest = 0; \
539 ad69471c pbrook
        } \
540 50f67e95 Juha Riihimรคki
    } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \
541 ad69471c pbrook
        dest = 0; \
542 ad69471c pbrook
    } else if (tmp < 0) { \
543 ad69471c pbrook
        dest = src1 >> -tmp; \
544 ad69471c pbrook
    } else { \
545 ad69471c pbrook
        dest = src1 << tmp; \
546 ad69471c pbrook
        if ((dest >> tmp) != src1) { \
547 ad69471c pbrook
            SET_QC(); \
548 ad69471c pbrook
            dest = ~0; \
549 ad69471c pbrook
        } \
550 ad69471c pbrook
    }} while (0)
551 ad69471c pbrook
NEON_VOP_ENV(qshl_u8, neon_u8, 4)
552 ad69471c pbrook
NEON_VOP_ENV(qshl_u16, neon_u16, 2)
553 ad69471c pbrook
NEON_VOP_ENV(qshl_u32, neon_u32, 1)
554 ad69471c pbrook
#undef NEON_FN
555 ad69471c pbrook
556 ad69471c pbrook
uint64_t HELPER(neon_qshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop)
557 ad69471c pbrook
{
558 ad69471c pbrook
    int8_t shift = (int8_t)shiftop;
559 ad69471c pbrook
    if (shift >= 64) {
560 ad69471c pbrook
        if (val) {
561 ad69471c pbrook
            val = ~(uint64_t)0;
562 ad69471c pbrook
            SET_QC();
563 ad69471c pbrook
        } else {
564 ad69471c pbrook
            val = 0;
565 ad69471c pbrook
        }
566 ad69471c pbrook
    } else if (shift <= -64) {
567 ad69471c pbrook
        val = 0;
568 ad69471c pbrook
    } else if (shift < 0) {
569 ad69471c pbrook
        val >>= -shift;
570 ad69471c pbrook
    } else {
571 ad69471c pbrook
        uint64_t tmp = val;
572 ad69471c pbrook
        val <<= shift;
573 ad69471c pbrook
        if ((val >> shift) != tmp) {
574 ad69471c pbrook
            SET_QC();
575 ad69471c pbrook
            val = ~(uint64_t)0;
576 ad69471c pbrook
        }
577 ad69471c pbrook
    }
578 ad69471c pbrook
    return val;
579 ad69471c pbrook
}
580 ad69471c pbrook
581 ad69471c pbrook
#define NEON_FN(dest, src1, src2) do { \
582 ad69471c pbrook
    int8_t tmp; \
583 ad69471c pbrook
    tmp = (int8_t)src2; \
584 50f67e95 Juha Riihimรคki
    if (tmp >= (ssize_t)sizeof(src1) * 8) { \
585 ad69471c pbrook
        if (src1) \
586 ad69471c pbrook
            SET_QC(); \
587 ad69471c pbrook
        dest = src1 >> 31; \
588 50f67e95 Juha Riihimรคki
    } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \
589 ad69471c pbrook
        dest = src1 >> 31; \
590 ad69471c pbrook
    } else if (tmp < 0) { \
591 ad69471c pbrook
        dest = src1 >> -tmp; \
592 ad69471c pbrook
    } else { \
593 ad69471c pbrook
        dest = src1 << tmp; \
594 ad69471c pbrook
        if ((dest >> tmp) != src1) { \
595 ad69471c pbrook
            SET_QC(); \
596 ad69471c pbrook
            dest = src2 >> 31; \
597 ad69471c pbrook
        } \
598 ad69471c pbrook
    }} while (0)
599 ad69471c pbrook
NEON_VOP_ENV(qshl_s8, neon_s8, 4)
600 ad69471c pbrook
NEON_VOP_ENV(qshl_s16, neon_s16, 2)
601 ad69471c pbrook
NEON_VOP_ENV(qshl_s32, neon_s32, 1)
602 ad69471c pbrook
#undef NEON_FN
603 ad69471c pbrook
604 ad69471c pbrook
uint64_t HELPER(neon_qshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop)
605 ad69471c pbrook
{
606 ad69471c pbrook
    int8_t shift = (uint8_t)shiftop;
607 ad69471c pbrook
    int64_t val = valop;
608 ad69471c pbrook
    if (shift >= 64) {
609 ad69471c pbrook
        if (val) {
610 ad69471c pbrook
            SET_QC();
611 ad69471c pbrook
            val = (val >> 63) & ~SIGNBIT64;
612 ad69471c pbrook
        }
613 ad69471c pbrook
    } else if (shift <= 64) {
614 ad69471c pbrook
        val >>= 63;
615 ad69471c pbrook
    } else if (shift < 0) {
616 ad69471c pbrook
        val >>= -shift;
617 ad69471c pbrook
    } else {
618 ad69471c pbrook
        int64_t tmp = val;
619 ad69471c pbrook
        val <<= shift;
620 ad69471c pbrook
        if ((val >> shift) != tmp) {
621 ad69471c pbrook
            SET_QC();
622 ad69471c pbrook
            val = (tmp >> 63) ^ ~SIGNBIT64;
623 ad69471c pbrook
        }
624 ad69471c pbrook
    }
625 ad69471c pbrook
    return val;
626 ad69471c pbrook
}
627 ad69471c pbrook
628 ad69471c pbrook
629 ad69471c pbrook
/* FIXME: This is wrong.  */
630 ad69471c pbrook
#define NEON_FN(dest, src1, src2) do { \
631 ad69471c pbrook
    int8_t tmp; \
632 ad69471c pbrook
    tmp = (int8_t)src2; \
633 ad69471c pbrook
    if (tmp < 0) { \
634 ad69471c pbrook
        dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
635 ad69471c pbrook
    } else { \
636 ad69471c pbrook
        dest = src1 << tmp; \
637 ad69471c pbrook
        if ((dest >> tmp) != src1) { \
638 ad69471c pbrook
            SET_QC(); \
639 ad69471c pbrook
            dest = ~0; \
640 ad69471c pbrook
        } \
641 ad69471c pbrook
    }} while (0)
642 ad69471c pbrook
NEON_VOP_ENV(qrshl_u8, neon_u8, 4)
643 ad69471c pbrook
NEON_VOP_ENV(qrshl_u16, neon_u16, 2)
644 ad69471c pbrook
NEON_VOP_ENV(qrshl_u32, neon_u32, 1)
645 ad69471c pbrook
#undef NEON_FN
646 ad69471c pbrook
647 ad69471c pbrook
uint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop)
648 ad69471c pbrook
{
649 ad69471c pbrook
    int8_t shift = (int8_t)shiftop;
650 ad69471c pbrook
    if (shift < 0) {
651 ad69471c pbrook
        val = (val + (1 << (-1 - shift))) >> -shift;
652 ad69471c pbrook
    } else { \
653 ad69471c pbrook
        uint64_t tmp = val;
654 ad69471c pbrook
        val <<= shift;
655 ad69471c pbrook
        if ((val >> shift) != tmp) {
656 ad69471c pbrook
            SET_QC();
657 ad69471c pbrook
            val = ~0;
658 ad69471c pbrook
        }
659 ad69471c pbrook
    }
660 ad69471c pbrook
    return val;
661 ad69471c pbrook
}
662 ad69471c pbrook
663 ad69471c pbrook
#define NEON_FN(dest, src1, src2) do { \
664 ad69471c pbrook
    int8_t tmp; \
665 ad69471c pbrook
    tmp = (int8_t)src2; \
666 ad69471c pbrook
    if (tmp < 0) { \
667 ad69471c pbrook
        dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
668 ad69471c pbrook
    } else { \
669 ad69471c pbrook
        dest = src1 << tmp; \
670 ad69471c pbrook
        if ((dest >> tmp) != src1) { \
671 ad69471c pbrook
            SET_QC(); \
672 ad69471c pbrook
            dest = src1 >> 31; \
673 ad69471c pbrook
        } \
674 ad69471c pbrook
    }} while (0)
675 ad69471c pbrook
NEON_VOP_ENV(qrshl_s8, neon_s8, 4)
676 ad69471c pbrook
NEON_VOP_ENV(qrshl_s16, neon_s16, 2)
677 ad69471c pbrook
NEON_VOP_ENV(qrshl_s32, neon_s32, 1)
678 ad69471c pbrook
#undef NEON_FN
679 ad69471c pbrook
680 ad69471c pbrook
uint64_t HELPER(neon_qrshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop)
681 ad69471c pbrook
{
682 ad69471c pbrook
    int8_t shift = (uint8_t)shiftop;
683 ad69471c pbrook
    int64_t val = valop;
684 ad69471c pbrook
685 ad69471c pbrook
    if (shift < 0) {
686 ad69471c pbrook
        val = (val + (1 << (-1 - shift))) >> -shift;
687 ad69471c pbrook
    } else {
688 ad69471c pbrook
        int64_t tmp = val;;
689 ad69471c pbrook
        val <<= shift;
690 ad69471c pbrook
        if ((val >> shift) != tmp) {
691 ad69471c pbrook
            SET_QC();
692 ad69471c pbrook
            val = tmp >> 31;
693 ad69471c pbrook
        }
694 ad69471c pbrook
    }
695 ad69471c pbrook
    return val;
696 ad69471c pbrook
}
697 ad69471c pbrook
698 ad69471c pbrook
uint32_t HELPER(neon_add_u8)(uint32_t a, uint32_t b)
699 ad69471c pbrook
{
700 ad69471c pbrook
    uint32_t mask;
701 ad69471c pbrook
    mask = (a ^ b) & 0x80808080u;
702 ad69471c pbrook
    a &= ~0x80808080u;
703 ad69471c pbrook
    b &= ~0x80808080u;
704 ad69471c pbrook
    return (a + b) ^ mask;
705 ad69471c pbrook
}
706 ad69471c pbrook
707 ad69471c pbrook
uint32_t HELPER(neon_add_u16)(uint32_t a, uint32_t b)
708 ad69471c pbrook
{
709 ad69471c pbrook
    uint32_t mask;
710 ad69471c pbrook
    mask = (a ^ b) & 0x80008000u;
711 ad69471c pbrook
    a &= ~0x80008000u;
712 ad69471c pbrook
    b &= ~0x80008000u;
713 ad69471c pbrook
    return (a + b) ^ mask;
714 ad69471c pbrook
}
715 ad69471c pbrook
716 ad69471c pbrook
#define NEON_FN(dest, src1, src2) dest = src1 + src2
717 ad69471c pbrook
NEON_POP(padd_u8, neon_u8, 4)
718 ad69471c pbrook
NEON_POP(padd_u16, neon_u16, 2)
719 ad69471c pbrook
#undef NEON_FN
720 ad69471c pbrook
721 ad69471c pbrook
#define NEON_FN(dest, src1, src2) dest = src1 - src2
722 ad69471c pbrook
NEON_VOP(sub_u8, neon_u8, 4)
723 ad69471c pbrook
NEON_VOP(sub_u16, neon_u16, 2)
724 ad69471c pbrook
#undef NEON_FN
725 ad69471c pbrook
726 ad69471c pbrook
#define NEON_FN(dest, src1, src2) dest = src1 * src2
727 ad69471c pbrook
NEON_VOP(mul_u8, neon_u8, 4)
728 ad69471c pbrook
NEON_VOP(mul_u16, neon_u16, 2)
729 ad69471c pbrook
#undef NEON_FN
730 ad69471c pbrook
731 1654b2d6 aurel32
/* Polynomial multiplication is like integer multiplication except the
732 ad69471c pbrook
   partial products are XORed, not added.  */
733 ad69471c pbrook
uint32_t HELPER(neon_mul_p8)(uint32_t op1, uint32_t op2)
734 ad69471c pbrook
{
735 ad69471c pbrook
    uint32_t mask;
736 ad69471c pbrook
    uint32_t result;
737 ad69471c pbrook
    result = 0;
738 ad69471c pbrook
    while (op1) {
739 ad69471c pbrook
        mask = 0;
740 ad69471c pbrook
        if (op1 & 1)
741 ad69471c pbrook
            mask |= 0xff;
742 ad69471c pbrook
        if (op1 & (1 << 8))
743 ad69471c pbrook
            mask |= (0xff << 8);
744 ad69471c pbrook
        if (op1 & (1 << 16))
745 ad69471c pbrook
            mask |= (0xff << 16);
746 ad69471c pbrook
        if (op1 & (1 << 24))
747 ad69471c pbrook
            mask |= (0xff << 24);
748 ad69471c pbrook
        result ^= op2 & mask;
749 ad69471c pbrook
        op1 = (op1 >> 1) & 0x7f7f7f7f;
750 ad69471c pbrook
        op2 = (op2 << 1) & 0xfefefefe;
751 ad69471c pbrook
    }
752 ad69471c pbrook
    return result;
753 ad69471c pbrook
}
754 ad69471c pbrook
755 ad69471c pbrook
#define NEON_FN(dest, src1, src2) dest = (src1 & src2) ? -1 : 0
756 ad69471c pbrook
NEON_VOP(tst_u8, neon_u8, 4)
757 ad69471c pbrook
NEON_VOP(tst_u16, neon_u16, 2)
758 ad69471c pbrook
NEON_VOP(tst_u32, neon_u32, 1)
759 ad69471c pbrook
#undef NEON_FN
760 ad69471c pbrook
761 ad69471c pbrook
#define NEON_FN(dest, src1, src2) dest = (src1 == src2) ? -1 : 0
762 ad69471c pbrook
NEON_VOP(ceq_u8, neon_u8, 4)
763 ad69471c pbrook
NEON_VOP(ceq_u16, neon_u16, 2)
764 ad69471c pbrook
NEON_VOP(ceq_u32, neon_u32, 1)
765 ad69471c pbrook
#undef NEON_FN
766 ad69471c pbrook
767 ad69471c pbrook
#define NEON_FN(dest, src, dummy) dest = (src < 0) ? -src : src
768 ad69471c pbrook
NEON_VOP1(abs_s8, neon_s8, 4)
769 ad69471c pbrook
NEON_VOP1(abs_s16, neon_s16, 2)
770 ad69471c pbrook
#undef NEON_FN
771 ad69471c pbrook
772 ad69471c pbrook
/* Count Leading Sign/Zero Bits.  */
773 ad69471c pbrook
static inline int do_clz8(uint8_t x)
774 ad69471c pbrook
{
775 ad69471c pbrook
    int n;
776 ad69471c pbrook
    for (n = 8; x; n--)
777 ad69471c pbrook
        x >>= 1;
778 ad69471c pbrook
    return n;
779 ad69471c pbrook
}
780 ad69471c pbrook
781 ad69471c pbrook
static inline int do_clz16(uint16_t x)
782 ad69471c pbrook
{
783 ad69471c pbrook
    int n;
784 ad69471c pbrook
    for (n = 16; x; n--)
785 ad69471c pbrook
        x >>= 1;
786 ad69471c pbrook
    return n;
787 ad69471c pbrook
}
788 ad69471c pbrook
789 ad69471c pbrook
#define NEON_FN(dest, src, dummy) dest = do_clz8(src)
790 ad69471c pbrook
NEON_VOP1(clz_u8, neon_u8, 4)
791 ad69471c pbrook
#undef NEON_FN
792 ad69471c pbrook
793 ad69471c pbrook
#define NEON_FN(dest, src, dummy) dest = do_clz16(src)
794 ad69471c pbrook
NEON_VOP1(clz_u16, neon_u16, 2)
795 ad69471c pbrook
#undef NEON_FN
796 ad69471c pbrook
797 ad69471c pbrook
#define NEON_FN(dest, src, dummy) dest = do_clz8((src < 0) ? ~src : src) - 1
798 ad69471c pbrook
NEON_VOP1(cls_s8, neon_s8, 4)
799 ad69471c pbrook
#undef NEON_FN
800 ad69471c pbrook
801 ad69471c pbrook
#define NEON_FN(dest, src, dummy) dest = do_clz16((src < 0) ? ~src : src) - 1
802 ad69471c pbrook
NEON_VOP1(cls_s16, neon_s16, 2)
803 ad69471c pbrook
#undef NEON_FN
804 ad69471c pbrook
805 ad69471c pbrook
uint32_t HELPER(neon_cls_s32)(uint32_t x)
806 ad69471c pbrook
{
807 ad69471c pbrook
    int count;
808 ad69471c pbrook
    if ((int32_t)x < 0)
809 ad69471c pbrook
        x = ~x;
810 ad69471c pbrook
    for (count = 32; x; count--)
811 ad69471c pbrook
        x = x >> 1;
812 ad69471c pbrook
    return count - 1;
813 ad69471c pbrook
}
814 ad69471c pbrook
815 ad69471c pbrook
/* Bit count.  */
816 ad69471c pbrook
uint32_t HELPER(neon_cnt_u8)(uint32_t x)
817 ad69471c pbrook
{
818 ad69471c pbrook
    x = (x & 0x55555555) + ((x >>  1) & 0x55555555);
819 ad69471c pbrook
    x = (x & 0x33333333) + ((x >>  2) & 0x33333333);
820 ad69471c pbrook
    x = (x & 0x0f0f0f0f) + ((x >>  4) & 0x0f0f0f0f);
821 ad69471c pbrook
    return x;
822 ad69471c pbrook
}
823 ad69471c pbrook
824 ad69471c pbrook
#define NEON_QDMULH16(dest, src1, src2, round) do { \
825 ad69471c pbrook
    uint32_t tmp = (int32_t)(int16_t) src1 * (int16_t) src2; \
826 ad69471c pbrook
    if ((tmp ^ (tmp << 1)) & SIGNBIT) { \
827 ad69471c pbrook
        SET_QC(); \
828 ad69471c pbrook
        tmp = (tmp >> 31) ^ ~SIGNBIT; \
829 ad69471c pbrook
    } \
830 ad69471c pbrook
    tmp <<= 1; \
831 ad69471c pbrook
    if (round) { \
832 ad69471c pbrook
        int32_t old = tmp; \
833 ad69471c pbrook
        tmp += 1 << 15; \
834 ad69471c pbrook
        if ((int32_t)tmp < old) { \
835 ad69471c pbrook
            SET_QC(); \
836 ad69471c pbrook
            tmp = SIGNBIT - 1; \
837 ad69471c pbrook
        } \
838 ad69471c pbrook
    } \
839 ad69471c pbrook
    dest = tmp >> 16; \
840 ad69471c pbrook
    } while(0)
841 ad69471c pbrook
#define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 0)
842 ad69471c pbrook
NEON_VOP_ENV(qdmulh_s16, neon_s16, 2)
843 ad69471c pbrook
#undef NEON_FN
844 ad69471c pbrook
#define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 1)
845 ad69471c pbrook
NEON_VOP_ENV(qrdmulh_s16, neon_s16, 2)
846 ad69471c pbrook
#undef NEON_FN
847 ad69471c pbrook
#undef NEON_QDMULH16
848 ad69471c pbrook
849 ad69471c pbrook
#define NEON_QDMULH32(dest, src1, src2, round) do { \
850 ad69471c pbrook
    uint64_t tmp = (int64_t)(int32_t) src1 * (int32_t) src2; \
851 ad69471c pbrook
    if ((tmp ^ (tmp << 1)) & SIGNBIT64) { \
852 ad69471c pbrook
        SET_QC(); \
853 ad69471c pbrook
        tmp = (tmp >> 63) ^ ~SIGNBIT64; \
854 ad69471c pbrook
    } else { \
855 ad69471c pbrook
        tmp <<= 1; \
856 ad69471c pbrook
    } \
857 ad69471c pbrook
    if (round) { \
858 ad69471c pbrook
        int64_t old = tmp; \
859 ad69471c pbrook
        tmp += (int64_t)1 << 31; \
860 ad69471c pbrook
        if ((int64_t)tmp < old) { \
861 ad69471c pbrook
            SET_QC(); \
862 ad69471c pbrook
            tmp = SIGNBIT64 - 1; \
863 ad69471c pbrook
        } \
864 ad69471c pbrook
    } \
865 ad69471c pbrook
    dest = tmp >> 32; \
866 ad69471c pbrook
    } while(0)
867 ad69471c pbrook
#define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 0)
868 ad69471c pbrook
NEON_VOP_ENV(qdmulh_s32, neon_s32, 1)
869 ad69471c pbrook
#undef NEON_FN
870 ad69471c pbrook
#define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 1)
871 ad69471c pbrook
NEON_VOP_ENV(qrdmulh_s32, neon_s32, 1)
872 ad69471c pbrook
#undef NEON_FN
873 ad69471c pbrook
#undef NEON_QDMULH32
874 ad69471c pbrook
875 ad69471c pbrook
uint32_t HELPER(neon_narrow_u8)(uint64_t x)
876 ad69471c pbrook
{
877 ad69471c pbrook
    return (x & 0xffu) | ((x >> 8) & 0xff00u) | ((x >> 16) & 0xff0000u)
878 ad69471c pbrook
           | ((x >> 24) & 0xff000000u);
879 ad69471c pbrook
}
880 ad69471c pbrook
881 ad69471c pbrook
uint32_t HELPER(neon_narrow_u16)(uint64_t x)
882 ad69471c pbrook
{
883 ad69471c pbrook
    return (x & 0xffffu) | ((x >> 16) & 0xffff0000u);
884 ad69471c pbrook
}
885 ad69471c pbrook
886 ad69471c pbrook
uint32_t HELPER(neon_narrow_high_u8)(uint64_t x)
887 ad69471c pbrook
{
888 ad69471c pbrook
    return ((x >> 8) & 0xff) | ((x >> 16) & 0xff00)
889 ad69471c pbrook
            | ((x >> 24) & 0xff0000) | ((x >> 32) & 0xff000000);
890 ad69471c pbrook
}
891 ad69471c pbrook
892 ad69471c pbrook
uint32_t HELPER(neon_narrow_high_u16)(uint64_t x)
893 ad69471c pbrook
{
894 ad69471c pbrook
    return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000);
895 ad69471c pbrook
}
896 ad69471c pbrook
897 ad69471c pbrook
uint32_t HELPER(neon_narrow_round_high_u8)(uint64_t x)
898 ad69471c pbrook
{
899 ad69471c pbrook
    x &= 0xff80ff80ff80ff80ull;
900 ad69471c pbrook
    x += 0x0080008000800080ull;
901 ad69471c pbrook
    return ((x >> 8) & 0xff) | ((x >> 16) & 0xff00)
902 ad69471c pbrook
            | ((x >> 24) & 0xff0000) | ((x >> 32) & 0xff000000);
903 ad69471c pbrook
}
904 ad69471c pbrook
905 ad69471c pbrook
uint32_t HELPER(neon_narrow_round_high_u16)(uint64_t x)
906 ad69471c pbrook
{
907 ad69471c pbrook
    x &= 0xffff8000ffff8000ull;
908 ad69471c pbrook
    x += 0x0000800000008000ull;
909 ad69471c pbrook
    return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000);
910 ad69471c pbrook
}
911 ad69471c pbrook
912 ad69471c pbrook
uint32_t HELPER(neon_narrow_sat_u8)(CPUState *env, uint64_t x)
913 ad69471c pbrook
{
914 ad69471c pbrook
    uint16_t s;
915 ad69471c pbrook
    uint8_t d;
916 ad69471c pbrook
    uint32_t res = 0;
917 ad69471c pbrook
#define SAT8(n) \
918 ad69471c pbrook
    s = x >> n; \
919 ad69471c pbrook
    if (s > 0xff) { \
920 ad69471c pbrook
        d = 0xff; \
921 ad69471c pbrook
        SET_QC(); \
922 ad69471c pbrook
    } else  { \
923 ad69471c pbrook
        d = s; \
924 ad69471c pbrook
    } \
925 ad69471c pbrook
    res |= (uint32_t)d << (n / 2);
926 ad69471c pbrook
927 ad69471c pbrook
    SAT8(0);
928 ad69471c pbrook
    SAT8(16);
929 ad69471c pbrook
    SAT8(32);
930 ad69471c pbrook
    SAT8(48);
931 ad69471c pbrook
#undef SAT8
932 ad69471c pbrook
    return res;
933 ad69471c pbrook
}
934 ad69471c pbrook
935 ad69471c pbrook
uint32_t HELPER(neon_narrow_sat_s8)(CPUState *env, uint64_t x)
936 ad69471c pbrook
{
937 ad69471c pbrook
    int16_t s;
938 ad69471c pbrook
    uint8_t d;
939 ad69471c pbrook
    uint32_t res = 0;
940 ad69471c pbrook
#define SAT8(n) \
941 ad69471c pbrook
    s = x >> n; \
942 ad69471c pbrook
    if (s != (int8_t)s) { \
943 ad69471c pbrook
        d = (s >> 15) ^ 0x7f; \
944 ad69471c pbrook
        SET_QC(); \
945 ad69471c pbrook
    } else  { \
946 ad69471c pbrook
        d = s; \
947 ad69471c pbrook
    } \
948 ad69471c pbrook
    res |= (uint32_t)d << (n / 2);
949 ad69471c pbrook
950 ad69471c pbrook
    SAT8(0);
951 ad69471c pbrook
    SAT8(16);
952 ad69471c pbrook
    SAT8(32);
953 ad69471c pbrook
    SAT8(48);
954 ad69471c pbrook
#undef SAT8
955 ad69471c pbrook
    return res;
956 ad69471c pbrook
}
957 ad69471c pbrook
958 ad69471c pbrook
uint32_t HELPER(neon_narrow_sat_u16)(CPUState *env, uint64_t x)
959 ad69471c pbrook
{
960 ad69471c pbrook
    uint32_t high;
961 ad69471c pbrook
    uint32_t low;
962 ad69471c pbrook
    low = x;
963 ad69471c pbrook
    if (low > 0xffff) {
964 ad69471c pbrook
        low = 0xffff;
965 ad69471c pbrook
        SET_QC();
966 ad69471c pbrook
    }
967 ad69471c pbrook
    high = x >> 32;
968 ad69471c pbrook
    if (high > 0xffff) {
969 ad69471c pbrook
        high = 0xffff;
970 ad69471c pbrook
        SET_QC();
971 ad69471c pbrook
    }
972 ad69471c pbrook
    return low | (high << 16);
973 ad69471c pbrook
}
974 ad69471c pbrook
975 ad69471c pbrook
uint32_t HELPER(neon_narrow_sat_s16)(CPUState *env, uint64_t x)
976 ad69471c pbrook
{
977 ad69471c pbrook
    int32_t low;
978 ad69471c pbrook
    int32_t high;
979 ad69471c pbrook
    low = x;
980 ad69471c pbrook
    if (low != (int16_t)low) {
981 ad69471c pbrook
        low = (low >> 31) ^ 0x7fff;
982 ad69471c pbrook
        SET_QC();
983 ad69471c pbrook
    }
984 ad69471c pbrook
    high = x >> 32;
985 ad69471c pbrook
    if (high != (int16_t)high) {
986 ad69471c pbrook
        high = (high >> 31) ^ 0x7fff;
987 ad69471c pbrook
        SET_QC();
988 ad69471c pbrook
    }
989 ad69471c pbrook
    return (uint16_t)low | (high << 16);
990 ad69471c pbrook
}
991 ad69471c pbrook
992 ad69471c pbrook
uint32_t HELPER(neon_narrow_sat_u32)(CPUState *env, uint64_t x)
993 ad69471c pbrook
{
994 ad69471c pbrook
    if (x > 0xffffffffu) {
995 ad69471c pbrook
        SET_QC();
996 ad69471c pbrook
        return 0xffffffffu;
997 ad69471c pbrook
    }
998 ad69471c pbrook
    return x;
999 ad69471c pbrook
}
1000 ad69471c pbrook
1001 ad69471c pbrook
uint32_t HELPER(neon_narrow_sat_s32)(CPUState *env, uint64_t x)
1002 ad69471c pbrook
{
1003 ad69471c pbrook
    if ((int64_t)x != (int32_t)x) {
1004 ad69471c pbrook
        SET_QC();
1005 ad69471c pbrook
        return (x >> 63) ^ 0x7fffffff;
1006 ad69471c pbrook
    }
1007 ad69471c pbrook
    return x;
1008 ad69471c pbrook
}
1009 ad69471c pbrook
1010 ad69471c pbrook
uint64_t HELPER(neon_widen_u8)(uint32_t x)
1011 ad69471c pbrook
{
1012 ad69471c pbrook
    uint64_t tmp;
1013 ad69471c pbrook
    uint64_t ret;
1014 ad69471c pbrook
    ret = (uint8_t)x;
1015 ad69471c pbrook
    tmp = (uint8_t)(x >> 8);
1016 ad69471c pbrook
    ret |= tmp << 16;
1017 ad69471c pbrook
    tmp = (uint8_t)(x >> 16);
1018 ad69471c pbrook
    ret |= tmp << 32;
1019 ad69471c pbrook
    tmp = (uint8_t)(x >> 24);
1020 ad69471c pbrook
    ret |= tmp << 48;
1021 ad69471c pbrook
    return ret;
1022 ad69471c pbrook
}
1023 ad69471c pbrook
1024 ad69471c pbrook
uint64_t HELPER(neon_widen_s8)(uint32_t x)
1025 ad69471c pbrook
{
1026 ad69471c pbrook
    uint64_t tmp;
1027 ad69471c pbrook
    uint64_t ret;
1028 ad69471c pbrook
    ret = (uint16_t)(int8_t)x;
1029 ad69471c pbrook
    tmp = (uint16_t)(int8_t)(x >> 8);
1030 ad69471c pbrook
    ret |= tmp << 16;
1031 ad69471c pbrook
    tmp = (uint16_t)(int8_t)(x >> 16);
1032 ad69471c pbrook
    ret |= tmp << 32;
1033 ad69471c pbrook
    tmp = (uint16_t)(int8_t)(x >> 24);
1034 ad69471c pbrook
    ret |= tmp << 48;
1035 ad69471c pbrook
    return ret;
1036 ad69471c pbrook
}
1037 ad69471c pbrook
1038 ad69471c pbrook
uint64_t HELPER(neon_widen_u16)(uint32_t x)
1039 ad69471c pbrook
{
1040 ad69471c pbrook
    uint64_t high = (uint16_t)(x >> 16);
1041 ad69471c pbrook
    return ((uint16_t)x) | (high << 32);
1042 ad69471c pbrook
}
1043 ad69471c pbrook
1044 ad69471c pbrook
uint64_t HELPER(neon_widen_s16)(uint32_t x)
1045 ad69471c pbrook
{
1046 ad69471c pbrook
    uint64_t high = (int16_t)(x >> 16);
1047 ad69471c pbrook
    return ((uint32_t)(int16_t)x) | (high << 32);
1048 ad69471c pbrook
}
1049 ad69471c pbrook
1050 ad69471c pbrook
uint64_t HELPER(neon_addl_u16)(uint64_t a, uint64_t b)
1051 ad69471c pbrook
{
1052 ad69471c pbrook
    uint64_t mask;
1053 ad69471c pbrook
    mask = (a ^ b) & 0x8000800080008000ull;
1054 ad69471c pbrook
    a &= ~0x8000800080008000ull;
1055 ad69471c pbrook
    b &= ~0x8000800080008000ull;
1056 ad69471c pbrook
    return (a + b) ^ mask;
1057 ad69471c pbrook
}
1058 ad69471c pbrook
1059 ad69471c pbrook
uint64_t HELPER(neon_addl_u32)(uint64_t a, uint64_t b)
1060 ad69471c pbrook
{
1061 ad69471c pbrook
    uint64_t mask;
1062 ad69471c pbrook
    mask = (a ^ b) & 0x8000000080000000ull;
1063 ad69471c pbrook
    a &= ~0x8000000080000000ull;
1064 ad69471c pbrook
    b &= ~0x8000000080000000ull;
1065 ad69471c pbrook
    return (a + b) ^ mask;
1066 ad69471c pbrook
}
1067 ad69471c pbrook
1068 ad69471c pbrook
uint64_t HELPER(neon_paddl_u16)(uint64_t a, uint64_t b)
1069 ad69471c pbrook
{
1070 ad69471c pbrook
    uint64_t tmp;
1071 ad69471c pbrook
    uint64_t tmp2;
1072 ad69471c pbrook
1073 ad69471c pbrook
    tmp = a & 0x0000ffff0000ffffull;
1074 ad69471c pbrook
    tmp += (a >> 16) & 0x0000ffff0000ffffull;
1075 ad69471c pbrook
    tmp2 = b & 0xffff0000ffff0000ull;
1076 ad69471c pbrook
    tmp2 += (b << 16) & 0xffff0000ffff0000ull;
1077 ad69471c pbrook
    return    ( tmp         & 0xffff)
1078 ad69471c pbrook
            | ((tmp  >> 16) & 0xffff0000ull)
1079 ad69471c pbrook
            | ((tmp2 << 16) & 0xffff00000000ull)
1080 ad69471c pbrook
            | ( tmp2        & 0xffff000000000000ull);
1081 ad69471c pbrook
}
1082 ad69471c pbrook
1083 ad69471c pbrook
uint64_t HELPER(neon_paddl_u32)(uint64_t a, uint64_t b)
1084 ad69471c pbrook
{
1085 ad69471c pbrook
    uint32_t low = a + (a >> 32);
1086 ad69471c pbrook
    uint32_t high = b + (b >> 32);
1087 ad69471c pbrook
    return low + ((uint64_t)high << 32);
1088 ad69471c pbrook
}
1089 ad69471c pbrook
1090 ad69471c pbrook
uint64_t HELPER(neon_subl_u16)(uint64_t a, uint64_t b)
1091 ad69471c pbrook
{
1092 ad69471c pbrook
    uint64_t mask;
1093 ad69471c pbrook
    mask = (a ^ ~b) & 0x8000800080008000ull;
1094 ad69471c pbrook
    a |= 0x8000800080008000ull;
1095 ad69471c pbrook
    b &= ~0x8000800080008000ull;
1096 ad69471c pbrook
    return (a - b) ^ mask;
1097 ad69471c pbrook
}
1098 ad69471c pbrook
1099 ad69471c pbrook
uint64_t HELPER(neon_subl_u32)(uint64_t a, uint64_t b)
1100 ad69471c pbrook
{
1101 ad69471c pbrook
    uint64_t mask;
1102 ad69471c pbrook
    mask = (a ^ ~b) & 0x8000000080000000ull;
1103 ad69471c pbrook
    a |= 0x8000000080000000ull;
1104 ad69471c pbrook
    b &= ~0x8000000080000000ull;
1105 ad69471c pbrook
    return (a - b) ^ mask;
1106 ad69471c pbrook
}
1107 ad69471c pbrook
1108 ad69471c pbrook
uint64_t HELPER(neon_addl_saturate_s32)(CPUState *env, uint64_t a, uint64_t b)
1109 ad69471c pbrook
{
1110 ad69471c pbrook
    uint32_t x, y;
1111 ad69471c pbrook
    uint32_t low, high;
1112 ad69471c pbrook
1113 ad69471c pbrook
    x = a;
1114 ad69471c pbrook
    y = b;
1115 ad69471c pbrook
    low = x + y;
1116 ad69471c pbrook
    if (((low ^ x) & SIGNBIT) && !((x ^ y) & SIGNBIT)) {
1117 ad69471c pbrook
        SET_QC();
1118 ad69471c pbrook
        low = ((int32_t)x >> 31) ^ ~SIGNBIT;
1119 ad69471c pbrook
    }
1120 ad69471c pbrook
    x = a >> 32;
1121 ad69471c pbrook
    y = b >> 32;
1122 ad69471c pbrook
    high = x + y;
1123 ad69471c pbrook
    if (((high ^ x) & SIGNBIT) && !((x ^ y) & SIGNBIT)) {
1124 ad69471c pbrook
        SET_QC();
1125 ad69471c pbrook
        high = ((int32_t)x >> 31) ^ ~SIGNBIT;
1126 ad69471c pbrook
    }
1127 ad69471c pbrook
    return low | ((uint64_t)high << 32);
1128 ad69471c pbrook
}
1129 ad69471c pbrook
1130 ad69471c pbrook
uint64_t HELPER(neon_addl_saturate_s64)(CPUState *env, uint64_t a, uint64_t b)
1131 ad69471c pbrook
{
1132 ad69471c pbrook
    uint64_t result;
1133 ad69471c pbrook
1134 ad69471c pbrook
    result = a + b;
1135 ad69471c pbrook
    if (((result ^ a) & SIGNBIT64) && !((a ^ b) & SIGNBIT64)) {
1136 ad69471c pbrook
        SET_QC();
1137 ad69471c pbrook
        result = ((int64_t)a >> 63) ^ ~SIGNBIT64;
1138 ad69471c pbrook
    }
1139 ad69471c pbrook
    return result;
1140 ad69471c pbrook
}
1141 ad69471c pbrook
1142 ad69471c pbrook
#define DO_ABD(dest, x, y, type) do { \
1143 ad69471c pbrook
    type tmp_x = x; \
1144 ad69471c pbrook
    type tmp_y = y; \
1145 ad69471c pbrook
    dest = ((tmp_x > tmp_y) ? tmp_x - tmp_y : tmp_y - tmp_x); \
1146 ad69471c pbrook
    } while(0)
1147 ad69471c pbrook
1148 ad69471c pbrook
uint64_t HELPER(neon_abdl_u16)(uint32_t a, uint32_t b)
1149 ad69471c pbrook
{
1150 ad69471c pbrook
    uint64_t tmp;
1151 ad69471c pbrook
    uint64_t result;
1152 ad69471c pbrook
    DO_ABD(result, a, b, uint8_t);
1153 ad69471c pbrook
    DO_ABD(tmp, a >> 8, b >> 8, uint8_t);
1154 ad69471c pbrook
    result |= tmp << 16;
1155 ad69471c pbrook
    DO_ABD(tmp, a >> 16, b >> 16, uint8_t);
1156 ad69471c pbrook
    result |= tmp << 32;
1157 ad69471c pbrook
    DO_ABD(tmp, a >> 24, b >> 24, uint8_t);
1158 ad69471c pbrook
    result |= tmp << 48;
1159 ad69471c pbrook
    return result;
1160 ad69471c pbrook
}
1161 ad69471c pbrook
1162 ad69471c pbrook
uint64_t HELPER(neon_abdl_s16)(uint32_t a, uint32_t b)
1163 ad69471c pbrook
{
1164 ad69471c pbrook
    uint64_t tmp;
1165 ad69471c pbrook
    uint64_t result;
1166 ad69471c pbrook
    DO_ABD(result, a, b, int8_t);
1167 ad69471c pbrook
    DO_ABD(tmp, a >> 8, b >> 8, int8_t);
1168 ad69471c pbrook
    result |= tmp << 16;
1169 ad69471c pbrook
    DO_ABD(tmp, a >> 16, b >> 16, int8_t);
1170 ad69471c pbrook
    result |= tmp << 32;
1171 ad69471c pbrook
    DO_ABD(tmp, a >> 24, b >> 24, int8_t);
1172 ad69471c pbrook
    result |= tmp << 48;
1173 ad69471c pbrook
    return result;
1174 ad69471c pbrook
}
1175 ad69471c pbrook
1176 ad69471c pbrook
uint64_t HELPER(neon_abdl_u32)(uint32_t a, uint32_t b)
1177 ad69471c pbrook
{
1178 ad69471c pbrook
    uint64_t tmp;
1179 ad69471c pbrook
    uint64_t result;
1180 ad69471c pbrook
    DO_ABD(result, a, b, uint16_t);
1181 ad69471c pbrook
    DO_ABD(tmp, a >> 16, b >> 16, uint16_t);
1182 ad69471c pbrook
    return result | (tmp << 32);
1183 ad69471c pbrook
}
1184 ad69471c pbrook
1185 ad69471c pbrook
uint64_t HELPER(neon_abdl_s32)(uint32_t a, uint32_t b)
1186 ad69471c pbrook
{
1187 ad69471c pbrook
    uint64_t tmp;
1188 ad69471c pbrook
    uint64_t result;
1189 ad69471c pbrook
    DO_ABD(result, a, b, int16_t);
1190 ad69471c pbrook
    DO_ABD(tmp, a >> 16, b >> 16, int16_t);
1191 ad69471c pbrook
    return result | (tmp << 32);
1192 ad69471c pbrook
}
1193 ad69471c pbrook
1194 ad69471c pbrook
uint64_t HELPER(neon_abdl_u64)(uint32_t a, uint32_t b)
1195 ad69471c pbrook
{
1196 ad69471c pbrook
    uint64_t result;
1197 ad69471c pbrook
    DO_ABD(result, a, b, uint32_t);
1198 ad69471c pbrook
    return result;
1199 ad69471c pbrook
}
1200 ad69471c pbrook
1201 ad69471c pbrook
uint64_t HELPER(neon_abdl_s64)(uint32_t a, uint32_t b)
1202 ad69471c pbrook
{
1203 ad69471c pbrook
    uint64_t result;
1204 ad69471c pbrook
    DO_ABD(result, a, b, int32_t);
1205 ad69471c pbrook
    return result;
1206 ad69471c pbrook
}
1207 ad69471c pbrook
#undef DO_ABD
1208 ad69471c pbrook
1209 ad69471c pbrook
/* Widening multiply. Named type is the source type.  */
1210 ad69471c pbrook
#define DO_MULL(dest, x, y, type1, type2) do { \
1211 ad69471c pbrook
    type1 tmp_x = x; \
1212 ad69471c pbrook
    type1 tmp_y = y; \
1213 ad69471c pbrook
    dest = (type2)((type2)tmp_x * (type2)tmp_y); \
1214 ad69471c pbrook
    } while(0)
1215 ad69471c pbrook
1216 ad69471c pbrook
uint64_t HELPER(neon_mull_u8)(uint32_t a, uint32_t b)
1217 ad69471c pbrook
{
1218 ad69471c pbrook
    uint64_t tmp;
1219 ad69471c pbrook
    uint64_t result;
1220 ad69471c pbrook
1221 ad69471c pbrook
    DO_MULL(result, a, b, uint8_t, uint16_t);
1222 ad69471c pbrook
    DO_MULL(tmp, a >> 8, b >> 8, uint8_t, uint16_t);
1223 ad69471c pbrook
    result |= tmp << 16;
1224 ad69471c pbrook
    DO_MULL(tmp, a >> 16, b >> 16, uint8_t, uint16_t);
1225 ad69471c pbrook
    result |= tmp << 32;
1226 ad69471c pbrook
    DO_MULL(tmp, a >> 24, b >> 24, uint8_t, uint16_t);
1227 ad69471c pbrook
    result |= tmp << 48;
1228 ad69471c pbrook
    return result;
1229 ad69471c pbrook
}
1230 ad69471c pbrook
1231 ad69471c pbrook
uint64_t HELPER(neon_mull_s8)(uint32_t a, uint32_t b)
1232 ad69471c pbrook
{
1233 ad69471c pbrook
    uint64_t tmp;
1234 ad69471c pbrook
    uint64_t result;
1235 ad69471c pbrook
1236 ad69471c pbrook
    DO_MULL(result, a, b, int8_t, uint16_t);
1237 ad69471c pbrook
    DO_MULL(tmp, a >> 8, b >> 8, int8_t, uint16_t);
1238 ad69471c pbrook
    result |= tmp << 16;
1239 ad69471c pbrook
    DO_MULL(tmp, a >> 16, b >> 16, int8_t, uint16_t);
1240 ad69471c pbrook
    result |= tmp << 32;
1241 ad69471c pbrook
    DO_MULL(tmp, a >> 24, b >> 24, int8_t, uint16_t);
1242 ad69471c pbrook
    result |= tmp << 48;
1243 ad69471c pbrook
    return result;
1244 ad69471c pbrook
}
1245 ad69471c pbrook
1246 ad69471c pbrook
uint64_t HELPER(neon_mull_u16)(uint32_t a, uint32_t b)
1247 ad69471c pbrook
{
1248 ad69471c pbrook
    uint64_t tmp;
1249 ad69471c pbrook
    uint64_t result;
1250 ad69471c pbrook
1251 ad69471c pbrook
    DO_MULL(result, a, b, uint16_t, uint32_t);
1252 ad69471c pbrook
    DO_MULL(tmp, a >> 16, b >> 16, uint16_t, uint32_t);
1253 ad69471c pbrook
    return result | (tmp << 32);
1254 ad69471c pbrook
}
1255 ad69471c pbrook
1256 ad69471c pbrook
uint64_t HELPER(neon_mull_s16)(uint32_t a, uint32_t b)
1257 ad69471c pbrook
{
1258 ad69471c pbrook
    uint64_t tmp;
1259 ad69471c pbrook
    uint64_t result;
1260 ad69471c pbrook
1261 ad69471c pbrook
    DO_MULL(result, a, b, int16_t, uint32_t);
1262 ad69471c pbrook
    DO_MULL(tmp, a >> 16, b >> 16, int16_t, uint32_t);
1263 ad69471c pbrook
    return result | (tmp << 32);
1264 ad69471c pbrook
}
1265 ad69471c pbrook
1266 ad69471c pbrook
uint64_t HELPER(neon_negl_u16)(uint64_t x)
1267 ad69471c pbrook
{
1268 ad69471c pbrook
    uint16_t tmp;
1269 ad69471c pbrook
    uint64_t result;
1270 ad69471c pbrook
    result = (uint16_t)-x;
1271 ad69471c pbrook
    tmp = -(x >> 16);
1272 ad69471c pbrook
    result |= (uint64_t)tmp << 16;
1273 ad69471c pbrook
    tmp = -(x >> 32);
1274 ad69471c pbrook
    result |= (uint64_t)tmp << 32;
1275 ad69471c pbrook
    tmp = -(x >> 48);
1276 ad69471c pbrook
    result |= (uint64_t)tmp << 48;
1277 ad69471c pbrook
    return result;
1278 ad69471c pbrook
}
1279 ad69471c pbrook
1280 ad69471c pbrook
#include <stdio.h>
1281 ad69471c pbrook
uint64_t HELPER(neon_negl_u32)(uint64_t x)
1282 ad69471c pbrook
{
1283 ad69471c pbrook
    uint32_t low = -x;
1284 ad69471c pbrook
    uint32_t high = -(x >> 32);
1285 ad69471c pbrook
    return low | ((uint64_t)high << 32);
1286 ad69471c pbrook
}
1287 ad69471c pbrook
1288 ad69471c pbrook
/* FIXME:  There should be a native op for this.  */
1289 ad69471c pbrook
uint64_t HELPER(neon_negl_u64)(uint64_t x)
1290 ad69471c pbrook
{
1291 ad69471c pbrook
    return -x;
1292 ad69471c pbrook
}
1293 ad69471c pbrook
1294 ad69471c pbrook
/* Saturnating sign manuipulation.  */
1295 ad69471c pbrook
/* ??? Make these use NEON_VOP1 */
1296 ad69471c pbrook
#define DO_QABS8(x) do { \
1297 ad69471c pbrook
    if (x == (int8_t)0x80) { \
1298 ad69471c pbrook
        x = 0x7f; \
1299 ad69471c pbrook
        SET_QC(); \
1300 ad69471c pbrook
    } else if (x < 0) { \
1301 ad69471c pbrook
        x = -x; \
1302 ad69471c pbrook
    }} while (0)
1303 ad69471c pbrook
uint32_t HELPER(neon_qabs_s8)(CPUState *env, uint32_t x)
1304 ad69471c pbrook
{
1305 ad69471c pbrook
    neon_s8 vec;
1306 ad69471c pbrook
    NEON_UNPACK(neon_s8, vec, x);
1307 ad69471c pbrook
    DO_QABS8(vec.v1);
1308 ad69471c pbrook
    DO_QABS8(vec.v2);
1309 ad69471c pbrook
    DO_QABS8(vec.v3);
1310 ad69471c pbrook
    DO_QABS8(vec.v4);
1311 ad69471c pbrook
    NEON_PACK(neon_s8, x, vec);
1312 ad69471c pbrook
    return x;
1313 ad69471c pbrook
}
1314 ad69471c pbrook
#undef DO_QABS8
1315 ad69471c pbrook
1316 ad69471c pbrook
#define DO_QNEG8(x) do { \
1317 ad69471c pbrook
    if (x == (int8_t)0x80) { \
1318 ad69471c pbrook
        x = 0x7f; \
1319 ad69471c pbrook
        SET_QC(); \
1320 ad69471c pbrook
    } else { \
1321 ad69471c pbrook
        x = -x; \
1322 ad69471c pbrook
    }} while (0)
1323 ad69471c pbrook
uint32_t HELPER(neon_qneg_s8)(CPUState *env, uint32_t x)
1324 ad69471c pbrook
{
1325 ad69471c pbrook
    neon_s8 vec;
1326 ad69471c pbrook
    NEON_UNPACK(neon_s8, vec, x);
1327 ad69471c pbrook
    DO_QNEG8(vec.v1);
1328 ad69471c pbrook
    DO_QNEG8(vec.v2);
1329 ad69471c pbrook
    DO_QNEG8(vec.v3);
1330 ad69471c pbrook
    DO_QNEG8(vec.v4);
1331 ad69471c pbrook
    NEON_PACK(neon_s8, x, vec);
1332 ad69471c pbrook
    return x;
1333 ad69471c pbrook
}
1334 ad69471c pbrook
#undef DO_QNEG8
1335 ad69471c pbrook
1336 ad69471c pbrook
#define DO_QABS16(x) do { \
1337 ad69471c pbrook
    if (x == (int16_t)0x8000) { \
1338 ad69471c pbrook
        x = 0x7fff; \
1339 ad69471c pbrook
        SET_QC(); \
1340 ad69471c pbrook
    } else if (x < 0) { \
1341 ad69471c pbrook
        x = -x; \
1342 ad69471c pbrook
    }} while (0)
1343 ad69471c pbrook
uint32_t HELPER(neon_qabs_s16)(CPUState *env, uint32_t x)
1344 ad69471c pbrook
{
1345 ad69471c pbrook
    neon_s16 vec;
1346 ad69471c pbrook
    NEON_UNPACK(neon_s16, vec, x);
1347 ad69471c pbrook
    DO_QABS16(vec.v1);
1348 ad69471c pbrook
    DO_QABS16(vec.v2);
1349 ad69471c pbrook
    NEON_PACK(neon_s16, x, vec);
1350 ad69471c pbrook
    return x;
1351 ad69471c pbrook
}
1352 ad69471c pbrook
#undef DO_QABS16
1353 ad69471c pbrook
1354 ad69471c pbrook
#define DO_QNEG16(x) do { \
1355 ad69471c pbrook
    if (x == (int16_t)0x8000) { \
1356 ad69471c pbrook
        x = 0x7fff; \
1357 ad69471c pbrook
        SET_QC(); \
1358 ad69471c pbrook
    } else { \
1359 ad69471c pbrook
        x = -x; \
1360 ad69471c pbrook
    }} while (0)
1361 ad69471c pbrook
uint32_t HELPER(neon_qneg_s16)(CPUState *env, uint32_t x)
1362 ad69471c pbrook
{
1363 ad69471c pbrook
    neon_s16 vec;
1364 ad69471c pbrook
    NEON_UNPACK(neon_s16, vec, x);
1365 ad69471c pbrook
    DO_QNEG16(vec.v1);
1366 ad69471c pbrook
    DO_QNEG16(vec.v2);
1367 ad69471c pbrook
    NEON_PACK(neon_s16, x, vec);
1368 ad69471c pbrook
    return x;
1369 ad69471c pbrook
}
1370 ad69471c pbrook
#undef DO_QNEG16
1371 ad69471c pbrook
1372 ad69471c pbrook
uint32_t HELPER(neon_qabs_s32)(CPUState *env, uint32_t x)
1373 ad69471c pbrook
{
1374 ad69471c pbrook
    if (x == SIGNBIT) {
1375 ad69471c pbrook
        SET_QC();
1376 ad69471c pbrook
        x = ~SIGNBIT;
1377 ad69471c pbrook
    } else if ((int32_t)x < 0) {
1378 ad69471c pbrook
        x = -x;
1379 ad69471c pbrook
    }
1380 ad69471c pbrook
    return x;
1381 ad69471c pbrook
}
1382 ad69471c pbrook
1383 ad69471c pbrook
uint32_t HELPER(neon_qneg_s32)(CPUState *env, uint32_t x)
1384 ad69471c pbrook
{
1385 ad69471c pbrook
    if (x == SIGNBIT) {
1386 ad69471c pbrook
        SET_QC();
1387 ad69471c pbrook
        x = ~SIGNBIT;
1388 ad69471c pbrook
    } else {
1389 ad69471c pbrook
        x = -x;
1390 ad69471c pbrook
    }
1391 ad69471c pbrook
    return x;
1392 ad69471c pbrook
}
1393 ad69471c pbrook
1394 ad69471c pbrook
/* NEON Float helpers.  */
1395 ad69471c pbrook
uint32_t HELPER(neon_min_f32)(uint32_t a, uint32_t b)
1396 ad69471c pbrook
{
1397 ad69471c pbrook
    float32 f0 = vfp_itos(a);
1398 ad69471c pbrook
    float32 f1 = vfp_itos(b);
1399 ad69471c pbrook
    return (float32_compare_quiet(f0, f1, NFS) == -1) ? a : b;
1400 ad69471c pbrook
}
1401 ad69471c pbrook
1402 ad69471c pbrook
uint32_t HELPER(neon_max_f32)(uint32_t a, uint32_t b)
1403 ad69471c pbrook
{
1404 ad69471c pbrook
    float32 f0 = vfp_itos(a);
1405 ad69471c pbrook
    float32 f1 = vfp_itos(b);
1406 ad69471c pbrook
    return (float32_compare_quiet(f0, f1, NFS) == 1) ? a : b;
1407 ad69471c pbrook
}
1408 ad69471c pbrook
1409 ad69471c pbrook
uint32_t HELPER(neon_abd_f32)(uint32_t a, uint32_t b)
1410 ad69471c pbrook
{
1411 ad69471c pbrook
    float32 f0 = vfp_itos(a);
1412 ad69471c pbrook
    float32 f1 = vfp_itos(b);
1413 ad69471c pbrook
    return vfp_stoi((float32_compare_quiet(f0, f1, NFS) == 1)
1414 ad69471c pbrook
                    ? float32_sub(f0, f1, NFS)
1415 ad69471c pbrook
                    : float32_sub(f1, f0, NFS));
1416 ad69471c pbrook
}
1417 ad69471c pbrook
1418 ad69471c pbrook
uint32_t HELPER(neon_add_f32)(uint32_t a, uint32_t b)
1419 ad69471c pbrook
{
1420 ad69471c pbrook
    return vfp_stoi(float32_add(vfp_itos(a), vfp_itos(b), NFS));
1421 ad69471c pbrook
}
1422 ad69471c pbrook
1423 ad69471c pbrook
uint32_t HELPER(neon_sub_f32)(uint32_t a, uint32_t b)
1424 ad69471c pbrook
{
1425 ad69471c pbrook
    return vfp_stoi(float32_sub(vfp_itos(a), vfp_itos(b), NFS));
1426 ad69471c pbrook
}
1427 ad69471c pbrook
1428 ad69471c pbrook
uint32_t HELPER(neon_mul_f32)(uint32_t a, uint32_t b)
1429 ad69471c pbrook
{
1430 ad69471c pbrook
    return vfp_stoi(float32_mul(vfp_itos(a), vfp_itos(b), NFS));
1431 ad69471c pbrook
}
1432 ad69471c pbrook
1433 ad69471c pbrook
/* Floating point comparisons produce an integer result.  */
1434 ad69471c pbrook
#define NEON_VOP_FCMP(name, cmp) \
1435 ad69471c pbrook
uint32_t HELPER(neon_##name)(uint32_t a, uint32_t b) \
1436 ad69471c pbrook
{ \
1437 ad69471c pbrook
    if (float32_compare_quiet(vfp_itos(a), vfp_itos(b), NFS) cmp 0) \
1438 ad69471c pbrook
        return ~0; \
1439 ad69471c pbrook
    else \
1440 ad69471c pbrook
        return 0; \
1441 ad69471c pbrook
}
1442 ad69471c pbrook
1443 ad69471c pbrook
NEON_VOP_FCMP(ceq_f32, ==)
1444 ad69471c pbrook
NEON_VOP_FCMP(cge_f32, >=)
1445 ad69471c pbrook
NEON_VOP_FCMP(cgt_f32, >)
1446 ad69471c pbrook
1447 ad69471c pbrook
uint32_t HELPER(neon_acge_f32)(uint32_t a, uint32_t b)
1448 ad69471c pbrook
{
1449 ad69471c pbrook
    float32 f0 = float32_abs(vfp_itos(a));
1450 ad69471c pbrook
    float32 f1 = float32_abs(vfp_itos(b));
1451 ad69471c pbrook
    return (float32_compare_quiet(f0, f1,NFS) >= 0) ? ~0 : 0;
1452 ad69471c pbrook
}
1453 ad69471c pbrook
1454 ad69471c pbrook
uint32_t HELPER(neon_acgt_f32)(uint32_t a, uint32_t b)
1455 ad69471c pbrook
{
1456 ad69471c pbrook
    float32 f0 = float32_abs(vfp_itos(a));
1457 ad69471c pbrook
    float32 f1 = float32_abs(vfp_itos(b));
1458 ad69471c pbrook
    return (float32_compare_quiet(f0, f1, NFS) > 0) ? ~0 : 0;
1459 ad69471c pbrook
}