root / target-arm / neon_helper.c @ e00c1e71
History | View | Annotate | Download (34.2 kB)
1 | e677137d | pbrook | /*
|
---|---|---|---|
2 | e677137d | pbrook | * ARM NEON vector operations.
|
3 | e677137d | pbrook | *
|
4 | e677137d | pbrook | * Copyright (c) 2007, 2008 CodeSourcery.
|
5 | e677137d | pbrook | * Written by Paul Brook
|
6 | e677137d | pbrook | *
|
7 | e677137d | pbrook | * This code is licenced under the GNU GPL v2.
|
8 | e677137d | pbrook | */
|
9 | ad69471c | pbrook | #include <stdlib.h> |
10 | ad69471c | pbrook | #include <stdio.h> |
11 | ad69471c | pbrook | |
12 | ad69471c | pbrook | #include "cpu.h" |
13 | ad69471c | pbrook | #include "exec-all.h" |
14 | ad69471c | pbrook | #include "helpers.h" |
15 | ad69471c | pbrook | |
16 | ad69471c | pbrook | #define SIGNBIT (uint32_t)0x80000000 |
17 | ad69471c | pbrook | #define SIGNBIT64 ((uint64_t)1 << 63) |
18 | ad69471c | pbrook | |
19 | ad69471c | pbrook | #define SET_QC() env->vfp.xregs[ARM_VFP_FPSCR] = CPSR_Q
|
20 | ad69471c | pbrook | |
21 | ad69471c | pbrook | static float_status neon_float_status;
|
22 | ad69471c | pbrook | #define NFS &neon_float_status
|
23 | ad69471c | pbrook | |
24 | ad69471c | pbrook | /* Helper routines to perform bitwise copies between float and int. */
|
25 | ad69471c | pbrook | static inline float32 vfp_itos(uint32_t i) |
26 | ad69471c | pbrook | { |
27 | ad69471c | pbrook | union {
|
28 | ad69471c | pbrook | uint32_t i; |
29 | ad69471c | pbrook | float32 s; |
30 | ad69471c | pbrook | } v; |
31 | ad69471c | pbrook | |
32 | ad69471c | pbrook | v.i = i; |
33 | ad69471c | pbrook | return v.s;
|
34 | ad69471c | pbrook | } |
35 | ad69471c | pbrook | |
36 | ad69471c | pbrook | static inline uint32_t vfp_stoi(float32 s) |
37 | ad69471c | pbrook | { |
38 | ad69471c | pbrook | union {
|
39 | ad69471c | pbrook | uint32_t i; |
40 | ad69471c | pbrook | float32 s; |
41 | ad69471c | pbrook | } v; |
42 | ad69471c | pbrook | |
43 | ad69471c | pbrook | v.s = s; |
44 | ad69471c | pbrook | return v.i;
|
45 | ad69471c | pbrook | } |
46 | ad69471c | pbrook | |
47 | ad69471c | pbrook | #define NEON_TYPE1(name, type) \
|
48 | ad69471c | pbrook | typedef struct \ |
49 | ad69471c | pbrook | { \ |
50 | ad69471c | pbrook | type v1; \ |
51 | ad69471c | pbrook | } neon_##name; |
52 | ad69471c | pbrook | #ifdef WORDS_BIGENDIAN
|
53 | ad69471c | pbrook | #define NEON_TYPE2(name, type) \
|
54 | ad69471c | pbrook | typedef struct \ |
55 | ad69471c | pbrook | { \ |
56 | ad69471c | pbrook | type v2; \ |
57 | ad69471c | pbrook | type v1; \ |
58 | ad69471c | pbrook | } neon_##name; |
59 | ad69471c | pbrook | #define NEON_TYPE4(name, type) \
|
60 | ad69471c | pbrook | typedef struct \ |
61 | ad69471c | pbrook | { \ |
62 | ad69471c | pbrook | type v4; \ |
63 | ad69471c | pbrook | type v3; \ |
64 | ad69471c | pbrook | type v2; \ |
65 | ad69471c | pbrook | type v1; \ |
66 | ad69471c | pbrook | } neon_##name; |
67 | ad69471c | pbrook | #else
|
68 | ad69471c | pbrook | #define NEON_TYPE2(name, type) \
|
69 | ad69471c | pbrook | typedef struct \ |
70 | ad69471c | pbrook | { \ |
71 | ad69471c | pbrook | type v1; \ |
72 | ad69471c | pbrook | type v2; \ |
73 | ad69471c | pbrook | } neon_##name; |
74 | ad69471c | pbrook | #define NEON_TYPE4(name, type) \
|
75 | ad69471c | pbrook | typedef struct \ |
76 | ad69471c | pbrook | { \ |
77 | ad69471c | pbrook | type v1; \ |
78 | ad69471c | pbrook | type v2; \ |
79 | ad69471c | pbrook | type v3; \ |
80 | ad69471c | pbrook | type v4; \ |
81 | ad69471c | pbrook | } neon_##name; |
82 | ad69471c | pbrook | #endif
|
83 | ad69471c | pbrook | |
84 | ad69471c | pbrook | NEON_TYPE4(s8, int8_t) |
85 | ad69471c | pbrook | NEON_TYPE4(u8, uint8_t) |
86 | ad69471c | pbrook | NEON_TYPE2(s16, int16_t) |
87 | ad69471c | pbrook | NEON_TYPE2(u16, uint16_t) |
88 | ad69471c | pbrook | NEON_TYPE1(s32, int32_t) |
89 | ad69471c | pbrook | NEON_TYPE1(u32, uint32_t) |
90 | ad69471c | pbrook | #undef NEON_TYPE4
|
91 | ad69471c | pbrook | #undef NEON_TYPE2
|
92 | ad69471c | pbrook | #undef NEON_TYPE1
|
93 | ad69471c | pbrook | |
94 | ad69471c | pbrook | /* Copy from a uint32_t to a vector structure type. */
|
95 | ad69471c | pbrook | #define NEON_UNPACK(vtype, dest, val) do { \ |
96 | ad69471c | pbrook | union { \
|
97 | ad69471c | pbrook | vtype v; \ |
98 | ad69471c | pbrook | uint32_t i; \ |
99 | ad69471c | pbrook | } conv_u; \ |
100 | ad69471c | pbrook | conv_u.i = (val); \ |
101 | ad69471c | pbrook | dest = conv_u.v; \ |
102 | ad69471c | pbrook | } while(0) |
103 | ad69471c | pbrook | |
104 | ad69471c | pbrook | /* Copy from a vector structure type to a uint32_t. */
|
105 | ad69471c | pbrook | #define NEON_PACK(vtype, dest, val) do { \ |
106 | ad69471c | pbrook | union { \
|
107 | ad69471c | pbrook | vtype v; \ |
108 | ad69471c | pbrook | uint32_t i; \ |
109 | ad69471c | pbrook | } conv_u; \ |
110 | ad69471c | pbrook | conv_u.v = (val); \ |
111 | ad69471c | pbrook | dest = conv_u.i; \ |
112 | ad69471c | pbrook | } while(0) |
113 | ad69471c | pbrook | |
114 | ad69471c | pbrook | #define NEON_DO1 \
|
115 | ad69471c | pbrook | NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); |
116 | ad69471c | pbrook | #define NEON_DO2 \
|
117 | ad69471c | pbrook | NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \ |
118 | ad69471c | pbrook | NEON_FN(vdest.v2, vsrc1.v2, vsrc2.v2); |
119 | ad69471c | pbrook | #define NEON_DO4 \
|
120 | ad69471c | pbrook | NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \ |
121 | ad69471c | pbrook | NEON_FN(vdest.v2, vsrc1.v2, vsrc2.v2); \ |
122 | ad69471c | pbrook | NEON_FN(vdest.v3, vsrc1.v3, vsrc2.v3); \ |
123 | ad69471c | pbrook | NEON_FN(vdest.v4, vsrc1.v4, vsrc2.v4); |
124 | ad69471c | pbrook | |
125 | ad69471c | pbrook | #define NEON_VOP_BODY(vtype, n) \
|
126 | ad69471c | pbrook | { \ |
127 | ad69471c | pbrook | uint32_t res; \ |
128 | ad69471c | pbrook | vtype vsrc1; \ |
129 | ad69471c | pbrook | vtype vsrc2; \ |
130 | ad69471c | pbrook | vtype vdest; \ |
131 | ad69471c | pbrook | NEON_UNPACK(vtype, vsrc1, arg1); \ |
132 | ad69471c | pbrook | NEON_UNPACK(vtype, vsrc2, arg2); \ |
133 | ad69471c | pbrook | NEON_DO##n; \ |
134 | ad69471c | pbrook | NEON_PACK(vtype, res, vdest); \ |
135 | ad69471c | pbrook | return res; \
|
136 | ad69471c | pbrook | } |
137 | ad69471c | pbrook | |
138 | ad69471c | pbrook | #define NEON_VOP(name, vtype, n) \
|
139 | ad69471c | pbrook | uint32_t HELPER(glue(neon_,name))(uint32_t arg1, uint32_t arg2) \ |
140 | ad69471c | pbrook | NEON_VOP_BODY(vtype, n) |
141 | ad69471c | pbrook | |
142 | ad69471c | pbrook | #define NEON_VOP_ENV(name, vtype, n) \
|
143 | ad69471c | pbrook | uint32_t HELPER(glue(neon_,name))(CPUState *env, uint32_t arg1, uint32_t arg2) \ |
144 | ad69471c | pbrook | NEON_VOP_BODY(vtype, n) |
145 | ad69471c | pbrook | |
146 | ad69471c | pbrook | /* Pairwise operations. */
|
147 | ad69471c | pbrook | /* For 32-bit elements each segment only contains a single element, so
|
148 | ad69471c | pbrook | the elementwise and pairwise operations are the same. */
|
149 | ad69471c | pbrook | #define NEON_PDO2 \
|
150 | ad69471c | pbrook | NEON_FN(vdest.v1, vsrc1.v1, vsrc1.v2); \ |
151 | ad69471c | pbrook | NEON_FN(vdest.v2, vsrc2.v1, vsrc2.v2); |
152 | ad69471c | pbrook | #define NEON_PDO4 \
|
153 | ad69471c | pbrook | NEON_FN(vdest.v1, vsrc1.v1, vsrc1.v2); \ |
154 | ad69471c | pbrook | NEON_FN(vdest.v2, vsrc1.v3, vsrc1.v4); \ |
155 | ad69471c | pbrook | NEON_FN(vdest.v3, vsrc2.v1, vsrc2.v2); \ |
156 | ad69471c | pbrook | NEON_FN(vdest.v4, vsrc2.v3, vsrc2.v4); \ |
157 | ad69471c | pbrook | |
158 | ad69471c | pbrook | #define NEON_POP(name, vtype, n) \
|
159 | ad69471c | pbrook | uint32_t HELPER(glue(neon_,name))(uint32_t arg1, uint32_t arg2) \ |
160 | ad69471c | pbrook | { \ |
161 | ad69471c | pbrook | uint32_t res; \ |
162 | ad69471c | pbrook | vtype vsrc1; \ |
163 | ad69471c | pbrook | vtype vsrc2; \ |
164 | ad69471c | pbrook | vtype vdest; \ |
165 | ad69471c | pbrook | NEON_UNPACK(vtype, vsrc1, arg1); \ |
166 | ad69471c | pbrook | NEON_UNPACK(vtype, vsrc2, arg2); \ |
167 | ad69471c | pbrook | NEON_PDO##n; \ |
168 | ad69471c | pbrook | NEON_PACK(vtype, res, vdest); \ |
169 | ad69471c | pbrook | return res; \
|
170 | ad69471c | pbrook | } |
171 | ad69471c | pbrook | |
172 | ad69471c | pbrook | /* Unary operators. */
|
173 | ad69471c | pbrook | #define NEON_VOP1(name, vtype, n) \
|
174 | ad69471c | pbrook | uint32_t HELPER(glue(neon_,name))(uint32_t arg) \ |
175 | ad69471c | pbrook | { \ |
176 | ad69471c | pbrook | vtype vsrc1; \ |
177 | ad69471c | pbrook | vtype vdest; \ |
178 | ad69471c | pbrook | NEON_UNPACK(vtype, vsrc1, arg); \ |
179 | ad69471c | pbrook | NEON_DO##n; \ |
180 | ad69471c | pbrook | NEON_PACK(vtype, arg, vdest); \ |
181 | ad69471c | pbrook | return arg; \
|
182 | ad69471c | pbrook | } |
183 | ad69471c | pbrook | |
184 | ad69471c | pbrook | |
185 | ad69471c | pbrook | #define NEON_USAT(dest, src1, src2, type) do { \ |
186 | ad69471c | pbrook | uint32_t tmp = (uint32_t)src1 + (uint32_t)src2; \ |
187 | ad69471c | pbrook | if (tmp != (type)tmp) { \
|
188 | ad69471c | pbrook | SET_QC(); \ |
189 | ad69471c | pbrook | dest = ~0; \
|
190 | ad69471c | pbrook | } else { \
|
191 | ad69471c | pbrook | dest = tmp; \ |
192 | ad69471c | pbrook | }} while(0) |
193 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t)
|
194 | ad69471c | pbrook | NEON_VOP_ENV(qadd_u8, neon_u8, 4)
|
195 | ad69471c | pbrook | #undef NEON_FN
|
196 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t)
|
197 | ad69471c | pbrook | NEON_VOP_ENV(qadd_u16, neon_u16, 2)
|
198 | ad69471c | pbrook | #undef NEON_FN
|
199 | ad69471c | pbrook | #undef NEON_USAT
|
200 | ad69471c | pbrook | |
201 | ad69471c | pbrook | #define NEON_SSAT(dest, src1, src2, type) do { \ |
202 | ad69471c | pbrook | int32_t tmp = (uint32_t)src1 + (uint32_t)src2; \ |
203 | ad69471c | pbrook | if (tmp != (type)tmp) { \
|
204 | ad69471c | pbrook | SET_QC(); \ |
205 | ad69471c | pbrook | if (src2 > 0) { \ |
206 | ad69471c | pbrook | tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \ |
207 | ad69471c | pbrook | } else { \
|
208 | ad69471c | pbrook | tmp = 1 << (sizeof(type) * 8 - 1); \ |
209 | ad69471c | pbrook | } \ |
210 | ad69471c | pbrook | } \ |
211 | ad69471c | pbrook | dest = tmp; \ |
212 | ad69471c | pbrook | } while(0) |
213 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t)
|
214 | ad69471c | pbrook | NEON_VOP_ENV(qadd_s8, neon_s8, 4)
|
215 | ad69471c | pbrook | #undef NEON_FN
|
216 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t)
|
217 | ad69471c | pbrook | NEON_VOP_ENV(qadd_s16, neon_s16, 2)
|
218 | ad69471c | pbrook | #undef NEON_FN
|
219 | ad69471c | pbrook | #undef NEON_SSAT
|
220 | ad69471c | pbrook | |
221 | ad69471c | pbrook | #define NEON_USAT(dest, src1, src2, type) do { \ |
222 | ad69471c | pbrook | uint32_t tmp = (uint32_t)src1 - (uint32_t)src2; \ |
223 | ad69471c | pbrook | if (tmp != (type)tmp) { \
|
224 | ad69471c | pbrook | SET_QC(); \ |
225 | ad69471c | pbrook | dest = 0; \
|
226 | ad69471c | pbrook | } else { \
|
227 | ad69471c | pbrook | dest = tmp; \ |
228 | ad69471c | pbrook | }} while(0) |
229 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t)
|
230 | ad69471c | pbrook | NEON_VOP_ENV(qsub_u8, neon_u8, 4)
|
231 | ad69471c | pbrook | #undef NEON_FN
|
232 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t)
|
233 | ad69471c | pbrook | NEON_VOP_ENV(qsub_u16, neon_u16, 2)
|
234 | ad69471c | pbrook | #undef NEON_FN
|
235 | ad69471c | pbrook | #undef NEON_USAT
|
236 | ad69471c | pbrook | |
237 | ad69471c | pbrook | #define NEON_SSAT(dest, src1, src2, type) do { \ |
238 | ad69471c | pbrook | int32_t tmp = (uint32_t)src1 - (uint32_t)src2; \ |
239 | ad69471c | pbrook | if (tmp != (type)tmp) { \
|
240 | ad69471c | pbrook | SET_QC(); \ |
241 | ad69471c | pbrook | if (src2 < 0) { \ |
242 | ad69471c | pbrook | tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \ |
243 | ad69471c | pbrook | } else { \
|
244 | ad69471c | pbrook | tmp = 1 << (sizeof(type) * 8 - 1); \ |
245 | ad69471c | pbrook | } \ |
246 | ad69471c | pbrook | } \ |
247 | ad69471c | pbrook | dest = tmp; \ |
248 | ad69471c | pbrook | } while(0) |
249 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t)
|
250 | ad69471c | pbrook | NEON_VOP_ENV(qsub_s8, neon_s8, 4)
|
251 | ad69471c | pbrook | #undef NEON_FN
|
252 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t)
|
253 | ad69471c | pbrook | NEON_VOP_ENV(qsub_s16, neon_s16, 2)
|
254 | ad69471c | pbrook | #undef NEON_FN
|
255 | ad69471c | pbrook | #undef NEON_SSAT
|
256 | ad69471c | pbrook | |
257 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 + src2) >> 1 |
258 | ad69471c | pbrook | NEON_VOP(hadd_s8, neon_s8, 4)
|
259 | ad69471c | pbrook | NEON_VOP(hadd_u8, neon_u8, 4)
|
260 | ad69471c | pbrook | NEON_VOP(hadd_s16, neon_s16, 2)
|
261 | ad69471c | pbrook | NEON_VOP(hadd_u16, neon_u16, 2)
|
262 | ad69471c | pbrook | #undef NEON_FN
|
263 | ad69471c | pbrook | |
264 | ad69471c | pbrook | int32_t HELPER(neon_hadd_s32)(int32_t src1, int32_t src2) |
265 | ad69471c | pbrook | { |
266 | ad69471c | pbrook | int32_t dest; |
267 | ad69471c | pbrook | |
268 | ad69471c | pbrook | dest = (src1 >> 1) + (src2 >> 1); |
269 | ad69471c | pbrook | if (src1 & src2 & 1) |
270 | ad69471c | pbrook | dest++; |
271 | ad69471c | pbrook | return dest;
|
272 | ad69471c | pbrook | } |
273 | ad69471c | pbrook | |
274 | ad69471c | pbrook | uint32_t HELPER(neon_hadd_u32)(uint32_t src1, uint32_t src2) |
275 | ad69471c | pbrook | { |
276 | ad69471c | pbrook | uint32_t dest; |
277 | ad69471c | pbrook | |
278 | ad69471c | pbrook | dest = (src1 >> 1) + (src2 >> 1); |
279 | ad69471c | pbrook | if (src1 & src2 & 1) |
280 | ad69471c | pbrook | dest++; |
281 | ad69471c | pbrook | return dest;
|
282 | ad69471c | pbrook | } |
283 | ad69471c | pbrook | |
284 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 + src2 + 1) >> 1 |
285 | ad69471c | pbrook | NEON_VOP(rhadd_s8, neon_s8, 4)
|
286 | ad69471c | pbrook | NEON_VOP(rhadd_u8, neon_u8, 4)
|
287 | ad69471c | pbrook | NEON_VOP(rhadd_s16, neon_s16, 2)
|
288 | ad69471c | pbrook | NEON_VOP(rhadd_u16, neon_u16, 2)
|
289 | ad69471c | pbrook | #undef NEON_FN
|
290 | ad69471c | pbrook | |
291 | ad69471c | pbrook | int32_t HELPER(neon_rhadd_s32)(int32_t src1, int32_t src2) |
292 | ad69471c | pbrook | { |
293 | ad69471c | pbrook | int32_t dest; |
294 | ad69471c | pbrook | |
295 | ad69471c | pbrook | dest = (src1 >> 1) + (src2 >> 1); |
296 | ad69471c | pbrook | if ((src1 | src2) & 1) |
297 | ad69471c | pbrook | dest++; |
298 | ad69471c | pbrook | return dest;
|
299 | ad69471c | pbrook | } |
300 | ad69471c | pbrook | |
301 | ad69471c | pbrook | uint32_t HELPER(neon_rhadd_u32)(uint32_t src1, uint32_t src2) |
302 | ad69471c | pbrook | { |
303 | ad69471c | pbrook | uint32_t dest; |
304 | ad69471c | pbrook | |
305 | ad69471c | pbrook | dest = (src1 >> 1) + (src2 >> 1); |
306 | ad69471c | pbrook | if ((src1 | src2) & 1) |
307 | ad69471c | pbrook | dest++; |
308 | ad69471c | pbrook | return dest;
|
309 | ad69471c | pbrook | } |
310 | ad69471c | pbrook | |
311 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 - src2) >> 1 |
312 | ad69471c | pbrook | NEON_VOP(hsub_s8, neon_s8, 4)
|
313 | ad69471c | pbrook | NEON_VOP(hsub_u8, neon_u8, 4)
|
314 | ad69471c | pbrook | NEON_VOP(hsub_s16, neon_s16, 2)
|
315 | ad69471c | pbrook | NEON_VOP(hsub_u16, neon_u16, 2)
|
316 | ad69471c | pbrook | #undef NEON_FN
|
317 | ad69471c | pbrook | |
318 | ad69471c | pbrook | int32_t HELPER(neon_hsub_s32)(int32_t src1, int32_t src2) |
319 | ad69471c | pbrook | { |
320 | ad69471c | pbrook | int32_t dest; |
321 | ad69471c | pbrook | |
322 | ad69471c | pbrook | dest = (src1 >> 1) - (src2 >> 1); |
323 | ad69471c | pbrook | if ((~src1) & src2 & 1) |
324 | ad69471c | pbrook | dest--; |
325 | ad69471c | pbrook | return dest;
|
326 | ad69471c | pbrook | } |
327 | ad69471c | pbrook | |
328 | ad69471c | pbrook | uint32_t HELPER(neon_hsub_u32)(uint32_t src1, uint32_t src2) |
329 | ad69471c | pbrook | { |
330 | ad69471c | pbrook | uint32_t dest; |
331 | ad69471c | pbrook | |
332 | ad69471c | pbrook | dest = (src1 >> 1) - (src2 >> 1); |
333 | ad69471c | pbrook | if ((~src1) & src2 & 1) |
334 | ad69471c | pbrook | dest--; |
335 | ad69471c | pbrook | return dest;
|
336 | ad69471c | pbrook | } |
337 | ad69471c | pbrook | |
338 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? ~0 : 0 |
339 | ad69471c | pbrook | NEON_VOP(cgt_s8, neon_s8, 4)
|
340 | ad69471c | pbrook | NEON_VOP(cgt_u8, neon_u8, 4)
|
341 | ad69471c | pbrook | NEON_VOP(cgt_s16, neon_s16, 2)
|
342 | ad69471c | pbrook | NEON_VOP(cgt_u16, neon_u16, 2)
|
343 | ad69471c | pbrook | NEON_VOP(cgt_s32, neon_s32, 1)
|
344 | ad69471c | pbrook | NEON_VOP(cgt_u32, neon_u32, 1)
|
345 | ad69471c | pbrook | #undef NEON_FN
|
346 | ad69471c | pbrook | |
347 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 >= src2) ? ~0 : 0 |
348 | ad69471c | pbrook | NEON_VOP(cge_s8, neon_s8, 4)
|
349 | ad69471c | pbrook | NEON_VOP(cge_u8, neon_u8, 4)
|
350 | ad69471c | pbrook | NEON_VOP(cge_s16, neon_s16, 2)
|
351 | ad69471c | pbrook | NEON_VOP(cge_u16, neon_u16, 2)
|
352 | ad69471c | pbrook | NEON_VOP(cge_s32, neon_s32, 1)
|
353 | ad69471c | pbrook | NEON_VOP(cge_u32, neon_u32, 1)
|
354 | ad69471c | pbrook | #undef NEON_FN
|
355 | ad69471c | pbrook | |
356 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 < src2) ? src1 : src2
|
357 | ad69471c | pbrook | NEON_VOP(min_s8, neon_s8, 4)
|
358 | ad69471c | pbrook | NEON_VOP(min_u8, neon_u8, 4)
|
359 | ad69471c | pbrook | NEON_VOP(min_s16, neon_s16, 2)
|
360 | ad69471c | pbrook | NEON_VOP(min_u16, neon_u16, 2)
|
361 | ad69471c | pbrook | NEON_VOP(min_s32, neon_s32, 1)
|
362 | ad69471c | pbrook | NEON_VOP(min_u32, neon_u32, 1)
|
363 | ad69471c | pbrook | NEON_POP(pmin_s8, neon_s8, 4)
|
364 | ad69471c | pbrook | NEON_POP(pmin_u8, neon_u8, 4)
|
365 | ad69471c | pbrook | NEON_POP(pmin_s16, neon_s16, 2)
|
366 | ad69471c | pbrook | NEON_POP(pmin_u16, neon_u16, 2)
|
367 | ad69471c | pbrook | #undef NEON_FN
|
368 | ad69471c | pbrook | |
369 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? src1 : src2
|
370 | ad69471c | pbrook | NEON_VOP(max_s8, neon_s8, 4)
|
371 | ad69471c | pbrook | NEON_VOP(max_u8, neon_u8, 4)
|
372 | ad69471c | pbrook | NEON_VOP(max_s16, neon_s16, 2)
|
373 | ad69471c | pbrook | NEON_VOP(max_u16, neon_u16, 2)
|
374 | ad69471c | pbrook | NEON_VOP(max_s32, neon_s32, 1)
|
375 | ad69471c | pbrook | NEON_VOP(max_u32, neon_u32, 1)
|
376 | ad69471c | pbrook | NEON_POP(pmax_s8, neon_s8, 4)
|
377 | ad69471c | pbrook | NEON_POP(pmax_u8, neon_u8, 4)
|
378 | ad69471c | pbrook | NEON_POP(pmax_s16, neon_s16, 2)
|
379 | ad69471c | pbrook | NEON_POP(pmax_u16, neon_u16, 2)
|
380 | ad69471c | pbrook | #undef NEON_FN
|
381 | ad69471c | pbrook | |
382 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) \
|
383 | ad69471c | pbrook | dest = (src1 > src2) ? (src1 - src2) : (src2 - src1) |
384 | ad69471c | pbrook | NEON_VOP(abd_s8, neon_s8, 4)
|
385 | ad69471c | pbrook | NEON_VOP(abd_u8, neon_u8, 4)
|
386 | ad69471c | pbrook | NEON_VOP(abd_s16, neon_s16, 2)
|
387 | ad69471c | pbrook | NEON_VOP(abd_u16, neon_u16, 2)
|
388 | ad69471c | pbrook | NEON_VOP(abd_s32, neon_s32, 1)
|
389 | ad69471c | pbrook | NEON_VOP(abd_u32, neon_u32, 1)
|
390 | ad69471c | pbrook | #undef NEON_FN
|
391 | ad69471c | pbrook | |
392 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
393 | ad69471c | pbrook | int8_t tmp; \ |
394 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
395 | ad69471c | pbrook | if (tmp >= sizeof(src1) * 8 || tmp <= -sizeof(src1) * 8) { \ |
396 | ad69471c | pbrook | dest = 0; \
|
397 | ad69471c | pbrook | } else if (tmp < 0) { \ |
398 | ad69471c | pbrook | dest = src1 >> -tmp; \ |
399 | ad69471c | pbrook | } else { \
|
400 | ad69471c | pbrook | dest = src1 << tmp; \ |
401 | ad69471c | pbrook | }} while (0) |
402 | ad69471c | pbrook | NEON_VOP(shl_u8, neon_u8, 4)
|
403 | ad69471c | pbrook | NEON_VOP(shl_u16, neon_u16, 2)
|
404 | ad69471c | pbrook | NEON_VOP(shl_u32, neon_u32, 1)
|
405 | ad69471c | pbrook | #undef NEON_FN
|
406 | ad69471c | pbrook | |
407 | ad69471c | pbrook | uint64_t HELPER(neon_shl_u64)(uint64_t val, uint64_t shiftop) |
408 | ad69471c | pbrook | { |
409 | ad69471c | pbrook | int8_t shift = (int8_t)shiftop; |
410 | ad69471c | pbrook | if (shift >= 64 || shift <= -64) { |
411 | ad69471c | pbrook | val = 0;
|
412 | ad69471c | pbrook | } else if (shift < 0) { |
413 | ad69471c | pbrook | val >>= -shift; |
414 | ad69471c | pbrook | } else {
|
415 | ad69471c | pbrook | val <<= shift; |
416 | ad69471c | pbrook | } |
417 | ad69471c | pbrook | return val;
|
418 | ad69471c | pbrook | } |
419 | ad69471c | pbrook | |
420 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
421 | ad69471c | pbrook | int8_t tmp; \ |
422 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
423 | ad69471c | pbrook | if (tmp >= sizeof(src1) * 8) { \ |
424 | ad69471c | pbrook | dest = 0; \
|
425 | ad69471c | pbrook | } else if (tmp <= -sizeof(src1) * 8) { \ |
426 | ad69471c | pbrook | dest = src1 >> (sizeof(src1) * 8 - 1); \ |
427 | ad69471c | pbrook | } else if (tmp < 0) { \ |
428 | ad69471c | pbrook | dest = src1 >> -tmp; \ |
429 | ad69471c | pbrook | } else { \
|
430 | ad69471c | pbrook | dest = src1 << tmp; \ |
431 | ad69471c | pbrook | }} while (0) |
432 | ad69471c | pbrook | NEON_VOP(shl_s8, neon_s8, 4)
|
433 | ad69471c | pbrook | NEON_VOP(shl_s16, neon_s16, 2)
|
434 | ad69471c | pbrook | NEON_VOP(shl_s32, neon_s32, 1)
|
435 | ad69471c | pbrook | #undef NEON_FN
|
436 | ad69471c | pbrook | |
437 | ad69471c | pbrook | uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop) |
438 | ad69471c | pbrook | { |
439 | ad69471c | pbrook | int8_t shift = (int8_t)shiftop; |
440 | ad69471c | pbrook | int64_t val = valop; |
441 | ad69471c | pbrook | if (shift >= 64) { |
442 | ad69471c | pbrook | val = 0;
|
443 | ad69471c | pbrook | } else if (shift <= -64) { |
444 | ad69471c | pbrook | val >>= 63;
|
445 | ad69471c | pbrook | } else if (shift < 0) { |
446 | ad69471c | pbrook | val >>= -shift; |
447 | ad69471c | pbrook | } else {
|
448 | ad69471c | pbrook | val <<= shift; |
449 | ad69471c | pbrook | } |
450 | ad69471c | pbrook | return val;
|
451 | ad69471c | pbrook | } |
452 | ad69471c | pbrook | |
453 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
454 | ad69471c | pbrook | int8_t tmp; \ |
455 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
456 | ad69471c | pbrook | if (tmp >= sizeof(src1) * 8) { \ |
457 | ad69471c | pbrook | dest = 0; \
|
458 | ad69471c | pbrook | } else if (tmp < -sizeof(src1) * 8) { \ |
459 | ad69471c | pbrook | dest >>= sizeof(src1) * 8 - 1; \ |
460 | ad69471c | pbrook | } else if (tmp == -sizeof(src1) * 8) { \ |
461 | ad69471c | pbrook | dest = src1 >> (tmp - 1); \
|
462 | ad69471c | pbrook | dest++; \ |
463 | ad69471c | pbrook | src2 >>= 1; \
|
464 | ad69471c | pbrook | } else if (tmp < 0) { \ |
465 | ad69471c | pbrook | dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ |
466 | ad69471c | pbrook | } else { \
|
467 | ad69471c | pbrook | dest = src1 << tmp; \ |
468 | ad69471c | pbrook | }} while (0) |
469 | ad69471c | pbrook | NEON_VOP(rshl_s8, neon_s8, 4)
|
470 | ad69471c | pbrook | NEON_VOP(rshl_s16, neon_s16, 2)
|
471 | ad69471c | pbrook | NEON_VOP(rshl_s32, neon_s32, 1)
|
472 | ad69471c | pbrook | #undef NEON_FN
|
473 | ad69471c | pbrook | |
474 | ad69471c | pbrook | uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop) |
475 | ad69471c | pbrook | { |
476 | ad69471c | pbrook | int8_t shift = (int8_t)shiftop; |
477 | ad69471c | pbrook | int64_t val = valop; |
478 | ad69471c | pbrook | if (shift >= 64) { |
479 | ad69471c | pbrook | val = 0;
|
480 | ad69471c | pbrook | } else if (shift < -64) { |
481 | ad69471c | pbrook | val >>= 63;
|
482 | ad69471c | pbrook | } else if (shift == -63) { |
483 | ad69471c | pbrook | val >>= 63;
|
484 | ad69471c | pbrook | val++; |
485 | ad69471c | pbrook | val >>= 1;
|
486 | ad69471c | pbrook | } else if (shift < 0) { |
487 | ad69471c | pbrook | val = (val + ((int64_t)1 << (-1 - shift))) >> -shift; |
488 | ad69471c | pbrook | } else {
|
489 | ad69471c | pbrook | val <<= shift; |
490 | ad69471c | pbrook | } |
491 | ad69471c | pbrook | return val;
|
492 | ad69471c | pbrook | } |
493 | ad69471c | pbrook | |
494 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
495 | ad69471c | pbrook | int8_t tmp; \ |
496 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
497 | ad69471c | pbrook | if (tmp >= sizeof(src1) * 8 || tmp < -sizeof(src1) * 8) { \ |
498 | ad69471c | pbrook | dest = 0; \
|
499 | ad69471c | pbrook | } else if (tmp == -sizeof(src1) * 8) { \ |
500 | ad69471c | pbrook | dest = src1 >> (tmp - 1); \
|
501 | ad69471c | pbrook | } else if (tmp < 0) { \ |
502 | ad69471c | pbrook | dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ |
503 | ad69471c | pbrook | } else { \
|
504 | ad69471c | pbrook | dest = src1 << tmp; \ |
505 | ad69471c | pbrook | }} while (0) |
506 | ad69471c | pbrook | NEON_VOP(rshl_u8, neon_u8, 4)
|
507 | ad69471c | pbrook | NEON_VOP(rshl_u16, neon_u16, 2)
|
508 | ad69471c | pbrook | NEON_VOP(rshl_u32, neon_u32, 1)
|
509 | ad69471c | pbrook | #undef NEON_FN
|
510 | ad69471c | pbrook | |
511 | ad69471c | pbrook | uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop) |
512 | ad69471c | pbrook | { |
513 | ad69471c | pbrook | int8_t shift = (uint8_t)shiftop; |
514 | ad69471c | pbrook | if (shift >= 64 || shift < 64) { |
515 | ad69471c | pbrook | val = 0;
|
516 | ad69471c | pbrook | } else if (shift == -64) { |
517 | ad69471c | pbrook | /* Rounding a 1-bit result just preserves that bit. */
|
518 | ad69471c | pbrook | val >>= 63;
|
519 | ad69471c | pbrook | } if (shift < 0) { |
520 | ad69471c | pbrook | val = (val + ((uint64_t)1 << (-1 - shift))) >> -shift; |
521 | ad69471c | pbrook | val >>= -shift; |
522 | ad69471c | pbrook | } else {
|
523 | ad69471c | pbrook | val <<= shift; |
524 | ad69471c | pbrook | } |
525 | ad69471c | pbrook | return val;
|
526 | ad69471c | pbrook | } |
527 | ad69471c | pbrook | |
528 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
529 | ad69471c | pbrook | int8_t tmp; \ |
530 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
531 | ad69471c | pbrook | if (tmp >= sizeof(src1) * 8) { \ |
532 | ad69471c | pbrook | if (src1) { \
|
533 | ad69471c | pbrook | SET_QC(); \ |
534 | ad69471c | pbrook | dest = ~0; \
|
535 | ad69471c | pbrook | } else { \
|
536 | ad69471c | pbrook | dest = 0; \
|
537 | ad69471c | pbrook | } \ |
538 | ad69471c | pbrook | } else if (tmp <= -sizeof(src1) * 8) { \ |
539 | ad69471c | pbrook | dest = 0; \
|
540 | ad69471c | pbrook | } else if (tmp < 0) { \ |
541 | ad69471c | pbrook | dest = src1 >> -tmp; \ |
542 | ad69471c | pbrook | } else { \
|
543 | ad69471c | pbrook | dest = src1 << tmp; \ |
544 | ad69471c | pbrook | if ((dest >> tmp) != src1) { \
|
545 | ad69471c | pbrook | SET_QC(); \ |
546 | ad69471c | pbrook | dest = ~0; \
|
547 | ad69471c | pbrook | } \ |
548 | ad69471c | pbrook | }} while (0) |
549 | ad69471c | pbrook | NEON_VOP_ENV(qshl_u8, neon_u8, 4)
|
550 | ad69471c | pbrook | NEON_VOP_ENV(qshl_u16, neon_u16, 2)
|
551 | ad69471c | pbrook | NEON_VOP_ENV(qshl_u32, neon_u32, 1)
|
552 | ad69471c | pbrook | #undef NEON_FN
|
553 | ad69471c | pbrook | |
554 | ad69471c | pbrook | uint64_t HELPER(neon_qshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop) |
555 | ad69471c | pbrook | { |
556 | ad69471c | pbrook | int8_t shift = (int8_t)shiftop; |
557 | ad69471c | pbrook | if (shift >= 64) { |
558 | ad69471c | pbrook | if (val) {
|
559 | ad69471c | pbrook | val = ~(uint64_t)0;
|
560 | ad69471c | pbrook | SET_QC(); |
561 | ad69471c | pbrook | } else {
|
562 | ad69471c | pbrook | val = 0;
|
563 | ad69471c | pbrook | } |
564 | ad69471c | pbrook | } else if (shift <= -64) { |
565 | ad69471c | pbrook | val = 0;
|
566 | ad69471c | pbrook | } else if (shift < 0) { |
567 | ad69471c | pbrook | val >>= -shift; |
568 | ad69471c | pbrook | } else {
|
569 | ad69471c | pbrook | uint64_t tmp = val; |
570 | ad69471c | pbrook | val <<= shift; |
571 | ad69471c | pbrook | if ((val >> shift) != tmp) {
|
572 | ad69471c | pbrook | SET_QC(); |
573 | ad69471c | pbrook | val = ~(uint64_t)0;
|
574 | ad69471c | pbrook | } |
575 | ad69471c | pbrook | } |
576 | ad69471c | pbrook | return val;
|
577 | ad69471c | pbrook | } |
578 | ad69471c | pbrook | |
579 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
580 | ad69471c | pbrook | int8_t tmp; \ |
581 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
582 | ad69471c | pbrook | if (tmp >= sizeof(src1) * 8) { \ |
583 | ad69471c | pbrook | if (src1) \
|
584 | ad69471c | pbrook | SET_QC(); \ |
585 | ad69471c | pbrook | dest = src1 >> 31; \
|
586 | ad69471c | pbrook | } else if (tmp <= -sizeof(src1) * 8) { \ |
587 | ad69471c | pbrook | dest = src1 >> 31; \
|
588 | ad69471c | pbrook | } else if (tmp < 0) { \ |
589 | ad69471c | pbrook | dest = src1 >> -tmp; \ |
590 | ad69471c | pbrook | } else { \
|
591 | ad69471c | pbrook | dest = src1 << tmp; \ |
592 | ad69471c | pbrook | if ((dest >> tmp) != src1) { \
|
593 | ad69471c | pbrook | SET_QC(); \ |
594 | ad69471c | pbrook | dest = src2 >> 31; \
|
595 | ad69471c | pbrook | } \ |
596 | ad69471c | pbrook | }} while (0) |
597 | ad69471c | pbrook | NEON_VOP_ENV(qshl_s8, neon_s8, 4)
|
598 | ad69471c | pbrook | NEON_VOP_ENV(qshl_s16, neon_s16, 2)
|
599 | ad69471c | pbrook | NEON_VOP_ENV(qshl_s32, neon_s32, 1)
|
600 | ad69471c | pbrook | #undef NEON_FN
|
601 | ad69471c | pbrook | |
602 | ad69471c | pbrook | uint64_t HELPER(neon_qshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop) |
603 | ad69471c | pbrook | { |
604 | ad69471c | pbrook | int8_t shift = (uint8_t)shiftop; |
605 | ad69471c | pbrook | int64_t val = valop; |
606 | ad69471c | pbrook | if (shift >= 64) { |
607 | ad69471c | pbrook | if (val) {
|
608 | ad69471c | pbrook | SET_QC(); |
609 | ad69471c | pbrook | val = (val >> 63) & ~SIGNBIT64;
|
610 | ad69471c | pbrook | } |
611 | ad69471c | pbrook | } else if (shift <= 64) { |
612 | ad69471c | pbrook | val >>= 63;
|
613 | ad69471c | pbrook | } else if (shift < 0) { |
614 | ad69471c | pbrook | val >>= -shift; |
615 | ad69471c | pbrook | } else {
|
616 | ad69471c | pbrook | int64_t tmp = val; |
617 | ad69471c | pbrook | val <<= shift; |
618 | ad69471c | pbrook | if ((val >> shift) != tmp) {
|
619 | ad69471c | pbrook | SET_QC(); |
620 | ad69471c | pbrook | val = (tmp >> 63) ^ ~SIGNBIT64;
|
621 | ad69471c | pbrook | } |
622 | ad69471c | pbrook | } |
623 | ad69471c | pbrook | return val;
|
624 | ad69471c | pbrook | } |
625 | ad69471c | pbrook | |
626 | ad69471c | pbrook | |
627 | ad69471c | pbrook | /* FIXME: This is wrong. */
|
628 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
629 | ad69471c | pbrook | int8_t tmp; \ |
630 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
631 | ad69471c | pbrook | if (tmp < 0) { \ |
632 | ad69471c | pbrook | dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ |
633 | ad69471c | pbrook | } else { \
|
634 | ad69471c | pbrook | dest = src1 << tmp; \ |
635 | ad69471c | pbrook | if ((dest >> tmp) != src1) { \
|
636 | ad69471c | pbrook | SET_QC(); \ |
637 | ad69471c | pbrook | dest = ~0; \
|
638 | ad69471c | pbrook | } \ |
639 | ad69471c | pbrook | }} while (0) |
640 | ad69471c | pbrook | NEON_VOP_ENV(qrshl_u8, neon_u8, 4)
|
641 | ad69471c | pbrook | NEON_VOP_ENV(qrshl_u16, neon_u16, 2)
|
642 | ad69471c | pbrook | NEON_VOP_ENV(qrshl_u32, neon_u32, 1)
|
643 | ad69471c | pbrook | #undef NEON_FN
|
644 | ad69471c | pbrook | |
645 | ad69471c | pbrook | uint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop) |
646 | ad69471c | pbrook | { |
647 | ad69471c | pbrook | int8_t shift = (int8_t)shiftop; |
648 | ad69471c | pbrook | if (shift < 0) { |
649 | ad69471c | pbrook | val = (val + (1 << (-1 - shift))) >> -shift; |
650 | ad69471c | pbrook | } else { \
|
651 | ad69471c | pbrook | uint64_t tmp = val; |
652 | ad69471c | pbrook | val <<= shift; |
653 | ad69471c | pbrook | if ((val >> shift) != tmp) {
|
654 | ad69471c | pbrook | SET_QC(); |
655 | ad69471c | pbrook | val = ~0;
|
656 | ad69471c | pbrook | } |
657 | ad69471c | pbrook | } |
658 | ad69471c | pbrook | return val;
|
659 | ad69471c | pbrook | } |
660 | ad69471c | pbrook | |
661 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) do { \ |
662 | ad69471c | pbrook | int8_t tmp; \ |
663 | ad69471c | pbrook | tmp = (int8_t)src2; \ |
664 | ad69471c | pbrook | if (tmp < 0) { \ |
665 | ad69471c | pbrook | dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ |
666 | ad69471c | pbrook | } else { \
|
667 | ad69471c | pbrook | dest = src1 << tmp; \ |
668 | ad69471c | pbrook | if ((dest >> tmp) != src1) { \
|
669 | ad69471c | pbrook | SET_QC(); \ |
670 | ad69471c | pbrook | dest = src1 >> 31; \
|
671 | ad69471c | pbrook | } \ |
672 | ad69471c | pbrook | }} while (0) |
673 | ad69471c | pbrook | NEON_VOP_ENV(qrshl_s8, neon_s8, 4)
|
674 | ad69471c | pbrook | NEON_VOP_ENV(qrshl_s16, neon_s16, 2)
|
675 | ad69471c | pbrook | NEON_VOP_ENV(qrshl_s32, neon_s32, 1)
|
676 | ad69471c | pbrook | #undef NEON_FN
|
677 | ad69471c | pbrook | |
678 | ad69471c | pbrook | uint64_t HELPER(neon_qrshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop) |
679 | ad69471c | pbrook | { |
680 | ad69471c | pbrook | int8_t shift = (uint8_t)shiftop; |
681 | ad69471c | pbrook | int64_t val = valop; |
682 | ad69471c | pbrook | |
683 | ad69471c | pbrook | if (shift < 0) { |
684 | ad69471c | pbrook | val = (val + (1 << (-1 - shift))) >> -shift; |
685 | ad69471c | pbrook | } else {
|
686 | ad69471c | pbrook | int64_t tmp = val;; |
687 | ad69471c | pbrook | val <<= shift; |
688 | ad69471c | pbrook | if ((val >> shift) != tmp) {
|
689 | ad69471c | pbrook | SET_QC(); |
690 | ad69471c | pbrook | val = tmp >> 31;
|
691 | ad69471c | pbrook | } |
692 | ad69471c | pbrook | } |
693 | ad69471c | pbrook | return val;
|
694 | ad69471c | pbrook | } |
695 | ad69471c | pbrook | |
696 | ad69471c | pbrook | uint32_t HELPER(neon_add_u8)(uint32_t a, uint32_t b) |
697 | ad69471c | pbrook | { |
698 | ad69471c | pbrook | uint32_t mask; |
699 | ad69471c | pbrook | mask = (a ^ b) & 0x80808080u;
|
700 | ad69471c | pbrook | a &= ~0x80808080u;
|
701 | ad69471c | pbrook | b &= ~0x80808080u;
|
702 | ad69471c | pbrook | return (a + b) ^ mask;
|
703 | ad69471c | pbrook | } |
704 | ad69471c | pbrook | |
705 | ad69471c | pbrook | uint32_t HELPER(neon_add_u16)(uint32_t a, uint32_t b) |
706 | ad69471c | pbrook | { |
707 | ad69471c | pbrook | uint32_t mask; |
708 | ad69471c | pbrook | mask = (a ^ b) & 0x80008000u;
|
709 | ad69471c | pbrook | a &= ~0x80008000u;
|
710 | ad69471c | pbrook | b &= ~0x80008000u;
|
711 | ad69471c | pbrook | return (a + b) ^ mask;
|
712 | ad69471c | pbrook | } |
713 | ad69471c | pbrook | |
714 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = src1 + src2
|
715 | ad69471c | pbrook | NEON_POP(padd_u8, neon_u8, 4)
|
716 | ad69471c | pbrook | NEON_POP(padd_u16, neon_u16, 2)
|
717 | ad69471c | pbrook | #undef NEON_FN
|
718 | ad69471c | pbrook | |
719 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = src1 - src2
|
720 | ad69471c | pbrook | NEON_VOP(sub_u8, neon_u8, 4)
|
721 | ad69471c | pbrook | NEON_VOP(sub_u16, neon_u16, 2)
|
722 | ad69471c | pbrook | #undef NEON_FN
|
723 | ad69471c | pbrook | |
724 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = src1 * src2
|
725 | ad69471c | pbrook | NEON_VOP(mul_u8, neon_u8, 4)
|
726 | ad69471c | pbrook | NEON_VOP(mul_u16, neon_u16, 2)
|
727 | ad69471c | pbrook | #undef NEON_FN
|
728 | ad69471c | pbrook | |
729 | 1654b2d6 | aurel32 | /* Polynomial multiplication is like integer multiplication except the
|
730 | ad69471c | pbrook | partial products are XORed, not added. */
|
731 | ad69471c | pbrook | uint32_t HELPER(neon_mul_p8)(uint32_t op1, uint32_t op2) |
732 | ad69471c | pbrook | { |
733 | ad69471c | pbrook | uint32_t mask; |
734 | ad69471c | pbrook | uint32_t result; |
735 | ad69471c | pbrook | result = 0;
|
736 | ad69471c | pbrook | while (op1) {
|
737 | ad69471c | pbrook | mask = 0;
|
738 | ad69471c | pbrook | if (op1 & 1) |
739 | ad69471c | pbrook | mask |= 0xff;
|
740 | ad69471c | pbrook | if (op1 & (1 << 8)) |
741 | ad69471c | pbrook | mask |= (0xff << 8); |
742 | ad69471c | pbrook | if (op1 & (1 << 16)) |
743 | ad69471c | pbrook | mask |= (0xff << 16); |
744 | ad69471c | pbrook | if (op1 & (1 << 24)) |
745 | ad69471c | pbrook | mask |= (0xff << 24); |
746 | ad69471c | pbrook | result ^= op2 & mask; |
747 | ad69471c | pbrook | op1 = (op1 >> 1) & 0x7f7f7f7f; |
748 | ad69471c | pbrook | op2 = (op2 << 1) & 0xfefefefe; |
749 | ad69471c | pbrook | } |
750 | ad69471c | pbrook | return result;
|
751 | ad69471c | pbrook | } |
752 | ad69471c | pbrook | |
753 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 & src2) ? -1 : 0 |
754 | ad69471c | pbrook | NEON_VOP(tst_u8, neon_u8, 4)
|
755 | ad69471c | pbrook | NEON_VOP(tst_u16, neon_u16, 2)
|
756 | ad69471c | pbrook | NEON_VOP(tst_u32, neon_u32, 1)
|
757 | ad69471c | pbrook | #undef NEON_FN
|
758 | ad69471c | pbrook | |
759 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) dest = (src1 == src2) ? -1 : 0 |
760 | ad69471c | pbrook | NEON_VOP(ceq_u8, neon_u8, 4)
|
761 | ad69471c | pbrook | NEON_VOP(ceq_u16, neon_u16, 2)
|
762 | ad69471c | pbrook | NEON_VOP(ceq_u32, neon_u32, 1)
|
763 | ad69471c | pbrook | #undef NEON_FN
|
764 | ad69471c | pbrook | |
765 | ad69471c | pbrook | #define NEON_FN(dest, src, dummy) dest = (src < 0) ? -src : src |
766 | ad69471c | pbrook | NEON_VOP1(abs_s8, neon_s8, 4)
|
767 | ad69471c | pbrook | NEON_VOP1(abs_s16, neon_s16, 2)
|
768 | ad69471c | pbrook | #undef NEON_FN
|
769 | ad69471c | pbrook | |
770 | ad69471c | pbrook | /* Count Leading Sign/Zero Bits. */
|
771 | ad69471c | pbrook | static inline int do_clz8(uint8_t x) |
772 | ad69471c | pbrook | { |
773 | ad69471c | pbrook | int n;
|
774 | ad69471c | pbrook | for (n = 8; x; n--) |
775 | ad69471c | pbrook | x >>= 1;
|
776 | ad69471c | pbrook | return n;
|
777 | ad69471c | pbrook | } |
778 | ad69471c | pbrook | |
779 | ad69471c | pbrook | static inline int do_clz16(uint16_t x) |
780 | ad69471c | pbrook | { |
781 | ad69471c | pbrook | int n;
|
782 | ad69471c | pbrook | for (n = 16; x; n--) |
783 | ad69471c | pbrook | x >>= 1;
|
784 | ad69471c | pbrook | return n;
|
785 | ad69471c | pbrook | } |
786 | ad69471c | pbrook | |
787 | ad69471c | pbrook | #define NEON_FN(dest, src, dummy) dest = do_clz8(src)
|
788 | ad69471c | pbrook | NEON_VOP1(clz_u8, neon_u8, 4)
|
789 | ad69471c | pbrook | #undef NEON_FN
|
790 | ad69471c | pbrook | |
791 | ad69471c | pbrook | #define NEON_FN(dest, src, dummy) dest = do_clz16(src)
|
792 | ad69471c | pbrook | NEON_VOP1(clz_u16, neon_u16, 2)
|
793 | ad69471c | pbrook | #undef NEON_FN
|
794 | ad69471c | pbrook | |
795 | ad69471c | pbrook | #define NEON_FN(dest, src, dummy) dest = do_clz8((src < 0) ? ~src : src) - 1 |
796 | ad69471c | pbrook | NEON_VOP1(cls_s8, neon_s8, 4)
|
797 | ad69471c | pbrook | #undef NEON_FN
|
798 | ad69471c | pbrook | |
799 | ad69471c | pbrook | #define NEON_FN(dest, src, dummy) dest = do_clz16((src < 0) ? ~src : src) - 1 |
800 | ad69471c | pbrook | NEON_VOP1(cls_s16, neon_s16, 2)
|
801 | ad69471c | pbrook | #undef NEON_FN
|
802 | ad69471c | pbrook | |
803 | ad69471c | pbrook | uint32_t HELPER(neon_cls_s32)(uint32_t x) |
804 | ad69471c | pbrook | { |
805 | ad69471c | pbrook | int count;
|
806 | ad69471c | pbrook | if ((int32_t)x < 0) |
807 | ad69471c | pbrook | x = ~x; |
808 | ad69471c | pbrook | for (count = 32; x; count--) |
809 | ad69471c | pbrook | x = x >> 1;
|
810 | ad69471c | pbrook | return count - 1; |
811 | ad69471c | pbrook | } |
812 | ad69471c | pbrook | |
813 | ad69471c | pbrook | /* Bit count. */
|
814 | ad69471c | pbrook | uint32_t HELPER(neon_cnt_u8)(uint32_t x) |
815 | ad69471c | pbrook | { |
816 | ad69471c | pbrook | x = (x & 0x55555555) + ((x >> 1) & 0x55555555); |
817 | ad69471c | pbrook | x = (x & 0x33333333) + ((x >> 2) & 0x33333333); |
818 | ad69471c | pbrook | x = (x & 0x0f0f0f0f) + ((x >> 4) & 0x0f0f0f0f); |
819 | ad69471c | pbrook | return x;
|
820 | ad69471c | pbrook | } |
821 | ad69471c | pbrook | |
822 | ad69471c | pbrook | #define NEON_QDMULH16(dest, src1, src2, round) do { \ |
823 | ad69471c | pbrook | uint32_t tmp = (int32_t)(int16_t) src1 * (int16_t) src2; \ |
824 | ad69471c | pbrook | if ((tmp ^ (tmp << 1)) & SIGNBIT) { \ |
825 | ad69471c | pbrook | SET_QC(); \ |
826 | ad69471c | pbrook | tmp = (tmp >> 31) ^ ~SIGNBIT; \
|
827 | ad69471c | pbrook | } \ |
828 | ad69471c | pbrook | tmp <<= 1; \
|
829 | ad69471c | pbrook | if (round) { \
|
830 | ad69471c | pbrook | int32_t old = tmp; \ |
831 | ad69471c | pbrook | tmp += 1 << 15; \ |
832 | ad69471c | pbrook | if ((int32_t)tmp < old) { \
|
833 | ad69471c | pbrook | SET_QC(); \ |
834 | ad69471c | pbrook | tmp = SIGNBIT - 1; \
|
835 | ad69471c | pbrook | } \ |
836 | ad69471c | pbrook | } \ |
837 | ad69471c | pbrook | dest = tmp >> 16; \
|
838 | ad69471c | pbrook | } while(0) |
839 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 0) |
840 | ad69471c | pbrook | NEON_VOP_ENV(qdmulh_s16, neon_s16, 2)
|
841 | ad69471c | pbrook | #undef NEON_FN
|
842 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 1) |
843 | ad69471c | pbrook | NEON_VOP_ENV(qrdmulh_s16, neon_s16, 2)
|
844 | ad69471c | pbrook | #undef NEON_FN
|
845 | ad69471c | pbrook | #undef NEON_QDMULH16
|
846 | ad69471c | pbrook | |
847 | ad69471c | pbrook | #define NEON_QDMULH32(dest, src1, src2, round) do { \ |
848 | ad69471c | pbrook | uint64_t tmp = (int64_t)(int32_t) src1 * (int32_t) src2; \ |
849 | ad69471c | pbrook | if ((tmp ^ (tmp << 1)) & SIGNBIT64) { \ |
850 | ad69471c | pbrook | SET_QC(); \ |
851 | ad69471c | pbrook | tmp = (tmp >> 63) ^ ~SIGNBIT64; \
|
852 | ad69471c | pbrook | } else { \
|
853 | ad69471c | pbrook | tmp <<= 1; \
|
854 | ad69471c | pbrook | } \ |
855 | ad69471c | pbrook | if (round) { \
|
856 | ad69471c | pbrook | int64_t old = tmp; \ |
857 | ad69471c | pbrook | tmp += (int64_t)1 << 31; \ |
858 | ad69471c | pbrook | if ((int64_t)tmp < old) { \
|
859 | ad69471c | pbrook | SET_QC(); \ |
860 | ad69471c | pbrook | tmp = SIGNBIT64 - 1; \
|
861 | ad69471c | pbrook | } \ |
862 | ad69471c | pbrook | } \ |
863 | ad69471c | pbrook | dest = tmp >> 32; \
|
864 | ad69471c | pbrook | } while(0) |
865 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 0) |
866 | ad69471c | pbrook | NEON_VOP_ENV(qdmulh_s32, neon_s32, 1)
|
867 | ad69471c | pbrook | #undef NEON_FN
|
868 | ad69471c | pbrook | #define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 1) |
869 | ad69471c | pbrook | NEON_VOP_ENV(qrdmulh_s32, neon_s32, 1)
|
870 | ad69471c | pbrook | #undef NEON_FN
|
871 | ad69471c | pbrook | #undef NEON_QDMULH32
|
872 | ad69471c | pbrook | |
873 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_u8)(uint64_t x) |
874 | ad69471c | pbrook | { |
875 | ad69471c | pbrook | return (x & 0xffu) | ((x >> 8) & 0xff00u) | ((x >> 16) & 0xff0000u) |
876 | ad69471c | pbrook | | ((x >> 24) & 0xff000000u); |
877 | ad69471c | pbrook | } |
878 | ad69471c | pbrook | |
879 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_u16)(uint64_t x) |
880 | ad69471c | pbrook | { |
881 | ad69471c | pbrook | return (x & 0xffffu) | ((x >> 16) & 0xffff0000u); |
882 | ad69471c | pbrook | } |
883 | ad69471c | pbrook | |
884 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_high_u8)(uint64_t x) |
885 | ad69471c | pbrook | { |
886 | ad69471c | pbrook | return ((x >> 8) & 0xff) | ((x >> 16) & 0xff00) |
887 | ad69471c | pbrook | | ((x >> 24) & 0xff0000) | ((x >> 32) & 0xff000000); |
888 | ad69471c | pbrook | } |
889 | ad69471c | pbrook | |
890 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_high_u16)(uint64_t x) |
891 | ad69471c | pbrook | { |
892 | ad69471c | pbrook | return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000); |
893 | ad69471c | pbrook | } |
894 | ad69471c | pbrook | |
895 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_round_high_u8)(uint64_t x) |
896 | ad69471c | pbrook | { |
897 | ad69471c | pbrook | x &= 0xff80ff80ff80ff80ull;
|
898 | ad69471c | pbrook | x += 0x0080008000800080ull;
|
899 | ad69471c | pbrook | return ((x >> 8) & 0xff) | ((x >> 16) & 0xff00) |
900 | ad69471c | pbrook | | ((x >> 24) & 0xff0000) | ((x >> 32) & 0xff000000); |
901 | ad69471c | pbrook | } |
902 | ad69471c | pbrook | |
903 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_round_high_u16)(uint64_t x) |
904 | ad69471c | pbrook | { |
905 | ad69471c | pbrook | x &= 0xffff8000ffff8000ull;
|
906 | ad69471c | pbrook | x += 0x0000800000008000ull;
|
907 | ad69471c | pbrook | return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000); |
908 | ad69471c | pbrook | } |
909 | ad69471c | pbrook | |
910 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_sat_u8)(CPUState *env, uint64_t x) |
911 | ad69471c | pbrook | { |
912 | ad69471c | pbrook | uint16_t s; |
913 | ad69471c | pbrook | uint8_t d; |
914 | ad69471c | pbrook | uint32_t res = 0;
|
915 | ad69471c | pbrook | #define SAT8(n) \
|
916 | ad69471c | pbrook | s = x >> n; \ |
917 | ad69471c | pbrook | if (s > 0xff) { \ |
918 | ad69471c | pbrook | d = 0xff; \
|
919 | ad69471c | pbrook | SET_QC(); \ |
920 | ad69471c | pbrook | } else { \
|
921 | ad69471c | pbrook | d = s; \ |
922 | ad69471c | pbrook | } \ |
923 | ad69471c | pbrook | res |= (uint32_t)d << (n / 2);
|
924 | ad69471c | pbrook | |
925 | ad69471c | pbrook | SAT8(0);
|
926 | ad69471c | pbrook | SAT8(16);
|
927 | ad69471c | pbrook | SAT8(32);
|
928 | ad69471c | pbrook | SAT8(48);
|
929 | ad69471c | pbrook | #undef SAT8
|
930 | ad69471c | pbrook | return res;
|
931 | ad69471c | pbrook | } |
932 | ad69471c | pbrook | |
933 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_sat_s8)(CPUState *env, uint64_t x) |
934 | ad69471c | pbrook | { |
935 | ad69471c | pbrook | int16_t s; |
936 | ad69471c | pbrook | uint8_t d; |
937 | ad69471c | pbrook | uint32_t res = 0;
|
938 | ad69471c | pbrook | #define SAT8(n) \
|
939 | ad69471c | pbrook | s = x >> n; \ |
940 | ad69471c | pbrook | if (s != (int8_t)s) { \
|
941 | ad69471c | pbrook | d = (s >> 15) ^ 0x7f; \ |
942 | ad69471c | pbrook | SET_QC(); \ |
943 | ad69471c | pbrook | } else { \
|
944 | ad69471c | pbrook | d = s; \ |
945 | ad69471c | pbrook | } \ |
946 | ad69471c | pbrook | res |= (uint32_t)d << (n / 2);
|
947 | ad69471c | pbrook | |
948 | ad69471c | pbrook | SAT8(0);
|
949 | ad69471c | pbrook | SAT8(16);
|
950 | ad69471c | pbrook | SAT8(32);
|
951 | ad69471c | pbrook | SAT8(48);
|
952 | ad69471c | pbrook | #undef SAT8
|
953 | ad69471c | pbrook | return res;
|
954 | ad69471c | pbrook | } |
955 | ad69471c | pbrook | |
956 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_sat_u16)(CPUState *env, uint64_t x) |
957 | ad69471c | pbrook | { |
958 | ad69471c | pbrook | uint32_t high; |
959 | ad69471c | pbrook | uint32_t low; |
960 | ad69471c | pbrook | low = x; |
961 | ad69471c | pbrook | if (low > 0xffff) { |
962 | ad69471c | pbrook | low = 0xffff;
|
963 | ad69471c | pbrook | SET_QC(); |
964 | ad69471c | pbrook | } |
965 | ad69471c | pbrook | high = x >> 32;
|
966 | ad69471c | pbrook | if (high > 0xffff) { |
967 | ad69471c | pbrook | high = 0xffff;
|
968 | ad69471c | pbrook | SET_QC(); |
969 | ad69471c | pbrook | } |
970 | ad69471c | pbrook | return low | (high << 16); |
971 | ad69471c | pbrook | } |
972 | ad69471c | pbrook | |
973 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_sat_s16)(CPUState *env, uint64_t x) |
974 | ad69471c | pbrook | { |
975 | ad69471c | pbrook | int32_t low; |
976 | ad69471c | pbrook | int32_t high; |
977 | ad69471c | pbrook | low = x; |
978 | ad69471c | pbrook | if (low != (int16_t)low) {
|
979 | ad69471c | pbrook | low = (low >> 31) ^ 0x7fff; |
980 | ad69471c | pbrook | SET_QC(); |
981 | ad69471c | pbrook | } |
982 | ad69471c | pbrook | high = x >> 32;
|
983 | ad69471c | pbrook | if (high != (int16_t)high) {
|
984 | ad69471c | pbrook | high = (high >> 31) ^ 0x7fff; |
985 | ad69471c | pbrook | SET_QC(); |
986 | ad69471c | pbrook | } |
987 | ad69471c | pbrook | return (uint16_t)low | (high << 16); |
988 | ad69471c | pbrook | } |
989 | ad69471c | pbrook | |
990 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_sat_u32)(CPUState *env, uint64_t x) |
991 | ad69471c | pbrook | { |
992 | ad69471c | pbrook | if (x > 0xffffffffu) { |
993 | ad69471c | pbrook | SET_QC(); |
994 | ad69471c | pbrook | return 0xffffffffu; |
995 | ad69471c | pbrook | } |
996 | ad69471c | pbrook | return x;
|
997 | ad69471c | pbrook | } |
998 | ad69471c | pbrook | |
999 | ad69471c | pbrook | uint32_t HELPER(neon_narrow_sat_s32)(CPUState *env, uint64_t x) |
1000 | ad69471c | pbrook | { |
1001 | ad69471c | pbrook | if ((int64_t)x != (int32_t)x) {
|
1002 | ad69471c | pbrook | SET_QC(); |
1003 | ad69471c | pbrook | return (x >> 63) ^ 0x7fffffff; |
1004 | ad69471c | pbrook | } |
1005 | ad69471c | pbrook | return x;
|
1006 | ad69471c | pbrook | } |
1007 | ad69471c | pbrook | |
1008 | ad69471c | pbrook | uint64_t HELPER(neon_widen_u8)(uint32_t x) |
1009 | ad69471c | pbrook | { |
1010 | ad69471c | pbrook | uint64_t tmp; |
1011 | ad69471c | pbrook | uint64_t ret; |
1012 | ad69471c | pbrook | ret = (uint8_t)x; |
1013 | ad69471c | pbrook | tmp = (uint8_t)(x >> 8);
|
1014 | ad69471c | pbrook | ret |= tmp << 16;
|
1015 | ad69471c | pbrook | tmp = (uint8_t)(x >> 16);
|
1016 | ad69471c | pbrook | ret |= tmp << 32;
|
1017 | ad69471c | pbrook | tmp = (uint8_t)(x >> 24);
|
1018 | ad69471c | pbrook | ret |= tmp << 48;
|
1019 | ad69471c | pbrook | return ret;
|
1020 | ad69471c | pbrook | } |
1021 | ad69471c | pbrook | |
1022 | ad69471c | pbrook | uint64_t HELPER(neon_widen_s8)(uint32_t x) |
1023 | ad69471c | pbrook | { |
1024 | ad69471c | pbrook | uint64_t tmp; |
1025 | ad69471c | pbrook | uint64_t ret; |
1026 | ad69471c | pbrook | ret = (uint16_t)(int8_t)x; |
1027 | ad69471c | pbrook | tmp = (uint16_t)(int8_t)(x >> 8);
|
1028 | ad69471c | pbrook | ret |= tmp << 16;
|
1029 | ad69471c | pbrook | tmp = (uint16_t)(int8_t)(x >> 16);
|
1030 | ad69471c | pbrook | ret |= tmp << 32;
|
1031 | ad69471c | pbrook | tmp = (uint16_t)(int8_t)(x >> 24);
|
1032 | ad69471c | pbrook | ret |= tmp << 48;
|
1033 | ad69471c | pbrook | return ret;
|
1034 | ad69471c | pbrook | } |
1035 | ad69471c | pbrook | |
1036 | ad69471c | pbrook | uint64_t HELPER(neon_widen_u16)(uint32_t x) |
1037 | ad69471c | pbrook | { |
1038 | ad69471c | pbrook | uint64_t high = (uint16_t)(x >> 16);
|
1039 | ad69471c | pbrook | return ((uint16_t)x) | (high << 32); |
1040 | ad69471c | pbrook | } |
1041 | ad69471c | pbrook | |
1042 | ad69471c | pbrook | uint64_t HELPER(neon_widen_s16)(uint32_t x) |
1043 | ad69471c | pbrook | { |
1044 | ad69471c | pbrook | uint64_t high = (int16_t)(x >> 16);
|
1045 | ad69471c | pbrook | return ((uint32_t)(int16_t)x) | (high << 32); |
1046 | ad69471c | pbrook | } |
1047 | ad69471c | pbrook | |
1048 | ad69471c | pbrook | uint64_t HELPER(neon_addl_u16)(uint64_t a, uint64_t b) |
1049 | ad69471c | pbrook | { |
1050 | ad69471c | pbrook | uint64_t mask; |
1051 | ad69471c | pbrook | mask = (a ^ b) & 0x8000800080008000ull;
|
1052 | ad69471c | pbrook | a &= ~0x8000800080008000ull;
|
1053 | ad69471c | pbrook | b &= ~0x8000800080008000ull;
|
1054 | ad69471c | pbrook | return (a + b) ^ mask;
|
1055 | ad69471c | pbrook | } |
1056 | ad69471c | pbrook | |
1057 | ad69471c | pbrook | uint64_t HELPER(neon_addl_u32)(uint64_t a, uint64_t b) |
1058 | ad69471c | pbrook | { |
1059 | ad69471c | pbrook | uint64_t mask; |
1060 | ad69471c | pbrook | mask = (a ^ b) & 0x8000000080000000ull;
|
1061 | ad69471c | pbrook | a &= ~0x8000000080000000ull;
|
1062 | ad69471c | pbrook | b &= ~0x8000000080000000ull;
|
1063 | ad69471c | pbrook | return (a + b) ^ mask;
|
1064 | ad69471c | pbrook | } |
1065 | ad69471c | pbrook | |
1066 | ad69471c | pbrook | uint64_t HELPER(neon_paddl_u16)(uint64_t a, uint64_t b) |
1067 | ad69471c | pbrook | { |
1068 | ad69471c | pbrook | uint64_t tmp; |
1069 | ad69471c | pbrook | uint64_t tmp2; |
1070 | ad69471c | pbrook | |
1071 | ad69471c | pbrook | tmp = a & 0x0000ffff0000ffffull;
|
1072 | ad69471c | pbrook | tmp += (a >> 16) & 0x0000ffff0000ffffull; |
1073 | ad69471c | pbrook | tmp2 = b & 0xffff0000ffff0000ull;
|
1074 | ad69471c | pbrook | tmp2 += (b << 16) & 0xffff0000ffff0000ull; |
1075 | ad69471c | pbrook | return ( tmp & 0xffff) |
1076 | ad69471c | pbrook | | ((tmp >> 16) & 0xffff0000ull) |
1077 | ad69471c | pbrook | | ((tmp2 << 16) & 0xffff00000000ull) |
1078 | ad69471c | pbrook | | ( tmp2 & 0xffff000000000000ull);
|
1079 | ad69471c | pbrook | } |
1080 | ad69471c | pbrook | |
1081 | ad69471c | pbrook | uint64_t HELPER(neon_paddl_u32)(uint64_t a, uint64_t b) |
1082 | ad69471c | pbrook | { |
1083 | ad69471c | pbrook | uint32_t low = a + (a >> 32);
|
1084 | ad69471c | pbrook | uint32_t high = b + (b >> 32);
|
1085 | ad69471c | pbrook | return low + ((uint64_t)high << 32); |
1086 | ad69471c | pbrook | } |
1087 | ad69471c | pbrook | |
1088 | ad69471c | pbrook | uint64_t HELPER(neon_subl_u16)(uint64_t a, uint64_t b) |
1089 | ad69471c | pbrook | { |
1090 | ad69471c | pbrook | uint64_t mask; |
1091 | ad69471c | pbrook | mask = (a ^ ~b) & 0x8000800080008000ull;
|
1092 | ad69471c | pbrook | a |= 0x8000800080008000ull;
|
1093 | ad69471c | pbrook | b &= ~0x8000800080008000ull;
|
1094 | ad69471c | pbrook | return (a - b) ^ mask;
|
1095 | ad69471c | pbrook | } |
1096 | ad69471c | pbrook | |
1097 | ad69471c | pbrook | uint64_t HELPER(neon_subl_u32)(uint64_t a, uint64_t b) |
1098 | ad69471c | pbrook | { |
1099 | ad69471c | pbrook | uint64_t mask; |
1100 | ad69471c | pbrook | mask = (a ^ ~b) & 0x8000000080000000ull;
|
1101 | ad69471c | pbrook | a |= 0x8000000080000000ull;
|
1102 | ad69471c | pbrook | b &= ~0x8000000080000000ull;
|
1103 | ad69471c | pbrook | return (a - b) ^ mask;
|
1104 | ad69471c | pbrook | } |
1105 | ad69471c | pbrook | |
1106 | ad69471c | pbrook | uint64_t HELPER(neon_addl_saturate_s32)(CPUState *env, uint64_t a, uint64_t b) |
1107 | ad69471c | pbrook | { |
1108 | ad69471c | pbrook | uint32_t x, y; |
1109 | ad69471c | pbrook | uint32_t low, high; |
1110 | ad69471c | pbrook | |
1111 | ad69471c | pbrook | x = a; |
1112 | ad69471c | pbrook | y = b; |
1113 | ad69471c | pbrook | low = x + y; |
1114 | ad69471c | pbrook | if (((low ^ x) & SIGNBIT) && !((x ^ y) & SIGNBIT)) {
|
1115 | ad69471c | pbrook | SET_QC(); |
1116 | ad69471c | pbrook | low = ((int32_t)x >> 31) ^ ~SIGNBIT;
|
1117 | ad69471c | pbrook | } |
1118 | ad69471c | pbrook | x = a >> 32;
|
1119 | ad69471c | pbrook | y = b >> 32;
|
1120 | ad69471c | pbrook | high = x + y; |
1121 | ad69471c | pbrook | if (((high ^ x) & SIGNBIT) && !((x ^ y) & SIGNBIT)) {
|
1122 | ad69471c | pbrook | SET_QC(); |
1123 | ad69471c | pbrook | high = ((int32_t)x >> 31) ^ ~SIGNBIT;
|
1124 | ad69471c | pbrook | } |
1125 | ad69471c | pbrook | return low | ((uint64_t)high << 32); |
1126 | ad69471c | pbrook | } |
1127 | ad69471c | pbrook | |
1128 | ad69471c | pbrook | uint64_t HELPER(neon_addl_saturate_s64)(CPUState *env, uint64_t a, uint64_t b) |
1129 | ad69471c | pbrook | { |
1130 | ad69471c | pbrook | uint64_t result; |
1131 | ad69471c | pbrook | |
1132 | ad69471c | pbrook | result = a + b; |
1133 | ad69471c | pbrook | if (((result ^ a) & SIGNBIT64) && !((a ^ b) & SIGNBIT64)) {
|
1134 | ad69471c | pbrook | SET_QC(); |
1135 | ad69471c | pbrook | result = ((int64_t)a >> 63) ^ ~SIGNBIT64;
|
1136 | ad69471c | pbrook | } |
1137 | ad69471c | pbrook | return result;
|
1138 | ad69471c | pbrook | } |
1139 | ad69471c | pbrook | |
1140 | ad69471c | pbrook | #define DO_ABD(dest, x, y, type) do { \ |
1141 | ad69471c | pbrook | type tmp_x = x; \ |
1142 | ad69471c | pbrook | type tmp_y = y; \ |
1143 | ad69471c | pbrook | dest = ((tmp_x > tmp_y) ? tmp_x - tmp_y : tmp_y - tmp_x); \ |
1144 | ad69471c | pbrook | } while(0) |
1145 | ad69471c | pbrook | |
1146 | ad69471c | pbrook | uint64_t HELPER(neon_abdl_u16)(uint32_t a, uint32_t b) |
1147 | ad69471c | pbrook | { |
1148 | ad69471c | pbrook | uint64_t tmp; |
1149 | ad69471c | pbrook | uint64_t result; |
1150 | ad69471c | pbrook | DO_ABD(result, a, b, uint8_t); |
1151 | ad69471c | pbrook | DO_ABD(tmp, a >> 8, b >> 8, uint8_t); |
1152 | ad69471c | pbrook | result |= tmp << 16;
|
1153 | ad69471c | pbrook | DO_ABD(tmp, a >> 16, b >> 16, uint8_t); |
1154 | ad69471c | pbrook | result |= tmp << 32;
|
1155 | ad69471c | pbrook | DO_ABD(tmp, a >> 24, b >> 24, uint8_t); |
1156 | ad69471c | pbrook | result |= tmp << 48;
|
1157 | ad69471c | pbrook | return result;
|
1158 | ad69471c | pbrook | } |
1159 | ad69471c | pbrook | |
1160 | ad69471c | pbrook | uint64_t HELPER(neon_abdl_s16)(uint32_t a, uint32_t b) |
1161 | ad69471c | pbrook | { |
1162 | ad69471c | pbrook | uint64_t tmp; |
1163 | ad69471c | pbrook | uint64_t result; |
1164 | ad69471c | pbrook | DO_ABD(result, a, b, int8_t); |
1165 | ad69471c | pbrook | DO_ABD(tmp, a >> 8, b >> 8, int8_t); |
1166 | ad69471c | pbrook | result |= tmp << 16;
|
1167 | ad69471c | pbrook | DO_ABD(tmp, a >> 16, b >> 16, int8_t); |
1168 | ad69471c | pbrook | result |= tmp << 32;
|
1169 | ad69471c | pbrook | DO_ABD(tmp, a >> 24, b >> 24, int8_t); |
1170 | ad69471c | pbrook | result |= tmp << 48;
|
1171 | ad69471c | pbrook | return result;
|
1172 | ad69471c | pbrook | } |
1173 | ad69471c | pbrook | |
1174 | ad69471c | pbrook | uint64_t HELPER(neon_abdl_u32)(uint32_t a, uint32_t b) |
1175 | ad69471c | pbrook | { |
1176 | ad69471c | pbrook | uint64_t tmp; |
1177 | ad69471c | pbrook | uint64_t result; |
1178 | ad69471c | pbrook | DO_ABD(result, a, b, uint16_t); |
1179 | ad69471c | pbrook | DO_ABD(tmp, a >> 16, b >> 16, uint16_t); |
1180 | ad69471c | pbrook | return result | (tmp << 32); |
1181 | ad69471c | pbrook | } |
1182 | ad69471c | pbrook | |
1183 | ad69471c | pbrook | uint64_t HELPER(neon_abdl_s32)(uint32_t a, uint32_t b) |
1184 | ad69471c | pbrook | { |
1185 | ad69471c | pbrook | uint64_t tmp; |
1186 | ad69471c | pbrook | uint64_t result; |
1187 | ad69471c | pbrook | DO_ABD(result, a, b, int16_t); |
1188 | ad69471c | pbrook | DO_ABD(tmp, a >> 16, b >> 16, int16_t); |
1189 | ad69471c | pbrook | return result | (tmp << 32); |
1190 | ad69471c | pbrook | } |
1191 | ad69471c | pbrook | |
1192 | ad69471c | pbrook | uint64_t HELPER(neon_abdl_u64)(uint32_t a, uint32_t b) |
1193 | ad69471c | pbrook | { |
1194 | ad69471c | pbrook | uint64_t result; |
1195 | ad69471c | pbrook | DO_ABD(result, a, b, uint32_t); |
1196 | ad69471c | pbrook | return result;
|
1197 | ad69471c | pbrook | } |
1198 | ad69471c | pbrook | |
1199 | ad69471c | pbrook | uint64_t HELPER(neon_abdl_s64)(uint32_t a, uint32_t b) |
1200 | ad69471c | pbrook | { |
1201 | ad69471c | pbrook | uint64_t result; |
1202 | ad69471c | pbrook | DO_ABD(result, a, b, int32_t); |
1203 | ad69471c | pbrook | return result;
|
1204 | ad69471c | pbrook | } |
1205 | ad69471c | pbrook | #undef DO_ABD
|
1206 | ad69471c | pbrook | |
1207 | ad69471c | pbrook | /* Widening multiply. Named type is the source type. */
|
1208 | ad69471c | pbrook | #define DO_MULL(dest, x, y, type1, type2) do { \ |
1209 | ad69471c | pbrook | type1 tmp_x = x; \ |
1210 | ad69471c | pbrook | type1 tmp_y = y; \ |
1211 | ad69471c | pbrook | dest = (type2)((type2)tmp_x * (type2)tmp_y); \ |
1212 | ad69471c | pbrook | } while(0) |
1213 | ad69471c | pbrook | |
1214 | ad69471c | pbrook | uint64_t HELPER(neon_mull_u8)(uint32_t a, uint32_t b) |
1215 | ad69471c | pbrook | { |
1216 | ad69471c | pbrook | uint64_t tmp; |
1217 | ad69471c | pbrook | uint64_t result; |
1218 | ad69471c | pbrook | |
1219 | ad69471c | pbrook | DO_MULL(result, a, b, uint8_t, uint16_t); |
1220 | ad69471c | pbrook | DO_MULL(tmp, a >> 8, b >> 8, uint8_t, uint16_t); |
1221 | ad69471c | pbrook | result |= tmp << 16;
|
1222 | ad69471c | pbrook | DO_MULL(tmp, a >> 16, b >> 16, uint8_t, uint16_t); |
1223 | ad69471c | pbrook | result |= tmp << 32;
|
1224 | ad69471c | pbrook | DO_MULL(tmp, a >> 24, b >> 24, uint8_t, uint16_t); |
1225 | ad69471c | pbrook | result |= tmp << 48;
|
1226 | ad69471c | pbrook | return result;
|
1227 | ad69471c | pbrook | } |
1228 | ad69471c | pbrook | |
1229 | ad69471c | pbrook | uint64_t HELPER(neon_mull_s8)(uint32_t a, uint32_t b) |
1230 | ad69471c | pbrook | { |
1231 | ad69471c | pbrook | uint64_t tmp; |
1232 | ad69471c | pbrook | uint64_t result; |
1233 | ad69471c | pbrook | |
1234 | ad69471c | pbrook | DO_MULL(result, a, b, int8_t, uint16_t); |
1235 | ad69471c | pbrook | DO_MULL(tmp, a >> 8, b >> 8, int8_t, uint16_t); |
1236 | ad69471c | pbrook | result |= tmp << 16;
|
1237 | ad69471c | pbrook | DO_MULL(tmp, a >> 16, b >> 16, int8_t, uint16_t); |
1238 | ad69471c | pbrook | result |= tmp << 32;
|
1239 | ad69471c | pbrook | DO_MULL(tmp, a >> 24, b >> 24, int8_t, uint16_t); |
1240 | ad69471c | pbrook | result |= tmp << 48;
|
1241 | ad69471c | pbrook | return result;
|
1242 | ad69471c | pbrook | } |
1243 | ad69471c | pbrook | |
1244 | ad69471c | pbrook | uint64_t HELPER(neon_mull_u16)(uint32_t a, uint32_t b) |
1245 | ad69471c | pbrook | { |
1246 | ad69471c | pbrook | uint64_t tmp; |
1247 | ad69471c | pbrook | uint64_t result; |
1248 | ad69471c | pbrook | |
1249 | ad69471c | pbrook | DO_MULL(result, a, b, uint16_t, uint32_t); |
1250 | ad69471c | pbrook | DO_MULL(tmp, a >> 16, b >> 16, uint16_t, uint32_t); |
1251 | ad69471c | pbrook | return result | (tmp << 32); |
1252 | ad69471c | pbrook | } |
1253 | ad69471c | pbrook | |
1254 | ad69471c | pbrook | uint64_t HELPER(neon_mull_s16)(uint32_t a, uint32_t b) |
1255 | ad69471c | pbrook | { |
1256 | ad69471c | pbrook | uint64_t tmp; |
1257 | ad69471c | pbrook | uint64_t result; |
1258 | ad69471c | pbrook | |
1259 | ad69471c | pbrook | DO_MULL(result, a, b, int16_t, uint32_t); |
1260 | ad69471c | pbrook | DO_MULL(tmp, a >> 16, b >> 16, int16_t, uint32_t); |
1261 | ad69471c | pbrook | return result | (tmp << 32); |
1262 | ad69471c | pbrook | } |
1263 | ad69471c | pbrook | |
1264 | ad69471c | pbrook | uint64_t HELPER(neon_negl_u16)(uint64_t x) |
1265 | ad69471c | pbrook | { |
1266 | ad69471c | pbrook | uint16_t tmp; |
1267 | ad69471c | pbrook | uint64_t result; |
1268 | ad69471c | pbrook | result = (uint16_t)-x; |
1269 | ad69471c | pbrook | tmp = -(x >> 16);
|
1270 | ad69471c | pbrook | result |= (uint64_t)tmp << 16;
|
1271 | ad69471c | pbrook | tmp = -(x >> 32);
|
1272 | ad69471c | pbrook | result |= (uint64_t)tmp << 32;
|
1273 | ad69471c | pbrook | tmp = -(x >> 48);
|
1274 | ad69471c | pbrook | result |= (uint64_t)tmp << 48;
|
1275 | ad69471c | pbrook | return result;
|
1276 | ad69471c | pbrook | } |
1277 | ad69471c | pbrook | |
1278 | ad69471c | pbrook | #include <stdio.h> |
1279 | ad69471c | pbrook | uint64_t HELPER(neon_negl_u32)(uint64_t x) |
1280 | ad69471c | pbrook | { |
1281 | ad69471c | pbrook | uint32_t low = -x; |
1282 | ad69471c | pbrook | uint32_t high = -(x >> 32);
|
1283 | ad69471c | pbrook | return low | ((uint64_t)high << 32); |
1284 | ad69471c | pbrook | } |
1285 | ad69471c | pbrook | |
1286 | ad69471c | pbrook | /* FIXME: There should be a native op for this. */
|
1287 | ad69471c | pbrook | uint64_t HELPER(neon_negl_u64)(uint64_t x) |
1288 | ad69471c | pbrook | { |
1289 | ad69471c | pbrook | return -x;
|
1290 | ad69471c | pbrook | } |
1291 | ad69471c | pbrook | |
1292 | ad69471c | pbrook | /* Saturnating sign manuipulation. */
|
1293 | ad69471c | pbrook | /* ??? Make these use NEON_VOP1 */
|
1294 | ad69471c | pbrook | #define DO_QABS8(x) do { \ |
1295 | ad69471c | pbrook | if (x == (int8_t)0x80) { \ |
1296 | ad69471c | pbrook | x = 0x7f; \
|
1297 | ad69471c | pbrook | SET_QC(); \ |
1298 | ad69471c | pbrook | } else if (x < 0) { \ |
1299 | ad69471c | pbrook | x = -x; \ |
1300 | ad69471c | pbrook | }} while (0) |
1301 | ad69471c | pbrook | uint32_t HELPER(neon_qabs_s8)(CPUState *env, uint32_t x) |
1302 | ad69471c | pbrook | { |
1303 | ad69471c | pbrook | neon_s8 vec; |
1304 | ad69471c | pbrook | NEON_UNPACK(neon_s8, vec, x); |
1305 | ad69471c | pbrook | DO_QABS8(vec.v1); |
1306 | ad69471c | pbrook | DO_QABS8(vec.v2); |
1307 | ad69471c | pbrook | DO_QABS8(vec.v3); |
1308 | ad69471c | pbrook | DO_QABS8(vec.v4); |
1309 | ad69471c | pbrook | NEON_PACK(neon_s8, x, vec); |
1310 | ad69471c | pbrook | return x;
|
1311 | ad69471c | pbrook | } |
1312 | ad69471c | pbrook | #undef DO_QABS8
|
1313 | ad69471c | pbrook | |
1314 | ad69471c | pbrook | #define DO_QNEG8(x) do { \ |
1315 | ad69471c | pbrook | if (x == (int8_t)0x80) { \ |
1316 | ad69471c | pbrook | x = 0x7f; \
|
1317 | ad69471c | pbrook | SET_QC(); \ |
1318 | ad69471c | pbrook | } else { \
|
1319 | ad69471c | pbrook | x = -x; \ |
1320 | ad69471c | pbrook | }} while (0) |
1321 | ad69471c | pbrook | uint32_t HELPER(neon_qneg_s8)(CPUState *env, uint32_t x) |
1322 | ad69471c | pbrook | { |
1323 | ad69471c | pbrook | neon_s8 vec; |
1324 | ad69471c | pbrook | NEON_UNPACK(neon_s8, vec, x); |
1325 | ad69471c | pbrook | DO_QNEG8(vec.v1); |
1326 | ad69471c | pbrook | DO_QNEG8(vec.v2); |
1327 | ad69471c | pbrook | DO_QNEG8(vec.v3); |
1328 | ad69471c | pbrook | DO_QNEG8(vec.v4); |
1329 | ad69471c | pbrook | NEON_PACK(neon_s8, x, vec); |
1330 | ad69471c | pbrook | return x;
|
1331 | ad69471c | pbrook | } |
1332 | ad69471c | pbrook | #undef DO_QNEG8
|
1333 | ad69471c | pbrook | |
1334 | ad69471c | pbrook | #define DO_QABS16(x) do { \ |
1335 | ad69471c | pbrook | if (x == (int16_t)0x8000) { \ |
1336 | ad69471c | pbrook | x = 0x7fff; \
|
1337 | ad69471c | pbrook | SET_QC(); \ |
1338 | ad69471c | pbrook | } else if (x < 0) { \ |
1339 | ad69471c | pbrook | x = -x; \ |
1340 | ad69471c | pbrook | }} while (0) |
1341 | ad69471c | pbrook | uint32_t HELPER(neon_qabs_s16)(CPUState *env, uint32_t x) |
1342 | ad69471c | pbrook | { |
1343 | ad69471c | pbrook | neon_s16 vec; |
1344 | ad69471c | pbrook | NEON_UNPACK(neon_s16, vec, x); |
1345 | ad69471c | pbrook | DO_QABS16(vec.v1); |
1346 | ad69471c | pbrook | DO_QABS16(vec.v2); |
1347 | ad69471c | pbrook | NEON_PACK(neon_s16, x, vec); |
1348 | ad69471c | pbrook | return x;
|
1349 | ad69471c | pbrook | } |
1350 | ad69471c | pbrook | #undef DO_QABS16
|
1351 | ad69471c | pbrook | |
1352 | ad69471c | pbrook | #define DO_QNEG16(x) do { \ |
1353 | ad69471c | pbrook | if (x == (int16_t)0x8000) { \ |
1354 | ad69471c | pbrook | x = 0x7fff; \
|
1355 | ad69471c | pbrook | SET_QC(); \ |
1356 | ad69471c | pbrook | } else { \
|
1357 | ad69471c | pbrook | x = -x; \ |
1358 | ad69471c | pbrook | }} while (0) |
1359 | ad69471c | pbrook | uint32_t HELPER(neon_qneg_s16)(CPUState *env, uint32_t x) |
1360 | ad69471c | pbrook | { |
1361 | ad69471c | pbrook | neon_s16 vec; |
1362 | ad69471c | pbrook | NEON_UNPACK(neon_s16, vec, x); |
1363 | ad69471c | pbrook | DO_QNEG16(vec.v1); |
1364 | ad69471c | pbrook | DO_QNEG16(vec.v2); |
1365 | ad69471c | pbrook | NEON_PACK(neon_s16, x, vec); |
1366 | ad69471c | pbrook | return x;
|
1367 | ad69471c | pbrook | } |
1368 | ad69471c | pbrook | #undef DO_QNEG16
|
1369 | ad69471c | pbrook | |
1370 | ad69471c | pbrook | uint32_t HELPER(neon_qabs_s32)(CPUState *env, uint32_t x) |
1371 | ad69471c | pbrook | { |
1372 | ad69471c | pbrook | if (x == SIGNBIT) {
|
1373 | ad69471c | pbrook | SET_QC(); |
1374 | ad69471c | pbrook | x = ~SIGNBIT; |
1375 | ad69471c | pbrook | } else if ((int32_t)x < 0) { |
1376 | ad69471c | pbrook | x = -x; |
1377 | ad69471c | pbrook | } |
1378 | ad69471c | pbrook | return x;
|
1379 | ad69471c | pbrook | } |
1380 | ad69471c | pbrook | |
1381 | ad69471c | pbrook | uint32_t HELPER(neon_qneg_s32)(CPUState *env, uint32_t x) |
1382 | ad69471c | pbrook | { |
1383 | ad69471c | pbrook | if (x == SIGNBIT) {
|
1384 | ad69471c | pbrook | SET_QC(); |
1385 | ad69471c | pbrook | x = ~SIGNBIT; |
1386 | ad69471c | pbrook | } else {
|
1387 | ad69471c | pbrook | x = -x; |
1388 | ad69471c | pbrook | } |
1389 | ad69471c | pbrook | return x;
|
1390 | ad69471c | pbrook | } |
1391 | ad69471c | pbrook | |
1392 | ad69471c | pbrook | /* NEON Float helpers. */
|
1393 | ad69471c | pbrook | uint32_t HELPER(neon_min_f32)(uint32_t a, uint32_t b) |
1394 | ad69471c | pbrook | { |
1395 | ad69471c | pbrook | float32 f0 = vfp_itos(a); |
1396 | ad69471c | pbrook | float32 f1 = vfp_itos(b); |
1397 | ad69471c | pbrook | return (float32_compare_quiet(f0, f1, NFS) == -1) ? a : b; |
1398 | ad69471c | pbrook | } |
1399 | ad69471c | pbrook | |
1400 | ad69471c | pbrook | uint32_t HELPER(neon_max_f32)(uint32_t a, uint32_t b) |
1401 | ad69471c | pbrook | { |
1402 | ad69471c | pbrook | float32 f0 = vfp_itos(a); |
1403 | ad69471c | pbrook | float32 f1 = vfp_itos(b); |
1404 | ad69471c | pbrook | return (float32_compare_quiet(f0, f1, NFS) == 1) ? a : b; |
1405 | ad69471c | pbrook | } |
1406 | ad69471c | pbrook | |
1407 | ad69471c | pbrook | uint32_t HELPER(neon_abd_f32)(uint32_t a, uint32_t b) |
1408 | ad69471c | pbrook | { |
1409 | ad69471c | pbrook | float32 f0 = vfp_itos(a); |
1410 | ad69471c | pbrook | float32 f1 = vfp_itos(b); |
1411 | ad69471c | pbrook | return vfp_stoi((float32_compare_quiet(f0, f1, NFS) == 1) |
1412 | ad69471c | pbrook | ? float32_sub(f0, f1, NFS) |
1413 | ad69471c | pbrook | : float32_sub(f1, f0, NFS)); |
1414 | ad69471c | pbrook | } |
1415 | ad69471c | pbrook | |
1416 | ad69471c | pbrook | uint32_t HELPER(neon_add_f32)(uint32_t a, uint32_t b) |
1417 | ad69471c | pbrook | { |
1418 | ad69471c | pbrook | return vfp_stoi(float32_add(vfp_itos(a), vfp_itos(b), NFS));
|
1419 | ad69471c | pbrook | } |
1420 | ad69471c | pbrook | |
1421 | ad69471c | pbrook | uint32_t HELPER(neon_sub_f32)(uint32_t a, uint32_t b) |
1422 | ad69471c | pbrook | { |
1423 | ad69471c | pbrook | return vfp_stoi(float32_sub(vfp_itos(a), vfp_itos(b), NFS));
|
1424 | ad69471c | pbrook | } |
1425 | ad69471c | pbrook | |
1426 | ad69471c | pbrook | uint32_t HELPER(neon_mul_f32)(uint32_t a, uint32_t b) |
1427 | ad69471c | pbrook | { |
1428 | ad69471c | pbrook | return vfp_stoi(float32_mul(vfp_itos(a), vfp_itos(b), NFS));
|
1429 | ad69471c | pbrook | } |
1430 | ad69471c | pbrook | |
1431 | ad69471c | pbrook | /* Floating point comparisons produce an integer result. */
|
1432 | ad69471c | pbrook | #define NEON_VOP_FCMP(name, cmp) \
|
1433 | ad69471c | pbrook | uint32_t HELPER(neon_##name)(uint32_t a, uint32_t b) \ |
1434 | ad69471c | pbrook | { \ |
1435 | ad69471c | pbrook | if (float32_compare_quiet(vfp_itos(a), vfp_itos(b), NFS) cmp 0) \ |
1436 | ad69471c | pbrook | return ~0; \ |
1437 | ad69471c | pbrook | else \
|
1438 | ad69471c | pbrook | return 0; \ |
1439 | ad69471c | pbrook | } |
1440 | ad69471c | pbrook | |
1441 | ad69471c | pbrook | NEON_VOP_FCMP(ceq_f32, ==) |
1442 | ad69471c | pbrook | NEON_VOP_FCMP(cge_f32, >=) |
1443 | ad69471c | pbrook | NEON_VOP_FCMP(cgt_f32, >) |
1444 | ad69471c | pbrook | |
1445 | ad69471c | pbrook | uint32_t HELPER(neon_acge_f32)(uint32_t a, uint32_t b) |
1446 | ad69471c | pbrook | { |
1447 | ad69471c | pbrook | float32 f0 = float32_abs(vfp_itos(a)); |
1448 | ad69471c | pbrook | float32 f1 = float32_abs(vfp_itos(b)); |
1449 | ad69471c | pbrook | return (float32_compare_quiet(f0, f1,NFS) >= 0) ? ~0 : 0; |
1450 | ad69471c | pbrook | } |
1451 | ad69471c | pbrook | |
1452 | ad69471c | pbrook | uint32_t HELPER(neon_acgt_f32)(uint32_t a, uint32_t b) |
1453 | ad69471c | pbrook | { |
1454 | ad69471c | pbrook | float32 f0 = float32_abs(vfp_itos(a)); |
1455 | ad69471c | pbrook | float32 f1 = float32_abs(vfp_itos(b)); |
1456 | ad69471c | pbrook | return (float32_compare_quiet(f0, f1, NFS) > 0) ? ~0 : 0; |
1457 | ad69471c | pbrook | } |