Revision ad69471c

b/Makefile.target
211 211
endif
212 212

  
213 213
ifeq ($(TARGET_BASE_ARCH), arm)
214
LIBOBJS+= op_helper.o helper.o
214
LIBOBJS+= op_helper.o helper.o neon_helper.o
215 215
endif
216 216

  
217 217
ifeq ($(TARGET_BASE_ARCH), sh4)
b/target-arm/helper.c
256 256
    free(env);
257 257
}
258 258

  
259
/* Polynomial multiplication is like integer multiplcation except the
260
   partial products are XORed, not added.  */
261
uint32_t helper_neon_mul_p8(uint32_t op1, uint32_t op2)
262
{
263
    uint32_t mask;
264
    uint32_t result;
265
    result = 0;
266
    while (op1) {
267
        mask = 0;
268
        if (op1 & 1)
269
            mask |= 0xff;
270
        if (op1 & (1 << 8))
271
            mask |= (0xff << 8);
272
        if (op1 & (1 << 16))
273
            mask |= (0xff << 16);
274
        if (op1 & (1 << 24))
275
            mask |= (0xff << 24);
276
        result ^= op2 & mask;
277
        op1 = (op1 >> 1) & 0x7f7f7f7f;
278
        op2 = (op2 << 1) & 0xfefefefe;
279
    }
280
    return result;
281
}
282

  
283 259
uint32_t cpsr_read(CPUARMState *env)
284 260
{
285 261
    int ZF;
......
376 352
    return x;
377 353
}
378 354

  
355
uint32_t HELPER(abs)(uint32_t x)
356
{
357
    return ((int32_t)x < 0) ? -x : x;
358
}
359

  
379 360
#if defined(CONFIG_USER_ONLY)
380 361

  
381 362
void do_interrupt (CPUState *env)
b/target-arm/helpers.h
84 84
DEF_HELPER_1_2(sdiv, int32_t, (int32_t, int32_t))
85 85
DEF_HELPER_1_2(udiv, uint32_t, (uint32_t, uint32_t))
86 86
DEF_HELPER_1_1(rbit, uint32_t, (uint32_t))
87
DEF_HELPER_1_1(abs, uint32_t, (uint32_t))
87 88

  
88 89
#define PAS_OP(pfx)  \
89 90
    DEF_HELPER_1_3(pfx ## add8, uint32_t, (uint32_t, uint32_t, uint32_t *)) \
......
208 209
DEF_HELPER_1_2(recpe_u32, uint32_t, (uint32_t, CPUState *))
209 210
DEF_HELPER_1_2(rsqrte_u32, uint32_t, (uint32_t, CPUState *))
210 211
DEF_HELPER_1_4(neon_tbl, uint32_t, (uint32_t, uint32_t, uint32_t, uint32_t))
212
DEF_HELPER_1_2(neon_add_saturate_u64, uint64_t, (uint64_t, uint64_t))
213
DEF_HELPER_1_2(neon_add_saturate_s64, uint64_t, (uint64_t, uint64_t))
214
DEF_HELPER_1_2(neon_sub_saturate_u64, uint64_t, (uint64_t, uint64_t))
215
DEF_HELPER_1_2(neon_sub_saturate_s64, uint64_t, (uint64_t, uint64_t))
211 216

  
212 217
DEF_HELPER_1_2(add_cc, uint32_t, (uint32_t, uint32_t))
213 218
DEF_HELPER_1_2(adc_cc, uint32_t, (uint32_t, uint32_t))
......
223 228
DEF_HELPER_1_2(sar_cc, uint32_t, (uint32_t, uint32_t))
224 229
DEF_HELPER_1_2(ror_cc, uint32_t, (uint32_t, uint32_t))
225 230

  
231
/* neon_helper.c */
232
DEF_HELPER_1_3(neon_qadd_u8, uint32_t, (CPUState *, uint32_t, uint32_t))
233
DEF_HELPER_1_3(neon_qadd_s8, uint32_t, (CPUState *, uint32_t, uint32_t))
234
DEF_HELPER_1_3(neon_qadd_u16, uint32_t, (CPUState *, uint32_t, uint32_t))
235
DEF_HELPER_1_3(neon_qadd_s16, uint32_t, (CPUState *, uint32_t, uint32_t))
236
DEF_HELPER_1_3(neon_qsub_u8, uint32_t, (CPUState *, uint32_t, uint32_t))
237
DEF_HELPER_1_3(neon_qsub_s8, uint32_t, (CPUState *, uint32_t, uint32_t))
238
DEF_HELPER_1_3(neon_qsub_u16, uint32_t, (CPUState *, uint32_t, uint32_t))
239
DEF_HELPER_1_3(neon_qsub_s16, uint32_t, (CPUState *, uint32_t, uint32_t))
240

  
241
DEF_HELPER_1_2(neon_hadd_s8, uint32_t, (uint32_t, uint32_t))
242
DEF_HELPER_1_2(neon_hadd_u8, uint32_t, (uint32_t, uint32_t))
243
DEF_HELPER_1_2(neon_hadd_s16, uint32_t, (uint32_t, uint32_t))
244
DEF_HELPER_1_2(neon_hadd_u16, uint32_t, (uint32_t, uint32_t))
245
DEF_HELPER_1_2(neon_hadd_s32, int32_t, (int32_t, int32_t))
246
DEF_HELPER_1_2(neon_hadd_u32, uint32_t, (uint32_t, uint32_t))
247
DEF_HELPER_1_2(neon_rhadd_s8, uint32_t, (uint32_t, uint32_t))
248
DEF_HELPER_1_2(neon_rhadd_u8, uint32_t, (uint32_t, uint32_t))
249
DEF_HELPER_1_2(neon_rhadd_s16, uint32_t, (uint32_t, uint32_t))
250
DEF_HELPER_1_2(neon_rhadd_u16, uint32_t, (uint32_t, uint32_t))
251
DEF_HELPER_1_2(neon_rhadd_s32, int32_t, (int32_t, int32_t))
252
DEF_HELPER_1_2(neon_rhadd_u32, uint32_t, (uint32_t, uint32_t))
253
DEF_HELPER_1_2(neon_hsub_s8, uint32_t, (uint32_t, uint32_t))
254
DEF_HELPER_1_2(neon_hsub_u8, uint32_t, (uint32_t, uint32_t))
255
DEF_HELPER_1_2(neon_hsub_s16, uint32_t, (uint32_t, uint32_t))
256
DEF_HELPER_1_2(neon_hsub_u16, uint32_t, (uint32_t, uint32_t))
257
DEF_HELPER_1_2(neon_hsub_s32, int32_t, (int32_t, int32_t))
258
DEF_HELPER_1_2(neon_hsub_u32, uint32_t, (uint32_t, uint32_t))
259

  
260
DEF_HELPER_1_2(neon_cgt_u8, uint32_t, (uint32_t, uint32_t))
261
DEF_HELPER_1_2(neon_cgt_s8, uint32_t, (uint32_t, uint32_t))
262
DEF_HELPER_1_2(neon_cgt_u16, uint32_t, (uint32_t, uint32_t))
263
DEF_HELPER_1_2(neon_cgt_s16, uint32_t, (uint32_t, uint32_t))
264
DEF_HELPER_1_2(neon_cgt_u32, uint32_t, (uint32_t, uint32_t))
265
DEF_HELPER_1_2(neon_cgt_s32, uint32_t, (uint32_t, uint32_t))
266
DEF_HELPER_1_2(neon_cge_u8, uint32_t, (uint32_t, uint32_t))
267
DEF_HELPER_1_2(neon_cge_s8, uint32_t, (uint32_t, uint32_t))
268
DEF_HELPER_1_2(neon_cge_u16, uint32_t, (uint32_t, uint32_t))
269
DEF_HELPER_1_2(neon_cge_s16, uint32_t, (uint32_t, uint32_t))
270
DEF_HELPER_1_2(neon_cge_u32, uint32_t, (uint32_t, uint32_t))
271
DEF_HELPER_1_2(neon_cge_s32, uint32_t, (uint32_t, uint32_t))
272

  
273
DEF_HELPER_1_2(neon_min_u8, uint32_t, (uint32_t, uint32_t))
274
DEF_HELPER_1_2(neon_min_s8, uint32_t, (uint32_t, uint32_t))
275
DEF_HELPER_1_2(neon_min_u16, uint32_t, (uint32_t, uint32_t))
276
DEF_HELPER_1_2(neon_min_s16, uint32_t, (uint32_t, uint32_t))
277
DEF_HELPER_1_2(neon_min_u32, uint32_t, (uint32_t, uint32_t))
278
DEF_HELPER_1_2(neon_min_s32, uint32_t, (uint32_t, uint32_t))
279
DEF_HELPER_1_2(neon_max_u8, uint32_t, (uint32_t, uint32_t))
280
DEF_HELPER_1_2(neon_max_s8, uint32_t, (uint32_t, uint32_t))
281
DEF_HELPER_1_2(neon_max_u16, uint32_t, (uint32_t, uint32_t))
282
DEF_HELPER_1_2(neon_max_s16, uint32_t, (uint32_t, uint32_t))
283
DEF_HELPER_1_2(neon_max_u32, uint32_t, (uint32_t, uint32_t))
284
DEF_HELPER_1_2(neon_max_s32, uint32_t, (uint32_t, uint32_t))
285
DEF_HELPER_1_2(neon_pmin_u8, uint32_t, (uint32_t, uint32_t))
286
DEF_HELPER_1_2(neon_pmin_s8, uint32_t, (uint32_t, uint32_t))
287
DEF_HELPER_1_2(neon_pmin_u16, uint32_t, (uint32_t, uint32_t))
288
DEF_HELPER_1_2(neon_pmin_s16, uint32_t, (uint32_t, uint32_t))
289
DEF_HELPER_1_2(neon_pmin_u32, uint32_t, (uint32_t, uint32_t))
290
DEF_HELPER_1_2(neon_pmin_s32, uint32_t, (uint32_t, uint32_t))
291
DEF_HELPER_1_2(neon_pmax_u8, uint32_t, (uint32_t, uint32_t))
292
DEF_HELPER_1_2(neon_pmax_s8, uint32_t, (uint32_t, uint32_t))
293
DEF_HELPER_1_2(neon_pmax_u16, uint32_t, (uint32_t, uint32_t))
294
DEF_HELPER_1_2(neon_pmax_s16, uint32_t, (uint32_t, uint32_t))
295
DEF_HELPER_1_2(neon_pmax_u32, uint32_t, (uint32_t, uint32_t))
296
DEF_HELPER_1_2(neon_pmax_s32, uint32_t, (uint32_t, uint32_t))
297

  
298
DEF_HELPER_1_2(neon_abd_u8, uint32_t, (uint32_t, uint32_t))
299
DEF_HELPER_1_2(neon_abd_s8, uint32_t, (uint32_t, uint32_t))
300
DEF_HELPER_1_2(neon_abd_u16, uint32_t, (uint32_t, uint32_t))
301
DEF_HELPER_1_2(neon_abd_s16, uint32_t, (uint32_t, uint32_t))
302
DEF_HELPER_1_2(neon_abd_u32, uint32_t, (uint32_t, uint32_t))
303
DEF_HELPER_1_2(neon_abd_s32, uint32_t, (uint32_t, uint32_t))
304

  
305
DEF_HELPER_1_2(neon_shl_u8, uint32_t, (uint32_t, uint32_t))
306
DEF_HELPER_1_2(neon_shl_s8, uint32_t, (uint32_t, uint32_t))
307
DEF_HELPER_1_2(neon_shl_u16, uint32_t, (uint32_t, uint32_t))
308
DEF_HELPER_1_2(neon_shl_s16, uint32_t, (uint32_t, uint32_t))
309
DEF_HELPER_1_2(neon_shl_u32, uint32_t, (uint32_t, uint32_t))
310
DEF_HELPER_1_2(neon_shl_s32, uint32_t, (uint32_t, uint32_t))
311
DEF_HELPER_1_2(neon_shl_u64, uint64_t, (uint64_t, uint64_t))
312
DEF_HELPER_1_2(neon_shl_s64, uint64_t, (uint64_t, uint64_t))
313
DEF_HELPER_1_2(neon_rshl_u8, uint32_t, (uint32_t, uint32_t))
314
DEF_HELPER_1_2(neon_rshl_s8, uint32_t, (uint32_t, uint32_t))
315
DEF_HELPER_1_2(neon_rshl_u16, uint32_t, (uint32_t, uint32_t))
316
DEF_HELPER_1_2(neon_rshl_s16, uint32_t, (uint32_t, uint32_t))
317
DEF_HELPER_1_2(neon_rshl_u32, uint32_t, (uint32_t, uint32_t))
318
DEF_HELPER_1_2(neon_rshl_s32, uint32_t, (uint32_t, uint32_t))
319
DEF_HELPER_1_2(neon_rshl_u64, uint64_t, (uint64_t, uint64_t))
320
DEF_HELPER_1_2(neon_rshl_s64, uint64_t, (uint64_t, uint64_t))
321
DEF_HELPER_1_3(neon_qshl_u8, uint32_t, (CPUState *, uint32_t, uint32_t))
322
DEF_HELPER_1_3(neon_qshl_s8, uint32_t, (CPUState *, uint32_t, uint32_t))
323
DEF_HELPER_1_3(neon_qshl_u16, uint32_t, (CPUState *, uint32_t, uint32_t))
324
DEF_HELPER_1_3(neon_qshl_s16, uint32_t, (CPUState *, uint32_t, uint32_t))
325
DEF_HELPER_1_3(neon_qshl_u32, uint32_t, (CPUState *, uint32_t, uint32_t))
326
DEF_HELPER_1_3(neon_qshl_s32, uint32_t, (CPUState *, uint32_t, uint32_t))
327
DEF_HELPER_1_3(neon_qshl_u64, uint64_t, (CPUState *, uint64_t, uint64_t))
328
DEF_HELPER_1_3(neon_qshl_s64, uint64_t, (CPUState *, uint64_t, uint64_t))
329
DEF_HELPER_1_3(neon_qrshl_u8, uint32_t, (CPUState *, uint32_t, uint32_t))
330
DEF_HELPER_1_3(neon_qrshl_s8, uint32_t, (CPUState *, uint32_t, uint32_t))
331
DEF_HELPER_1_3(neon_qrshl_u16, uint32_t, (CPUState *, uint32_t, uint32_t))
332
DEF_HELPER_1_3(neon_qrshl_s16, uint32_t, (CPUState *, uint32_t, uint32_t))
333
DEF_HELPER_1_3(neon_qrshl_u32, uint32_t, (CPUState *, uint32_t, uint32_t))
334
DEF_HELPER_1_3(neon_qrshl_s32, uint32_t, (CPUState *, uint32_t, uint32_t))
335
DEF_HELPER_1_3(neon_qrshl_u64, uint64_t, (CPUState *, uint64_t, uint64_t))
336
DEF_HELPER_1_3(neon_qrshl_s64, uint64_t, (CPUState *, uint64_t, uint64_t))
337

  
338
DEF_HELPER_1_2(neon_add_u8, uint32_t, (uint32_t, uint32_t))
339
DEF_HELPER_1_2(neon_add_u16, uint32_t, (uint32_t, uint32_t))
340
DEF_HELPER_1_2(neon_padd_u8, uint32_t, (uint32_t, uint32_t))
341
DEF_HELPER_1_2(neon_padd_u16, uint32_t, (uint32_t, uint32_t))
342
DEF_HELPER_1_2(neon_sub_u8, uint32_t, (uint32_t, uint32_t))
343
DEF_HELPER_1_2(neon_sub_u16, uint32_t, (uint32_t, uint32_t))
344
DEF_HELPER_1_2(neon_mul_u8, uint32_t, (uint32_t, uint32_t))
345
DEF_HELPER_1_2(neon_mul_u16, uint32_t, (uint32_t, uint32_t))
346
DEF_HELPER_1_2(neon_mul_p8, uint32_t, (uint32_t, uint32_t))
347

  
348
DEF_HELPER_1_2(neon_tst_u8, uint32_t, (uint32_t, uint32_t))
349
DEF_HELPER_1_2(neon_tst_u16, uint32_t, (uint32_t, uint32_t))
350
DEF_HELPER_1_2(neon_tst_u32, uint32_t, (uint32_t, uint32_t))
351
DEF_HELPER_1_2(neon_ceq_u8, uint32_t, (uint32_t, uint32_t))
352
DEF_HELPER_1_2(neon_ceq_u16, uint32_t, (uint32_t, uint32_t))
353
DEF_HELPER_1_2(neon_ceq_u32, uint32_t, (uint32_t, uint32_t))
354

  
355
DEF_HELPER_1_1(neon_abs_s8, uint32_t, (uint32_t))
356
DEF_HELPER_1_1(neon_abs_s16, uint32_t, (uint32_t))
357
DEF_HELPER_1_1(neon_clz_u8, uint32_t, (uint32_t))
358
DEF_HELPER_1_1(neon_clz_u16, uint32_t, (uint32_t))
359
DEF_HELPER_1_1(neon_cls_s8, uint32_t, (uint32_t))
360
DEF_HELPER_1_1(neon_cls_s16, uint32_t, (uint32_t))
361
DEF_HELPER_1_1(neon_cls_s32, uint32_t, (uint32_t))
362
DEF_HELPER_1_1(neon_cnt_u8, uint32_t, (uint32_t))
363

  
364
DEF_HELPER_1_3(neon_qdmulh_s16, uint32_t, (CPUState *, uint32_t, uint32_t))
365
DEF_HELPER_1_3(neon_qrdmulh_s16, uint32_t, (CPUState *, uint32_t, uint32_t))
366
DEF_HELPER_1_3(neon_qdmulh_s32, uint32_t, (CPUState *, uint32_t, uint32_t))
367
DEF_HELPER_1_3(neon_qrdmulh_s32, uint32_t, (CPUState *, uint32_t, uint32_t))
368

  
369
DEF_HELPER_1_1(neon_narrow_u8, uint32_t, (uint64_t))
370
DEF_HELPER_1_1(neon_narrow_u16, uint32_t, (uint64_t))
371
DEF_HELPER_1_2(neon_narrow_sat_u8, uint32_t, (CPUState *, uint64_t))
372
DEF_HELPER_1_2(neon_narrow_sat_s8, uint32_t, (CPUState *, uint64_t))
373
DEF_HELPER_1_2(neon_narrow_sat_u16, uint32_t, (CPUState *, uint64_t))
374
DEF_HELPER_1_2(neon_narrow_sat_s16, uint32_t, (CPUState *, uint64_t))
375
DEF_HELPER_1_2(neon_narrow_sat_u32, uint32_t, (CPUState *, uint64_t))
376
DEF_HELPER_1_2(neon_narrow_sat_s32, uint32_t, (CPUState *, uint64_t))
377
DEF_HELPER_1_1(neon_narrow_high_u8, uint32_t, (uint64_t))
378
DEF_HELPER_1_1(neon_narrow_high_u16, uint32_t, (uint64_t))
379
DEF_HELPER_1_1(neon_narrow_round_high_u8, uint32_t, (uint64_t))
380
DEF_HELPER_1_1(neon_narrow_round_high_u16, uint32_t, (uint64_t))
381
DEF_HELPER_1_1(neon_widen_u8, uint64_t, (uint32_t))
382
DEF_HELPER_1_1(neon_widen_s8, uint64_t, (uint32_t))
383
DEF_HELPER_1_1(neon_widen_u16, uint64_t, (uint32_t))
384
DEF_HELPER_1_1(neon_widen_s16, uint64_t, (uint32_t))
385

  
386
DEF_HELPER_1_2(neon_addl_u16, uint64_t, (uint64_t, uint64_t))
387
DEF_HELPER_1_2(neon_addl_u32, uint64_t, (uint64_t, uint64_t))
388
DEF_HELPER_1_2(neon_paddl_u16, uint64_t, (uint64_t, uint64_t))
389
DEF_HELPER_1_2(neon_paddl_u32, uint64_t, (uint64_t, uint64_t))
390
DEF_HELPER_1_2(neon_subl_u16, uint64_t, (uint64_t, uint64_t))
391
DEF_HELPER_1_2(neon_subl_u32, uint64_t, (uint64_t, uint64_t))
392
DEF_HELPER_1_3(neon_addl_saturate_s32, uint64_t, (CPUState *, uint64_t, uint64_t))
393
DEF_HELPER_1_3(neon_addl_saturate_s64, uint64_t, (CPUState *, uint64_t, uint64_t))
394
DEF_HELPER_1_2(neon_abdl_u16, uint64_t, (uint32_t, uint32_t))
395
DEF_HELPER_1_2(neon_abdl_s16, uint64_t, (uint32_t, uint32_t))
396
DEF_HELPER_1_2(neon_abdl_u32, uint64_t, (uint32_t, uint32_t))
397
DEF_HELPER_1_2(neon_abdl_s32, uint64_t, (uint32_t, uint32_t))
398
DEF_HELPER_1_2(neon_abdl_u64, uint64_t, (uint32_t, uint32_t))
399
DEF_HELPER_1_2(neon_abdl_s64, uint64_t, (uint32_t, uint32_t))
400
DEF_HELPER_1_2(neon_mull_u8, uint64_t, (uint32_t, uint32_t))
401
DEF_HELPER_1_2(neon_mull_s8, uint64_t, (uint32_t, uint32_t))
402
DEF_HELPER_1_2(neon_mull_u16, uint64_t, (uint32_t, uint32_t))
403
DEF_HELPER_1_2(neon_mull_s16, uint64_t, (uint32_t, uint32_t))
404

  
405
DEF_HELPER_1_1(neon_negl_u16, uint64_t, (uint64_t))
406
DEF_HELPER_1_1(neon_negl_u32, uint64_t, (uint64_t))
407
DEF_HELPER_1_1(neon_negl_u64, uint64_t, (uint64_t))
408

  
409
DEF_HELPER_1_2(neon_qabs_s8, uint32_t, (CPUState *, uint32_t))
410
DEF_HELPER_1_2(neon_qabs_s16, uint32_t, (CPUState *, uint32_t))
411
DEF_HELPER_1_2(neon_qabs_s32, uint32_t, (CPUState *, uint32_t))
412
DEF_HELPER_1_2(neon_qneg_s8, uint32_t, (CPUState *, uint32_t))
413
DEF_HELPER_1_2(neon_qneg_s16, uint32_t, (CPUState *, uint32_t))
414
DEF_HELPER_1_2(neon_qneg_s32, uint32_t, (CPUState *, uint32_t))
415

  
416
DEF_HELPER_0_0(neon_trn_u8, void, (void))
417
DEF_HELPER_0_0(neon_trn_u16, void, (void))
418
DEF_HELPER_0_0(neon_unzip_u8, void, (void))
419
DEF_HELPER_0_0(neon_zip_u8, void, (void))
420
DEF_HELPER_0_0(neon_zip_u16, void, (void))
421

  
422
DEF_HELPER_1_2(neon_min_f32, uint32_t, (uint32_t, uint32_t))
423
DEF_HELPER_1_2(neon_max_f32, uint32_t, (uint32_t, uint32_t))
424
DEF_HELPER_1_2(neon_abd_f32, uint32_t, (uint32_t, uint32_t))
425
DEF_HELPER_1_2(neon_add_f32, uint32_t, (uint32_t, uint32_t))
426
DEF_HELPER_1_2(neon_sub_f32, uint32_t, (uint32_t, uint32_t))
427
DEF_HELPER_1_2(neon_mul_f32, uint32_t, (uint32_t, uint32_t))
428
DEF_HELPER_1_2(neon_ceq_f32, uint32_t, (uint32_t, uint32_t))
429
DEF_HELPER_1_2(neon_cge_f32, uint32_t, (uint32_t, uint32_t))
430
DEF_HELPER_1_2(neon_cgt_f32, uint32_t, (uint32_t, uint32_t))
431
DEF_HELPER_1_2(neon_acge_f32, uint32_t, (uint32_t, uint32_t))
432
DEF_HELPER_1_2(neon_acgt_f32, uint32_t, (uint32_t, uint32_t))
433

  
226 434
#undef DEF_HELPER
227 435
#undef DEF_HELPER_0_0
228 436
#undef DEF_HELPER_0_1
b/target-arm/neon_helper.c
1
#include <stdlib.h>
2
#include <stdio.h>
3

  
4
#include "cpu.h"
5
#include "exec-all.h"
6
#include "helpers.h"
7

  
8
#define SIGNBIT (uint32_t)0x80000000
9
#define SIGNBIT64 ((uint64_t)1 << 63)
10

  
11
#define SET_QC() env->vfp.xregs[ARM_VFP_FPSCR] = CPSR_Q
12

  
13
static float_status neon_float_status;
14
#define NFS &neon_float_status
15

  
16
/* Helper routines to perform bitwise copies between float and int.  */
17
static inline float32 vfp_itos(uint32_t i)
18
{
19
    union {
20
        uint32_t i;
21
        float32 s;
22
    } v;
23

  
24
    v.i = i;
25
    return v.s;
26
}
27

  
28
static inline uint32_t vfp_stoi(float32 s)
29
{
30
    union {
31
        uint32_t i;
32
        float32 s;
33
    } v;
34

  
35
    v.s = s;
36
    return v.i;
37
}
38

  
39
#define NEON_TYPE1(name, type) \
40
typedef struct \
41
{ \
42
    type v1; \
43
} neon_##name;
44
#ifdef WORDS_BIGENDIAN
45
#define NEON_TYPE2(name, type) \
46
typedef struct \
47
{ \
48
    type v2; \
49
    type v1; \
50
} neon_##name;
51
#define NEON_TYPE4(name, type) \
52
typedef struct \
53
{ \
54
    type v4; \
55
    type v3; \
56
    type v2; \
57
    type v1; \
58
} neon_##name;
59
#else
60
#define NEON_TYPE2(name, type) \
61
typedef struct \
62
{ \
63
    type v1; \
64
    type v2; \
65
} neon_##name;
66
#define NEON_TYPE4(name, type) \
67
typedef struct \
68
{ \
69
    type v1; \
70
    type v2; \
71
    type v3; \
72
    type v4; \
73
} neon_##name;
74
#endif
75

  
76
NEON_TYPE4(s8, int8_t)
77
NEON_TYPE4(u8, uint8_t)
78
NEON_TYPE2(s16, int16_t)
79
NEON_TYPE2(u16, uint16_t)
80
NEON_TYPE1(s32, int32_t)
81
NEON_TYPE1(u32, uint32_t)
82
#undef NEON_TYPE4
83
#undef NEON_TYPE2
84
#undef NEON_TYPE1
85

  
86
/* Copy from a uint32_t to a vector structure type.  */
87
#define NEON_UNPACK(vtype, dest, val) do { \
88
    union { \
89
        vtype v; \
90
        uint32_t i; \
91
    } conv_u; \
92
    conv_u.i = (val); \
93
    dest = conv_u.v; \
94
    } while(0)
95

  
96
/* Copy from a vector structure type to a uint32_t.  */
97
#define NEON_PACK(vtype, dest, val) do { \
98
    union { \
99
        vtype v; \
100
        uint32_t i; \
101
    } conv_u; \
102
    conv_u.v = (val); \
103
    dest = conv_u.i; \
104
    } while(0)
105

  
106
#define NEON_DO1 \
107
    NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1);
108
#define NEON_DO2 \
109
    NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \
110
    NEON_FN(vdest.v2, vsrc1.v2, vsrc2.v2);
111
#define NEON_DO4 \
112
    NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \
113
    NEON_FN(vdest.v2, vsrc1.v2, vsrc2.v2); \
114
    NEON_FN(vdest.v3, vsrc1.v3, vsrc2.v3); \
115
    NEON_FN(vdest.v4, vsrc1.v4, vsrc2.v4);
116

  
117
#define NEON_VOP_BODY(vtype, n) \
118
{ \
119
    uint32_t res; \
120
    vtype vsrc1; \
121
    vtype vsrc2; \
122
    vtype vdest; \
123
    NEON_UNPACK(vtype, vsrc1, arg1); \
124
    NEON_UNPACK(vtype, vsrc2, arg2); \
125
    NEON_DO##n; \
126
    NEON_PACK(vtype, res, vdest); \
127
    return res; \
128
}
129

  
130
#define NEON_VOP(name, vtype, n) \
131
uint32_t HELPER(glue(neon_,name))(uint32_t arg1, uint32_t arg2) \
132
NEON_VOP_BODY(vtype, n)
133

  
134
#define NEON_VOP_ENV(name, vtype, n) \
135
uint32_t HELPER(glue(neon_,name))(CPUState *env, uint32_t arg1, uint32_t arg2) \
136
NEON_VOP_BODY(vtype, n)
137

  
138
/* Pairwise operations.  */
139
/* For 32-bit elements each segment only contains a single element, so
140
   the elementwise and pairwise operations are the same.  */
141
#define NEON_PDO2 \
142
    NEON_FN(vdest.v1, vsrc1.v1, vsrc1.v2); \
143
    NEON_FN(vdest.v2, vsrc2.v1, vsrc2.v2);
144
#define NEON_PDO4 \
145
    NEON_FN(vdest.v1, vsrc1.v1, vsrc1.v2); \
146
    NEON_FN(vdest.v2, vsrc1.v3, vsrc1.v4); \
147
    NEON_FN(vdest.v3, vsrc2.v1, vsrc2.v2); \
148
    NEON_FN(vdest.v4, vsrc2.v3, vsrc2.v4); \
149

  
150
#define NEON_POP(name, vtype, n) \
151
uint32_t HELPER(glue(neon_,name))(uint32_t arg1, uint32_t arg2) \
152
{ \
153
    uint32_t res; \
154
    vtype vsrc1; \
155
    vtype vsrc2; \
156
    vtype vdest; \
157
    NEON_UNPACK(vtype, vsrc1, arg1); \
158
    NEON_UNPACK(vtype, vsrc2, arg2); \
159
    NEON_PDO##n; \
160
    NEON_PACK(vtype, res, vdest); \
161
    return res; \
162
}
163

  
164
/* Unary operators.  */
165
#define NEON_VOP1(name, vtype, n) \
166
uint32_t HELPER(glue(neon_,name))(uint32_t arg) \
167
{ \
168
    vtype vsrc1; \
169
    vtype vdest; \
170
    NEON_UNPACK(vtype, vsrc1, arg); \
171
    NEON_DO##n; \
172
    NEON_PACK(vtype, arg, vdest); \
173
    return arg; \
174
}
175

  
176

  
177
#define NEON_USAT(dest, src1, src2, type) do { \
178
    uint32_t tmp = (uint32_t)src1 + (uint32_t)src2; \
179
    if (tmp != (type)tmp) { \
180
        SET_QC(); \
181
        dest = ~0; \
182
    } else { \
183
        dest = tmp; \
184
    }} while(0)
185
#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t)
186
NEON_VOP_ENV(qadd_u8, neon_u8, 4)
187
#undef NEON_FN
188
#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t)
189
NEON_VOP_ENV(qadd_u16, neon_u16, 2)
190
#undef NEON_FN
191
#undef NEON_USAT
192

  
193
#define NEON_SSAT(dest, src1, src2, type) do { \
194
    int32_t tmp = (uint32_t)src1 + (uint32_t)src2; \
195
    if (tmp != (type)tmp) { \
196
        SET_QC(); \
197
        if (src2 > 0) { \
198
            tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \
199
        } else { \
200
            tmp = 1 << (sizeof(type) * 8 - 1); \
201
        } \
202
    } \
203
    dest = tmp; \
204
    } while(0)
205
#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t)
206
NEON_VOP_ENV(qadd_s8, neon_s8, 4)
207
#undef NEON_FN
208
#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t)
209
NEON_VOP_ENV(qadd_s16, neon_s16, 2)
210
#undef NEON_FN
211
#undef NEON_SSAT
212

  
213
#define NEON_USAT(dest, src1, src2, type) do { \
214
    uint32_t tmp = (uint32_t)src1 - (uint32_t)src2; \
215
    if (tmp != (type)tmp) { \
216
        SET_QC(); \
217
        dest = 0; \
218
    } else { \
219
        dest = tmp; \
220
    }} while(0)
221
#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t)
222
NEON_VOP_ENV(qsub_u8, neon_u8, 4)
223
#undef NEON_FN
224
#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t)
225
NEON_VOP_ENV(qsub_u16, neon_u16, 2)
226
#undef NEON_FN
227
#undef NEON_USAT
228

  
229
#define NEON_SSAT(dest, src1, src2, type) do { \
230
    int32_t tmp = (uint32_t)src1 - (uint32_t)src2; \
231
    if (tmp != (type)tmp) { \
232
        SET_QC(); \
233
        if (src2 < 0) { \
234
            tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \
235
        } else { \
236
            tmp = 1 << (sizeof(type) * 8 - 1); \
237
        } \
238
    } \
239
    dest = tmp; \
240
    } while(0)
241
#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t)
242
NEON_VOP_ENV(qsub_s8, neon_s8, 4)
243
#undef NEON_FN
244
#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t)
245
NEON_VOP_ENV(qsub_s16, neon_s16, 2)
246
#undef NEON_FN
247
#undef NEON_SSAT
248

  
249
#define NEON_FN(dest, src1, src2) dest = (src1 + src2) >> 1
250
NEON_VOP(hadd_s8, neon_s8, 4)
251
NEON_VOP(hadd_u8, neon_u8, 4)
252
NEON_VOP(hadd_s16, neon_s16, 2)
253
NEON_VOP(hadd_u16, neon_u16, 2)
254
#undef NEON_FN
255

  
256
int32_t HELPER(neon_hadd_s32)(int32_t src1, int32_t src2)
257
{
258
    int32_t dest;
259

  
260
    dest = (src1 >> 1) + (src2 >> 1);
261
    if (src1 & src2 & 1)
262
        dest++;
263
    return dest;
264
}
265

  
266
uint32_t HELPER(neon_hadd_u32)(uint32_t src1, uint32_t src2)
267
{
268
    uint32_t dest;
269

  
270
    dest = (src1 >> 1) + (src2 >> 1);
271
    if (src1 & src2 & 1)
272
        dest++;
273
    return dest;
274
}
275

  
276
#define NEON_FN(dest, src1, src2) dest = (src1 + src2 + 1) >> 1
277
NEON_VOP(rhadd_s8, neon_s8, 4)
278
NEON_VOP(rhadd_u8, neon_u8, 4)
279
NEON_VOP(rhadd_s16, neon_s16, 2)
280
NEON_VOP(rhadd_u16, neon_u16, 2)
281
#undef NEON_FN
282

  
283
int32_t HELPER(neon_rhadd_s32)(int32_t src1, int32_t src2)
284
{
285
    int32_t dest;
286

  
287
    dest = (src1 >> 1) + (src2 >> 1);
288
    if ((src1 | src2) & 1)
289
        dest++;
290
    return dest;
291
}
292

  
293
uint32_t HELPER(neon_rhadd_u32)(uint32_t src1, uint32_t src2)
294
{
295
    uint32_t dest;
296

  
297
    dest = (src1 >> 1) + (src2 >> 1);
298
    if ((src1 | src2) & 1)
299
        dest++;
300
    return dest;
301
}
302

  
303
#define NEON_FN(dest, src1, src2) dest = (src1 - src2) >> 1
304
NEON_VOP(hsub_s8, neon_s8, 4)
305
NEON_VOP(hsub_u8, neon_u8, 4)
306
NEON_VOP(hsub_s16, neon_s16, 2)
307
NEON_VOP(hsub_u16, neon_u16, 2)
308
#undef NEON_FN
309

  
310
int32_t HELPER(neon_hsub_s32)(int32_t src1, int32_t src2)
311
{
312
    int32_t dest;
313

  
314
    dest = (src1 >> 1) - (src2 >> 1);
315
    if ((~src1) & src2 & 1)
316
        dest--;
317
    return dest;
318
}
319

  
320
uint32_t HELPER(neon_hsub_u32)(uint32_t src1, uint32_t src2)
321
{
322
    uint32_t dest;
323

  
324
    dest = (src1 >> 1) - (src2 >> 1);
325
    if ((~src1) & src2 & 1)
326
        dest--;
327
    return dest;
328
}
329

  
330
#define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? ~0 : 0
331
NEON_VOP(cgt_s8, neon_s8, 4)
332
NEON_VOP(cgt_u8, neon_u8, 4)
333
NEON_VOP(cgt_s16, neon_s16, 2)
334
NEON_VOP(cgt_u16, neon_u16, 2)
335
NEON_VOP(cgt_s32, neon_s32, 1)
336
NEON_VOP(cgt_u32, neon_u32, 1)
337
#undef NEON_FN
338

  
339
#define NEON_FN(dest, src1, src2) dest = (src1 >= src2) ? ~0 : 0
340
NEON_VOP(cge_s8, neon_s8, 4)
341
NEON_VOP(cge_u8, neon_u8, 4)
342
NEON_VOP(cge_s16, neon_s16, 2)
343
NEON_VOP(cge_u16, neon_u16, 2)
344
NEON_VOP(cge_s32, neon_s32, 1)
345
NEON_VOP(cge_u32, neon_u32, 1)
346
#undef NEON_FN
347

  
348
#define NEON_FN(dest, src1, src2) dest = (src1 < src2) ? src1 : src2
349
NEON_VOP(min_s8, neon_s8, 4)
350
NEON_VOP(min_u8, neon_u8, 4)
351
NEON_VOP(min_s16, neon_s16, 2)
352
NEON_VOP(min_u16, neon_u16, 2)
353
NEON_VOP(min_s32, neon_s32, 1)
354
NEON_VOP(min_u32, neon_u32, 1)
355
NEON_POP(pmin_s8, neon_s8, 4)
356
NEON_POP(pmin_u8, neon_u8, 4)
357
NEON_POP(pmin_s16, neon_s16, 2)
358
NEON_POP(pmin_u16, neon_u16, 2)
359
#undef NEON_FN
360

  
361
#define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? src1 : src2
362
NEON_VOP(max_s8, neon_s8, 4)
363
NEON_VOP(max_u8, neon_u8, 4)
364
NEON_VOP(max_s16, neon_s16, 2)
365
NEON_VOP(max_u16, neon_u16, 2)
366
NEON_VOP(max_s32, neon_s32, 1)
367
NEON_VOP(max_u32, neon_u32, 1)
368
NEON_POP(pmax_s8, neon_s8, 4)
369
NEON_POP(pmax_u8, neon_u8, 4)
370
NEON_POP(pmax_s16, neon_s16, 2)
371
NEON_POP(pmax_u16, neon_u16, 2)
372
#undef NEON_FN
373

  
374
#define NEON_FN(dest, src1, src2) \
375
    dest = (src1 > src2) ? (src1 - src2) : (src2 - src1)
376
NEON_VOP(abd_s8, neon_s8, 4)
377
NEON_VOP(abd_u8, neon_u8, 4)
378
NEON_VOP(abd_s16, neon_s16, 2)
379
NEON_VOP(abd_u16, neon_u16, 2)
380
NEON_VOP(abd_s32, neon_s32, 1)
381
NEON_VOP(abd_u32, neon_u32, 1)
382
#undef NEON_FN
383

  
384
#define NEON_FN(dest, src1, src2) do { \
385
    int8_t tmp; \
386
    tmp = (int8_t)src2; \
387
    if (tmp >= sizeof(src1) * 8 || tmp <= -sizeof(src1) * 8) { \
388
        dest = 0; \
389
    } else if (tmp < 0) { \
390
        dest = src1 >> -tmp; \
391
    } else { \
392
        dest = src1 << tmp; \
393
    }} while (0)
394
NEON_VOP(shl_u8, neon_u8, 4)
395
NEON_VOP(shl_u16, neon_u16, 2)
396
NEON_VOP(shl_u32, neon_u32, 1)
397
#undef NEON_FN
398

  
399
uint64_t HELPER(neon_shl_u64)(uint64_t val, uint64_t shiftop)
400
{
401
    int8_t shift = (int8_t)shiftop;
402
    if (shift >= 64 || shift <= -64) {
403
        val = 0;
404
    } else if (shift < 0) {
405
        val >>= -shift;
406
    } else {
407
        val <<= shift;
408
    }
409
    return val;
410
}
411

  
412
#define NEON_FN(dest, src1, src2) do { \
413
    int8_t tmp; \
414
    tmp = (int8_t)src2; \
415
    if (tmp >= sizeof(src1) * 8) { \
416
        dest = 0; \
417
    } else if (tmp <= -sizeof(src1) * 8) { \
418
        dest = src1 >> (sizeof(src1) * 8 - 1); \
419
    } else if (tmp < 0) { \
420
        dest = src1 >> -tmp; \
421
    } else { \
422
        dest = src1 << tmp; \
423
    }} while (0)
424
NEON_VOP(shl_s8, neon_s8, 4)
425
NEON_VOP(shl_s16, neon_s16, 2)
426
NEON_VOP(shl_s32, neon_s32, 1)
427
#undef NEON_FN
428

  
429
uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop)
430
{
431
    int8_t shift = (int8_t)shiftop;
432
    int64_t val = valop;
433
    if (shift >= 64) {
434
        val = 0;
435
    } else if (shift <= -64) {
436
        val >>= 63;
437
    } else if (shift < 0) {
438
        val >>= -shift;
439
    } else {
440
        val <<= shift;
441
    }
442
    return val;
443
}
444

  
445
#define NEON_FN(dest, src1, src2) do { \
446
    int8_t tmp; \
447
    tmp = (int8_t)src2; \
448
    if (tmp >= sizeof(src1) * 8) { \
449
        dest = 0; \
450
    } else if (tmp < -sizeof(src1) * 8) { \
451
        dest >>= sizeof(src1) * 8 - 1; \
452
    } else if (tmp == -sizeof(src1) * 8) { \
453
        dest = src1 >> (tmp - 1); \
454
        dest++; \
455
        src2 >>= 1; \
456
    } else if (tmp < 0) { \
457
        dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
458
    } else { \
459
        dest = src1 << tmp; \
460
    }} while (0)
461
NEON_VOP(rshl_s8, neon_s8, 4)
462
NEON_VOP(rshl_s16, neon_s16, 2)
463
NEON_VOP(rshl_s32, neon_s32, 1)
464
#undef NEON_FN
465

  
466
uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
467
{
468
    int8_t shift = (int8_t)shiftop;
469
    int64_t val = valop;
470
    if (shift >= 64) {
471
        val = 0;
472
    } else if (shift < -64) {
473
        val >>= 63;
474
    } else if (shift == -63) {
475
        val >>= 63;
476
        val++;
477
        val >>= 1;
478
    } else if (shift < 0) {
479
        val = (val + ((int64_t)1 << (-1 - shift))) >> -shift;
480
    } else {
481
        val <<= shift;
482
    }
483
    return val;
484
}
485

  
486
#define NEON_FN(dest, src1, src2) do { \
487
    int8_t tmp; \
488
    tmp = (int8_t)src2; \
489
    if (tmp >= sizeof(src1) * 8 || tmp < -sizeof(src1) * 8) { \
490
        dest = 0; \
491
    } else if (tmp == -sizeof(src1) * 8) { \
492
        dest = src1 >> (tmp - 1); \
493
    } else if (tmp < 0) { \
494
        dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
495
    } else { \
496
        dest = src1 << tmp; \
497
    }} while (0)
498
NEON_VOP(rshl_u8, neon_u8, 4)
499
NEON_VOP(rshl_u16, neon_u16, 2)
500
NEON_VOP(rshl_u32, neon_u32, 1)
501
#undef NEON_FN
502

  
503
uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop)
504
{
505
    int8_t shift = (uint8_t)shiftop;
506
    if (shift >= 64 || shift < 64) {
507
        val = 0;
508
    } else if (shift == -64) {
509
        /* Rounding a 1-bit result just preserves that bit.  */
510
        val >>= 63;
511
    } if (shift < 0) {
512
        val = (val + ((uint64_t)1 << (-1 - shift))) >> -shift;
513
        val >>= -shift;
514
    } else {
515
        val <<= shift;
516
    }
517
    return val;
518
}
519

  
520
#define NEON_FN(dest, src1, src2) do { \
521
    int8_t tmp; \
522
    tmp = (int8_t)src2; \
523
    if (tmp >= sizeof(src1) * 8) { \
524
        if (src1) { \
525
            SET_QC(); \
526
            dest = ~0; \
527
        } else { \
528
            dest = 0; \
529
        } \
530
    } else if (tmp <= -sizeof(src1) * 8) { \
531
        dest = 0; \
532
    } else if (tmp < 0) { \
533
        dest = src1 >> -tmp; \
534
    } else { \
535
        dest = src1 << tmp; \
536
        if ((dest >> tmp) != src1) { \
537
            SET_QC(); \
538
            dest = ~0; \
539
        } \
540
    }} while (0)
541
NEON_VOP_ENV(qshl_u8, neon_u8, 4)
542
NEON_VOP_ENV(qshl_u16, neon_u16, 2)
543
NEON_VOP_ENV(qshl_u32, neon_u32, 1)
544
#undef NEON_FN
545

  
546
uint64_t HELPER(neon_qshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop)
547
{
548
    int8_t shift = (int8_t)shiftop;
549
    if (shift >= 64) {
550
        if (val) {
551
            val = ~(uint64_t)0;
552
            SET_QC();
553
        } else {
554
            val = 0;
555
        }
556
    } else if (shift <= -64) {
557
        val = 0;
558
    } else if (shift < 0) {
559
        val >>= -shift;
560
    } else {
561
        uint64_t tmp = val;
562
        val <<= shift;
563
        if ((val >> shift) != tmp) {
564
            SET_QC();
565
            val = ~(uint64_t)0;
566
        }
567
    }
568
    return val;
569
}
570

  
571
#define NEON_FN(dest, src1, src2) do { \
572
    int8_t tmp; \
573
    tmp = (int8_t)src2; \
574
    if (tmp >= sizeof(src1) * 8) { \
575
        if (src1) \
576
            SET_QC(); \
577
        dest = src1 >> 31; \
578
    } else if (tmp <= -sizeof(src1) * 8) { \
579
        dest = src1 >> 31; \
580
    } else if (tmp < 0) { \
581
        dest = src1 >> -tmp; \
582
    } else { \
583
        dest = src1 << tmp; \
584
        if ((dest >> tmp) != src1) { \
585
            SET_QC(); \
586
            dest = src2 >> 31; \
587
        } \
588
    }} while (0)
589
NEON_VOP_ENV(qshl_s8, neon_s8, 4)
590
NEON_VOP_ENV(qshl_s16, neon_s16, 2)
591
NEON_VOP_ENV(qshl_s32, neon_s32, 1)
592
#undef NEON_FN
593

  
594
uint64_t HELPER(neon_qshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop)
595
{
596
    int8_t shift = (uint8_t)shiftop;
597
    int64_t val = valop;
598
    if (shift >= 64) {
599
        if (val) {
600
            SET_QC();
601
            val = (val >> 63) & ~SIGNBIT64;
602
        }
603
    } else if (shift <= 64) {
604
        val >>= 63;
605
    } else if (shift < 0) {
606
        val >>= -shift;
607
    } else {
608
        int64_t tmp = val;
609
        val <<= shift;
610
        if ((val >> shift) != tmp) {
611
            SET_QC();
612
            val = (tmp >> 63) ^ ~SIGNBIT64;
613
        }
614
    }
615
    return val;
616
}
617

  
618

  
619
/* FIXME: This is wrong.  */
620
#define NEON_FN(dest, src1, src2) do { \
621
    int8_t tmp; \
622
    tmp = (int8_t)src2; \
623
    if (tmp < 0) { \
624
        dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
625
    } else { \
626
        dest = src1 << tmp; \
627
        if ((dest >> tmp) != src1) { \
628
            SET_QC(); \
629
            dest = ~0; \
630
        } \
631
    }} while (0)
632
NEON_VOP_ENV(qrshl_u8, neon_u8, 4)
633
NEON_VOP_ENV(qrshl_u16, neon_u16, 2)
634
NEON_VOP_ENV(qrshl_u32, neon_u32, 1)
635
#undef NEON_FN
636

  
637
uint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop)
638
{
639
    int8_t shift = (int8_t)shiftop;
640
    if (shift < 0) {
641
        val = (val + (1 << (-1 - shift))) >> -shift;
642
    } else { \
643
        uint64_t tmp = val;
644
        val <<= shift;
645
        if ((val >> shift) != tmp) {
646
            SET_QC();
647
            val = ~0;
648
        }
649
    }
650
    return val;
651
}
652

  
653
#define NEON_FN(dest, src1, src2) do { \
654
    int8_t tmp; \
655
    tmp = (int8_t)src2; \
656
    if (tmp < 0) { \
657
        dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
658
    } else { \
659
        dest = src1 << tmp; \
660
        if ((dest >> tmp) != src1) { \
661
            SET_QC(); \
662
            dest = src1 >> 31; \
663
        } \
664
    }} while (0)
665
NEON_VOP_ENV(qrshl_s8, neon_s8, 4)
666
NEON_VOP_ENV(qrshl_s16, neon_s16, 2)
667
NEON_VOP_ENV(qrshl_s32, neon_s32, 1)
668
#undef NEON_FN
669

  
670
uint64_t HELPER(neon_qrshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop)
671
{
672
    int8_t shift = (uint8_t)shiftop;
673
    int64_t val = valop;
674

  
675
    if (shift < 0) {
676
        val = (val + (1 << (-1 - shift))) >> -shift;
677
    } else {
678
        int64_t tmp = val;;
679
        val <<= shift;
680
        if ((val >> shift) != tmp) {
681
            SET_QC();
682
            val = tmp >> 31;
683
        }
684
    }
685
    return val;
686
}
687

  
688
uint32_t HELPER(neon_add_u8)(uint32_t a, uint32_t b)
689
{
690
    uint32_t mask;
691
    mask = (a ^ b) & 0x80808080u;
692
    a &= ~0x80808080u;
693
    b &= ~0x80808080u;
694
    return (a + b) ^ mask;
695
}
696

  
697
uint32_t HELPER(neon_add_u16)(uint32_t a, uint32_t b)
698
{
699
    uint32_t mask;
700
    mask = (a ^ b) & 0x80008000u;
701
    a &= ~0x80008000u;
702
    b &= ~0x80008000u;
703
    return (a + b) ^ mask;
704
}
705

  
706
#define NEON_FN(dest, src1, src2) dest = src1 + src2
707
NEON_POP(padd_u8, neon_u8, 4)
708
NEON_POP(padd_u16, neon_u16, 2)
709
#undef NEON_FN
710

  
711
#define NEON_FN(dest, src1, src2) dest = src1 - src2
712
NEON_VOP(sub_u8, neon_u8, 4)
713
NEON_VOP(sub_u16, neon_u16, 2)
714
#undef NEON_FN
715

  
716
#define NEON_FN(dest, src1, src2) dest = src1 * src2
717
NEON_VOP(mul_u8, neon_u8, 4)
718
NEON_VOP(mul_u16, neon_u16, 2)
719
#undef NEON_FN
720

  
721
/* Polynomial multiplication is like integer multiplcation except the
722
   partial products are XORed, not added.  */
723
uint32_t HELPER(neon_mul_p8)(uint32_t op1, uint32_t op2)
724
{
725
    uint32_t mask;
726
    uint32_t result;
727
    result = 0;
728
    while (op1) {
729
        mask = 0;
730
        if (op1 & 1)
731
            mask |= 0xff;
732
        if (op1 & (1 << 8))
733
            mask |= (0xff << 8);
734
        if (op1 & (1 << 16))
735
            mask |= (0xff << 16);
736
        if (op1 & (1 << 24))
737
            mask |= (0xff << 24);
738
        result ^= op2 & mask;
739
        op1 = (op1 >> 1) & 0x7f7f7f7f;
740
        op2 = (op2 << 1) & 0xfefefefe;
741
    }
742
    return result;
743
}
744

  
745
#define NEON_FN(dest, src1, src2) dest = (src1 & src2) ? -1 : 0
746
NEON_VOP(tst_u8, neon_u8, 4)
747
NEON_VOP(tst_u16, neon_u16, 2)
748
NEON_VOP(tst_u32, neon_u32, 1)
749
#undef NEON_FN
750

  
751
#define NEON_FN(dest, src1, src2) dest = (src1 == src2) ? -1 : 0
752
NEON_VOP(ceq_u8, neon_u8, 4)
753
NEON_VOP(ceq_u16, neon_u16, 2)
754
NEON_VOP(ceq_u32, neon_u32, 1)
755
#undef NEON_FN
756

  
757
#define NEON_FN(dest, src, dummy) dest = (src < 0) ? -src : src
758
NEON_VOP1(abs_s8, neon_s8, 4)
759
NEON_VOP1(abs_s16, neon_s16, 2)
760
#undef NEON_FN
761

  
762
/* Count Leading Sign/Zero Bits.  */
763
static inline int do_clz8(uint8_t x)
764
{
765
    int n;
766
    for (n = 8; x; n--)
767
        x >>= 1;
768
    return n;
769
}
770

  
771
static inline int do_clz16(uint16_t x)
772
{
773
    int n;
774
    for (n = 16; x; n--)
775
        x >>= 1;
776
    return n;
777
}
778

  
779
#define NEON_FN(dest, src, dummy) dest = do_clz8(src)
780
NEON_VOP1(clz_u8, neon_u8, 4)
781
#undef NEON_FN
782

  
783
#define NEON_FN(dest, src, dummy) dest = do_clz16(src)
784
NEON_VOP1(clz_u16, neon_u16, 2)
785
#undef NEON_FN
786

  
787
#define NEON_FN(dest, src, dummy) dest = do_clz8((src < 0) ? ~src : src) - 1
788
NEON_VOP1(cls_s8, neon_s8, 4)
789
#undef NEON_FN
790

  
791
#define NEON_FN(dest, src, dummy) dest = do_clz16((src < 0) ? ~src : src) - 1
792
NEON_VOP1(cls_s16, neon_s16, 2)
793
#undef NEON_FN
794

  
795
uint32_t HELPER(neon_cls_s32)(uint32_t x)
796
{
797
    int count;
798
    if ((int32_t)x < 0)
799
        x = ~x;
800
    for (count = 32; x; count--)
801
        x = x >> 1;
802
    return count - 1;
803
}
804

  
805
/* Bit count.  */
806
uint32_t HELPER(neon_cnt_u8)(uint32_t x)
807
{
808
    x = (x & 0x55555555) + ((x >>  1) & 0x55555555);
809
    x = (x & 0x33333333) + ((x >>  2) & 0x33333333);
810
    x = (x & 0x0f0f0f0f) + ((x >>  4) & 0x0f0f0f0f);
811
    return x;
812
}
813

  
814
#define NEON_QDMULH16(dest, src1, src2, round) do { \
815
    uint32_t tmp = (int32_t)(int16_t) src1 * (int16_t) src2; \
816
    if ((tmp ^ (tmp << 1)) & SIGNBIT) { \
817
        SET_QC(); \
818
        tmp = (tmp >> 31) ^ ~SIGNBIT; \
819
    } \
820
    tmp <<= 1; \
821
    if (round) { \
822
        int32_t old = tmp; \
823
        tmp += 1 << 15; \
824
        if ((int32_t)tmp < old) { \
825
            SET_QC(); \
826
            tmp = SIGNBIT - 1; \
827
        } \
828
    } \
829
    dest = tmp >> 16; \
830
    } while(0)
831
#define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 0)
832
NEON_VOP_ENV(qdmulh_s16, neon_s16, 2)
833
#undef NEON_FN
834
#define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 1)
835
NEON_VOP_ENV(qrdmulh_s16, neon_s16, 2)
836
#undef NEON_FN
837
#undef NEON_QDMULH16
838

  
839
#define NEON_QDMULH32(dest, src1, src2, round) do { \
840
    uint64_t tmp = (int64_t)(int32_t) src1 * (int32_t) src2; \
841
    if ((tmp ^ (tmp << 1)) & SIGNBIT64) { \
842
        SET_QC(); \
843
        tmp = (tmp >> 63) ^ ~SIGNBIT64; \
844
    } else { \
845
        tmp <<= 1; \
846
    } \
847
    if (round) { \
848
        int64_t old = tmp; \
849
        tmp += (int64_t)1 << 31; \
850
        if ((int64_t)tmp < old) { \
851
            SET_QC(); \
852
            tmp = SIGNBIT64 - 1; \
853
        } \
854
    } \
855
    dest = tmp >> 32; \
856
    } while(0)
857
#define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 0)
858
NEON_VOP_ENV(qdmulh_s32, neon_s32, 1)
859
#undef NEON_FN
860
#define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 1)
861
NEON_VOP_ENV(qrdmulh_s32, neon_s32, 1)
862
#undef NEON_FN
863
#undef NEON_QDMULH32
864

  
865
uint32_t HELPER(neon_narrow_u8)(uint64_t x)
866
{
867
    return (x & 0xffu) | ((x >> 8) & 0xff00u) | ((x >> 16) & 0xff0000u)
868
           | ((x >> 24) & 0xff000000u);
869
}
870

  
871
uint32_t HELPER(neon_narrow_u16)(uint64_t x)
872
{
873
    return (x & 0xffffu) | ((x >> 16) & 0xffff0000u);
874
}
875

  
876
uint32_t HELPER(neon_narrow_high_u8)(uint64_t x)
877
{
878
    return ((x >> 8) & 0xff) | ((x >> 16) & 0xff00)
879
            | ((x >> 24) & 0xff0000) | ((x >> 32) & 0xff000000);
880
}
881

  
882
uint32_t HELPER(neon_narrow_high_u16)(uint64_t x)
883
{
884
    return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000);
885
}
886

  
887
uint32_t HELPER(neon_narrow_round_high_u8)(uint64_t x)
888
{
889
    x &= 0xff80ff80ff80ff80ull;
890
    x += 0x0080008000800080ull;
891
    return ((x >> 8) & 0xff) | ((x >> 16) & 0xff00)
892
            | ((x >> 24) & 0xff0000) | ((x >> 32) & 0xff000000);
893
}
894

  
895
uint32_t HELPER(neon_narrow_round_high_u16)(uint64_t x)
896
{
897
    x &= 0xffff8000ffff8000ull;
898
    x += 0x0000800000008000ull;
899
    return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000);
900
}
901

  
902
uint32_t HELPER(neon_narrow_sat_u8)(CPUState *env, uint64_t x)
903
{
904
    uint16_t s;
905
    uint8_t d;
906
    uint32_t res = 0;
907
#define SAT8(n) \
908
    s = x >> n; \
909
    if (s > 0xff) { \
910
        d = 0xff; \
911
        SET_QC(); \
912
    } else  { \
913
        d = s; \
914
    } \
915
    res |= (uint32_t)d << (n / 2);
916

  
917
    SAT8(0);
918
    SAT8(16);
919
    SAT8(32);
920
    SAT8(48);
921
#undef SAT8
922
    return res;
923
}
924

  
925
uint32_t HELPER(neon_narrow_sat_s8)(CPUState *env, uint64_t x)
926
{
927
    int16_t s;
928
    uint8_t d;
929
    uint32_t res = 0;
930
#define SAT8(n) \
931
    s = x >> n; \
932
    if (s != (int8_t)s) { \
933
        d = (s >> 15) ^ 0x7f; \
934
        SET_QC(); \
935
    } else  { \
936
        d = s; \
937
    } \
938
    res |= (uint32_t)d << (n / 2);
939

  
940
    SAT8(0);
941
    SAT8(16);
942
    SAT8(32);
943
    SAT8(48);
944
#undef SAT8
945
    return res;
946
}
947

  
948
uint32_t HELPER(neon_narrow_sat_u16)(CPUState *env, uint64_t x)
949
{
950
    uint32_t high;
951
    uint32_t low;
952
    low = x;
953
    if (low > 0xffff) {
954
        low = 0xffff;
955
        SET_QC();
956
    }
957
    high = x >> 32;
958
    if (high > 0xffff) {
959
        high = 0xffff;
960
        SET_QC();
961
    }
962
    return low | (high << 16);
963
}
964

  
965
uint32_t HELPER(neon_narrow_sat_s16)(CPUState *env, uint64_t x)
966
{
967
    int32_t low;
968
    int32_t high;
969
    low = x;
970
    if (low != (int16_t)low) {
971
        low = (low >> 31) ^ 0x7fff;
972
        SET_QC();
973
    }
974
    high = x >> 32;
975
    if (high != (int16_t)high) {
976
        high = (high >> 31) ^ 0x7fff;
977
        SET_QC();
978
    }
979
    return (uint16_t)low | (high << 16);
980
}
981

  
982
uint32_t HELPER(neon_narrow_sat_u32)(CPUState *env, uint64_t x)
983
{
984
    if (x > 0xffffffffu) {
985
        SET_QC();
986
        return 0xffffffffu;
987
    }
988
    return x;
989
}
990

  
991
uint32_t HELPER(neon_narrow_sat_s32)(CPUState *env, uint64_t x)
992
{
993
    if ((int64_t)x != (int32_t)x) {
994
        SET_QC();
995
        return (x >> 63) ^ 0x7fffffff;
996
    }
997
    return x;
998
}
999

  
1000
uint64_t HELPER(neon_widen_u8)(uint32_t x)
1001
{
1002
    uint64_t tmp;
1003
    uint64_t ret;
1004
    ret = (uint8_t)x;
1005
    tmp = (uint8_t)(x >> 8);
1006
    ret |= tmp << 16;
1007
    tmp = (uint8_t)(x >> 16);
1008
    ret |= tmp << 32;
1009
    tmp = (uint8_t)(x >> 24);
1010
    ret |= tmp << 48;
1011
    return ret;
1012
}
1013

  
1014
uint64_t HELPER(neon_widen_s8)(uint32_t x)
1015
{
1016
    uint64_t tmp;
1017
    uint64_t ret;
1018
    ret = (uint16_t)(int8_t)x;
1019
    tmp = (uint16_t)(int8_t)(x >> 8);
1020
    ret |= tmp << 16;
1021
    tmp = (uint16_t)(int8_t)(x >> 16);
1022
    ret |= tmp << 32;
1023
    tmp = (uint16_t)(int8_t)(x >> 24);
1024
    ret |= tmp << 48;
1025
    return ret;
1026
}
1027

  
1028
uint64_t HELPER(neon_widen_u16)(uint32_t x)
1029
{
1030
    uint64_t high = (uint16_t)(x >> 16);
1031
    return ((uint16_t)x) | (high << 32);
1032
}
1033

  
1034
uint64_t HELPER(neon_widen_s16)(uint32_t x)
1035
{
1036
    uint64_t high = (int16_t)(x >> 16);
1037
    return ((uint32_t)(int16_t)x) | (high << 32);
1038
}
1039

  
1040
uint64_t HELPER(neon_addl_u16)(uint64_t a, uint64_t b)
1041
{
1042
    uint64_t mask;
1043
    mask = (a ^ b) & 0x8000800080008000ull;
1044
    a &= ~0x8000800080008000ull;
1045
    b &= ~0x8000800080008000ull;
1046
    return (a + b) ^ mask;
1047
}
1048

  
1049
uint64_t HELPER(neon_addl_u32)(uint64_t a, uint64_t b)
1050
{
1051
    uint64_t mask;
1052
    mask = (a ^ b) & 0x8000000080000000ull;
1053
    a &= ~0x8000000080000000ull;
1054
    b &= ~0x8000000080000000ull;
1055
    return (a + b) ^ mask;
1056
}
1057

  
1058
uint64_t HELPER(neon_paddl_u16)(uint64_t a, uint64_t b)
1059
{
1060
    uint64_t tmp;
1061
    uint64_t tmp2;
1062

  
1063
    tmp = a & 0x0000ffff0000ffffull;
1064
    tmp += (a >> 16) & 0x0000ffff0000ffffull;
1065
    tmp2 = b & 0xffff0000ffff0000ull;
1066
    tmp2 += (b << 16) & 0xffff0000ffff0000ull;
1067
    return    ( tmp         & 0xffff)
1068
            | ((tmp  >> 16) & 0xffff0000ull)
1069
            | ((tmp2 << 16) & 0xffff00000000ull)
1070
            | ( tmp2        & 0xffff000000000000ull);
1071
}
1072

  
1073
uint64_t HELPER(neon_paddl_u32)(uint64_t a, uint64_t b)
1074
{
1075
    uint32_t low = a + (a >> 32);
1076
    uint32_t high = b + (b >> 32);
1077
    return low + ((uint64_t)high << 32);
1078
}
1079

  
1080
uint64_t HELPER(neon_subl_u16)(uint64_t a, uint64_t b)
1081
{
1082
    uint64_t mask;
1083
    mask = (a ^ ~b) & 0x8000800080008000ull;
1084
    a |= 0x8000800080008000ull;
1085
    b &= ~0x8000800080008000ull;
1086
    return (a - b) ^ mask;
1087
}
1088

  
1089
uint64_t HELPER(neon_subl_u32)(uint64_t a, uint64_t b)
1090
{
1091
    uint64_t mask;
1092
    mask = (a ^ ~b) & 0x8000000080000000ull;
1093
    a |= 0x8000000080000000ull;
1094
    b &= ~0x8000000080000000ull;
1095
    return (a - b) ^ mask;
1096
}
1097

  
1098
uint64_t HELPER(neon_addl_saturate_s32)(CPUState *env, uint64_t a, uint64_t b)
1099
{
1100
    uint32_t x, y;
1101
    uint32_t low, high;
1102

  
1103
    x = a;
1104
    y = b;
1105
    low = x + y;
1106
    if (((low ^ x) & SIGNBIT) && !((x ^ y) & SIGNBIT)) {
1107
        SET_QC();
1108
        low = ((int32_t)x >> 31) ^ ~SIGNBIT;
1109
    }
1110
    x = a >> 32;
1111
    y = b >> 32;
1112
    high = x + y;
1113
    if (((high ^ x) & SIGNBIT) && !((x ^ y) & SIGNBIT)) {
1114
        SET_QC();
1115
        high = ((int32_t)x >> 31) ^ ~SIGNBIT;
1116
    }
1117
    return low | ((uint64_t)high << 32);
1118
}
1119

  
1120
uint64_t HELPER(neon_addl_saturate_s64)(CPUState *env, uint64_t a, uint64_t b)
1121
{
1122
    uint64_t result;
1123

  
1124
    result = a + b;
1125
    if (((result ^ a) & SIGNBIT64) && !((a ^ b) & SIGNBIT64)) {
1126
        SET_QC();
1127
        result = ((int64_t)a >> 63) ^ ~SIGNBIT64;
1128
    }
1129
    return result;
1130
}
1131

  
1132
#define DO_ABD(dest, x, y, type) do { \
1133
    type tmp_x = x; \
1134
    type tmp_y = y; \
1135
    dest = ((tmp_x > tmp_y) ? tmp_x - tmp_y : tmp_y - tmp_x); \
1136
    } while(0)
1137

  
1138
uint64_t HELPER(neon_abdl_u16)(uint32_t a, uint32_t b)
1139
{
1140
    uint64_t tmp;
1141
    uint64_t result;
1142
    DO_ABD(result, a, b, uint8_t);
1143
    DO_ABD(tmp, a >> 8, b >> 8, uint8_t);
1144
    result |= tmp << 16;
1145
    DO_ABD(tmp, a >> 16, b >> 16, uint8_t);
1146
    result |= tmp << 32;
1147
    DO_ABD(tmp, a >> 24, b >> 24, uint8_t);
1148
    result |= tmp << 48;
1149
    return result;
1150
}
1151

  
1152
uint64_t HELPER(neon_abdl_s16)(uint32_t a, uint32_t b)
1153
{
1154
    uint64_t tmp;
1155
    uint64_t result;
1156
    DO_ABD(result, a, b, int8_t);
1157
    DO_ABD(tmp, a >> 8, b >> 8, int8_t);
1158
    result |= tmp << 16;
1159
    DO_ABD(tmp, a >> 16, b >> 16, int8_t);
1160
    result |= tmp << 32;
1161
    DO_ABD(tmp, a >> 24, b >> 24, int8_t);
1162
    result |= tmp << 48;
1163
    return result;
1164
}
1165

  
1166
uint64_t HELPER(neon_abdl_u32)(uint32_t a, uint32_t b)
1167
{
1168
    uint64_t tmp;
1169
    uint64_t result;
1170
    DO_ABD(result, a, b, uint16_t);
1171
    DO_ABD(tmp, a >> 16, b >> 16, uint16_t);
1172
    return result | (tmp << 32);
1173
}
1174

  
1175
uint64_t HELPER(neon_abdl_s32)(uint32_t a, uint32_t b)
1176
{
1177
    uint64_t tmp;
1178
    uint64_t result;
1179
    DO_ABD(result, a, b, int16_t);
1180
    DO_ABD(tmp, a >> 16, b >> 16, int16_t);
1181
    return result | (tmp << 32);
1182
}
1183

  
1184
uint64_t HELPER(neon_abdl_u64)(uint32_t a, uint32_t b)
1185
{
1186
    uint64_t result;
1187
    DO_ABD(result, a, b, uint32_t);
1188
    return result;
1189
}
1190

  
1191
uint64_t HELPER(neon_abdl_s64)(uint32_t a, uint32_t b)
1192
{
1193
    uint64_t result;
1194
    DO_ABD(result, a, b, int32_t);
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff