Revision ad69471c target-arm/translate.c
b/target-arm/translate.c | ||
---|---|---|
77 | 77 |
extern int loglevel; |
78 | 78 |
|
79 | 79 |
static TCGv cpu_env; |
80 |
/* We reuse the same 64-bit temporaries for efficiency. */ |
|
81 |
static TCGv cpu_V0, cpu_V1; |
|
82 |
|
|
80 | 83 |
/* FIXME: These should be removed. */ |
81 | 84 |
static TCGv cpu_T[2]; |
82 | 85 |
static TCGv cpu_F0s, cpu_F1s, cpu_F0d, cpu_F1d; |
... | ... | |
469 | 472 |
} |
470 | 473 |
|
471 | 474 |
/* FIXME: Implement this natively. */ |
475 |
#define tcg_gen_abs_i32(t0, t1) gen_helper_abs(t0, t1) |
|
476 |
|
|
477 |
/* FIXME: Implement this natively. */ |
|
472 | 478 |
static void tcg_gen_rori_i32(TCGv t0, TCGv t1, int i) |
473 | 479 |
{ |
474 | 480 |
TCGv tmp; |
... | ... | |
1166 | 1172 |
return vfp_reg_offset(0, sreg); |
1167 | 1173 |
} |
1168 | 1174 |
|
1169 |
#define NEON_GET_REG(T, reg, n) gen_op_neon_getreg_##T(neon_reg_offset(reg, n)) |
|
1170 |
#define NEON_SET_REG(T, reg, n) gen_op_neon_setreg_##T(neon_reg_offset(reg, n)) |
|
1175 |
/* FIXME: Remove these. */ |
|
1176 |
#define neon_T0 cpu_T[0] |
|
1177 |
#define neon_T1 cpu_T[1] |
|
1178 |
#define NEON_GET_REG(T, reg, n) \ |
|
1179 |
tcg_gen_ld_i32(neon_##T, cpu_env, neon_reg_offset(reg, n)) |
|
1180 |
#define NEON_SET_REG(T, reg, n) \ |
|
1181 |
tcg_gen_st_i32(neon_##T, cpu_env, neon_reg_offset(reg, n)) |
|
1171 | 1182 |
|
1172 | 1183 |
static TCGv neon_load_reg(int reg, int pass) |
1173 | 1184 |
{ |
... | ... | |
1182 | 1193 |
dead_tmp(var); |
1183 | 1194 |
} |
1184 | 1195 |
|
1196 |
static inline void neon_load_reg64(TCGv var, int reg) |
|
1197 |
{ |
|
1198 |
tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg)); |
|
1199 |
} |
|
1200 |
|
|
1201 |
static inline void neon_store_reg64(TCGv var, int reg) |
|
1202 |
{ |
|
1203 |
tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg)); |
|
1204 |
} |
|
1205 |
|
|
1185 | 1206 |
#define tcg_gen_ld_f32 tcg_gen_ld_i32 |
1186 | 1207 |
#define tcg_gen_ld_f64 tcg_gen_ld_i64 |
1187 | 1208 |
#define tcg_gen_st_f32 tcg_gen_st_i32 |
... | ... | |
2418 | 2439 |
return ((env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)) != 0); |
2419 | 2440 |
} |
2420 | 2441 |
|
2442 |
static void gen_neon_dup_u8(TCGv var, int shift) |
|
2443 |
{ |
|
2444 |
TCGv tmp = new_tmp(); |
|
2445 |
if (shift) |
|
2446 |
tcg_gen_shri_i32(var, var, shift); |
|
2447 |
tcg_gen_andi_i32(var, var, 0xff); |
|
2448 |
tcg_gen_shli_i32(tmp, var, 8); |
|
2449 |
tcg_gen_or_i32(var, var, tmp); |
|
2450 |
tcg_gen_shli_i32(tmp, var, 16); |
|
2451 |
tcg_gen_or_i32(var, var, tmp); |
|
2452 |
dead_tmp(tmp); |
|
2453 |
} |
|
2454 |
|
|
2455 |
static void gen_neon_dup_low16(TCGv var) |
|
2456 |
{ |
|
2457 |
TCGv tmp = new_tmp(); |
|
2458 |
tcg_gen_andi_i32(var, var, 0xffff); |
|
2459 |
tcg_gen_shli_i32(tmp, var, 16); |
|
2460 |
tcg_gen_or_i32(var, var, tmp); |
|
2461 |
dead_tmp(tmp); |
|
2462 |
} |
|
2463 |
|
|
2464 |
static void gen_neon_dup_high16(TCGv var) |
|
2465 |
{ |
|
2466 |
TCGv tmp = new_tmp(); |
|
2467 |
tcg_gen_andi_i32(var, var, 0xffff0000); |
|
2468 |
tcg_gen_shri_i32(tmp, var, 16); |
|
2469 |
tcg_gen_or_i32(var, var, tmp); |
|
2470 |
dead_tmp(tmp); |
|
2471 |
} |
|
2472 |
|
|
2421 | 2473 |
/* Disassemble a VFP instruction. Returns nonzero if an error occured |
2422 | 2474 |
(ie. an undefined instruction). */ |
2423 | 2475 |
static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) |
... | ... | |
2425 | 2477 |
uint32_t rd, rn, rm, op, i, n, offset, delta_d, delta_m, bank_mask; |
2426 | 2478 |
int dp, veclen; |
2427 | 2479 |
TCGv tmp; |
2480 |
TCGv tmp2; |
|
2428 | 2481 |
|
2429 | 2482 |
if (!arm_feature(env, ARM_FEATURE_VFP)) |
2430 | 2483 |
return 1; |
... | ... | |
2468 | 2521 |
} |
2469 | 2522 |
if (insn & ARM_CP_RW_BIT) { |
2470 | 2523 |
/* vfp->arm */ |
2524 |
tmp = neon_load_reg(rn, pass); |
|
2471 | 2525 |
switch (size) { |
2472 | 2526 |
case 0: |
2473 |
NEON_GET_REG(T1, rn, pass); |
|
2474 | 2527 |
if (offset) |
2475 |
gen_op_shrl_T1_im(offset);
|
|
2528 |
tcg_gen_shri_i32(tmp, tmp, offset);
|
|
2476 | 2529 |
if (insn & (1 << 23)) |
2477 |
gen_uxtb(cpu_T[1]);
|
|
2530 |
gen_uxtb(tmp);
|
|
2478 | 2531 |
else |
2479 |
gen_sxtb(cpu_T[1]);
|
|
2532 |
gen_sxtb(tmp);
|
|
2480 | 2533 |
break; |
2481 | 2534 |
case 1: |
2482 |
NEON_GET_REG(T1, rn, pass); |
|
2483 | 2535 |
if (insn & (1 << 23)) { |
2484 | 2536 |
if (offset) { |
2485 |
gen_op_shrl_T1_im(16);
|
|
2537 |
tcg_gen_shri_i32(tmp, tmp, 16);
|
|
2486 | 2538 |
} else { |
2487 |
gen_uxth(cpu_T[1]);
|
|
2539 |
gen_uxth(tmp);
|
|
2488 | 2540 |
} |
2489 | 2541 |
} else { |
2490 | 2542 |
if (offset) { |
2491 |
gen_op_sarl_T1_im(16);
|
|
2543 |
tcg_gen_sari_i32(tmp, tmp, 16);
|
|
2492 | 2544 |
} else { |
2493 |
gen_sxth(cpu_T[1]);
|
|
2545 |
gen_sxth(tmp);
|
|
2494 | 2546 |
} |
2495 | 2547 |
} |
2496 | 2548 |
break; |
2497 | 2549 |
case 2: |
2498 |
NEON_GET_REG(T1, rn, pass); |
|
2499 | 2550 |
break; |
2500 | 2551 |
} |
2501 |
gen_movl_reg_T1(s, rd);
|
|
2552 |
store_reg(s, rd, tmp);
|
|
2502 | 2553 |
} else { |
2503 | 2554 |
/* arm->vfp */ |
2504 |
gen_movl_T0_reg(s, rd);
|
|
2555 |
tmp = load_reg(s, rd);
|
|
2505 | 2556 |
if (insn & (1 << 23)) { |
2506 | 2557 |
/* VDUP */ |
2507 | 2558 |
if (size == 0) { |
2508 |
gen_op_neon_dup_u8(0);
|
|
2559 |
gen_neon_dup_u8(tmp, 0);
|
|
2509 | 2560 |
} else if (size == 1) { |
2510 |
gen_op_neon_dup_low16();
|
|
2561 |
gen_neon_dup_low16(tmp);
|
|
2511 | 2562 |
} |
2512 |
NEON_SET_REG(T0, rn, 0); |
|
2513 |
NEON_SET_REG(T0, rn, 1); |
|
2563 |
tmp2 = new_tmp(); |
|
2564 |
tcg_gen_mov_i32(tmp2, tmp); |
|
2565 |
neon_store_reg(rn, 0, tmp2); |
|
2566 |
neon_store_reg(rn, 0, tmp); |
|
2514 | 2567 |
} else { |
2515 | 2568 |
/* VMOV */ |
2516 | 2569 |
switch (size) { |
2517 | 2570 |
case 0: |
2518 |
tmp = neon_load_reg(rn, pass); |
|
2519 |
gen_bfi(tmp, tmp, cpu_T[0], offset, 0xff);
|
|
2520 |
neon_store_reg(rn, pass, tmp);
|
|
2571 |
tmp2 = neon_load_reg(rn, pass);
|
|
2572 |
gen_bfi(tmp, tmp2, tmp, offset, 0xff);
|
|
2573 |
dead_tmp(tmp2);
|
|
2521 | 2574 |
break; |
2522 | 2575 |
case 1: |
2523 |
tmp = neon_load_reg(rn, pass); |
|
2524 |
gen_bfi(tmp, tmp, cpu_T[0], offset, 0xffff);
|
|
2525 |
neon_store_reg(rn, pass, tmp);
|
|
2576 |
tmp2 = neon_load_reg(rn, pass);
|
|
2577 |
gen_bfi(tmp, tmp2, tmp, offset, 0xffff);
|
|
2578 |
dead_tmp(tmp2);
|
|
2526 | 2579 |
break; |
2527 | 2580 |
case 2: |
2528 |
NEON_SET_REG(T0, rn, pass); |
|
2529 | 2581 |
break; |
2530 | 2582 |
} |
2583 |
neon_store_reg(rn, pass, tmp); |
|
2531 | 2584 |
} |
2532 | 2585 |
} |
2533 | 2586 |
} else { /* !dp */ |
... | ... | |
3210 | 3263 |
} |
3211 | 3264 |
} |
3212 | 3265 |
|
3213 |
/* Neon shift by constant. The actual ops are the same as used for variable |
|
3214 |
shifts. [OP][U][SIZE] */ |
|
3215 |
static GenOpFunc *gen_neon_shift_im[8][2][4] = { |
|
3216 |
{ /* 0 */ /* VSHR */ |
|
3217 |
{ |
|
3218 |
gen_op_neon_shl_u8, |
|
3219 |
gen_op_neon_shl_u16, |
|
3220 |
gen_op_neon_shl_u32, |
|
3221 |
gen_op_neon_shl_u64 |
|
3222 |
}, { |
|
3223 |
gen_op_neon_shl_s8, |
|
3224 |
gen_op_neon_shl_s16, |
|
3225 |
gen_op_neon_shl_s32, |
|
3226 |
gen_op_neon_shl_s64 |
|
3227 |
} |
|
3228 |
}, { /* 1 */ /* VSRA */ |
|
3229 |
{ |
|
3230 |
gen_op_neon_shl_u8, |
|
3231 |
gen_op_neon_shl_u16, |
|
3232 |
gen_op_neon_shl_u32, |
|
3233 |
gen_op_neon_shl_u64 |
|
3234 |
}, { |
|
3235 |
gen_op_neon_shl_s8, |
|
3236 |
gen_op_neon_shl_s16, |
|
3237 |
gen_op_neon_shl_s32, |
|
3238 |
gen_op_neon_shl_s64 |
|
3239 |
} |
|
3240 |
}, { /* 2 */ /* VRSHR */ |
|
3241 |
{ |
|
3242 |
gen_op_neon_rshl_u8, |
|
3243 |
gen_op_neon_rshl_u16, |
|
3244 |
gen_op_neon_rshl_u32, |
|
3245 |
gen_op_neon_rshl_u64 |
|
3246 |
}, { |
|
3247 |
gen_op_neon_rshl_s8, |
|
3248 |
gen_op_neon_rshl_s16, |
|
3249 |
gen_op_neon_rshl_s32, |
|
3250 |
gen_op_neon_rshl_s64 |
|
3251 |
} |
|
3252 |
}, { /* 3 */ /* VRSRA */ |
|
3253 |
{ |
|
3254 |
gen_op_neon_rshl_u8, |
|
3255 |
gen_op_neon_rshl_u16, |
|
3256 |
gen_op_neon_rshl_u32, |
|
3257 |
gen_op_neon_rshl_u64 |
|
3258 |
}, { |
|
3259 |
gen_op_neon_rshl_s8, |
|
3260 |
gen_op_neon_rshl_s16, |
|
3261 |
gen_op_neon_rshl_s32, |
|
3262 |
gen_op_neon_rshl_s64 |
|
3263 |
} |
|
3264 |
}, { /* 4 */ |
|
3265 |
{ |
|
3266 |
NULL, NULL, NULL, NULL |
|
3267 |
}, { /* VSRI */ |
|
3268 |
gen_op_neon_shl_u8, |
|
3269 |
gen_op_neon_shl_u16, |
|
3270 |
gen_op_neon_shl_u32, |
|
3271 |
gen_op_neon_shl_u64, |
|
3272 |
} |
|
3273 |
}, { /* 5 */ |
|
3274 |
{ /* VSHL */ |
|
3275 |
gen_op_neon_shl_u8, |
|
3276 |
gen_op_neon_shl_u16, |
|
3277 |
gen_op_neon_shl_u32, |
|
3278 |
gen_op_neon_shl_u64, |
|
3279 |
}, { /* VSLI */ |
|
3280 |
gen_op_neon_shl_u8, |
|
3281 |
gen_op_neon_shl_u16, |
|
3282 |
gen_op_neon_shl_u32, |
|
3283 |
gen_op_neon_shl_u64, |
|
3284 |
} |
|
3285 |
}, { /* 6 */ /* VQSHL */ |
|
3286 |
{ |
|
3287 |
gen_op_neon_qshl_u8, |
|
3288 |
gen_op_neon_qshl_u16, |
|
3289 |
gen_op_neon_qshl_u32, |
|
3290 |
gen_op_neon_qshl_u64 |
|
3291 |
}, { |
|
3292 |
gen_op_neon_qshl_s8, |
|
3293 |
gen_op_neon_qshl_s16, |
|
3294 |
gen_op_neon_qshl_s32, |
|
3295 |
gen_op_neon_qshl_s64 |
|
3296 |
} |
|
3297 |
}, { /* 7 */ /* VQSHLU */ |
|
3298 |
{ |
|
3299 |
gen_op_neon_qshl_u8, |
|
3300 |
gen_op_neon_qshl_u16, |
|
3301 |
gen_op_neon_qshl_u32, |
|
3302 |
gen_op_neon_qshl_u64 |
|
3303 |
}, { |
|
3304 |
gen_op_neon_qshl_u8, |
|
3305 |
gen_op_neon_qshl_u16, |
|
3306 |
gen_op_neon_qshl_u32, |
|
3307 |
gen_op_neon_qshl_u64 |
|
3308 |
} |
|
3309 |
} |
|
3310 |
}; |
|
3266 |
/* These macros help make the code more readable when migrating from the |
|
3267 |
old dyngen helpers. They should probably be removed when |
|
3268 |
T0/T1 are removed. */ |
|
3269 |
#define CPU_T001 cpu_T[0], cpu_T[0], cpu_T[1] |
|
3270 |
#define CPU_T0E01 cpu_T[0], cpu_env, cpu_T[0], cpu_T[1] |
|
3311 | 3271 |
|
3312 |
/* [R][U][size - 1] */ |
|
3313 |
static GenOpFunc *gen_neon_shift_im_narrow[2][2][3] = { |
|
3314 |
{ |
|
3315 |
{ |
|
3316 |
gen_op_neon_shl_u16, |
|
3317 |
gen_op_neon_shl_u32, |
|
3318 |
gen_op_neon_shl_u64 |
|
3319 |
}, { |
|
3320 |
gen_op_neon_shl_s16, |
|
3321 |
gen_op_neon_shl_s32, |
|
3322 |
gen_op_neon_shl_s64 |
|
3323 |
} |
|
3324 |
}, { |
|
3325 |
{ |
|
3326 |
gen_op_neon_rshl_u16, |
|
3327 |
gen_op_neon_rshl_u32, |
|
3328 |
gen_op_neon_rshl_u64 |
|
3329 |
}, { |
|
3330 |
gen_op_neon_rshl_s16, |
|
3331 |
gen_op_neon_rshl_s32, |
|
3332 |
gen_op_neon_rshl_s64 |
|
3333 |
} |
|
3334 |
} |
|
3335 |
}; |
|
3336 |
|
|
3337 |
static inline void |
|
3338 |
gen_op_neon_narrow_u32 () |
|
3339 |
{ |
|
3340 |
/* No-op. */ |
|
3341 |
} |
|
3342 |
|
|
3343 |
static GenOpFunc *gen_neon_narrow[3] = { |
|
3344 |
gen_op_neon_narrow_u8, |
|
3345 |
gen_op_neon_narrow_u16, |
|
3346 |
gen_op_neon_narrow_u32 |
|
3347 |
}; |
|
3348 |
|
|
3349 |
static GenOpFunc *gen_neon_narrow_satu[3] = { |
|
3350 |
gen_op_neon_narrow_sat_u8, |
|
3351 |
gen_op_neon_narrow_sat_u16, |
|
3352 |
gen_op_neon_narrow_sat_u32 |
|
3353 |
}; |
|
3354 |
|
|
3355 |
static GenOpFunc *gen_neon_narrow_sats[3] = { |
|
3356 |
gen_op_neon_narrow_sat_s8, |
|
3357 |
gen_op_neon_narrow_sat_s16, |
|
3358 |
gen_op_neon_narrow_sat_s32 |
|
3359 |
}; |
|
3272 |
#define CPU_V001 cpu_V0, cpu_V0, cpu_V1 |
|
3360 | 3273 |
|
3361 | 3274 |
static inline int gen_neon_add(int size) |
3362 | 3275 |
{ |
3363 | 3276 |
switch (size) { |
3364 |
case 0: gen_op_neon_add_u8(); break;
|
|
3365 |
case 1: gen_op_neon_add_u16(); break;
|
|
3277 |
case 0: gen_helper_neon_add_u8(CPU_T001); break;
|
|
3278 |
case 1: gen_helper_neon_add_u16(CPU_T001); break;
|
|
3366 | 3279 |
case 2: gen_op_addl_T0_T1(); break; |
3367 | 3280 |
default: return 1; |
3368 | 3281 |
} |
3369 | 3282 |
return 0; |
3370 | 3283 |
} |
3371 | 3284 |
|
3372 |
/* 32-bit pairwise ops end up the same as the elementsise versions. */ |
|
3373 |
#define gen_op_neon_pmax_s32 gen_op_neon_max_s32 |
|
3374 |
#define gen_op_neon_pmax_u32 gen_op_neon_max_u32 |
|
3375 |
#define gen_op_neon_pmin_s32 gen_op_neon_min_s32 |
|
3376 |
#define gen_op_neon_pmin_u32 gen_op_neon_min_u32 |
|
3285 |
static inline void gen_neon_rsb(int size) |
|
3286 |
{ |
|
3287 |
switch (size) { |
|
3288 |
case 0: gen_helper_neon_sub_u8(cpu_T[0], cpu_T[1], cpu_T[0]); break; |
|
3289 |
case 1: gen_helper_neon_sub_u16(cpu_T[0], cpu_T[1], cpu_T[0]); break; |
|
3290 |
case 2: gen_op_rsbl_T0_T1(); break; |
|
3291 |
default: return; |
|
3292 |
} |
|
3293 |
} |
|
3294 |
|
|
3295 |
/* 32-bit pairwise ops end up the same as the elementwise versions. */ |
|
3296 |
#define gen_helper_neon_pmax_s32 gen_helper_neon_max_s32 |
|
3297 |
#define gen_helper_neon_pmax_u32 gen_helper_neon_max_u32 |
|
3298 |
#define gen_helper_neon_pmin_s32 gen_helper_neon_min_s32 |
|
3299 |
#define gen_helper_neon_pmin_u32 gen_helper_neon_min_u32 |
|
3300 |
|
|
3301 |
/* FIXME: This is wrong. They set the wrong overflow bit. */ |
|
3302 |
#define gen_helper_neon_qadd_s32(a, e, b, c) gen_helper_add_saturate(a, b, c) |
|
3303 |
#define gen_helper_neon_qadd_u32(a, e, b, c) gen_helper_add_usaturate(a, b, c) |
|
3304 |
#define gen_helper_neon_qsub_s32(a, e, b, c) gen_helper_sub_saturate(a, b, c) |
|
3305 |
#define gen_helper_neon_qsub_u32(a, e, b, c) gen_helper_sub_usaturate(a, b, c) |
|
3306 |
|
|
3307 |
#define GEN_NEON_INTEGER_OP_ENV(name) do { \ |
|
3308 |
switch ((size << 1) | u) { \ |
|
3309 |
case 0: \ |
|
3310 |
gen_helper_neon_##name##_s8(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \ |
|
3311 |
break; \ |
|
3312 |
case 1: \ |
|
3313 |
gen_helper_neon_##name##_u8(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \ |
|
3314 |
break; \ |
|
3315 |
case 2: \ |
|
3316 |
gen_helper_neon_##name##_s16(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \ |
|
3317 |
break; \ |
|
3318 |
case 3: \ |
|
3319 |
gen_helper_neon_##name##_u16(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \ |
|
3320 |
break; \ |
|
3321 |
case 4: \ |
|
3322 |
gen_helper_neon_##name##_s32(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \ |
|
3323 |
break; \ |
|
3324 |
case 5: \ |
|
3325 |
gen_helper_neon_##name##_u32(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \ |
|
3326 |
break; \ |
|
3327 |
default: return 1; \ |
|
3328 |
}} while (0) |
|
3377 | 3329 |
|
3378 | 3330 |
#define GEN_NEON_INTEGER_OP(name) do { \ |
3379 | 3331 |
switch ((size << 1) | u) { \ |
3380 |
case 0: gen_op_neon_##name##_s8(); break; \ |
|
3381 |
case 1: gen_op_neon_##name##_u8(); break; \ |
|
3382 |
case 2: gen_op_neon_##name##_s16(); break; \ |
|
3383 |
case 3: gen_op_neon_##name##_u16(); break; \ |
|
3384 |
case 4: gen_op_neon_##name##_s32(); break; \ |
|
3385 |
case 5: gen_op_neon_##name##_u32(); break; \ |
|
3332 |
case 0: \ |
|
3333 |
gen_helper_neon_##name##_s8(cpu_T[0], cpu_T[0], cpu_T[1]); \ |
|
3334 |
break; \ |
|
3335 |
case 1: \ |
|
3336 |
gen_helper_neon_##name##_u8(cpu_T[0], cpu_T[0], cpu_T[1]); \ |
|
3337 |
break; \ |
|
3338 |
case 2: \ |
|
3339 |
gen_helper_neon_##name##_s16(cpu_T[0], cpu_T[0], cpu_T[1]); \ |
|
3340 |
break; \ |
|
3341 |
case 3: \ |
|
3342 |
gen_helper_neon_##name##_u16(cpu_T[0], cpu_T[0], cpu_T[1]); \ |
|
3343 |
break; \ |
|
3344 |
case 4: \ |
|
3345 |
gen_helper_neon_##name##_s32(cpu_T[0], cpu_T[0], cpu_T[1]); \ |
|
3346 |
break; \ |
|
3347 |
case 5: \ |
|
3348 |
gen_helper_neon_##name##_u32(cpu_T[0], cpu_T[0], cpu_T[1]); \ |
|
3349 |
break; \ |
|
3386 | 3350 |
default: return 1; \ |
3387 | 3351 |
}} while (0) |
3388 | 3352 |
|
... | ... | |
3392 | 3356 |
uint32_t offset; |
3393 | 3357 |
|
3394 | 3358 |
offset = offsetof(CPUARMState, vfp.scratch[scratch]); |
3395 |
gen_op_neon_setreg_T0(offset);
|
|
3359 |
tcg_gen_st_i32(cpu_T[0], cpu_env, offset);
|
|
3396 | 3360 |
} |
3397 | 3361 |
|
3398 | 3362 |
static inline void |
... | ... | |
3401 | 3365 |
uint32_t offset; |
3402 | 3366 |
|
3403 | 3367 |
offset = offsetof(CPUARMState, vfp.scratch[scratch]); |
3404 |
gen_op_neon_setreg_T1(offset);
|
|
3368 |
tcg_gen_st_i32(cpu_T[1], cpu_env, offset);
|
|
3405 | 3369 |
} |
3406 | 3370 |
|
3407 | 3371 |
static inline void |
... | ... | |
3410 | 3374 |
uint32_t offset; |
3411 | 3375 |
|
3412 | 3376 |
offset = offsetof(CPUARMState, vfp.scratch[scratch]); |
3413 |
gen_op_neon_getreg_T0(offset);
|
|
3377 |
tcg_gen_ld_i32(cpu_T[0], cpu_env, offset);
|
|
3414 | 3378 |
} |
3415 | 3379 |
|
3416 | 3380 |
static inline void |
... | ... | |
3419 | 3383 |
uint32_t offset; |
3420 | 3384 |
|
3421 | 3385 |
offset = offsetof(CPUARMState, vfp.scratch[scratch]); |
3422 |
gen_op_neon_getreg_T1(offset); |
|
3423 |
} |
|
3424 |
|
|
3425 |
static inline void gen_op_neon_widen_u32(void) |
|
3426 |
{ |
|
3427 |
gen_op_movl_T1_im(0); |
|
3386 |
tcg_gen_ld_i32(cpu_T[1], cpu_env, offset); |
|
3428 | 3387 |
} |
3429 | 3388 |
|
3430 | 3389 |
static inline void gen_neon_get_scalar(int size, int reg) |
... | ... | |
3434 | 3393 |
} else { |
3435 | 3394 |
NEON_GET_REG(T0, reg >> 2, (reg >> 1) & 1); |
3436 | 3395 |
if (reg & 1) |
3437 |
gen_op_neon_dup_low16();
|
|
3396 |
gen_neon_dup_low16(cpu_T[0]);
|
|
3438 | 3397 |
else |
3439 |
gen_op_neon_dup_high16();
|
|
3398 |
gen_neon_dup_high16(cpu_T[0]);
|
|
3440 | 3399 |
} |
3441 | 3400 |
} |
3442 | 3401 |
|
... | ... | |
3448 | 3407 |
NEON_GET_REG(T0, reg, n); |
3449 | 3408 |
NEON_GET_REG(T0, reg, n + n); |
3450 | 3409 |
switch (size) { |
3451 |
case 0: gen_op_neon_unzip_u8(); break;
|
|
3452 |
case 1: gen_op_neon_zip_u16(); break; /* zip and unzip are the same. */
|
|
3410 |
case 0: gen_helper_neon_unzip_u8(); break;
|
|
3411 |
case 1: gen_helper_neon_zip_u16(); break; /* zip and unzip are the same. */
|
|
3453 | 3412 |
case 2: /* no-op */; break; |
3454 | 3413 |
default: abort(); |
3455 | 3414 |
} |
... | ... | |
3522 | 3481 |
if (size == 2) { |
3523 | 3482 |
if (load) { |
3524 | 3483 |
tmp = gen_ld32(cpu_T[1], IS_USER(s)); |
3525 |
tcg_gen_mov_i32(cpu_T[0], tmp); |
|
3526 |
dead_tmp(tmp); |
|
3527 |
NEON_SET_REG(T0, rd, pass); |
|
3484 |
neon_store_reg(rd, pass, tmp); |
|
3528 | 3485 |
} else { |
3529 |
NEON_GET_REG(T0, rd, pass); |
|
3530 |
tmp = new_tmp(); |
|
3531 |
tcg_gen_mov_i32(tmp, cpu_T[0]); |
|
3486 |
tmp = neon_load_reg(rd, pass); |
|
3532 | 3487 |
gen_st32(tmp, cpu_T[1], IS_USER(s)); |
3533 | 3488 |
} |
3534 | 3489 |
gen_op_addl_T1_im(stride); |
... | ... | |
3596 | 3551 |
switch (size) { |
3597 | 3552 |
case 0: |
3598 | 3553 |
tmp = gen_ld8u(cpu_T[1], IS_USER(s)); |
3599 |
tcg_gen_mov_i32(cpu_T[0], tmp); |
|
3600 |
dead_tmp(tmp); |
|
3601 |
gen_op_neon_dup_u8(0); |
|
3554 |
gen_neon_dup_u8(tmp, 0); |
|
3602 | 3555 |
break; |
3603 | 3556 |
case 1: |
3604 | 3557 |
tmp = gen_ld16u(cpu_T[1], IS_USER(s)); |
3605 |
tcg_gen_mov_i32(cpu_T[0], tmp); |
|
3606 |
dead_tmp(tmp); |
|
3607 |
gen_op_neon_dup_low16(); |
|
3558 |
gen_neon_dup_low16(tmp); |
|
3608 | 3559 |
break; |
3609 | 3560 |
case 2: |
3610 | 3561 |
tmp = gen_ld32(cpu_T[0], IS_USER(s)); |
3611 |
tcg_gen_mov_i32(cpu_T[0], tmp); |
|
3612 |
dead_tmp(tmp); |
|
3613 | 3562 |
break; |
3614 | 3563 |
case 3: |
3615 | 3564 |
return 1; |
3616 | 3565 |
} |
3617 | 3566 |
gen_op_addl_T1_im(1 << size); |
3618 |
NEON_SET_REG(T0, rd, 0); |
|
3619 |
NEON_SET_REG(T0, rd, 1); |
|
3567 |
tmp2 = new_tmp(); |
|
3568 |
tcg_gen_mov_i32(tmp2, tmp); |
|
3569 |
neon_store_reg(rd, 0, tmp2); |
|
3570 |
neon_store_reg(rd, 0, tmp); |
|
3620 | 3571 |
rd += stride; |
3621 | 3572 |
} |
3622 | 3573 |
stride = (1 << size) * nregs; |
... | ... | |
3707 | 3658 |
tcg_gen_or_i32(dest, t, f); |
3708 | 3659 |
} |
3709 | 3660 |
|
3661 |
static inline void gen_neon_narrow(int size, TCGv dest, TCGv src) |
|
3662 |
{ |
|
3663 |
switch (size) { |
|
3664 |
case 0: gen_helper_neon_narrow_u8(dest, src); break; |
|
3665 |
case 1: gen_helper_neon_narrow_u16(dest, src); break; |
|
3666 |
case 2: tcg_gen_trunc_i64_i32(dest, src); break; |
|
3667 |
default: abort(); |
|
3668 |
} |
|
3669 |
} |
|
3670 |
|
|
3671 |
static inline void gen_neon_narrow_sats(int size, TCGv dest, TCGv src) |
|
3672 |
{ |
|
3673 |
switch (size) { |
|
3674 |
case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break; |
|
3675 |
case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break; |
|
3676 |
case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break; |
|
3677 |
default: abort(); |
|
3678 |
} |
|
3679 |
} |
|
3680 |
|
|
3681 |
static inline void gen_neon_narrow_satu(int size, TCGv dest, TCGv src) |
|
3682 |
{ |
|
3683 |
switch (size) { |
|
3684 |
case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break; |
|
3685 |
case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break; |
|
3686 |
case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break; |
|
3687 |
default: abort(); |
|
3688 |
} |
|
3689 |
} |
|
3690 |
|
|
3691 |
static inline void gen_neon_shift_narrow(int size, TCGv var, TCGv shift, |
|
3692 |
int q, int u) |
|
3693 |
{ |
|
3694 |
if (q) { |
|
3695 |
if (u) { |
|
3696 |
switch (size) { |
|
3697 |
case 1: gen_helper_neon_rshl_u16(var, var, shift); break; |
|
3698 |
case 2: gen_helper_neon_rshl_u32(var, var, shift); break; |
|
3699 |
default: abort(); |
|
3700 |
} |
|
3701 |
} else { |
|
3702 |
switch (size) { |
|
3703 |
case 1: gen_helper_neon_rshl_s16(var, var, shift); break; |
|
3704 |
case 2: gen_helper_neon_rshl_s32(var, var, shift); break; |
|
3705 |
default: abort(); |
|
3706 |
} |
|
3707 |
} |
|
3708 |
} else { |
|
3709 |
if (u) { |
|
3710 |
switch (size) { |
|
3711 |
case 1: gen_helper_neon_rshl_u16(var, var, shift); break; |
|
3712 |
case 2: gen_helper_neon_rshl_u32(var, var, shift); break; |
|
3713 |
default: abort(); |
|
3714 |
} |
|
3715 |
} else { |
|
3716 |
switch (size) { |
|
3717 |
case 1: gen_helper_neon_shl_s16(var, var, shift); break; |
|
3718 |
case 2: gen_helper_neon_shl_s32(var, var, shift); break; |
|
3719 |
default: abort(); |
|
3720 |
} |
|
3721 |
} |
|
3722 |
} |
|
3723 |
} |
|
3724 |
|
|
3725 |
static inline void gen_neon_widen(TCGv dest, TCGv src, int size, int u) |
|
3726 |
{ |
|
3727 |
if (u) { |
|
3728 |
switch (size) { |
|
3729 |
case 0: gen_helper_neon_widen_u8(dest, src); break; |
|
3730 |
case 1: gen_helper_neon_widen_u16(dest, src); break; |
|
3731 |
case 2: tcg_gen_extu_i32_i64(dest, src); break; |
|
3732 |
default: abort(); |
|
3733 |
} |
|
3734 |
} else { |
|
3735 |
switch (size) { |
|
3736 |
case 0: gen_helper_neon_widen_s8(dest, src); break; |
|
3737 |
case 1: gen_helper_neon_widen_s16(dest, src); break; |
|
3738 |
case 2: tcg_gen_ext_i32_i64(dest, src); break; |
|
3739 |
default: abort(); |
|
3740 |
} |
|
3741 |
} |
|
3742 |
dead_tmp(src); |
|
3743 |
} |
|
3744 |
|
|
3745 |
static inline void gen_neon_addl(int size) |
|
3746 |
{ |
|
3747 |
switch (size) { |
|
3748 |
case 0: gen_helper_neon_addl_u16(CPU_V001); break; |
|
3749 |
case 1: gen_helper_neon_addl_u32(CPU_V001); break; |
|
3750 |
case 2: tcg_gen_add_i64(CPU_V001); break; |
|
3751 |
default: abort(); |
|
3752 |
} |
|
3753 |
} |
|
3754 |
|
|
3755 |
static inline void gen_neon_subl(int size) |
|
3756 |
{ |
|
3757 |
switch (size) { |
|
3758 |
case 0: gen_helper_neon_subl_u16(CPU_V001); break; |
|
3759 |
case 1: gen_helper_neon_subl_u32(CPU_V001); break; |
|
3760 |
case 2: tcg_gen_sub_i64(CPU_V001); break; |
|
3761 |
default: abort(); |
|
3762 |
} |
|
3763 |
} |
|
3764 |
|
|
3765 |
static inline void gen_neon_negl(TCGv var, int size) |
|
3766 |
{ |
|
3767 |
switch (size) { |
|
3768 |
case 0: gen_helper_neon_negl_u16(var, var); break; |
|
3769 |
case 1: gen_helper_neon_negl_u32(var, var); break; |
|
3770 |
case 2: gen_helper_neon_negl_u64(var, var); break; |
|
3771 |
default: abort(); |
|
3772 |
} |
|
3773 |
} |
|
3774 |
|
|
3775 |
static inline void gen_neon_addl_saturate(TCGv op0, TCGv op1, int size) |
|
3776 |
{ |
|
3777 |
switch (size) { |
|
3778 |
case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break; |
|
3779 |
case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break; |
|
3780 |
default: abort(); |
|
3781 |
} |
|
3782 |
} |
|
3783 |
|
|
3784 |
static inline void gen_neon_mull(TCGv dest, TCGv a, TCGv b, int size, int u) |
|
3785 |
{ |
|
3786 |
TCGv tmp; |
|
3787 |
|
|
3788 |
switch ((size << 1) | u) { |
|
3789 |
case 0: gen_helper_neon_mull_s8(dest, a, b); break; |
|
3790 |
case 1: gen_helper_neon_mull_u8(dest, a, b); break; |
|
3791 |
case 2: gen_helper_neon_mull_s16(dest, a, b); break; |
|
3792 |
case 3: gen_helper_neon_mull_u16(dest, a, b); break; |
|
3793 |
case 4: |
|
3794 |
tmp = gen_muls_i64_i32(a, b); |
|
3795 |
tcg_gen_mov_i64(dest, tmp); |
|
3796 |
break; |
|
3797 |
case 5: |
|
3798 |
tmp = gen_mulu_i64_i32(a, b); |
|
3799 |
tcg_gen_mov_i64(dest, tmp); |
|
3800 |
break; |
|
3801 |
default: abort(); |
|
3802 |
} |
|
3803 |
if (size < 2) { |
|
3804 |
dead_tmp(b); |
|
3805 |
dead_tmp(a); |
|
3806 |
} |
|
3807 |
} |
|
3808 |
|
|
3710 | 3809 |
/* Translate a NEON data processing instruction. Return nonzero if the |
3711 | 3810 |
instruction is invalid. |
3712 |
In general we process vectors in 32-bit chunks. This means we can reuse |
|
3713 |
some of the scalar ops, and hopefully the code generated for 32-bit |
|
3714 |
hosts won't be too awful. The downside is that the few 64-bit operations |
|
3715 |
(mainly shifts) get complicated. */ |
|
3811 |
We process data in a mixture of 32-bit and 64-bit chunks. |
|
3812 |
Mostly we use 32-bit chunks so we can use normal scalar instructions. */ |
|
3716 | 3813 |
|
3717 | 3814 |
static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) |
3718 | 3815 |
{ |
... | ... | |
3742 | 3839 |
if ((insn & (1 << 23)) == 0) { |
3743 | 3840 |
/* Three register same length. */ |
3744 | 3841 |
op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1); |
3745 |
if (size == 3 && (op == 1 || op == 5 || op == 16)) { |
|
3842 |
if (size == 3 && (op == 1 || op == 5 || op == 8 || op == 9 |
|
3843 |
|| op == 10 || op == 11 || op == 16)) { |
|
3844 |
/* 64-bit element instructions. */ |
|
3746 | 3845 |
for (pass = 0; pass < (q ? 2 : 1); pass++) { |
3747 |
NEON_GET_REG(T0, rm, pass * 2); |
|
3748 |
NEON_GET_REG(T1, rm, pass * 2 + 1); |
|
3749 |
gen_neon_movl_scratch_T0(0); |
|
3750 |
gen_neon_movl_scratch_T1(1); |
|
3751 |
NEON_GET_REG(T0, rn, pass * 2); |
|
3752 |
NEON_GET_REG(T1, rn, pass * 2 + 1); |
|
3846 |
neon_load_reg64(cpu_V0, rn + pass); |
|
3847 |
neon_load_reg64(cpu_V1, rm + pass); |
|
3753 | 3848 |
switch (op) { |
3754 | 3849 |
case 1: /* VQADD */ |
3755 | 3850 |
if (u) { |
3756 |
gen_op_neon_addl_saturate_u64();
|
|
3851 |
gen_helper_neon_add_saturate_u64(CPU_V001);
|
|
3757 | 3852 |
} else { |
3758 |
gen_op_neon_addl_saturate_s64();
|
|
3853 |
gen_helper_neon_add_saturate_s64(CPU_V001);
|
|
3759 | 3854 |
} |
3760 | 3855 |
break; |
3761 | 3856 |
case 5: /* VQSUB */ |
3762 | 3857 |
if (u) { |
3763 |
gen_op_neon_subl_saturate_u64();
|
|
3858 |
gen_helper_neon_sub_saturate_u64(CPU_V001);
|
|
3764 | 3859 |
} else { |
3765 |
gen_op_neon_subl_saturate_s64(); |
|
3860 |
gen_helper_neon_sub_saturate_s64(CPU_V001); |
|
3861 |
} |
|
3862 |
break; |
|
3863 |
case 8: /* VSHL */ |
|
3864 |
if (u) { |
|
3865 |
gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0); |
|
3866 |
} else { |
|
3867 |
gen_helper_neon_shl_s64(cpu_V0, cpu_V1, cpu_V0); |
|
3868 |
} |
|
3869 |
break; |
|
3870 |
case 9: /* VQSHL */ |
|
3871 |
if (u) { |
|
3872 |
gen_helper_neon_qshl_u64(cpu_V0, cpu_env, |
|
3873 |
cpu_V0, cpu_V0); |
|
3874 |
} else { |
|
3875 |
gen_helper_neon_qshl_s64(cpu_V1, cpu_env, |
|
3876 |
cpu_V1, cpu_V0); |
|
3877 |
} |
|
3878 |
break; |
|
3879 |
case 10: /* VRSHL */ |
|
3880 |
if (u) { |
|
3881 |
gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0); |
|
3882 |
} else { |
|
3883 |
gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0); |
|
3884 |
} |
|
3885 |
break; |
|
3886 |
case 11: /* VQRSHL */ |
|
3887 |
if (u) { |
|
3888 |
gen_helper_neon_qrshl_u64(cpu_V0, cpu_env, |
|
3889 |
cpu_V1, cpu_V0); |
|
3890 |
} else { |
|
3891 |
gen_helper_neon_qrshl_s64(cpu_V0, cpu_env, |
|
3892 |
cpu_V1, cpu_V0); |
|
3766 | 3893 |
} |
3767 | 3894 |
break; |
3768 | 3895 |
case 16: |
3769 | 3896 |
if (u) { |
3770 |
gen_op_neon_subl_u64();
|
|
3897 |
tcg_gen_sub_i64(CPU_V001);
|
|
3771 | 3898 |
} else { |
3772 |
gen_op_neon_addl_u64();
|
|
3899 |
tcg_gen_add_i64(CPU_V001);
|
|
3773 | 3900 |
} |
3774 | 3901 |
break; |
3775 | 3902 |
default: |
3776 | 3903 |
abort(); |
3777 | 3904 |
} |
3778 |
NEON_SET_REG(T0, rd, pass * 2); |
|
3779 |
NEON_SET_REG(T1, rd, pass * 2 + 1); |
|
3905 |
neon_store_reg64(cpu_V0, rd + pass); |
|
3780 | 3906 |
} |
3781 | 3907 |
return 0; |
3782 | 3908 |
} |
... | ... | |
3784 | 3910 |
case 8: /* VSHL */ |
3785 | 3911 |
case 9: /* VQSHL */ |
3786 | 3912 |
case 10: /* VRSHL */ |
3787 |
case 11: /* VQSHL */ |
|
3788 |
/* Shift operations have Rn and Rm reversed. */ |
|
3913 |
case 11: /* VQRSHL */ |
|
3789 | 3914 |
{ |
3790 |
int tmp; |
|
3791 |
tmp = rn; |
|
3915 |
int rtmp; |
|
3916 |
/* Shift instruction operands are reversed. */ |
|
3917 |
rtmp = rn; |
|
3792 | 3918 |
rn = rm; |
3793 |
rm = tmp; |
|
3919 |
rm = rtmp;
|
|
3794 | 3920 |
pairwise = 0; |
3795 | 3921 |
} |
3796 | 3922 |
break; |
... | ... | |
3834 | 3960 |
GEN_NEON_INTEGER_OP(hadd); |
3835 | 3961 |
break; |
3836 | 3962 |
case 1: /* VQADD */ |
3837 |
switch (size << 1| u) { |
|
3838 |
case 0: gen_op_neon_qadd_s8(); break; |
|
3839 |
case 1: gen_op_neon_qadd_u8(); break; |
|
3840 |
case 2: gen_op_neon_qadd_s16(); break; |
|
3841 |
case 3: gen_op_neon_qadd_u16(); break; |
|
3842 |
case 4: |
|
3843 |
gen_helper_add_saturate(cpu_T[0], cpu_T[0], cpu_T[1]); |
|
3844 |
break; |
|
3845 |
case 5: |
|
3846 |
gen_helper_add_usaturate(cpu_T[0], cpu_T[0], cpu_T[1]); |
|
3847 |
break; |
|
3848 |
default: abort(); |
|
3849 |
} |
|
3963 |
GEN_NEON_INTEGER_OP_ENV(qadd); |
|
3850 | 3964 |
break; |
3851 | 3965 |
case 2: /* VRHADD */ |
3852 | 3966 |
GEN_NEON_INTEGER_OP(rhadd); |
... | ... | |
3890 | 4004 |
GEN_NEON_INTEGER_OP(hsub); |
3891 | 4005 |
break; |
3892 | 4006 |
case 5: /* VQSUB */ |
3893 |
switch ((size << 1) | u) { |
|
3894 |
case 0: gen_op_neon_qsub_s8(); break; |
|
3895 |
case 1: gen_op_neon_qsub_u8(); break; |
|
3896 |
case 2: gen_op_neon_qsub_s16(); break; |
|
3897 |
case 3: gen_op_neon_qsub_u16(); break; |
|
3898 |
case 4: |
|
3899 |
gen_helper_sub_saturate(cpu_T[0], cpu_T[0], cpu_T[1]); |
|
3900 |
break; |
|
3901 |
case 5: |
|
3902 |
gen_helper_sub_usaturate(cpu_T[0], cpu_T[0], cpu_T[1]); |
|
3903 |
break; |
|
3904 |
default: abort(); |
|
3905 |
} |
|
4007 |
GEN_NEON_INTEGER_OP_ENV(qsub); |
|
3906 | 4008 |
break; |
3907 | 4009 |
case 6: /* VCGT */ |
3908 | 4010 |
GEN_NEON_INTEGER_OP(cgt); |
... | ... | |
3911 | 4013 |
GEN_NEON_INTEGER_OP(cge); |
3912 | 4014 |
break; |
3913 | 4015 |
case 8: /* VSHL */ |
3914 |
switch ((size << 1) | u) { |
|
3915 |
case 0: gen_op_neon_shl_s8(); break; |
|
3916 |
case 1: gen_op_neon_shl_u8(); break; |
|
3917 |
case 2: gen_op_neon_shl_s16(); break; |
|
3918 |
case 3: gen_op_neon_shl_u16(); break; |
|
3919 |
case 4: gen_op_neon_shl_s32(); break; |
|
3920 |
case 5: gen_op_neon_shl_u32(); break; |
|
3921 |
#if 0 |
|
3922 |
/* ??? Implementing these is tricky because the vector ops work |
|
3923 |
on 32-bit pieces. */ |
|
3924 |
case 6: gen_op_neon_shl_s64(); break; |
|
3925 |
case 7: gen_op_neon_shl_u64(); break; |
|
3926 |
#else |
|
3927 |
case 6: case 7: cpu_abort(env, "VSHL.64 not implemented"); |
|
3928 |
#endif |
|
3929 |
} |
|
4016 |
GEN_NEON_INTEGER_OP(shl); |
|
3930 | 4017 |
break; |
3931 | 4018 |
case 9: /* VQSHL */ |
3932 |
switch ((size << 1) | u) { |
|
3933 |
case 0: gen_op_neon_qshl_s8(); break; |
|
3934 |
case 1: gen_op_neon_qshl_u8(); break; |
|
3935 |
case 2: gen_op_neon_qshl_s16(); break; |
|
3936 |
case 3: gen_op_neon_qshl_u16(); break; |
|
3937 |
case 4: gen_op_neon_qshl_s32(); break; |
|
3938 |
case 5: gen_op_neon_qshl_u32(); break; |
|
3939 |
#if 0 |
|
3940 |
/* ??? Implementing these is tricky because the vector ops work |
|
3941 |
on 32-bit pieces. */ |
|
3942 |
case 6: gen_op_neon_qshl_s64(); break; |
|
3943 |
case 7: gen_op_neon_qshl_u64(); break; |
|
3944 |
#else |
|
3945 |
case 6: case 7: cpu_abort(env, "VQSHL.64 not implemented"); |
|
3946 |
#endif |
|
3947 |
} |
|
4019 |
GEN_NEON_INTEGER_OP_ENV(qshl); |
|
3948 | 4020 |
break; |
3949 | 4021 |
case 10: /* VRSHL */ |
3950 |
switch ((size << 1) | u) { |
|
3951 |
case 0: gen_op_neon_rshl_s8(); break; |
|
3952 |
case 1: gen_op_neon_rshl_u8(); break; |
|
3953 |
case 2: gen_op_neon_rshl_s16(); break; |
|
3954 |
case 3: gen_op_neon_rshl_u16(); break; |
|
3955 |
case 4: gen_op_neon_rshl_s32(); break; |
|
3956 |
case 5: gen_op_neon_rshl_u32(); break; |
|
3957 |
#if 0 |
|
3958 |
/* ??? Implementing these is tricky because the vector ops work |
|
3959 |
on 32-bit pieces. */ |
|
3960 |
case 6: gen_op_neon_rshl_s64(); break; |
|
3961 |
case 7: gen_op_neon_rshl_u64(); break; |
|
3962 |
#else |
|
3963 |
case 6: case 7: cpu_abort(env, "VRSHL.64 not implemented"); |
|
3964 |
#endif |
|
3965 |
} |
|
4022 |
GEN_NEON_INTEGER_OP(rshl); |
|
3966 | 4023 |
break; |
3967 | 4024 |
case 11: /* VQRSHL */ |
3968 |
switch ((size << 1) | u) { |
|
3969 |
case 0: gen_op_neon_qrshl_s8(); break; |
|
3970 |
case 1: gen_op_neon_qrshl_u8(); break; |
|
3971 |
case 2: gen_op_neon_qrshl_s16(); break; |
|
3972 |
case 3: gen_op_neon_qrshl_u16(); break; |
|
3973 |
case 4: gen_op_neon_qrshl_s32(); break; |
|
3974 |
case 5: gen_op_neon_qrshl_u32(); break; |
|
3975 |
#if 0 |
|
3976 |
/* ??? Implementing these is tricky because the vector ops work |
|
3977 |
on 32-bit pieces. */ |
|
3978 |
case 6: gen_op_neon_qrshl_s64(); break; |
|
3979 |
case 7: gen_op_neon_qrshl_u64(); break; |
|
3980 |
#else |
|
3981 |
case 6: case 7: cpu_abort(env, "VQRSHL.64 not implemented"); |
|
3982 |
#endif |
|
3983 |
} |
|
4025 |
GEN_NEON_INTEGER_OP_ENV(qrshl); |
|
3984 | 4026 |
break; |
3985 | 4027 |
case 12: /* VMAX */ |
3986 | 4028 |
GEN_NEON_INTEGER_OP(max); |
... | ... | |
4002 | 4044 |
return 1; |
4003 | 4045 |
} else { /* VSUB */ |
4004 | 4046 |
switch (size) { |
4005 |
case 0: gen_op_neon_sub_u8(); break;
|
|
4006 |
case 1: gen_op_neon_sub_u16(); break;
|
|
4047 |
case 0: gen_helper_neon_sub_u8(CPU_T001); break;
|
|
4048 |
case 1: gen_helper_neon_sub_u16(CPU_T001); break;
|
|
4007 | 4049 |
case 2: gen_op_subl_T0_T1(); break; |
4008 | 4050 |
default: return 1; |
4009 | 4051 |
} |
... | ... | |
4012 | 4054 |
case 17: |
4013 | 4055 |
if (!u) { /* VTST */ |
4014 | 4056 |
switch (size) { |
4015 |
case 0: gen_op_neon_tst_u8(); break;
|
|
4016 |
case 1: gen_op_neon_tst_u16(); break;
|
|
4017 |
case 2: gen_op_neon_tst_u32(); break;
|
|
4057 |
case 0: gen_helper_neon_tst_u8(CPU_T001); break;
|
|
4058 |
case 1: gen_helper_neon_tst_u16(CPU_T001); break;
|
|
4059 |
case 2: gen_helper_neon_tst_u32(CPU_T001); break;
|
|
4018 | 4060 |
default: return 1; |
4019 | 4061 |
} |
4020 | 4062 |
} else { /* VCEQ */ |
4021 | 4063 |
switch (size) { |
4022 |
case 0: gen_op_neon_ceq_u8(); break;
|
|
4023 |
case 1: gen_op_neon_ceq_u16(); break;
|
|
4024 |
case 2: gen_op_neon_ceq_u32(); break;
|
|
4064 |
case 0: gen_helper_neon_ceq_u8(CPU_T001); break;
|
|
4065 |
case 1: gen_helper_neon_ceq_u16(CPU_T001); break;
|
|
4066 |
case 2: gen_helper_neon_ceq_u32(CPU_T001); break;
|
|
4025 | 4067 |
default: return 1; |
4026 | 4068 |
} |
4027 | 4069 |
} |
4028 | 4070 |
break; |
4029 | 4071 |
case 18: /* Multiply. */ |
4030 | 4072 |
switch (size) { |
4031 |
case 0: gen_op_neon_mul_u8(); break;
|
|
4032 |
case 1: gen_op_neon_mul_u16(); break;
|
|
4073 |
case 0: gen_helper_neon_mul_u8(CPU_T001); break;
|
|
4074 |
case 1: gen_helper_neon_mul_u16(CPU_T001); break;
|
|
4033 | 4075 |
case 2: gen_op_mul_T0_T1(); break; |
4034 | 4076 |
default: return 1; |
4035 | 4077 |
} |
4036 | 4078 |
NEON_GET_REG(T1, rd, pass); |
4037 | 4079 |
if (u) { /* VMLS */ |
4038 |
switch (size) { |
|
4039 |
case 0: gen_op_neon_rsb_u8(); break; |
|
4040 |
case 1: gen_op_neon_rsb_u16(); break; |
|
4041 |
case 2: gen_op_rsbl_T0_T1(); break; |
|
4042 |
default: return 1; |
|
4043 |
} |
|
4080 |
gen_neon_rsb(size); |
|
4044 | 4081 |
} else { /* VMLA */ |
4045 | 4082 |
gen_neon_add(size); |
4046 | 4083 |
} |
4047 | 4084 |
break; |
4048 | 4085 |
case 19: /* VMUL */ |
4049 | 4086 |
if (u) { /* polynomial */ |
4050 |
gen_op_neon_mul_p8();
|
|
4087 |
gen_helper_neon_mul_p8(CPU_T001);
|
|
4051 | 4088 |
} else { /* Integer */ |
4052 | 4089 |
switch (size) { |
4053 |
case 0: gen_op_neon_mul_u8(); break;
|
|
4054 |
case 1: gen_op_neon_mul_u16(); break;
|
|
4090 |
case 0: gen_helper_neon_mul_u8(CPU_T001); break;
|
|
4091 |
case 1: gen_helper_neon_mul_u16(CPU_T001); break;
|
|
4055 | 4092 |
case 2: gen_op_mul_T0_T1(); break; |
4056 | 4093 |
default: return 1; |
4057 | 4094 |
} |
... | ... | |
4066 | 4103 |
case 22: /* Hultiply high. */ |
4067 | 4104 |
if (!u) { /* VQDMULH */ |
4068 | 4105 |
switch (size) { |
4069 |
case 1: gen_op_neon_qdmulh_s16(); break;
|
|
4070 |
case 2: gen_op_neon_qdmulh_s32(); break;
|
|
4106 |
case 1: gen_helper_neon_qdmulh_s16(CPU_T0E01); break;
|
|
4107 |
case 2: gen_helper_neon_qdmulh_s32(CPU_T0E01); break;
|
|
4071 | 4108 |
default: return 1; |
4072 | 4109 |
} |
4073 | 4110 |
} else { /* VQRDHMUL */ |
4074 | 4111 |
switch (size) { |
4075 |
case 1: gen_op_neon_qrdmulh_s16(); break;
|
|
4076 |
case 2: gen_op_neon_qrdmulh_s32(); break;
|
|
4112 |
case 1: gen_helper_neon_qrdmulh_s16(CPU_T0E01); break;
|
|
4113 |
case 2: gen_helper_neon_qrdmulh_s32(CPU_T0E01); break;
|
|
4077 | 4114 |
default: return 1; |
4078 | 4115 |
} |
4079 | 4116 |
} |
... | ... | |
4082 | 4119 |
if (u) |
4083 | 4120 |
return 1; |
4084 | 4121 |
switch (size) { |
4085 |
case 0: gen_op_neon_padd_u8(); break;
|
|
4086 |
case 1: gen_op_neon_padd_u16(); break;
|
|
4122 |
case 0: gen_helper_neon_padd_u8(CPU_T001); break;
|
|
4123 |
case 1: gen_helper_neon_padd_u16(CPU_T001); break;
|
|
4087 | 4124 |
case 2: gen_op_addl_T0_T1(); break; |
4088 | 4125 |
default: return 1; |
4089 | 4126 |
} |
... | ... | |
4091 | 4128 |
case 26: /* Floating point arithnetic. */ |
4092 | 4129 |
switch ((u << 2) | size) { |
4093 | 4130 |
case 0: /* VADD */ |
4094 |
gen_op_neon_add_f32();
|
|
4131 |
gen_helper_neon_add_f32(CPU_T001);
|
|
4095 | 4132 |
break; |
4096 | 4133 |
case 2: /* VSUB */ |
4097 |
gen_op_neon_sub_f32();
|
|
4134 |
gen_helper_neon_sub_f32(CPU_T001);
|
|
4098 | 4135 |
break; |
4099 | 4136 |
case 4: /* VPADD */ |
4100 |
gen_op_neon_add_f32();
|
|
4137 |
gen_helper_neon_add_f32(CPU_T001);
|
|
4101 | 4138 |
break; |
4102 | 4139 |
case 6: /* VABD */ |
4103 |
gen_op_neon_abd_f32();
|
|
4140 |
gen_helper_neon_abd_f32(CPU_T001);
|
|
4104 | 4141 |
break; |
4105 | 4142 |
default: |
4106 | 4143 |
return 1; |
4107 | 4144 |
} |
4108 | 4145 |
break; |
4109 | 4146 |
case 27: /* Float multiply. */ |
4110 |
gen_op_neon_mul_f32();
|
|
4147 |
gen_helper_neon_mul_f32(CPU_T001);
|
|
4111 | 4148 |
if (!u) { |
4112 | 4149 |
NEON_GET_REG(T1, rd, pass); |
4113 | 4150 |
if (size == 0) { |
4114 |
gen_op_neon_add_f32();
|
|
4151 |
gen_helper_neon_add_f32(CPU_T001);
|
|
4115 | 4152 |
} else { |
4116 |
gen_op_neon_rsb_f32();
|
|
4153 |
gen_helper_neon_sub_f32(cpu_T[0], cpu_T[1], cpu_T[0]);
|
|
4117 | 4154 |
} |
4118 | 4155 |
} |
4119 | 4156 |
break; |
4120 | 4157 |
case 28: /* Float compare. */ |
4121 | 4158 |
if (!u) { |
4122 |
gen_op_neon_ceq_f32();
|
|
4159 |
gen_helper_neon_ceq_f32(CPU_T001);
|
|
4123 | 4160 |
} else { |
4124 | 4161 |
if (size == 0) |
4125 |
gen_op_neon_cge_f32();
|
|
4162 |
gen_helper_neon_cge_f32(CPU_T001);
|
|
4126 | 4163 |
else |
4127 |
gen_op_neon_cgt_f32();
|
|
4164 |
gen_helper_neon_cgt_f32(CPU_T001);
|
|
4128 | 4165 |
} |
4129 | 4166 |
break; |
4130 | 4167 |
case 29: /* Float compare absolute. */ |
4131 | 4168 |
if (!u) |
4132 | 4169 |
return 1; |
4133 | 4170 |
if (size == 0) |
4134 |
gen_op_neon_acge_f32();
|
|
4171 |
gen_helper_neon_acge_f32(CPU_T001);
|
|
4135 | 4172 |
else |
4136 |
gen_op_neon_acgt_f32();
|
|
4173 |
gen_helper_neon_acgt_f32(CPU_T001);
|
|
4137 | 4174 |
break; |
4138 | 4175 |
case 30: /* Float min/max. */ |
4139 | 4176 |
if (size == 0) |
4140 |
gen_op_neon_max_f32();
|
|
4177 |
gen_helper_neon_max_f32(CPU_T001);
|
|
4141 | 4178 |
else |
4142 |
gen_op_neon_min_f32();
|
|
4179 |
gen_helper_neon_min_f32(CPU_T001);
|
|
4143 | 4180 |
break; |
4144 | 4181 |
case 31: |
4145 | 4182 |
if (size == 0) |
... | ... | |
4166 | 4203 |
NEON_SET_REG(T0, rd, pass); |
4167 | 4204 |
} |
4168 | 4205 |
} |
4206 |
/* End of 3 register same size operations. */ |
|
4169 | 4207 |
} else if (insn & (1 << 4)) { |
4170 | 4208 |
if ((insn & 0x00380080) != 0) { |
4171 | 4209 |
/* Two registers and shift. */ |
... | ... | |
4212 | 4250 |
} |
4213 | 4251 |
|
4214 | 4252 |
for (pass = 0; pass < count; pass++) { |
4215 |
if (size < 3) { |
|
4216 |
/* Operands in T0 and T1. */ |
|
4217 |
gen_op_movl_T1_im(imm); |
|
4218 |
NEON_GET_REG(T0, rm, pass); |
|
4219 |
} else { |
|
4220 |
/* Operands in {T0, T1} and env->vfp.scratch. */ |
|
4221 |
gen_op_movl_T0_im(imm); |
|
4222 |
gen_neon_movl_scratch_T0(0); |
|
4223 |
gen_op_movl_T0_im((int32_t)imm >> 31); |
|
4224 |
gen_neon_movl_scratch_T0(1); |
|
4225 |
NEON_GET_REG(T0, rm, pass * 2); |
|
4226 |
NEON_GET_REG(T1, rm, pass * 2 + 1); |
|
4227 |
} |
|
4228 |
|
|
4229 |
if (gen_neon_shift_im[op][u][size] == NULL) |
|
4230 |
return 1; |
|
4231 |
gen_neon_shift_im[op][u][size](); |
|
4232 |
|
|
4233 |
if (op == 1 || op == 3) { |
|
4234 |
/* Accumulate. */ |
|
4235 |
if (size == 3) { |
|
4236 |
gen_neon_movl_scratch_T0(0); |
|
4237 |
gen_neon_movl_scratch_T1(1); |
|
4238 |
NEON_GET_REG(T0, rd, pass * 2); |
|
4239 |
NEON_GET_REG(T1, rd, pass * 2 + 1); |
|
4240 |
gen_op_neon_addl_u64(); |
|
4241 |
} else { |
|
4242 |
NEON_GET_REG(T1, rd, pass); |
|
4243 |
gen_neon_add(size); |
|
4244 |
} |
|
4245 |
} else if (op == 4 || (op == 5 && u)) { |
|
4246 |
/* Insert */ |
|
4247 |
if (size == 3) { |
|
4248 |
cpu_abort(env, "VS[LR]I.64 not implemented"); |
|
4249 |
} |
|
4250 |
switch (size) { |
|
4251 |
case 0: |
|
4252 |
if (op == 4) |
|
4253 |
imm = 0xff >> -shift; |
|
4253 |
if (size == 3) { |
|
4254 |
neon_load_reg64(cpu_V0, rm + pass); |
|
4255 |
tcg_gen_movi_i64(cpu_V1, imm); |
|
4256 |
switch (op) { |
|
4257 |
case 0: /* VSHR */ |
|
4258 |
case 1: /* VSRA */ |
|
4259 |
if (u) |
|
4260 |
gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1); |
|
4254 | 4261 |
else |
4255 |
imm = (uint8_t)(0xff << shift); |
|
4256 |
imm |= imm << 8; |
|
4257 |
imm |= imm << 16; |
|
4262 |
gen_helper_neon_shl_s64(cpu_V0, cpu_V0, cpu_V1); |
|
4258 | 4263 |
break; |
4259 |
case 1: |
|
4260 |
if (op == 4) |
|
4261 |
imm = 0xffff >> -shift; |
|
4264 |
case 2: /* VRSHR */ |
|
4265 |
case 3: /* VRSRA */ |
|
4266 |
if (u) |
|
4267 |
gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, cpu_V1); |
|
4262 | 4268 |
else |
4263 |
imm = (uint16_t)(0xffff << shift); |
|
4264 |
imm |= imm << 16; |
|
4269 |
gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1); |
|
4265 | 4270 |
break; |
4266 |
case 2: |
|
4267 |
if (op == 4) |
|
4268 |
imm = 0xffffffffu >> -shift; |
|
4271 |
case 4: /* VSRI */ |
|
4272 |
if (!u) |
|
4273 |
return 1; |
|
4274 |
gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1); |
|
4275 |
break; |
|
4276 |
case 5: /* VSHL, VSLI */ |
|
4277 |
gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1); |
|
4278 |
break; |
|
4279 |
case 6: /* VQSHL */ |
|
4280 |
if (u) |
|
4281 |
gen_helper_neon_qshl_u64(cpu_V0, cpu_env, cpu_V0, cpu_V1); |
|
4269 | 4282 |
else |
4270 |
imm = 0xffffffffu << shift; |
|
4283 |
gen_helper_neon_qshl_s64(cpu_V0, cpu_env, cpu_V0, cpu_V1); |
|
4284 |
break; |
|
4285 |
case 7: /* VQSHLU */ |
|
4286 |
gen_helper_neon_qshl_u64(cpu_V0, cpu_env, cpu_V0, cpu_V1); |
|
4271 | 4287 |
break; |
4272 |
default: |
|
4273 |
abort(); |
|
4274 | 4288 |
} |
4275 |
tmp = neon_load_reg(rd, pass); |
|
4276 |
tcg_gen_andi_i32(cpu_T[0], cpu_T[0], imm); |
|
4277 |
tcg_gen_andi_i32(tmp, tmp, ~imm); |
|
4278 |
tcg_gen_or_i32(cpu_T[0], cpu_T[0], tmp); |
|
4279 |
} |
|
4280 |
if (size == 3) { |
|
4281 |
NEON_SET_REG(T0, rd, pass * 2); |
|
4282 |
NEON_SET_REG(T1, rd, pass * 2 + 1); |
|
4283 |
} else { |
|
4289 |
if (op == 1 || op == 3) { |
|
4290 |
/* Accumulate. */ |
|
4291 |
neon_load_reg64(cpu_V0, rd + pass); |
|
4292 |
tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1); |
|
4293 |
} else if (op == 4 || (op == 5 && u)) { |
|
4294 |
/* Insert */ |
|
4295 |
cpu_abort(env, "VS[LR]I.64 not implemented"); |
|
4296 |
} |
|
4297 |
neon_store_reg64(cpu_V0, rd + pass); |
|
4298 |
} else { /* size < 3 */ |
|
4299 |
/* Operands in T0 and T1. */ |
|
4300 |
gen_op_movl_T1_im(imm); |
|
4301 |
NEON_GET_REG(T0, rm, pass); |
|
4302 |
switch (op) { |
|
4303 |
case 0: /* VSHR */ |
|
4304 |
case 1: /* VSRA */ |
|
4305 |
GEN_NEON_INTEGER_OP(shl); |
|
4306 |
break; |
|
4307 |
case 2: /* VRSHR */ |
|
4308 |
case 3: /* VRSRA */ |
|
4309 |
GEN_NEON_INTEGER_OP(rshl); |
|
4310 |
break; |
|
4311 |
case 4: /* VSRI */ |
|
4312 |
if (!u) |
|
4313 |
return 1; |
|
4314 |
GEN_NEON_INTEGER_OP(shl); |
|
4315 |
break; |
|
4316 |
case 5: /* VSHL, VSLI */ |
|
4317 |
switch (size) { |
|
4318 |
case 0: gen_helper_neon_shl_u8(CPU_T001); break; |
|
4319 |
case 1: gen_helper_neon_shl_u16(CPU_T001); break; |
|
4320 |
case 2: gen_helper_neon_shl_u32(CPU_T001); break; |
|
4321 |
default: return 1; |
|
4322 |
} |
|
4323 |
break; |
|
4324 |
case 6: /* VQSHL */ |
|
4325 |
GEN_NEON_INTEGER_OP_ENV(qshl); |
|
4326 |
break; |
|
4327 |
case 7: /* VQSHLU */ |
|
4328 |
switch (size) { |
|
4329 |
case 0: gen_helper_neon_qshl_u8(CPU_T0E01); break; |
|
4330 |
case 1: gen_helper_neon_qshl_u16(CPU_T0E01); break; |
|
4331 |
case 2: gen_helper_neon_qshl_u32(CPU_T0E01); break; |
|
4332 |
default: return 1; |
|
4333 |
} |
|
4334 |
break; |
|
4335 |
} |
|
4336 |
|
|
4337 |
if (op == 1 || op == 3) { |
|
4338 |
/* Accumulate. */ |
|
4339 |
NEON_GET_REG(T1, rd, pass); |
|
4340 |
gen_neon_add(size); |
|
4341 |
} else if (op == 4 || (op == 5 && u)) { |
|
4342 |
/* Insert */ |
|
4343 |
switch (size) { |
|
4344 |
case 0: |
|
4345 |
if (op == 4) |
|
4346 |
imm = 0xff >> -shift; |
|
4347 |
else |
|
4348 |
imm = (uint8_t)(0xff << shift); |
|
4349 |
imm |= imm << 8; |
|
4350 |
imm |= imm << 16; |
|
4351 |
break; |
|
4352 |
case 1: |
|
4353 |
if (op == 4) |
|
4354 |
imm = 0xffff >> -shift; |
|
4355 |
else |
|
4356 |
imm = (uint16_t)(0xffff << shift); |
|
4357 |
imm |= imm << 16; |
|
4358 |
break; |
|
4359 |
case 2: |
|
4360 |
if (op == 4) |
|
4361 |
imm = 0xffffffffu >> -shift; |
|
4362 |
else |
|
4363 |
imm = 0xffffffffu << shift; |
|
4364 |
break; |
|
4365 |
default: |
|
4366 |
abort(); |
|
4367 |
} |
|
4368 |
tmp = neon_load_reg(rd, pass); |
|
4369 |
tcg_gen_andi_i32(cpu_T[0], cpu_T[0], imm); |
|
4370 |
tcg_gen_andi_i32(tmp, tmp, ~imm); |
|
4371 |
tcg_gen_or_i32(cpu_T[0], cpu_T[0], tmp); |
|
4372 |
} |
|
4284 | 4373 |
NEON_SET_REG(T0, rd, pass); |
4285 | 4374 |
} |
4286 | 4375 |
} /* for pass */ |
4287 | 4376 |
} else if (op < 10) { |
4288 |
/* Shift by immedaiate and narrow:
|
|
4377 |
/* Shift by immediate and narrow: |
|
4289 | 4378 |
VSHRN, VRSHRN, VQSHRN, VQRSHRN. */ |
4290 | 4379 |
shift = shift - (1 << (size + 3)); |
4291 | 4380 |
size++; |
4292 |
if (size == 3) { |
|
4293 |
count = q + 1; |
|
4294 |
} else { |
|
4295 |
count = q ? 4: 2; |
|
4296 |
} |
|
4297 | 4381 |
switch (size) { |
4298 | 4382 |
case 1: |
4299 |
imm = (uint16_t) shift;
|
|
4383 |
imm = (uint16_t)shift; |
|
4300 | 4384 |
imm |= imm << 16; |
4385 |
tmp2 = tcg_const_i32(imm); |
|
4301 | 4386 |
break; |
4302 | 4387 |
case 2: |
4388 |
imm = (uint32_t)shift; |
|
4389 |
tmp2 = tcg_const_i32(imm); |
|
4303 | 4390 |
case 3: |
4304 |
imm = shift;
|
|
4391 |
tmp2 = tcg_const_i64(shift);
|
|
4305 | 4392 |
break; |
4306 | 4393 |
default: |
4307 | 4394 |
abort(); |
4308 | 4395 |
} |
4309 | 4396 |
|
4310 |
/* Processing MSB first means we need to do less shuffling at |
|
4311 |
the end. */ |
|
4312 |
for (pass = count - 1; pass >= 0; pass--) { |
|
4313 |
/* Avoid clobbering the second operand before it has been |
|
4314 |
written. */ |
|
4315 |
n = pass; |
|
4316 |
if (rd == rm) |
|
4317 |
n ^= (count - 1); |
|
4318 |
else |
|
4319 |
n = pass; |
|
4320 |
|
|
4321 |
if (size < 3) { |
|
4322 |
/* Operands in T0 and T1. */ |
|
4323 |
gen_op_movl_T1_im(imm); |
|
4324 |
NEON_GET_REG(T0, rm, n); |
|
4397 |
for (pass = 0; pass < 2; pass++) { |
|
4398 |
if (size == 3) { |
|
4399 |
neon_load_reg64(cpu_V0, rm + pass); |
|
4400 |
if (q) { |
|
4401 |
if (u) |
|
4402 |
gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, tmp2); |
|
4403 |
else |
|
4404 |
gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, tmp2); |
|
4405 |
} else { |
|
4406 |
if (u) |
|
4407 |
gen_helper_neon_shl_u64(cpu_V0, cpu_V0, tmp2); |
|
4408 |
else |
|
4409 |
gen_helper_neon_shl_s64(cpu_V0, cpu_V0, tmp2); |
|
4410 |
} |
|
4325 | 4411 |
} else { |
4326 |
/* Operands in {T0, T1} and env->vfp.scratch. */ |
|
4327 |
gen_op_movl_T0_im(imm); |
|
4328 |
gen_neon_movl_scratch_T0(0); |
|
4329 |
gen_op_movl_T0_im((int32_t)imm >> 31); |
|
4330 |
gen_neon_movl_scratch_T0(1); |
|
4331 |
NEON_GET_REG(T0, rm, n * 2); |
|
4332 |
NEON_GET_REG(T0, rm, n * 2 + 1); |
|
4412 |
tmp = neon_load_reg(rm + pass, 0); |
|
4413 |
gen_neon_shift_narrow(size, tmp, tmp2, q, u); |
|
4414 |
tcg_gen_extu_i32_i64(cpu_V0, tmp); |
|
4415 |
dead_tmp(tmp); |
|
4416 |
tmp = neon_load_reg(rm + pass, 1); |
|
4417 |
gen_neon_shift_narrow(size, tmp, tmp2, q, u); |
|
4418 |
tcg_gen_extu_i32_i64(cpu_V1, tmp); |
|
4419 |
dead_tmp(tmp); |
|
4420 |
tcg_gen_shli_i64(cpu_V1, cpu_V1, 32); |
|
4421 |
tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1); |
|
4333 | 4422 |
} |
4334 |
|
|
4335 |
gen_neon_shift_im_narrow[q][u][size - 1](); |
|
4336 |
|
|
4337 |
if (size < 3 && (pass & 1) == 0) { |
|
4338 |
gen_neon_movl_scratch_T0(0); |
|
4423 |
tmp = new_tmp(); |
|
4424 |
if (op == 8 && !u) { |
|
4425 |
gen_neon_narrow(size - 1, tmp, cpu_V0); |
|
4339 | 4426 |
} else { |
4340 |
uint32_t offset; |
|
4341 |
|
|
4342 |
if (size < 3) |
|
4343 |
gen_neon_movl_T1_scratch(0); |
|
4344 |
|
|
4345 |
if (op == 8 && !u) { |
|
4346 |
gen_neon_narrow[size - 1](); |
|
4347 |
} else { |
|
4348 |
if (op == 8) |
|
4349 |
gen_neon_narrow_sats[size - 2](); |
|
4350 |
else |
|
4351 |
gen_neon_narrow_satu[size - 1](); |
|
4352 |
} |
|
4353 |
if (size == 3) |
|
4354 |
offset = neon_reg_offset(rd, n); |
|
4427 |
if (op == 8) |
|
4428 |
gen_neon_narrow_sats(size - 1, tmp, cpu_V0); |
|
4355 | 4429 |
else |
4356 |
offset = neon_reg_offset(rd, n >> 1); |
|
4357 |
gen_op_neon_setreg_T0(offset); |
|
4430 |
gen_neon_narrow_satu(size - 1, tmp, cpu_V0); |
|
4431 |
} |
|
4432 |
if (pass == 0) { |
|
4433 |
tmp2 = tmp; |
|
4434 |
} else { |
|
4435 |
neon_store_reg(rd, 0, tmp2); |
|
4436 |
neon_store_reg(rd, 1, tmp); |
|
4358 | 4437 |
} |
4359 | 4438 |
} /* for pass */ |
4360 | 4439 |
} else if (op == 10) { |
4361 | 4440 |
/* VSHLL */ |
4362 |
if (q) |
|
4441 |
if (q || size == 3)
|
|
4363 | 4442 |
return 1; |
4443 |
tmp = neon_load_reg(rm, 0); |
|
4444 |
tmp2 = neon_load_reg(rm, 1); |
|
4364 | 4445 |
for (pass = 0; pass < 2; pass++) { |
4365 |
/* Avoid clobbering the input operand. */ |
|
4366 |
if (rd == rm) |
|
4367 |
n = 1 - pass; |
|
4368 |
else |
|
4369 |
n = pass; |
|
4446 |
if (pass == 1) |
|
4447 |
tmp = tmp2; |
|
4448 |
|
|
4449 |
gen_neon_widen(cpu_V0, tmp, size, u); |
|
4370 | 4450 |
|
4371 |
NEON_GET_REG(T0, rm, n); |
|
4372 |
GEN_NEON_INTEGER_OP(widen); |
|
4373 | 4451 |
if (shift != 0) { |
4374 | 4452 |
/* The shift is less than the width of the source |
4375 |
type, so in some cases we can just |
|
4376 |
shift the whole register. */ |
|
4377 |
if (size == 1 || (size == 0 && u)) { |
|
4378 |
gen_op_shll_T0_im(shift); |
|
4379 |
gen_op_shll_T1_im(shift); |
|
4380 |
} else { |
|
4381 |
switch (size) { |
|
4382 |
case 0: gen_op_neon_shll_u16(shift); break; |
|
4383 |
case 2: gen_op_neon_shll_u64(shift); break; |
|
4384 |
default: abort(); |
|
4453 |
type, so we can just shift the whole register. */ |
|
4454 |
tcg_gen_shli_i64(cpu_V0, cpu_V0, shift); |
|
4455 |
if (size < 2 || !u) { |
|
4456 |
uint64_t imm64; |
|
4457 |
if (size == 0) { |
|
4458 |
imm = (0xffu >> (8 - shift)); |
|
4459 |
imm |= imm << 16; |
|
4460 |
} else { |
|
4461 |
imm = 0xffff >> (16 - shift); |
|
4385 | 4462 |
} |
4463 |
imm64 = imm | (((uint64_t)imm) << 32); |
|
4464 |
tcg_gen_andi_i64(cpu_V0, cpu_V0, imm64); |
|
4386 | 4465 |
} |
4387 | 4466 |
} |
4388 |
NEON_SET_REG(T0, rd, n * 2); |
|
4389 |
NEON_SET_REG(T1, rd, n * 2 + 1); |
|
4467 |
neon_store_reg64(cpu_V0, rd + pass); |
|
4390 | 4468 |
} |
4391 | 4469 |
} else if (op == 15 || op == 16) { |
4392 | 4470 |
/* VCVT fixed-point. */ |
... | ... | |
4458 | 4536 |
|
4459 | 4537 |
for (pass = 0; pass < (q ? 4 : 2); pass++) { |
4460 | 4538 |
if (op & 1 && op < 12) { |
4461 |
NEON_GET_REG(T0, rd, pass);
|
|
4539 |
tmp = neon_load_reg(rd, pass);
|
|
4462 | 4540 |
if (invert) { |
4463 | 4541 |
/* The immediate value has already been inverted, so |
4464 | 4542 |
BIC becomes AND. */ |
4465 |
gen_op_andl_T0_T1(); |
|
4543 |
tcg_gen_andi_i32(tmp, tmp, imm); |
Also available in: Unified diff