Revision 19457615

b/target-arm/helpers.h
338 338
DEF_HELPER_2(neon_qneg_s16, i32, env, i32)
339 339
DEF_HELPER_2(neon_qneg_s32, i32, env, i32)
340 340

  
341
DEF_HELPER_0(neon_trn_u8, void)
342
DEF_HELPER_0(neon_trn_u16, void)
343
DEF_HELPER_0(neon_unzip_u8, void)
344
DEF_HELPER_0(neon_zip_u8, void)
345
DEF_HELPER_0(neon_zip_u16, void)
346

  
347 341
DEF_HELPER_2(neon_min_f32, i32, i32, i32)
348 342
DEF_HELPER_2(neon_max_f32, i32, i32, i32)
349 343
DEF_HELPER_2(neon_abd_f32, i32, i32, i32)
b/target-arm/op_helper.c
495 495
    }
496 496
    return res;
497 497
}
498

  
499
/* These need to return a pair of value, so still use T0/T1.  */
500
/* Transpose.  Argument order is rather strange to avoid special casing
501
   the tranlation code.
502
   On input T0 = rm, T1 = rd.  On output T0 = rd, T1 = rm  */
503
void HELPER(neon_trn_u8)(void)
504
{
505
    uint32_t rd;
506
    uint32_t rm;
507
    rd = ((T0 & 0x00ff00ff) << 8) | (T1 & 0x00ff00ff);
508
    rm = ((T1 & 0xff00ff00) >> 8) | (T0 & 0xff00ff00);
509
    T0 = rd;
510
    T1 = rm;
511
}
512

  
513
void HELPER(neon_trn_u16)(void)
514
{
515
    uint32_t rd;
516
    uint32_t rm;
517
    rd = (T0 << 16) | (T1 & 0xffff);
518
    rm = (T1 >> 16) | (T0 & 0xffff0000);
519
    T0 = rd;
520
    T1 = rm;
521
}
522

  
523
/* Worker routines for zip and unzip.  */
524
void HELPER(neon_unzip_u8)(void)
525
{
526
    uint32_t rd;
527
    uint32_t rm;
528
    rd = (T0 & 0xff) | ((T0 >> 8) & 0xff00)
529
         | ((T1 << 16) & 0xff0000) | ((T1 << 8) & 0xff000000);
530
    rm = ((T0 >> 8) & 0xff) | ((T0 >> 16) & 0xff00)
531
         | ((T1 << 8) & 0xff0000) | (T1 & 0xff000000);
532
    T0 = rd;
533
    T1 = rm;
534
}
535

  
536
void HELPER(neon_zip_u8)(void)
537
{
538
    uint32_t rd;
539
    uint32_t rm;
540
    rd = (T0 & 0xff) | ((T1 << 8) & 0xff00)
541
         | ((T0 << 16) & 0xff0000) | ((T1 << 24) & 0xff000000);
542
    rm = ((T0 >> 16) & 0xff) | ((T1 >> 8) & 0xff00)
543
         | ((T0 >> 8) & 0xff0000) | (T1 & 0xff000000);
544
    T0 = rd;
545
    T1 = rm;
546
}
547

  
548
void HELPER(neon_zip_u16)(void)
549
{
550
    uint32_t tmp;
551

  
552
    tmp = (T0 & 0xffff) | (T1 << 16);
553
    T1 = (T1 & 0xffff0000) | (T0 >> 16);
554
    T0 = tmp;
555
}
b/target-arm/translate.c
3627 3627
    }
3628 3628
}
3629 3629

  
3630
static void gen_neon_unzip_u8(TCGv t0, TCGv t1)
3631
{
3632
    TCGv rd, rm, tmp;
3633

  
3634
    rd = new_tmp();
3635
    rm = new_tmp();
3636
    tmp = new_tmp();
3637

  
3638
    tcg_gen_andi_i32(rd, t0, 0xff);
3639
    tcg_gen_shri_i32(tmp, t0, 8);
3640
    tcg_gen_andi_i32(tmp, tmp, 0xff00);
3641
    tcg_gen_or_i32(rd, rd, tmp);
3642
    tcg_gen_shli_i32(tmp, t1, 16);
3643
    tcg_gen_andi_i32(tmp, tmp, 0xff0000);
3644
    tcg_gen_or_i32(rd, rd, tmp);
3645
    tcg_gen_shli_i32(tmp, t1, 8);
3646
    tcg_gen_andi_i32(tmp, tmp, 0xff000000);
3647
    tcg_gen_or_i32(rd, rd, tmp);
3648

  
3649
    tcg_gen_shri_i32(rm, t0, 8);
3650
    tcg_gen_andi_i32(rm, rm, 0xff);
3651
    tcg_gen_shri_i32(tmp, t0, 16);
3652
    tcg_gen_andi_i32(tmp, tmp, 0xff00);
3653
    tcg_gen_or_i32(rm, rm, tmp);
3654
    tcg_gen_shli_i32(tmp, t1, 8);
3655
    tcg_gen_andi_i32(tmp, tmp, 0xff0000);
3656
    tcg_gen_or_i32(rm, rm, tmp);
3657
    tcg_gen_andi_i32(tmp, t1, 0xff000000);
3658
    tcg_gen_or_i32(t1, rm, tmp);
3659
    tcg_gen_mov_i32(t0, rd);
3660

  
3661
    dead_tmp(tmp);
3662
    dead_tmp(rm);
3663
    dead_tmp(rd);
3664
}
3665

  
3666
static void gen_neon_zip_u8(TCGv t0, TCGv t1)
3667
{
3668
    TCGv rd, rm, tmp;
3669

  
3670
    rd = new_tmp();
3671
    rm = new_tmp();
3672
    tmp = new_tmp();
3673

  
3674
    tcg_gen_andi_i32(rd, t0, 0xff);
3675
    tcg_gen_shli_i32(tmp, t1, 8);
3676
    tcg_gen_andi_i32(tmp, tmp, 0xff00);
3677
    tcg_gen_or_i32(rd, rd, tmp);
3678
    tcg_gen_shli_i32(tmp, t0, 16);
3679
    tcg_gen_andi_i32(tmp, tmp, 0xff0000);
3680
    tcg_gen_or_i32(rd, rd, tmp);
3681
    tcg_gen_shli_i32(tmp, t1, 24);
3682
    tcg_gen_andi_i32(tmp, tmp, 0xff000000);
3683
    tcg_gen_or_i32(rd, rd, tmp);
3684

  
3685
    tcg_gen_andi_i32(rm, t1, 0xff000000);
3686
    tcg_gen_shri_i32(tmp, t0, 8);
3687
    tcg_gen_andi_i32(tmp, tmp, 0xff0000);
3688
    tcg_gen_or_i32(rm, rm, tmp);
3689
    tcg_gen_shri_i32(tmp, t1, 8);
3690
    tcg_gen_andi_i32(tmp, tmp, 0xff00);
3691
    tcg_gen_or_i32(rm, rm, tmp);
3692
    tcg_gen_shri_i32(tmp, t0, 16);
3693
    tcg_gen_andi_i32(tmp, tmp, 0xff);
3694
    tcg_gen_or_i32(t1, rm, tmp);
3695
    tcg_gen_mov_i32(t0, rd);
3696

  
3697
    dead_tmp(tmp);
3698
    dead_tmp(rm);
3699
    dead_tmp(rd);
3700
}
3701

  
3702
static void gen_neon_zip_u16(TCGv t0, TCGv t1)
3703
{
3704
    TCGv tmp, tmp2;
3705

  
3706
    tmp = new_tmp();
3707
    tmp2 = new_tmp();
3708

  
3709
    tcg_gen_andi_i32(tmp, t0, 0xffff);
3710
    tcg_gen_shli_i32(tmp2, t1, 16);
3711
    tcg_gen_or_i32(tmp, tmp, tmp2);
3712
    tcg_gen_andi_i32(t1, t1, 0xffff0000);
3713
    tcg_gen_shri_i32(tmp2, t0, 16);
3714
    tcg_gen_or_i32(t1, t1, tmp2);
3715
    tcg_gen_mov_i32(t0, tmp);
3716

  
3717
    dead_tmp(tmp2);
3718
    dead_tmp(tmp);
3719
}
3720

  
3630 3721
static void gen_neon_unzip(int reg, int q, int tmp, int size)
3631 3722
{
3632 3723
    int n;
3633 3724

  
3634 3725
    for (n = 0; n < q + 1; n += 2) {
3635 3726
        NEON_GET_REG(T0, reg, n);
3636
        NEON_GET_REG(T0, reg, n + n);
3727
        NEON_GET_REG(T1, reg, n + 1);
3637 3728
        switch (size) {
3638
        case 0: gen_helper_neon_unzip_u8(); break;
3639
        case 1: gen_helper_neon_zip_u16(); break; /* zip and unzip are the same.  */
3729
        case 0: gen_neon_unzip_u8(cpu_T[0], cpu_T[1]); break;
3730
        case 1: gen_neon_zip_u16(cpu_T[0], cpu_T[1]); break; /* zip and unzip are the same.  */
3640 3731
        case 2: /* no-op */; break;
3641 3732
        default: abort();
3642 3733
        }
3643
        gen_neon_movl_scratch_T0(tmp + n);
3644
        gen_neon_movl_scratch_T1(tmp + n + 1);
3734
        gen_neon_movl_T0_scratch(tmp + n);
3735
        gen_neon_movl_T1_scratch(tmp + n + 1);
3645 3736
    }
3646 3737
}
3647 3738

  
3739
static void gen_neon_trn_u8(TCGv t0, TCGv t1)
3740
{
3741
    TCGv rd, tmp;
3742

  
3743
    rd = new_tmp();
3744
    tmp = new_tmp();
3745

  
3746
    tcg_gen_shli_i32(rd, t0, 8);
3747
    tcg_gen_andi_i32(rd, rd, 0xff00ff00);
3748
    tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
3749
    tcg_gen_or_i32(rd, rd, tmp);
3750

  
3751
    tcg_gen_shri_i32(t1, t1, 8);
3752
    tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
3753
    tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
3754
    tcg_gen_or_i32(t1, t1, tmp);
3755
    tcg_gen_mov_i32(t0, rd);
3756

  
3757
    dead_tmp(tmp);
3758
    dead_tmp(rd);
3759
}
3760

  
3761
static void gen_neon_trn_u16(TCGv t0, TCGv t1)
3762
{
3763
    TCGv rd, tmp;
3764

  
3765
    rd = new_tmp();
3766
    tmp = new_tmp();
3767

  
3768
    tcg_gen_shli_i32(rd, t0, 16);
3769
    tcg_gen_andi_i32(tmp, t1, 0xffff);
3770
    tcg_gen_or_i32(rd, rd, tmp);
3771
    tcg_gen_shri_i32(t1, t1, 16);
3772
    tcg_gen_andi_i32(tmp, t0, 0xffff0000);
3773
    tcg_gen_or_i32(t1, t1, tmp);
3774
    tcg_gen_mov_i32(t0, rd);
3775

  
3776
    dead_tmp(tmp);
3777
    dead_tmp(rd);
3778
}
3779

  
3780

  
3648 3781
static struct {
3649 3782
    int nregs;
3650 3783
    int interleave;
......
5256 5389
                        NEON_GET_REG(T0, rd, n);
5257 5390
                        NEON_GET_REG(T1, rd, n);
5258 5391
                        switch (size) {
5259
                        case 0: gen_helper_neon_zip_u8(); break;
5260
                        case 1: gen_helper_neon_zip_u16(); break;
5392
                        case 0: gen_neon_zip_u8(cpu_T[0], cpu_T[1]); break;
5393
                        case 1: gen_neon_zip_u16(cpu_T[0], cpu_T[1]); break;
5261 5394
                        case 2: /* no-op */; break;
5262 5395
                        default: abort();
5263 5396
                        }
......
5442 5575
                        case 33: /* VTRN */
5443 5576
                            NEON_GET_REG(T1, rd, pass);
5444 5577
                            switch (size) {
5445
                            case 0: gen_helper_neon_trn_u8(); break;
5446
                            case 1: gen_helper_neon_trn_u16(); break;
5578
                            case 0: gen_neon_trn_u8(cpu_T[0], cpu_T[1]); break;
5579
                            case 1: gen_neon_trn_u16(cpu_T[0], cpu_T[1]); break;
5447 5580
                            case 2: abort();
5448 5581
                            default: return 1;
5449 5582
                            }

Also available in: Unified diff