Revision 222a3336 target-i386/translate.c

b/target-i386/translate.c
2140 2140
    }
2141 2141
}
2142 2142

  
2143
/* generate modrm memory load or store of 'reg'. TMP0 is used if reg !=
2143
/* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2144 2144
   OR_TMP0 */
2145 2145
static void gen_ldst_modrm(DisasContext *s, int modrm, int ot, int reg, int is_store)
2146 2146
{
......
2770 2770
    [0xc2] = SSE_FOP(cmpeq),
2771 2771
    [0xc6] = { helper_shufps, helper_shufpd },
2772 2772

  
2773
    [0x38] = { SSE_SPECIAL, SSE_SPECIAL },  /* SSSE3 */
2774
    [0x3a] = { SSE_SPECIAL, SSE_SPECIAL },  /* SSSE3 */
2773
    [0x38] = { SSE_SPECIAL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* SSSE3/SSE4 */
2774
    [0x3a] = { SSE_SPECIAL, SSE_SPECIAL }, /* SSSE3/SSE4 */
2775 2775

  
2776 2776
    /* MMX ops and their SSE extensions */
2777 2777
    [0x60] = MMX_OP2(punpcklbw),
......
2924 2924
    [0xbf] = helper_pavgb_mmx /* pavgusb */
2925 2925
};
2926 2926

  
2927
static void *sse_op_table6[256][2] = {
2928
    [0x00] = MMX_OP2(pshufb),
2929
    [0x01] = MMX_OP2(phaddw),
2930
    [0x02] = MMX_OP2(phaddd),
2931
    [0x03] = MMX_OP2(phaddsw),
2932
    [0x04] = MMX_OP2(pmaddubsw),
2933
    [0x05] = MMX_OP2(phsubw),
2934
    [0x06] = MMX_OP2(phsubd),
2935
    [0x07] = MMX_OP2(phsubsw),
2936
    [0x08] = MMX_OP2(psignb),
2937
    [0x09] = MMX_OP2(psignw),
2938
    [0x0a] = MMX_OP2(psignd),
2939
    [0x0b] = MMX_OP2(pmulhrsw),
2940
    [0x1c] = MMX_OP2(pabsb),
2941
    [0x1d] = MMX_OP2(pabsw),
2942
    [0x1e] = MMX_OP2(pabsd),
2927
struct sse_op_helper_s {
2928
    void *op[2]; uint32_t ext_mask;
2929
};
2930
#define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
2931
#define SSE41_OP(x) { { NULL, helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
2932
#define SSE42_OP(x) { { NULL, helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
2933
#define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
2934
static struct sse_op_helper_s sse_op_table6[256] = {
2935
    [0x00] = SSSE3_OP(pshufb),
2936
    [0x01] = SSSE3_OP(phaddw),
2937
    [0x02] = SSSE3_OP(phaddd),
2938
    [0x03] = SSSE3_OP(phaddsw),
2939
    [0x04] = SSSE3_OP(pmaddubsw),
2940
    [0x05] = SSSE3_OP(phsubw),
2941
    [0x06] = SSSE3_OP(phsubd),
2942
    [0x07] = SSSE3_OP(phsubsw),
2943
    [0x08] = SSSE3_OP(psignb),
2944
    [0x09] = SSSE3_OP(psignw),
2945
    [0x0a] = SSSE3_OP(psignd),
2946
    [0x0b] = SSSE3_OP(pmulhrsw),
2947
    [0x10] = SSE41_OP(pblendvb),
2948
    [0x14] = SSE41_OP(blendvps),
2949
    [0x15] = SSE41_OP(blendvpd),
2950
    [0x17] = SSE41_OP(ptest),
2951
    [0x1c] = SSSE3_OP(pabsb),
2952
    [0x1d] = SSSE3_OP(pabsw),
2953
    [0x1e] = SSSE3_OP(pabsd),
2954
    [0x20] = SSE41_OP(pmovsxbw),
2955
    [0x21] = SSE41_OP(pmovsxbd),
2956
    [0x22] = SSE41_OP(pmovsxbq),
2957
    [0x23] = SSE41_OP(pmovsxwd),
2958
    [0x24] = SSE41_OP(pmovsxwq),
2959
    [0x25] = SSE41_OP(pmovsxdq),
2960
    [0x28] = SSE41_OP(pmuldq),
2961
    [0x29] = SSE41_OP(pcmpeqq),
2962
    [0x2a] = SSE41_SPECIAL, /* movntqda */
2963
    [0x2b] = SSE41_OP(packusdw),
2964
    [0x30] = SSE41_OP(pmovzxbw),
2965
    [0x31] = SSE41_OP(pmovzxbd),
2966
    [0x32] = SSE41_OP(pmovzxbq),
2967
    [0x33] = SSE41_OP(pmovzxwd),
2968
    [0x34] = SSE41_OP(pmovzxwq),
2969
    [0x35] = SSE41_OP(pmovzxdq),
2970
    [0x37] = SSE42_OP(pcmpgtq),
2971
    [0x38] = SSE41_OP(pminsb),
2972
    [0x39] = SSE41_OP(pminsd),
2973
    [0x3a] = SSE41_OP(pminuw),
2974
    [0x3b] = SSE41_OP(pminud),
2975
    [0x3c] = SSE41_OP(pmaxsb),
2976
    [0x3d] = SSE41_OP(pmaxsd),
2977
    [0x3e] = SSE41_OP(pmaxuw),
2978
    [0x3f] = SSE41_OP(pmaxud),
2979
    [0x40] = SSE41_OP(pmulld),
2980
    [0x41] = SSE41_OP(phminposuw),
2943 2981
};
2944 2982

  
2945
static void *sse_op_table7[256][2] = {
2946
    [0x0f] = MMX_OP2(palignr),
2983
static struct sse_op_helper_s sse_op_table7[256] = {
2984
    [0x08] = SSE41_OP(roundps),
2985
    [0x09] = SSE41_OP(roundpd),
2986
    [0x0a] = SSE41_OP(roundss),
2987
    [0x0b] = SSE41_OP(roundsd),
2988
    [0x0c] = SSE41_OP(blendps),
2989
    [0x0d] = SSE41_OP(blendpd),
2990
    [0x0e] = SSE41_OP(pblendw),
2991
    [0x0f] = SSSE3_OP(palignr),
2992
    [0x14] = SSE41_SPECIAL, /* pextrb */
2993
    [0x15] = SSE41_SPECIAL, /* pextrw */
2994
    [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
2995
    [0x17] = SSE41_SPECIAL, /* extractps */
2996
    [0x20] = SSE41_SPECIAL, /* pinsrb */
2997
    [0x21] = SSE41_SPECIAL, /* insertps */
2998
    [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
2999
    [0x40] = SSE41_OP(dpps),
3000
    [0x41] = SSE41_OP(dppd),
3001
    [0x42] = SSE41_OP(mpsadbw),
3002
    [0x60] = SSE42_OP(pcmpestrm),
3003
    [0x61] = SSE42_OP(pcmpestri),
3004
    [0x62] = SSE42_OP(pcmpistrm),
3005
    [0x63] = SSE42_OP(pcmpistri),
2947 3006
};
2948 3007

  
2949 3008
static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
......
3511 3570
            break;
3512 3571
        case 0x038:
3513 3572
        case 0x138:
3514
            if (!(s->cpuid_ext_features & CPUID_EXT_SSSE3))
3515
                goto illegal_op;
3516

  
3517 3573
            b = modrm;
3518 3574
            modrm = ldub_code(s->pc++);
3519 3575
            rm = modrm & 7;
3520 3576
            reg = ((modrm >> 3) & 7) | rex_r;
3521 3577
            mod = (modrm >> 6) & 3;
3522 3578

  
3523
            sse_op2 = sse_op_table6[b][b1];
3579
            if (s->prefix & PREFIX_REPNZ)
3580
                goto crc32;
3581

  
3582
            sse_op2 = sse_op_table6[b].op[b1];
3524 3583
            if (!sse_op2)
3525 3584
                goto illegal_op;
3585
            if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
3586
                goto illegal_op;
3526 3587

  
3527 3588
            if (b1) {
3528 3589
                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
......
3531 3592
                } else {
3532 3593
                    op2_offset = offsetof(CPUX86State,xmm_t0);
3533 3594
                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3534
                    gen_ldo_env_A0(s->mem_index, op2_offset);
3595
                    switch (b) {
3596
                    case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
3597
                    case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
3598
                    case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
3599
                        gen_ldq_env_A0(s->mem_index, op2_offset +
3600
                                        offsetof(XMMReg, XMM_Q(0)));
3601
                        break;
3602
                    case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
3603
                    case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
3604
                        tcg_gen_qemu_ld32u(cpu_tmp2_i32, cpu_A0,
3605
                                          (s->mem_index >> 2) - 1);
3606
                        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, op2_offset +
3607
                                        offsetof(XMMReg, XMM_L(0)));
3608
                        break;
3609
                    case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
3610
                        tcg_gen_qemu_ld16u(cpu_tmp0, cpu_A0,
3611
                                          (s->mem_index >> 2) - 1);
3612
                        tcg_gen_st16_tl(cpu_tmp0, cpu_env, op2_offset +
3613
                                        offsetof(XMMReg, XMM_W(0)));
3614
                        break;
3615
                    case 0x2a:            /* movntqda */
3616
                        gen_ldo_env_A0(s->mem_index, op1_offset);
3617
                        return;
3618
                    default:
3619
                        gen_ldo_env_A0(s->mem_index, op2_offset);
3620
                    }
3535 3621
                }
3536 3622
            } else {
3537 3623
                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
......
3543 3629
                    gen_ldq_env_A0(s->mem_index, op2_offset);
3544 3630
                }
3545 3631
            }
3632
            if (sse_op2 == SSE_SPECIAL)
3633
                goto illegal_op;
3634

  
3546 3635
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3547 3636
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3548 3637
            tcg_gen_helper_0_2(sse_op2, cpu_ptr0, cpu_ptr1);
3638

  
3639
            if (b == 0x17)
3640
                s->cc_op = CC_OP_EFLAGS;
3549 3641
            break;
3550
        case 0x03a:
3551
        case 0x13a:
3552
            if (!(s->cpuid_ext_features & CPUID_EXT_SSSE3))
3642
        case 0x338: /* crc32 */
3643
        crc32:
3644
            b = modrm;
3645
            modrm = ldub_code(s->pc++);
3646
            reg = ((modrm >> 3) & 7) | rex_r;
3647

  
3648
            if (b != 0xf0 && b != 0xf1)
3649
                goto illegal_op;
3650
            if (!(s->cpuid_ext_features & CPUID_EXT_SSE42))
3553 3651
                goto illegal_op;
3554 3652

  
3653
            if (b == 0xf0)
3654
                ot = OT_BYTE;
3655
            else if (b == 0xf1 && s->dflag != 2)
3656
                if (s->prefix & PREFIX_DATA)
3657
                    ot = OT_WORD;
3658
                else
3659
                    ot = OT_LONG;
3660
            else
3661
                ot = OT_QUAD;
3662

  
3663
            gen_op_mov_TN_reg(OT_LONG, 0, reg);
3664
            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
3665
            gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
3666
            tcg_gen_helper_1_3(helper_crc32, cpu_T[0], cpu_tmp2_i32,
3667
                            cpu_T[0], tcg_const_i32(8 << ot));
3668

  
3669
            ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
3670
            gen_op_mov_reg_T0(ot, reg);
3671
            break;
3672
        case 0x03a:
3673
        case 0x13a:
3555 3674
            b = modrm;
3556 3675
            modrm = ldub_code(s->pc++);
3557 3676
            rm = modrm & 7;
3558 3677
            reg = ((modrm >> 3) & 7) | rex_r;
3559 3678
            mod = (modrm >> 6) & 3;
3560 3679

  
3561
            sse_op2 = sse_op_table7[b][b1];
3680
            sse_op2 = sse_op_table7[b].op[b1];
3562 3681
            if (!sse_op2)
3563 3682
                goto illegal_op;
3683
            if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
3684
                goto illegal_op;
3685

  
3686
            if (sse_op2 == SSE_SPECIAL) {
3687
                ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
3688
                rm = (modrm & 7) | REX_B(s);
3689
                if (mod != 3)
3690
                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3691
                reg = ((modrm >> 3) & 7) | rex_r;
3692
                val = ldub_code(s->pc++);
3693
                switch (b) {
3694
                case 0x14: /* pextrb */
3695
                    tcg_gen_ld8u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
3696
                                            xmm_regs[reg].XMM_B(val & 15)));
3697
                    if (mod == 3)
3698
                        gen_op_mov_reg_T0(ot, rm);
3699
                    else
3700
                        tcg_gen_qemu_st8(cpu_T[0], cpu_A0,
3701
                                        (s->mem_index >> 2) - 1);
3702
                    break;
3703
                case 0x15: /* pextrw */
3704
                    tcg_gen_ld16u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
3705
                                            xmm_regs[reg].XMM_W(val & 7)));
3706
                    if (mod == 3)
3707
                        gen_op_mov_reg_T0(ot, rm);
3708
                    else
3709
                        tcg_gen_qemu_st16(cpu_T[0], cpu_A0,
3710
                                        (s->mem_index >> 2) - 1);
3711
                    break;
3712
                case 0x16:
3713
                    if (ot == OT_LONG) { /* pextrd */
3714
                        tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
3715
                                        offsetof(CPUX86State,
3716
                                                xmm_regs[reg].XMM_L(val & 3)));
3717
                        if (mod == 3)
3718
                            gen_op_mov_reg_v(ot, rm, cpu_tmp2_i32);
3719
                        else
3720
                            tcg_gen_qemu_st32(cpu_tmp2_i32, cpu_A0,
3721
                                            (s->mem_index >> 2) - 1);
3722
                    } else { /* pextrq */
3723
                        tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
3724
                                        offsetof(CPUX86State,
3725
                                                xmm_regs[reg].XMM_Q(val & 1)));
3726
                        if (mod == 3)
3727
                            gen_op_mov_reg_v(ot, rm, cpu_tmp1_i64);
3728
                        else
3729
                            tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0,
3730
                                            (s->mem_index >> 2) - 1);
3731
                    }
3732
                    break;
3733
                case 0x17: /* extractps */
3734
                    tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
3735
                                            xmm_regs[reg].XMM_L(val & 3)));
3736
                    if (mod == 3)
3737
                        gen_op_mov_reg_T0(ot, rm);
3738
                    else
3739
                        tcg_gen_qemu_st32(cpu_T[0], cpu_A0,
3740
                                        (s->mem_index >> 2) - 1);
3741
                    break;
3742
                case 0x20: /* pinsrb */
3743
                    if (mod == 3)
3744
                        gen_op_mov_TN_reg(OT_LONG, 0, rm);
3745
                    else
3746
                        tcg_gen_qemu_ld8u(cpu_T[0], cpu_A0,
3747
                                        (s->mem_index >> 2) - 1);
3748
                    tcg_gen_st8_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
3749
                                            xmm_regs[reg].XMM_B(val & 15)));
3750
                    break;
3751
                case 0x21: /* insertps */
3752
                    if (mod == 3)
3753
                        tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
3754
                                        offsetof(CPUX86State,xmm_regs[rm]
3755
                                                .XMM_L((val >> 6) & 3)));
3756
                    else
3757
                        tcg_gen_qemu_ld32u(cpu_tmp2_i32, cpu_A0,
3758
                                        (s->mem_index >> 2) - 1);
3759
                    tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
3760
                                    offsetof(CPUX86State,xmm_regs[reg]
3761
                                            .XMM_L((val >> 4) & 3)));
3762
                    if ((val >> 0) & 1)
3763
                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
3764
                                        cpu_env, offsetof(CPUX86State,
3765
                                                xmm_regs[reg].XMM_L(0)));
3766
                    if ((val >> 1) & 1)
3767
                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
3768
                                        cpu_env, offsetof(CPUX86State,
3769
                                                xmm_regs[reg].XMM_L(1)));
3770
                    if ((val >> 2) & 1)
3771
                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
3772
                                        cpu_env, offsetof(CPUX86State,
3773
                                                xmm_regs[reg].XMM_L(2)));
3774
                    if ((val >> 3) & 1)
3775
                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
3776
                                        cpu_env, offsetof(CPUX86State,
3777
                                                xmm_regs[reg].XMM_L(3)));
3778
                    break;
3779
                case 0x22:
3780
                    if (ot == OT_LONG) { /* pinsrd */
3781
                        if (mod == 3)
3782
                            gen_op_mov_v_reg(ot, cpu_tmp2_i32, rm);
3783
                        else
3784
                            tcg_gen_qemu_ld32u(cpu_tmp2_i32, cpu_A0,
3785
                                            (s->mem_index >> 2) - 1);
3786
                        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
3787
                                        offsetof(CPUX86State,
3788
                                                xmm_regs[reg].XMM_L(val & 3)));
3789
                    } else { /* pinsrq */
3790
                        if (mod == 3)
3791
                            gen_op_mov_v_reg(ot, cpu_tmp1_i64, rm);
3792
                        else
3793
                            tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0,
3794
                                            (s->mem_index >> 2) - 1);
3795
                        tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
3796
                                        offsetof(CPUX86State,
3797
                                                xmm_regs[reg].XMM_Q(val & 1)));
3798
                    }
3799
                    break;
3800
                }
3801
                return;
3802
            }
3564 3803

  
3565 3804
            if (b1) {
3566 3805
                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
......
3583 3822
            }
3584 3823
            val = ldub_code(s->pc++);
3585 3824

  
3825
            if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
3826
                s->cc_op = CC_OP_EFLAGS;
3827

  
3828
                if (s->dflag == 2)
3829
                    /* The helper must use entire 64-bit gp registers */
3830
                    val |= 1 << 8;
3831
            }
3832

  
3586 3833
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3587 3834
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3588 3835
            tcg_gen_helper_0_3(sse_op2, cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
......
7094 7341
            gen_eob(s);
7095 7342
        }
7096 7343
        break;
7097
    /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3 support */
7344
    /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
7098 7345
    case 0x1c3: /* MOVNTI reg, mem */
7099 7346
        if (!(s->cpuid_features & CPUID_SSE2))
7100 7347
            goto illegal_op;
......
7202 7449
        tcg_gen_helper_0_0(helper_rsm);
7203 7450
        gen_eob(s);
7204 7451
        break;
7452
    case 0x1b8: /* SSE4.2 popcnt */
7453
        if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
7454
             PREFIX_REPZ)
7455
            goto illegal_op;
7456
        if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
7457
            goto illegal_op;
7458

  
7459
        modrm = ldub_code(s->pc++);
7460
        reg = ((modrm >> 3) & 7);
7461

  
7462
        if (s->prefix & PREFIX_DATA)
7463
            ot = OT_WORD;
7464
        else if (s->dflag != 2)
7465
            ot = OT_LONG;
7466
        else
7467
            ot = OT_QUAD;
7468

  
7469
        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
7470
        tcg_gen_helper_1_2(helper_popcnt,
7471
                cpu_T[0], cpu_T[0], tcg_const_i32(ot));
7472
        gen_op_mov_reg_T0(ot, reg);
7473
        break;
7205 7474
    case 0x10e ... 0x10f:
7206 7475
        /* 3DNow! instructions, ignore prefixes */
7207 7476
        s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);

Also available in: Unified diff