Revision 0211e5af target-i386/translate.c

b/target-i386/translate.c
3799 3799
        case 4: /* mul */
3800 3800
            switch(ot) {
3801 3801
            case OT_BYTE:
3802
                gen_op_mulb_AL_T0();
3802
                gen_op_mov_TN_reg(OT_BYTE, 1, R_EAX);
3803
                tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]);
3804
                tcg_gen_ext8u_tl(cpu_T[1], cpu_T[1]);
3805
                /* XXX: use 32 bit mul which could be faster */
3806
                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
3807
                gen_op_mov_reg_T0(OT_WORD, R_EAX);
3808
                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
3809
                tcg_gen_andi_tl(cpu_cc_src, cpu_T[0], 0xff00);
3803 3810
                s->cc_op = CC_OP_MULB;
3804 3811
                break;
3805 3812
            case OT_WORD:
3806
                gen_op_mulw_AX_T0();
3813
                gen_op_mov_TN_reg(OT_WORD, 1, R_EAX);
3814
                tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]);
3815
                tcg_gen_ext16u_tl(cpu_T[1], cpu_T[1]);
3816
                /* XXX: use 32 bit mul which could be faster */
3817
                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
3818
                gen_op_mov_reg_T0(OT_WORD, R_EAX);
3819
                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
3820
                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16);
3821
                gen_op_mov_reg_T0(OT_WORD, R_EDX);
3822
                tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
3807 3823
                s->cc_op = CC_OP_MULW;
3808 3824
                break;
3809 3825
            default:
3810 3826
            case OT_LONG:
3811
                gen_op_mull_EAX_T0();
3827
#ifdef TARGET_X86_64
3828
                gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
3829
                tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]);
3830
                tcg_gen_ext32u_tl(cpu_T[1], cpu_T[1]);
3831
                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
3832
                gen_op_mov_reg_T0(OT_LONG, R_EAX);
3833
                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
3834
                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32);
3835
                gen_op_mov_reg_T0(OT_LONG, R_EDX);
3836
                tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
3837
#else
3838
                {
3839
                    TCGv t0, t1;
3840
                    t0 = tcg_temp_new(TCG_TYPE_I64);
3841
                    t1 = tcg_temp_new(TCG_TYPE_I64);
3842
                    gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
3843
                    tcg_gen_extu_i32_i64(t0, cpu_T[0]);
3844
                    tcg_gen_extu_i32_i64(t1, cpu_T[1]);
3845
                    tcg_gen_mul_i64(t0, t0, t1);
3846
                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
3847
                    gen_op_mov_reg_T0(OT_LONG, R_EAX);
3848
                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
3849
                    tcg_gen_shri_i64(t0, t0, 32);
3850
                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
3851
                    gen_op_mov_reg_T0(OT_LONG, R_EDX);
3852
                    tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
3853
                }
3854
#endif
3812 3855
                s->cc_op = CC_OP_MULL;
3813 3856
                break;
3814 3857
#ifdef TARGET_X86_64
3815 3858
            case OT_QUAD:
3816
                gen_op_mulq_EAX_T0();
3859
                tcg_gen_helper_0_1(helper_mulq_EAX_T0, cpu_T[0]);
3817 3860
                s->cc_op = CC_OP_MULQ;
3818 3861
                break;
3819 3862
#endif
......
3822 3865
        case 5: /* imul */
3823 3866
            switch(ot) {
3824 3867
            case OT_BYTE:
3825
                gen_op_imulb_AL_T0();
3868
                gen_op_mov_TN_reg(OT_BYTE, 1, R_EAX);
3869
                tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]);
3870
                tcg_gen_ext8s_tl(cpu_T[1], cpu_T[1]);
3871
                /* XXX: use 32 bit mul which could be faster */
3872
                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
3873
                gen_op_mov_reg_T0(OT_WORD, R_EAX);
3874
                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
3875
                tcg_gen_ext8s_tl(cpu_tmp0, cpu_T[0]);
3876
                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
3826 3877
                s->cc_op = CC_OP_MULB;
3827 3878
                break;
3828 3879
            case OT_WORD:
3829
                gen_op_imulw_AX_T0();
3880
                gen_op_mov_TN_reg(OT_WORD, 1, R_EAX);
3881
                tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
3882
                tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]);
3883
                /* XXX: use 32 bit mul which could be faster */
3884
                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
3885
                gen_op_mov_reg_T0(OT_WORD, R_EAX);
3886
                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
3887
                tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]);
3888
                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
3889
                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16);
3890
                gen_op_mov_reg_T0(OT_WORD, R_EDX);
3830 3891
                s->cc_op = CC_OP_MULW;
3831 3892
                break;
3832 3893
            default:
3833 3894
            case OT_LONG:
3834
                gen_op_imull_EAX_T0();
3895
#ifdef TARGET_X86_64
3896
                gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
3897
                tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
3898
                tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]);
3899
                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
3900
                gen_op_mov_reg_T0(OT_LONG, R_EAX);
3901
                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
3902
                tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]);
3903
                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
3904
                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32);
3905
                gen_op_mov_reg_T0(OT_LONG, R_EDX);
3906
#else
3907
                {
3908
                    TCGv t0, t1;
3909
                    t0 = tcg_temp_new(TCG_TYPE_I64);
3910
                    t1 = tcg_temp_new(TCG_TYPE_I64);
3911
                    gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
3912
                    tcg_gen_ext_i32_i64(t0, cpu_T[0]);
3913
                    tcg_gen_ext_i32_i64(t1, cpu_T[1]);
3914
                    tcg_gen_mul_i64(t0, t0, t1);
3915
                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
3916
                    gen_op_mov_reg_T0(OT_LONG, R_EAX);
3917
                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
3918
                    tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31);
3919
                    tcg_gen_shri_i64(t0, t0, 32);
3920
                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
3921
                    gen_op_mov_reg_T0(OT_LONG, R_EDX);
3922
                    tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
3923
                }
3924
#endif
3835 3925
                s->cc_op = CC_OP_MULL;
3836 3926
                break;
3837 3927
#ifdef TARGET_X86_64
3838 3928
            case OT_QUAD:
3839
                gen_op_imulq_EAX_T0();
3929
                tcg_gen_helper_0_1(helper_imulq_EAX_T0, cpu_T[0]);
3840 3930
                s->cc_op = CC_OP_MULQ;
3841 3931
                break;
3842 3932
#endif
......
4104 4194

  
4105 4195
#ifdef TARGET_X86_64
4106 4196
        if (ot == OT_QUAD) {
4107
            gen_op_imulq_T0_T1();
4197
            tcg_gen_helper_1_2(helper_imulq_T0_T1, cpu_T[0], cpu_T[0], cpu_T[1]);
4108 4198
        } else
4109 4199
#endif
4110 4200
        if (ot == OT_LONG) {
4111
            gen_op_imull_T0_T1();
4201
#ifdef TARGET_X86_64
4202
                tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
4203
                tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]);
4204
                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4205
                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4206
                tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]);
4207
                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
4208
#else
4209
                {
4210
                    TCGv t0, t1;
4211
                    t0 = tcg_temp_new(TCG_TYPE_I64);
4212
                    t1 = tcg_temp_new(TCG_TYPE_I64);
4213
                    tcg_gen_ext_i32_i64(t0, cpu_T[0]);
4214
                    tcg_gen_ext_i32_i64(t1, cpu_T[1]);
4215
                    tcg_gen_mul_i64(t0, t0, t1);
4216
                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
4217
                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4218
                    tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31);
4219
                    tcg_gen_shri_i64(t0, t0, 32);
4220
                    tcg_gen_trunc_i64_i32(cpu_T[1], t0);
4221
                    tcg_gen_sub_tl(cpu_cc_src, cpu_T[1], cpu_tmp0);
4222
                }
4223
#endif
4112 4224
        } else {
4113
            gen_op_imulw_T0_T1();
4225
            tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
4226
            tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]);
4227
            /* XXX: use 32 bit mul which could be faster */
4228
            tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4229
            tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4230
            tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]);
4231
            tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
4114 4232
        }
4115 4233
        gen_op_mov_reg_T0(ot, reg);
4116 4234
        s->cc_op = CC_OP_MULB + ot;

Also available in: Unified diff