Revision 5af45186 target-i386/translate.c

b/target-i386/translate.c
60 60
/* global register indexes */
61 61
static TCGv cpu_env, cpu_T[2], cpu_A0;
62 62
/* local register indexes (only used inside old micro ops) */
63
static TCGv cpu_tmp0, cpu_tmp1;
63
static TCGv cpu_tmp0, cpu_tmp1, cpu_tmp2, cpu_ptr0, cpu_ptr1;
64 64

  
65 65
#ifdef TARGET_X86_64
66 66
static int x86_64_hregs;
......
2410 2410
    tcg_gen_qemu_st64(cpu_tmp1, cpu_tmp0, mem_index);
2411 2411
}
2412 2412

  
2413
#define SSE_SPECIAL ((GenOpFunc2 *)1)
2414
#define SSE_DUMMY ((GenOpFunc2 *)2)
2413
static inline void gen_op_movo(int d_offset, int s_offset)
2414
{
2415
    tcg_gen_ld_i64(cpu_tmp1, cpu_env, s_offset);
2416
    tcg_gen_st_i64(cpu_tmp1, cpu_env, d_offset);
2417
    tcg_gen_ld_i64(cpu_tmp1, cpu_env, s_offset + 8);
2418
    tcg_gen_st_i64(cpu_tmp1, cpu_env, d_offset + 8);
2419
}
2420

  
2421
static inline void gen_op_movq(int d_offset, int s_offset)
2422
{
2423
    tcg_gen_ld_i64(cpu_tmp1, cpu_env, s_offset);
2424
    tcg_gen_st_i64(cpu_tmp1, cpu_env, d_offset);
2425
}
2426

  
2427
static inline void gen_op_movl(int d_offset, int s_offset)
2428
{
2429
    tcg_gen_ld_i32(cpu_tmp2, cpu_env, s_offset);
2430
    tcg_gen_st_i32(cpu_tmp2, cpu_env, d_offset);
2431
}
2432

  
2433
static inline void gen_op_movq_env_0(int d_offset)
2434
{
2435
    tcg_gen_movi_i64(cpu_tmp1, 0);
2436
    tcg_gen_st_i64(cpu_tmp1, cpu_env, d_offset);
2437
}
2415 2438

  
2416
#define MMX_OP2(x) { gen_op_ ## x ## _mmx, gen_op_ ## x ## _xmm }
2417
#define SSE_FOP(x) { gen_op_ ## x ## ps, gen_op_ ## x ## pd, \
2418
                     gen_op_ ## x ## ss, gen_op_ ## x ## sd, }
2439
#define SSE_SPECIAL ((void *)1)
2440
#define SSE_DUMMY ((void *)2)
2419 2441

  
2420
static GenOpFunc2 *sse_op_table1[256][4] = {
2442
#define MMX_OP2(x) { helper_ ## x ## _mmx, helper_ ## x ## _xmm }
2443
#define SSE_FOP(x) { helper_ ## x ## ps, helper_ ## x ## pd, \
2444
                     helper_ ## x ## ss, helper_ ## x ## sd, }
2445

  
2446
static void *sse_op_table1[256][4] = {
2421 2447
    /* 3DNow! extensions */
2422 2448
    [0x0e] = { SSE_DUMMY }, /* femms */
2423 2449
    [0x0f] = { SSE_DUMMY }, /* pf... */
......
2426 2452
    [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2427 2453
    [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2428 2454
    [0x13] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
2429
    [0x14] = { gen_op_punpckldq_xmm, gen_op_punpcklqdq_xmm },
2430
    [0x15] = { gen_op_punpckhdq_xmm, gen_op_punpckhqdq_xmm },
2455
    [0x14] = { helper_punpckldq_xmm, helper_punpcklqdq_xmm },
2456
    [0x15] = { helper_punpckhdq_xmm, helper_punpckhqdq_xmm },
2431 2457
    [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd, movshdup */
2432 2458
    [0x17] = { SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd */
2433 2459

  
......
2437 2463
    [0x2b] = { SSE_SPECIAL, SSE_SPECIAL },  /* movntps, movntpd */
2438 2464
    [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2439 2465
    [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2440
    [0x2e] = { gen_op_ucomiss, gen_op_ucomisd },
2441
    [0x2f] = { gen_op_comiss, gen_op_comisd },
2466
    [0x2e] = { helper_ucomiss, helper_ucomisd },
2467
    [0x2f] = { helper_comiss, helper_comisd },
2442 2468
    [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2443 2469
    [0x51] = SSE_FOP(sqrt),
2444
    [0x52] = { gen_op_rsqrtps, NULL, gen_op_rsqrtss, NULL },
2445
    [0x53] = { gen_op_rcpps, NULL, gen_op_rcpss, NULL },
2446
    [0x54] = { gen_op_pand_xmm, gen_op_pand_xmm }, /* andps, andpd */
2447
    [0x55] = { gen_op_pandn_xmm, gen_op_pandn_xmm }, /* andnps, andnpd */
2448
    [0x56] = { gen_op_por_xmm, gen_op_por_xmm }, /* orps, orpd */
2449
    [0x57] = { gen_op_pxor_xmm, gen_op_pxor_xmm }, /* xorps, xorpd */
2470
    [0x52] = { helper_rsqrtps, NULL, helper_rsqrtss, NULL },
2471
    [0x53] = { helper_rcpps, NULL, helper_rcpss, NULL },
2472
    [0x54] = { helper_pand_xmm, helper_pand_xmm }, /* andps, andpd */
2473
    [0x55] = { helper_pandn_xmm, helper_pandn_xmm }, /* andnps, andnpd */
2474
    [0x56] = { helper_por_xmm, helper_por_xmm }, /* orps, orpd */
2475
    [0x57] = { helper_pxor_xmm, helper_pxor_xmm }, /* xorps, xorpd */
2450 2476
    [0x58] = SSE_FOP(add),
2451 2477
    [0x59] = SSE_FOP(mul),
2452
    [0x5a] = { gen_op_cvtps2pd, gen_op_cvtpd2ps,
2453
               gen_op_cvtss2sd, gen_op_cvtsd2ss },
2454
    [0x5b] = { gen_op_cvtdq2ps, gen_op_cvtps2dq, gen_op_cvttps2dq },
2478
    [0x5a] = { helper_cvtps2pd, helper_cvtpd2ps,
2479
               helper_cvtss2sd, helper_cvtsd2ss },
2480
    [0x5b] = { helper_cvtdq2ps, helper_cvtps2dq, helper_cvttps2dq },
2455 2481
    [0x5c] = SSE_FOP(sub),
2456 2482
    [0x5d] = SSE_FOP(min),
2457 2483
    [0x5e] = SSE_FOP(div),
2458 2484
    [0x5f] = SSE_FOP(max),
2459 2485

  
2460 2486
    [0xc2] = SSE_FOP(cmpeq),
2461
    [0xc6] = { (GenOpFunc2 *)gen_op_shufps, (GenOpFunc2 *)gen_op_shufpd },
2487
    [0xc6] = { helper_shufps, helper_shufpd },
2462 2488

  
2463 2489
    /* MMX ops and their SSE extensions */
2464 2490
    [0x60] = MMX_OP2(punpcklbw),
......
2473 2499
    [0x69] = MMX_OP2(punpckhwd),
2474 2500
    [0x6a] = MMX_OP2(punpckhdq),
2475 2501
    [0x6b] = MMX_OP2(packssdw),
2476
    [0x6c] = { NULL, gen_op_punpcklqdq_xmm },
2477
    [0x6d] = { NULL, gen_op_punpckhqdq_xmm },
2502
    [0x6c] = { NULL, helper_punpcklqdq_xmm },
2503
    [0x6d] = { NULL, helper_punpckhqdq_xmm },
2478 2504
    [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2479 2505
    [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2480
    [0x70] = { (GenOpFunc2 *)gen_op_pshufw_mmx,
2481
               (GenOpFunc2 *)gen_op_pshufd_xmm,
2482
               (GenOpFunc2 *)gen_op_pshufhw_xmm,
2483
               (GenOpFunc2 *)gen_op_pshuflw_xmm },
2506
    [0x70] = { helper_pshufw_mmx,
2507
               helper_pshufd_xmm,
2508
               helper_pshufhw_xmm,
2509
               helper_pshuflw_xmm },
2484 2510
    [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2485 2511
    [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2486 2512
    [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
......
2488 2514
    [0x75] = MMX_OP2(pcmpeqw),
2489 2515
    [0x76] = MMX_OP2(pcmpeql),
2490 2516
    [0x77] = { SSE_DUMMY }, /* emms */
2491
    [0x7c] = { NULL, gen_op_haddpd, NULL, gen_op_haddps },
2492
    [0x7d] = { NULL, gen_op_hsubpd, NULL, gen_op_hsubps },
2517
    [0x7c] = { NULL, helper_haddpd, NULL, helper_haddps },
2518
    [0x7d] = { NULL, helper_hsubpd, NULL, helper_hsubps },
2493 2519
    [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2494 2520
    [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2495 2521
    [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2496 2522
    [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2497
    [0xd0] = { NULL, gen_op_addsubpd, NULL, gen_op_addsubps },
2523
    [0xd0] = { NULL, helper_addsubpd, NULL, helper_addsubps },
2498 2524
    [0xd1] = MMX_OP2(psrlw),
2499 2525
    [0xd2] = MMX_OP2(psrld),
2500 2526
    [0xd3] = MMX_OP2(psrlq),
......
2516 2542
    [0xe3] = MMX_OP2(pavgw),
2517 2543
    [0xe4] = MMX_OP2(pmulhuw),
2518 2544
    [0xe5] = MMX_OP2(pmulhw),
2519
    [0xe6] = { NULL, gen_op_cvttpd2dq, gen_op_cvtdq2pd, gen_op_cvtpd2dq },
2545
    [0xe6] = { NULL, helper_cvttpd2dq, helper_cvtdq2pd, helper_cvtpd2dq },
2520 2546
    [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
2521 2547
    [0xe8] = MMX_OP2(psubsb),
2522 2548
    [0xe9] = MMX_OP2(psubsw),
......
2543 2569
    [0xfe] = MMX_OP2(paddl),
2544 2570
};
2545 2571

  
2546
static GenOpFunc2 *sse_op_table2[3 * 8][2] = {
2572
static void *sse_op_table2[3 * 8][2] = {
2547 2573
    [0 + 2] = MMX_OP2(psrlw),
2548 2574
    [0 + 4] = MMX_OP2(psraw),
2549 2575
    [0 + 6] = MMX_OP2(psllw),
......
2551 2577
    [8 + 4] = MMX_OP2(psrad),
2552 2578
    [8 + 6] = MMX_OP2(pslld),
2553 2579
    [16 + 2] = MMX_OP2(psrlq),
2554
    [16 + 3] = { NULL, gen_op_psrldq_xmm },
2580
    [16 + 3] = { NULL, helper_psrldq_xmm },
2555 2581
    [16 + 6] = MMX_OP2(psllq),
2556
    [16 + 7] = { NULL, gen_op_pslldq_xmm },
2582
    [16 + 7] = { NULL, helper_pslldq_xmm },
2557 2583
};
2558 2584

  
2559
static GenOpFunc1 *sse_op_table3[4 * 3] = {
2560
    gen_op_cvtsi2ss,
2561
    gen_op_cvtsi2sd,
2562
    X86_64_ONLY(gen_op_cvtsq2ss),
2563
    X86_64_ONLY(gen_op_cvtsq2sd),
2564

  
2565
    gen_op_cvttss2si,
2566
    gen_op_cvttsd2si,
2567
    X86_64_ONLY(gen_op_cvttss2sq),
2568
    X86_64_ONLY(gen_op_cvttsd2sq),
2569

  
2570
    gen_op_cvtss2si,
2571
    gen_op_cvtsd2si,
2572
    X86_64_ONLY(gen_op_cvtss2sq),
2573
    X86_64_ONLY(gen_op_cvtsd2sq),
2585
static void *sse_op_table3[4 * 3] = {
2586
    helper_cvtsi2ss,
2587
    helper_cvtsi2sd,
2588
    X86_64_ONLY(helper_cvtsq2ss),
2589
    X86_64_ONLY(helper_cvtsq2sd),
2590

  
2591
    helper_cvttss2si,
2592
    helper_cvttsd2si,
2593
    X86_64_ONLY(helper_cvttss2sq),
2594
    X86_64_ONLY(helper_cvttsd2sq),
2595

  
2596
    helper_cvtss2si,
2597
    helper_cvtsd2si,
2598
    X86_64_ONLY(helper_cvtss2sq),
2599
    X86_64_ONLY(helper_cvtsd2sq),
2574 2600
};
2575 2601

  
2576
static GenOpFunc2 *sse_op_table4[8][4] = {
2602
static void *sse_op_table4[8][4] = {
2577 2603
    SSE_FOP(cmpeq),
2578 2604
    SSE_FOP(cmplt),
2579 2605
    SSE_FOP(cmple),
......
2584 2610
    SSE_FOP(cmpord),
2585 2611
};
2586 2612

  
2587
static GenOpFunc2 *sse_op_table5[256] = {
2588
    [0x0c] = gen_op_pi2fw,
2589
    [0x0d] = gen_op_pi2fd,
2590
    [0x1c] = gen_op_pf2iw,
2591
    [0x1d] = gen_op_pf2id,
2592
    [0x8a] = gen_op_pfnacc,
2593
    [0x8e] = gen_op_pfpnacc,
2594
    [0x90] = gen_op_pfcmpge,
2595
    [0x94] = gen_op_pfmin,
2596
    [0x96] = gen_op_pfrcp,
2597
    [0x97] = gen_op_pfrsqrt,
2598
    [0x9a] = gen_op_pfsub,
2599
    [0x9e] = gen_op_pfadd,
2600
    [0xa0] = gen_op_pfcmpgt,
2601
    [0xa4] = gen_op_pfmax,
2602
    [0xa6] = gen_op_movq, /* pfrcpit1; no need to actually increase precision */
2603
    [0xa7] = gen_op_movq, /* pfrsqit1 */
2604
    [0xaa] = gen_op_pfsubr,
2605
    [0xae] = gen_op_pfacc,
2606
    [0xb0] = gen_op_pfcmpeq,
2607
    [0xb4] = gen_op_pfmul,
2608
    [0xb6] = gen_op_movq, /* pfrcpit2 */
2609
    [0xb7] = gen_op_pmulhrw_mmx,
2610
    [0xbb] = gen_op_pswapd,
2611
    [0xbf] = gen_op_pavgb_mmx /* pavgusb */
2613
static void *sse_op_table5[256] = {
2614
    [0x0c] = helper_pi2fw,
2615
    [0x0d] = helper_pi2fd,
2616
    [0x1c] = helper_pf2iw,
2617
    [0x1d] = helper_pf2id,
2618
    [0x8a] = helper_pfnacc,
2619
    [0x8e] = helper_pfpnacc,
2620
    [0x90] = helper_pfcmpge,
2621
    [0x94] = helper_pfmin,
2622
    [0x96] = helper_pfrcp,
2623
    [0x97] = helper_pfrsqrt,
2624
    [0x9a] = helper_pfsub,
2625
    [0x9e] = helper_pfadd,
2626
    [0xa0] = helper_pfcmpgt,
2627
    [0xa4] = helper_pfmax,
2628
    [0xa6] = helper_movq, /* pfrcpit1; no need to actually increase precision */
2629
    [0xa7] = helper_movq, /* pfrsqit1 */
2630
    [0xaa] = helper_pfsubr,
2631
    [0xae] = helper_pfacc,
2632
    [0xb0] = helper_pfcmpeq,
2633
    [0xb4] = helper_pfmul,
2634
    [0xb6] = helper_movq, /* pfrcpit2 */
2635
    [0xb7] = helper_pmulhrw_mmx,
2636
    [0xbb] = helper_pswapd,
2637
    [0xbf] = helper_pavgb_mmx /* pavgusb */
2612 2638
};
2613 2639

  
2614 2640
static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
2615 2641
{
2616 2642
    int b1, op1_offset, op2_offset, is_xmm, val, ot;
2617 2643
    int modrm, mod, rm, reg, reg_addr, offset_addr;
2618
    GenOpFunc2 *sse_op2;
2619
    GenOpFunc3 *sse_op3;
2644
    void *sse_op2;
2620 2645

  
2621 2646
    b &= 0xff;
2622 2647
    if (s->prefix & PREFIX_DATA)
......
2656 2681
        if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW))
2657 2682
            goto illegal_op;
2658 2683
        /* femms */
2659
        gen_op_emms();
2684
        tcg_gen_helper_0_0(helper_emms);
2660 2685
        return;
2661 2686
    }
2662 2687
    if (b == 0x77) {
2663 2688
        /* emms */
2664
        gen_op_emms();
2689
        tcg_gen_helper_0_0(helper_emms);
2665 2690
        return;
2666 2691
    }
2667 2692
    /* prepare MMX state (XXX: optimize by storing fptt and fptags in
2668 2693
       the static cpu state) */
2669 2694
    if (!is_xmm) {
2670
        gen_op_enter_mmx();
2695
        tcg_gen_helper_0_0(helper_enter_mmx);
2671 2696
    }
2672 2697

  
2673 2698
    modrm = ldub_code(s->pc++);
......
2697 2722
#ifdef TARGET_X86_64
2698 2723
            if (s->dflag == 2) {
2699 2724
                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0);
2700
                gen_op_movq_mm_T0_mmx(offsetof(CPUX86State,fpregs[reg].mmx));
2725
                tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,fpregs[reg].mmx));
2701 2726
            } else
2702 2727
#endif
2703 2728
            {
2704 2729
                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0);
2705
                gen_op_movl_mm_T0_mmx(offsetof(CPUX86State,fpregs[reg].mmx));
2730
                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
2731
                                 offsetof(CPUX86State,fpregs[reg].mmx));
2732
                tcg_gen_helper_0_2(helper_movl_mm_T0_mmx, cpu_ptr0, cpu_T[0]);
2706 2733
            }
2707 2734
            break;
2708 2735
        case 0x16e: /* movd xmm, ea */
2709 2736
#ifdef TARGET_X86_64
2710 2737
            if (s->dflag == 2) {
2711 2738
                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0);
2712
                gen_op_movq_mm_T0_xmm(offsetof(CPUX86State,xmm_regs[reg]));
2739
                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
2740
                                 offsetof(CPUX86State,xmm_regs[reg]));
2741
                tcg_gen_helper_0_2(helper_movq_mm_T0_xmm, cpu_ptr0, cpu_T[0]);
2713 2742
            } else
2714 2743
#endif
2715 2744
            {
2716 2745
                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0);
2717
                gen_op_movl_mm_T0_xmm(offsetof(CPUX86State,xmm_regs[reg]));
2746
                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
2747
                                 offsetof(CPUX86State,xmm_regs[reg]));
2748
                tcg_gen_trunc_tl_i32(cpu_tmp2, cpu_T[0]);
2749
                tcg_gen_helper_0_2(helper_movl_mm_T0_xmm, cpu_ptr0, cpu_tmp2);
2718 2750
            }
2719 2751
            break;
2720 2752
        case 0x6f: /* movq mm, ea */
......
2723 2755
                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx));
2724 2756
            } else {
2725 2757
                rm = (modrm & 7);
2726
                gen_op_movq(offsetof(CPUX86State,fpregs[reg].mmx),
2727
                            offsetof(CPUX86State,fpregs[rm].mmx));
2758
                tcg_gen_ld_i64(cpu_tmp1, cpu_env,
2759
                               offsetof(CPUX86State,fpregs[rm].mmx));
2760
                tcg_gen_st_i64(cpu_tmp1, cpu_env,
2761
                               offsetof(CPUX86State,fpregs[reg].mmx));
2728 2762
            }
2729 2763
            break;
2730 2764
        case 0x010: /* movups */
......
2841 2875
        case 0x7e: /* movd ea, mm */
2842 2876
#ifdef TARGET_X86_64
2843 2877
            if (s->dflag == 2) {
2844
                gen_op_movq_T0_mm_mmx(offsetof(CPUX86State,fpregs[reg].mmx));
2878
                tcg_gen_ld_i64(cpu_T[0], cpu_env, 
2879
                               offsetof(CPUX86State,fpregs[reg].mmx));
2845 2880
                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1);
2846 2881
            } else
2847 2882
#endif
2848 2883
            {
2849
                gen_op_movl_T0_mm_mmx(offsetof(CPUX86State,fpregs[reg].mmx));
2884
                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
2885
                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
2850 2886
                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1);
2851 2887
            }
2852 2888
            break;
2853 2889
        case 0x17e: /* movd ea, xmm */
2854 2890
#ifdef TARGET_X86_64
2855 2891
            if (s->dflag == 2) {
2856
                gen_op_movq_T0_mm_xmm(offsetof(CPUX86State,xmm_regs[reg]));
2892
                tcg_gen_ld_i64(cpu_T[0], cpu_env, 
2893
                               offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
2857 2894
                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1);
2858 2895
            } else
2859 2896
#endif
2860 2897
            {
2861
                gen_op_movl_T0_mm_xmm(offsetof(CPUX86State,xmm_regs[reg]));
2898
                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
2899
                                 offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
2862 2900
                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1);
2863 2901
            }
2864 2902
            break;
......
2967 3005
                rm = (modrm & 7);
2968 3006
                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
2969 3007
            }
2970
            sse_op2(op2_offset, op1_offset);
3008
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
3009
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op1_offset);
3010
            tcg_gen_helper_0_2(sse_op2, cpu_ptr0, cpu_ptr1);
2971 3011
            break;
2972 3012
        case 0x050: /* movmskps */
2973 3013
            rm = (modrm & 7) | REX_B(s);
2974
            gen_op_movmskps(offsetof(CPUX86State,xmm_regs[rm]));
3014
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3015
                             offsetof(CPUX86State,xmm_regs[rm]));
3016
            tcg_gen_helper_1_1(helper_movmskps, cpu_tmp2, cpu_ptr0);
3017
            tcg_gen_extu_i32_i64(cpu_T[0], cpu_tmp2);
2975 3018
            gen_op_mov_reg_T0(OT_LONG, reg);
2976 3019
            break;
2977 3020
        case 0x150: /* movmskpd */
2978 3021
            rm = (modrm & 7) | REX_B(s);
2979
            gen_op_movmskpd(offsetof(CPUX86State,xmm_regs[rm]));
3022
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3023
                             offsetof(CPUX86State,xmm_regs[rm]));
3024
            tcg_gen_helper_1_1(helper_movmskpd, cpu_tmp2, cpu_ptr0);
3025
            tcg_gen_extu_i32_i64(cpu_T[0], cpu_tmp2);
2980 3026
            gen_op_mov_reg_T0(OT_LONG, reg);
2981 3027
            break;
2982 3028
        case 0x02a: /* cvtpi2ps */
2983 3029
        case 0x12a: /* cvtpi2pd */
2984
            gen_op_enter_mmx();
3030
            tcg_gen_helper_0_0(helper_enter_mmx);
2985 3031
            if (mod != 3) {
2986 3032
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
2987 3033
                op2_offset = offsetof(CPUX86State,mmx_t0);
......
2991 3037
                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
2992 3038
            }
2993 3039
            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3040
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3041
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
2994 3042
            switch(b >> 8) {
2995 3043
            case 0x0:
2996
                gen_op_cvtpi2ps(op1_offset, op2_offset);
3044
                tcg_gen_helper_0_2(helper_cvtpi2ps, cpu_ptr0, cpu_ptr1);
2997 3045
                break;
2998 3046
            default:
2999 3047
            case 0x1:
3000
                gen_op_cvtpi2pd(op1_offset, op2_offset);
3048
                tcg_gen_helper_0_2(helper_cvtpi2pd, cpu_ptr0, cpu_ptr1);
3001 3049
                break;
3002 3050
            }
3003 3051
            break;
......
3006 3054
            ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
3007 3055
            gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
3008 3056
            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3009
            sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2)](op1_offset);
3057
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3058
            sse_op2 = sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2)];
3059
            tcg_gen_trunc_tl_i32(cpu_tmp2, cpu_T[0]);
3060
            tcg_gen_helper_0_2(sse_op2, cpu_ptr0, cpu_tmp2);
3010 3061
            break;
3011 3062
        case 0x02c: /* cvttps2pi */
3012 3063
        case 0x12c: /* cvttpd2pi */
3013 3064
        case 0x02d: /* cvtps2pi */
3014 3065
        case 0x12d: /* cvtpd2pi */
3015
            gen_op_enter_mmx();
3066
            tcg_gen_helper_0_0(helper_enter_mmx);
3016 3067
            if (mod != 3) {
3017 3068
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3018 3069
                op2_offset = offsetof(CPUX86State,xmm_t0);
......
3022 3073
                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3023 3074
            }
3024 3075
            op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
3076
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3077
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3025 3078
            switch(b) {
3026 3079
            case 0x02c:
3027
                gen_op_cvttps2pi(op1_offset, op2_offset);
3080
                tcg_gen_helper_0_2(helper_cvttps2pi, cpu_ptr0, cpu_ptr1);
3028 3081
                break;
3029 3082
            case 0x12c:
3030
                gen_op_cvttpd2pi(op1_offset, op2_offset);
3083
                tcg_gen_helper_0_2(helper_cvttpd2pi, cpu_ptr0, cpu_ptr1);
3031 3084
                break;
3032 3085
            case 0x02d:
3033
                gen_op_cvtps2pi(op1_offset, op2_offset);
3086
                tcg_gen_helper_0_2(helper_cvtps2pi, cpu_ptr0, cpu_ptr1);
3034 3087
                break;
3035 3088
            case 0x12d:
3036
                gen_op_cvtpd2pi(op1_offset, op2_offset);
3089
                tcg_gen_helper_0_2(helper_cvtpd2pi, cpu_ptr0, cpu_ptr1);
3037 3090
                break;
3038 3091
            }
3039 3092
            break;
......
3055 3108
                rm = (modrm & 7) | REX_B(s);
3056 3109
                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3057 3110
            }
3058
            sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2) + 4 +
3059
                          (b & 1) * 4](op2_offset);
3111
            sse_op2 = sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2) + 4 +
3112
                                    (b & 1) * 4];
3113
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
3114
            if (ot == OT_LONG) {
3115
                tcg_gen_helper_1_1(sse_op2, cpu_tmp2, cpu_ptr0);
3116
                tcg_gen_extu_i32_i64(cpu_T[0], cpu_tmp2);
3117
            } else {
3118
                tcg_gen_helper_1_1(sse_op2, cpu_T[0], cpu_ptr0);
3119
            }
3060 3120
            gen_op_mov_reg_T0(ot, reg);
3061 3121
            break;
3062 3122
        case 0xc4: /* pinsrw */
......
3066 3126
            val = ldub_code(s->pc++);
3067 3127
            if (b1) {
3068 3128
                val &= 7;
3069
                gen_op_pinsrw_xmm(offsetof(CPUX86State,xmm_regs[reg]), val);
3129
                tcg_gen_st16_tl(cpu_T[0], cpu_env,
3130
                                offsetof(CPUX86State,xmm_regs[reg].XMM_W(val)));
3070 3131
            } else {
3071 3132
                val &= 3;
3072
                gen_op_pinsrw_mmx(offsetof(CPUX86State,fpregs[reg].mmx), val);
3133
                tcg_gen_st16_tl(cpu_T[0], cpu_env,
3134
                                offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3073 3135
            }
3074 3136
            break;
3075 3137
        case 0xc5: /* pextrw */
......
3080 3142
            if (b1) {
3081 3143
                val &= 7;
3082 3144
                rm = (modrm & 7) | REX_B(s);
3083
                gen_op_pextrw_xmm(offsetof(CPUX86State,xmm_regs[rm]), val);
3145
                tcg_gen_ld16u_tl(cpu_T[0], cpu_env,
3146
                                 offsetof(CPUX86State,xmm_regs[rm].XMM_W(val)));
3084 3147
            } else {
3085 3148
                val &= 3;
3086 3149
                rm = (modrm & 7);
3087
                gen_op_pextrw_mmx(offsetof(CPUX86State,fpregs[rm].mmx), val);
3150
                tcg_gen_ld16u_tl(cpu_T[0], cpu_env,
3151
                                offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3088 3152
            }
3089 3153
            reg = ((modrm >> 3) & 7) | rex_r;
3090 3154
            gen_op_mov_reg_T0(OT_LONG, reg);
......
3101 3165
            }
3102 3166
            break;
3103 3167
        case 0x2d6: /* movq2dq */
3104
            gen_op_enter_mmx();
3168
            tcg_gen_helper_0_0(helper_enter_mmx);
3105 3169
            rm = (modrm & 7);
3106 3170
            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
3107 3171
                        offsetof(CPUX86State,fpregs[rm].mmx));
3108 3172
            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
3109 3173
            break;
3110 3174
        case 0x3d6: /* movdq2q */
3111
            gen_op_enter_mmx();
3175
            tcg_gen_helper_0_0(helper_enter_mmx);
3112 3176
            rm = (modrm & 7) | REX_B(s);
3113 3177
            gen_op_movq(offsetof(CPUX86State,fpregs[reg & 7].mmx),
3114 3178
                        offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
......
3119 3183
                goto illegal_op;
3120 3184
            if (b1) {
3121 3185
                rm = (modrm & 7) | REX_B(s);
3122
                gen_op_pmovmskb_xmm(offsetof(CPUX86State,xmm_regs[rm]));
3186
                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,xmm_regs[rm]));
3187
                tcg_gen_helper_1_1(helper_pmovmskb_xmm, cpu_tmp2, cpu_ptr0);
3123 3188
            } else {
3124 3189
                rm = (modrm & 7);
3125
                gen_op_pmovmskb_mmx(offsetof(CPUX86State,fpregs[rm].mmx));
3190
                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,fpregs[rm].mmx));
3191
                tcg_gen_helper_1_1(helper_pmovmskb_mmx, cpu_tmp2, cpu_ptr0);
3126 3192
            }
3193
            tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2);
3127 3194
            reg = ((modrm >> 3) & 7) | rex_r;
3128 3195
            gen_op_mov_reg_T0(OT_LONG, reg);
3129 3196
            break;
......
3199 3266
            sse_op2 = sse_op_table5[val];
3200 3267
            if (!sse_op2)
3201 3268
                goto illegal_op;
3202
            sse_op2(op1_offset, op2_offset);
3269
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3270
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3271
            tcg_gen_helper_0_2(sse_op2, cpu_ptr0, cpu_ptr1);
3203 3272
            break;
3204 3273
        case 0x70: /* pshufx insn */
3205 3274
        case 0xc6: /* pshufx insn */
3206 3275
            val = ldub_code(s->pc++);
3207
            sse_op3 = (GenOpFunc3 *)sse_op2;
3208
            sse_op3(op1_offset, op2_offset, val);
3276
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3277
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3278
            tcg_gen_helper_0_3(sse_op2, cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
3209 3279
            break;
3210 3280
        case 0xc2:
3211 3281
            /* compare insns */
......
3213 3283
            if (val >= 8)
3214 3284
                goto illegal_op;
3215 3285
            sse_op2 = sse_op_table4[val][b1];
3216
            sse_op2(op1_offset, op2_offset);
3286
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3287
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3288
            tcg_gen_helper_0_2(sse_op2, cpu_ptr0, cpu_ptr1);
3217 3289
            break;
3218 3290
        default:
3219
            sse_op2(op1_offset, op2_offset);
3291
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3292
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3293
            tcg_gen_helper_0_2(sse_op2, cpu_ptr0, cpu_ptr1);
3220 3294
            break;
3221 3295
        }
3222 3296
        if (b == 0x2e || b == 0x2f) {
3297
            /* just to keep the EFLAGS optimization correct */
3298
            gen_op_com_dummy();
3223 3299
            s->cc_op = CC_OP_EFLAGS;
3224 3300
        }
3225 3301
    }
......
6485 6561
    X86_64_DEF([INDEX_op_imulq_T0_T1] = CC_OSZAPC,)
6486 6562

  
6487 6563
    /* sse */
6488
    [INDEX_op_ucomiss] = CC_OSZAPC,
6489
    [INDEX_op_ucomisd] = CC_OSZAPC,
6490
    [INDEX_op_comiss] = CC_OSZAPC,
6491
    [INDEX_op_comisd] = CC_OSZAPC,
6564
    [INDEX_op_com_dummy] = CC_OSZAPC,
6565
    [INDEX_op_com_dummy] = CC_OSZAPC,
6566
    [INDEX_op_com_dummy] = CC_OSZAPC,
6567
    [INDEX_op_com_dummy] = CC_OSZAPC,
6492 6568

  
6493 6569
    /* bcd */
6494 6570
    [INDEX_op_aam] = CC_OSZAPC,
......
6792 6868
#if TARGET_LONG_BITS > HOST_LONG_BITS
6793 6869
    cpu_tmp1 = tcg_temp_new(TCG_TYPE_I64);
6794 6870
#endif
6871
    cpu_tmp2 = tcg_temp_new(TCG_TYPE_I32);
6872
    cpu_ptr0 = tcg_temp_new(TCG_TYPE_PTR);
6873
    cpu_ptr1 = tcg_temp_new(TCG_TYPE_PTR);
6795 6874

  
6796 6875
    gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
6797 6876

  

Also available in: Unified diff