Revision 5d8a4f8f

b/configure
2643 2643
  cflags="-I\$(SRC_PATH)/tcg/sparc $cflags"
2644 2644
elif test "$ARCH" = "s390x" ; then
2645 2645
  cflags="-I\$(SRC_PATH)/tcg/s390 $cflags"
2646
elif test "$ARCH" = "x86_64" ; then
2647
  cflags="-I\$(SRC_PATH)/tcg/i386 $cflags"
2646 2648
else
2647 2649
  cflags="-I\$(SRC_PATH)/tcg/\$(ARCH) $cflags"
2648 2650
fi
b/tcg/i386/tcg-target.c
24 24

  
25 25
#ifndef NDEBUG
26 26
static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
27
    "%eax",
28
    "%ecx",
29
    "%edx",
30
    "%ebx",
31
    "%esp",
32
    "%ebp",
33
    "%esi",
34
    "%edi",
27
#if TCG_TARGET_REG_BITS == 64
28
    "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
29
    "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
30
#else
31
    "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
32
#endif
35 33
};
36 34
#endif
37 35

  
38 36
static const int tcg_target_reg_alloc_order[] = {
37
#if TCG_TARGET_REG_BITS == 64
38
    TCG_REG_RBP,
39
    TCG_REG_RBX,
40
    TCG_REG_R12,
41
    TCG_REG_R13,
42
    TCG_REG_R14,
43
    TCG_REG_R15,
44
    TCG_REG_R10,
45
    TCG_REG_R11,
46
    TCG_REG_R9,
47
    TCG_REG_R8,
48
    TCG_REG_RCX,
49
    TCG_REG_RDX,
50
    TCG_REG_RSI,
51
    TCG_REG_RDI,
52
    TCG_REG_RAX,
53
#else
39 54
    TCG_REG_EBX,
40 55
    TCG_REG_ESI,
41 56
    TCG_REG_EDI,
......
43 58
    TCG_REG_ECX,
44 59
    TCG_REG_EDX,
45 60
    TCG_REG_EAX,
61
#endif
46 62
};
47 63

  
48
static const int tcg_target_call_iarg_regs[3] = { TCG_REG_EAX, TCG_REG_EDX, TCG_REG_ECX };
49
static const int tcg_target_call_oarg_regs[2] = { TCG_REG_EAX, TCG_REG_EDX };
64
static const int tcg_target_call_iarg_regs[] = {
65
#if TCG_TARGET_REG_BITS == 64
66
    TCG_REG_RDI,
67
    TCG_REG_RSI,
68
    TCG_REG_RDX,
69
    TCG_REG_RCX,
70
    TCG_REG_R8,
71
    TCG_REG_R9,
72
#else
73
    TCG_REG_EAX,
74
    TCG_REG_EDX,
75
    TCG_REG_ECX
76
#endif
77
};
78

  
79
static const int tcg_target_call_oarg_regs[2] = {
80
    TCG_REG_EAX,
81
    TCG_REG_EDX
82
};
50 83

  
51 84
static uint8_t *tb_ret_addr;
52 85

  
......
55 88
{
56 89
    value += addend;
57 90
    switch(type) {
58
    case R_386_32:
59
        *(uint32_t *)code_ptr = value;
60
        break;
61 91
    case R_386_PC32:
62
        *(uint32_t *)code_ptr = value - (long)code_ptr;
92
        value -= (uintptr_t)code_ptr;
93
        if (value != (int32_t)value) {
94
            tcg_abort();
95
        }
96
        *(uint32_t *)code_ptr = value;
63 97
        break;
64 98
    case R_386_PC8:
65
        value -= (long)code_ptr;
99
        value -= (uintptr_t)code_ptr;
66 100
        if (value != (int8_t)value) {
67 101
            tcg_abort();
68 102
        }
......
76 110
/* maximum number of register used for input function arguments */
77 111
static inline int tcg_target_get_call_iarg_regs_count(int flags)
78 112
{
113
    if (TCG_TARGET_REG_BITS == 64) {
114
        return 6;
115
    }
116

  
79 117
    flags &= TCG_CALL_TYPE_MASK;
80 118
    switch(flags) {
81 119
    case TCG_CALL_TYPE_STD:
......
122 160
        break;
123 161
    case 'q':
124 162
        ct->ct |= TCG_CT_REG;
125
        tcg_regset_set32(ct->u.regs, 0, 0xf);
163
        if (TCG_TARGET_REG_BITS == 64) {
164
            tcg_regset_set32(ct->u.regs, 0, 0xffff);
165
        } else {
166
            tcg_regset_set32(ct->u.regs, 0, 0xf);
167
        }
126 168
        break;
127 169
    case 'r':
128 170
        ct->ct |= TCG_CT_REG;
129
        tcg_regset_set32(ct->u.regs, 0, 0xff);
171
        if (TCG_TARGET_REG_BITS == 64) {
172
            tcg_regset_set32(ct->u.regs, 0, 0xffff);
173
        } else {
174
            tcg_regset_set32(ct->u.regs, 0, 0xff);
175
        }
130 176
        break;
131 177

  
132 178
        /* qemu_ld/st address constraint */
133 179
    case 'L':
134 180
        ct->ct |= TCG_CT_REG;
135
        tcg_regset_set32(ct->u.regs, 0, 0xff);
136
        tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX);
137
        tcg_regset_reset_reg(ct->u.regs, TCG_REG_EDX);
181
        if (TCG_TARGET_REG_BITS == 64) {
182
            tcg_regset_set32(ct->u.regs, 0, 0xffff);
183
            tcg_regset_reset_reg(ct->u.regs, TCG_REG_RSI);
184
            tcg_regset_reset_reg(ct->u.regs, TCG_REG_RDI);
185
        } else {
186
            tcg_regset_set32(ct->u.regs, 0, 0xff);
187
            tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX);
188
            tcg_regset_reset_reg(ct->u.regs, TCG_REG_EDX);
189
        }
190
        break;
191

  
192
    case 'e':
193
        ct->ct |= TCG_CT_CONST_S32;
194
        break;
195
    case 'Z':
196
        ct->ct |= TCG_CT_CONST_U32;
138 197
        break;
198

  
139 199
    default:
140 200
        return -1;
141 201
    }
......
148 208
static inline int tcg_target_const_match(tcg_target_long val,
149 209
                                         const TCGArgConstraint *arg_ct)
150 210
{
151
    int ct;
152
    ct = arg_ct->ct;
153
    if (ct & TCG_CT_CONST)
211
    int ct = arg_ct->ct;
212
    if (ct & TCG_CT_CONST) {
154 213
        return 1;
155
    else
156
        return 0;
214
    }
215
    if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
216
        return 1;
217
    }
218
    if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
219
        return 1;
220
    }
221
    return 0;
157 222
}
158 223

  
224
#if TCG_TARGET_REG_BITS == 64
225
# define LOWREGMASK(x)	((x) & 7)
226
#else
227
# define LOWREGMASK(x)	(x)
228
#endif
229

  
159 230
#define P_EXT		0x100		/* 0x0f opcode prefix */
160 231
#define P_DATA16	0x200		/* 0x66 opcode prefix */
232
#if TCG_TARGET_REG_BITS == 64
233
# define P_ADDR32	0x400		/* 0x67 opcode prefix */
234
# define P_REXW		0x800		/* Set REX.W = 1 */
235
# define P_REXB_R	0x1000		/* REG field as byte register */
236
# define P_REXB_RM	0x2000		/* R/M field as byte register */
237
#else
238
# define P_ADDR32	0
239
# define P_REXW		0
240
# define P_REXB_R	0
241
# define P_REXB_RM	0
242
#endif
161 243

  
162 244
#define OPC_ARITH_EvIz	(0x81)
163 245
#define OPC_ARITH_EvIb	(0x83)
......
179 261
#define OPC_MOVB_EvGv	(0x88)		/* stores, more or less */
180 262
#define OPC_MOVL_EvGv	(0x89)		/* stores, more or less */
181 263
#define OPC_MOVL_GvEv	(0x8b)		/* loads, more or less */
264
#define OPC_MOVL_EvIz	(0xc7)
182 265
#define OPC_MOVL_Iv     (0xb8)
183 266
#define OPC_MOVSBL	(0xbe | P_EXT)
184 267
#define OPC_MOVSWL	(0xbf | P_EXT)
268
#define OPC_MOVSLQ	(0x63 | P_REXW)
185 269
#define OPC_MOVZBL	(0xb6 | P_EXT)
186 270
#define OPC_MOVZWL	(0xb7 | P_EXT)
187 271
#define OPC_POP_r32	(0x58)
......
189 273
#define OPC_PUSH_Iv	(0x68)
190 274
#define OPC_PUSH_Ib	(0x6a)
191 275
#define OPC_RET		(0xc3)
192
#define OPC_SETCC	(0x90 | P_EXT)	/* ... plus condition code */
276
#define OPC_SETCC	(0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
193 277
#define OPC_SHIFT_1	(0xd1)
194 278
#define OPC_SHIFT_Ib	(0xc1)
195 279
#define OPC_SHIFT_cl	(0xd3)
......
226 310
#define EXT3_IDIV  7
227 311

  
228 312
/* Group 5 opcode extensions for 0xff.  To be used with OPC_GRP5.  */
313
#define EXT5_INC_Ev	0
314
#define EXT5_DEC_Ev	1
229 315
#define EXT5_CALLN_Ev	2
230 316
#define EXT5_JMPN_Ev	4
231 317

  
......
261 347
    [TCG_COND_GTU] = JCC_JA,
262 348
};
263 349

  
264
static inline void tcg_out_opc(TCGContext *s, int opc)
350
#if TCG_TARGET_REG_BITS == 64
351
static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
352
{
353
    int rex;
354

  
355
    if (opc & P_DATA16) {
356
        /* We should never be asking for both 16 and 64-bit operation.  */
357
        assert((opc & P_REXW) == 0);
358
        tcg_out8(s, 0x66);
359
    }
360
    if (opc & P_ADDR32) {
361
        tcg_out8(s, 0x67);
362
    }
363

  
364
    rex = 0;
365
    rex |= (opc & P_REXW) >> 8;		/* REX.W */
366
    rex |= (r & 8) >> 1;		/* REX.R */
367
    rex |= (x & 8) >> 2;		/* REX.X */
368
    rex |= (rm & 8) >> 3;		/* REX.B */
369

  
370
    /* P_REXB_{R,RM} indicates that the given register is the low byte.
371
       For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
372
       as otherwise the encoding indicates %[abcd]h.  Note that the values
373
       that are ORed in merely indicate that the REX byte must be present;
374
       those bits get discarded in output.  */
375
    rex |= opc & (r >= 4 ? P_REXB_R : 0);
376
    rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
377

  
378
    if (rex) {
379
        tcg_out8(s, (uint8_t)(rex | 0x40));
380
    }
381

  
382
    if (opc & P_EXT) {
383
        tcg_out8(s, 0x0f);
384
    }
385
    tcg_out8(s, opc);
386
}
387
#else
388
static void tcg_out_opc(TCGContext *s, int opc)
265 389
{
266 390
    if (opc & P_DATA16) {
267 391
        tcg_out8(s, 0x66);
......
271 395
    }
272 396
    tcg_out8(s, opc);
273 397
}
398
/* Discard the register arguments to tcg_out_opc early, so as not to penalize
399
   the 32-bit compilation paths.  This method works with all versions of gcc,
400
   whereas relying on optimization may not be able to exclude them.  */
401
#define tcg_out_opc(s, opc, r, rm, x)  (tcg_out_opc)(s, opc)
402
#endif
274 403

  
275
static inline void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
404
static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
276 405
{
277
    tcg_out_opc(s, opc);
278
    tcg_out8(s, 0xc0 | (r << 3) | rm);
406
    tcg_out_opc(s, opc, r, rm, 0);
407
    tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
279 408
}
280 409

  
281 410
/* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
282
   We handle either RM and INDEX missing with a -1 value.  */
411
   We handle either RM and INDEX missing with a negative value.  In 64-bit
412
   mode for absolute addresses, ~RM is the size of the immediate operand
413
   that will follow the instruction.  */
283 414

  
284 415
static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
285
                                     int index, int shift, int32_t offset)
416
                                     int index, int shift,
417
                                     tcg_target_long offset)
286 418
{
287 419
    int mod, len;
288 420

  
289
    if (index == -1 && rm == -1) {
290
        /* Absolute address.  */
291
        tcg_out_opc(s, opc);
292
        tcg_out8(s, (r << 3) | 5);
293
        tcg_out32(s, offset);
294
        return;
295
    }
421
    if (index < 0 && rm < 0) {
422
        if (TCG_TARGET_REG_BITS == 64) {
423
            /* Try for a rip-relative addressing mode.  This has replaced
424
               the 32-bit-mode absolute addressing encoding.  */
425
            tcg_target_long pc = (tcg_target_long)s->code_ptr + 5 + ~rm;
426
            tcg_target_long disp = offset - pc;
427
            if (disp == (int32_t)disp) {
428
                tcg_out_opc(s, opc, r, 0, 0);
429
                tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
430
                tcg_out32(s, disp);
431
                return;
432
            }
296 433

  
297
    tcg_out_opc(s, opc);
434
            /* Try for an absolute address encoding.  This requires the
435
               use of the MODRM+SIB encoding and is therefore larger than
436
               rip-relative addressing.  */
437
            if (offset == (int32_t)offset) {
438
                tcg_out_opc(s, opc, r, 0, 0);
439
                tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
440
                tcg_out8(s, (4 << 3) | 5);
441
                tcg_out32(s, offset);
442
                return;
443
            }
444

  
445
            /* ??? The memory isn't directly addressable.  */
446
            tcg_abort();
447
        } else {
448
            /* Absolute address.  */
449
            tcg_out_opc(s, opc, r, 0, 0);
450
            tcg_out8(s, (r << 3) | 5);
451
            tcg_out32(s, offset);
452
            return;
453
        }
454
    }
298 455

  
299 456
    /* Find the length of the immediate addend.  Note that the encoding
300 457
       that would be used for (%ebp) indicates absolute addressing.  */
301
    if (rm == -1) {
458
    if (rm < 0) {
302 459
        mod = 0, len = 4, rm = 5;
303
    } else if (offset == 0 && rm != TCG_REG_EBP) {
460
    } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
304 461
        mod = 0, len = 0;
305 462
    } else if (offset == (int8_t)offset) {
306 463
        mod = 0x40, len = 1;
......
310 467

  
311 468
    /* Use a single byte MODRM format if possible.  Note that the encoding
312 469
       that would be used for %esp is the escape to the two byte form.  */
313
    if (index == -1 && rm != TCG_REG_ESP) {
470
    if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
314 471
        /* Single byte MODRM format.  */
315
        tcg_out8(s, mod | (r << 3) | rm);
472
        tcg_out_opc(s, opc, r, rm, 0);
473
        tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
316 474
    } else {
317 475
        /* Two byte MODRM+SIB format.  */
318 476

  
319 477
        /* Note that the encoding that would place %esp into the index
320
           field indicates no index register.  */
321
        if (index == -1) {
478
           field indicates no index register.  In 64-bit mode, the REX.X
479
           bit counts, so %r12 can be used as the index.  */
480
        if (index < 0) {
322 481
            index = 4;
323 482
        } else {
324 483
            assert(index != TCG_REG_ESP);
325 484
        }
326 485

  
327
        tcg_out8(s, mod | (r << 3) | 4);
328
        tcg_out8(s, (shift << 6) | (index << 3) | rm);
486
        tcg_out_opc(s, opc, r, rm, index);
487
        tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
488
        tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
329 489
    }
330 490

  
331 491
    if (len == 1) {
......
335 495
    }
336 496
}
337 497

  
338
/* rm == -1 means no register index */
339
static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r, int rm,
340
                                        int32_t offset)
498
/* A simplification of the above with no index or shift.  */
499
static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
500
                                        int rm, tcg_target_long offset)
341 501
{
342 502
    tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
343 503
}
......
345 505
/* Generate dest op= src.  Uses the same ARITH_* codes as tgen_arithi.  */
346 506
static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
347 507
{
348
    tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3), dest, src);
508
    /* Propagate an opcode prefix, such as P_REXW.  */
509
    int ext = subop & ~0x7;
510
    subop &= 0x7;
511

  
512
    tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
349 513
}
350 514

  
351 515
static inline void tcg_out_mov(TCGContext *s, TCGType type, int ret, int arg)
352 516
{
353 517
    if (arg != ret) {
354
        tcg_out_modrm(s, OPC_MOVL_GvEv, ret, arg);
518
        int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
519
        tcg_out_modrm(s, opc, ret, arg);
355 520
    }
356 521
}
357 522

  
358
static inline void tcg_out_movi(TCGContext *s, TCGType type,
359
                                int ret, int32_t arg)
523
static void tcg_out_movi(TCGContext *s, TCGType type,
524
                         int ret, tcg_target_long arg)
360 525
{
361 526
    if (arg == 0) {
362 527
        tgen_arithr(s, ARITH_XOR, ret, ret);
528
        return;
529
    } else if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
530
        tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
531
        tcg_out32(s, arg);
532
    } else if (arg == (int32_t)arg) {
533
        tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
534
        tcg_out32(s, arg);
363 535
    } else {
364
        tcg_out8(s, OPC_MOVL_Iv + ret);
536
        tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
365 537
        tcg_out32(s, arg);
538
        tcg_out32(s, arg >> 31 >> 1);
366 539
    }
367 540
}
368 541

  
369 542
static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
370 543
{
371 544
    if (val == (int8_t)val) {
372
        tcg_out_opc(s, OPC_PUSH_Ib);
545
        tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
373 546
        tcg_out8(s, val);
374
    } else {
375
        tcg_out_opc(s, OPC_PUSH_Iv);
547
    } else if (val == (int32_t)val) {
548
        tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
376 549
        tcg_out32(s, val);
550
    } else {
551
        tcg_abort();
377 552
    }
378 553
}
379 554

  
380 555
static inline void tcg_out_push(TCGContext *s, int reg)
381 556
{
382
    tcg_out_opc(s, OPC_PUSH_r32 + reg);
557
    tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
383 558
}
384 559

  
385 560
static inline void tcg_out_pop(TCGContext *s, int reg)
386 561
{
387
    tcg_out_opc(s, OPC_POP_r32 + reg);
562
    tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
388 563
}
389 564

  
390 565
static inline void tcg_out_ld(TCGContext *s, TCGType type, int ret,
391 566
                              int arg1, tcg_target_long arg2)
392 567
{
393
    tcg_out_modrm_offset(s, OPC_MOVL_GvEv, ret, arg1, arg2);
568
    int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
569
    tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
394 570
}
395 571

  
396 572
static inline void tcg_out_st(TCGContext *s, TCGType type, int arg,
397 573
                              int arg1, tcg_target_long arg2)
398 574
{
399
    tcg_out_modrm_offset(s, OPC_MOVL_EvGv, arg, arg1, arg2);
575
    int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
576
    tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
400 577
}
401 578

  
402 579
static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
......
406 583
    subopc &= 0x7;
407 584

  
408 585
    if (count == 1) {
409
        tcg_out_modrm(s, OPC_SHIFT_1 | ext, subopc, reg);
586
        tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
410 587
    } else {
411
        tcg_out_modrm(s, OPC_SHIFT_Ib | ext, subopc, reg);
588
        tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
412 589
        tcg_out8(s, count);
413 590
    }
414 591
}
415 592

  
416 593
static inline void tcg_out_bswap32(TCGContext *s, int reg)
417 594
{
418
    tcg_out_opc(s, OPC_BSWAP + reg);
595
    tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
419 596
}
420 597

  
421 598
static inline void tcg_out_rolw_8(TCGContext *s, int reg)
422 599
{
423
    tcg_out_shifti(s, SHIFT_ROL | P_DATA16, reg, 8);
600
    tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
424 601
}
425 602

  
426 603
static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
427 604
{
428 605
    /* movzbl */
429
    assert(src < 4);
430
    tcg_out_modrm(s, OPC_MOVZBL, dest, src);
606
    assert(src < 4 || TCG_TARGET_REG_BITS == 64);
607
    tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
431 608
}
432 609

  
433
static void tcg_out_ext8s(TCGContext *s, int dest, int src)
610
static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
434 611
{
435 612
    /* movsbl */
436
    assert(src < 4);
437
    tcg_out_modrm(s, OPC_MOVSBL, dest, src);
613
    assert(src < 4 || TCG_TARGET_REG_BITS == 64);
614
    tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
438 615
}
439 616

  
440 617
static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
......
443 620
    tcg_out_modrm(s, OPC_MOVZWL, dest, src);
444 621
}
445 622

  
446
static inline void tcg_out_ext16s(TCGContext *s, int dest, int src)
623
static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
447 624
{
448
    /* movswl */
449
    tcg_out_modrm(s, OPC_MOVSWL, dest, src);
625
    /* movsw[lq] */
626
    tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
450 627
}
451 628

  
452
static inline void tgen_arithi(TCGContext *s, int c, int r0,
453
                               int32_t val, int cf)
629
static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
454 630
{
631
    /* 32-bit mov zero extends.  */
632
    tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
633
}
634

  
635
static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
636
{
637
    tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
638
}
639

  
640
static inline void tcg_out_bswap64(TCGContext *s, int reg)
641
{
642
    tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
643
}
644

  
645
static void tgen_arithi(TCGContext *s, int c, int r0,
646
                        tcg_target_long val, int cf)
647
{
648
    int rexw = 0;
649

  
650
    if (TCG_TARGET_REG_BITS == 64) {
651
        rexw = c & -8;
652
        c &= 7;
653
    }
654

  
455 655
    /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
456 656
       partial flags update stalls on Pentium4 and are not recommended
457 657
       by current Intel optimization manuals.  */
458 658
    if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
459
        int opc = ((c == ARITH_ADD) ^ (val < 0) ? OPC_INC_r32 : OPC_DEC_r32);
460
        tcg_out_opc(s, opc + r0);
461
    } else if (val == (int8_t)val) {
462
        tcg_out_modrm(s, OPC_ARITH_EvIb, c, r0);
659
        _Bool is_inc = (c == ARITH_ADD) ^ (val < 0);
660
        if (TCG_TARGET_REG_BITS == 64) {
661
            /* The single-byte increment encodings are re-tasked as the
662
               REX prefixes.  Use the MODRM encoding.  */
663
            tcg_out_modrm(s, OPC_GRP5 + rexw,
664
                          (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
665
        } else {
666
            tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
667
        }
668
        return;
669
    }
670

  
671
    if (c == ARITH_AND) {
672
        if (TCG_TARGET_REG_BITS == 64) {
673
            if (val == 0xffffffffu) {
674
                tcg_out_ext32u(s, r0, r0);
675
                return;
676
            }
677
            if (val == (uint32_t)val) {
678
                /* AND with no high bits set can use a 32-bit operation.  */
679
                rexw = 0;
680
            }
681
        }
682
        if (val == 0xffu) {
683
            tcg_out_ext8u(s, r0, r0);
684
            return;
685
        }
686
        if (val == 0xffffu) {
687
            tcg_out_ext16u(s, r0, r0);
688
            return;
689
        }
690
    }
691

  
692
    if (val == (int8_t)val) {
693
        tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
463 694
        tcg_out8(s, val);
464
    } else if (c == ARITH_AND && val == 0xffu && r0 < 4) {
465
        tcg_out_ext8u(s, r0, r0);
466
    } else if (c == ARITH_AND && val == 0xffffu) {
467
        tcg_out_ext16u(s, r0, r0);
468
    } else {
469
        tcg_out_modrm(s, OPC_ARITH_EvIz, c, r0);
695
        return;
696
    }
697
    if (rexw == 0 || val == (int32_t)val) {
698
        tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
470 699
        tcg_out32(s, val);
700
        return;
471 701
    }
702

  
703
    tcg_abort();
472 704
}
473 705

  
474 706
static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
475 707
{
476
    if (val != 0)
477
        tgen_arithi(s, ARITH_ADD, reg, val, 0);
708
    if (val != 0) {
709
        tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
710
    }
478 711
}
479 712

  
480 713
/* Use SMALL != 0 to force a short forward branch.  */
......
501 734
                tcg_out8(s, OPC_JMP_long);
502 735
                tcg_out32(s, val - 5);
503 736
            } else {
504
                tcg_out_opc(s, OPC_JCC_long + opc);
737
                tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
505 738
                tcg_out32(s, val - 6);
506 739
            }
507 740
        }
......
517 750
        if (opc == -1) {
518 751
            tcg_out8(s, OPC_JMP_long);
519 752
        } else {
520
            tcg_out_opc(s, OPC_JCC_long + opc);
753
            tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
521 754
        }
522 755
        tcg_out_reloc(s, s->code_ptr, R_386_PC32, label_index, -4);
523 756
        s->code_ptr += 4;
......
525 758
}
526 759

  
527 760
static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
528
                        int const_arg2)
761
                        int const_arg2, int rexw)
529 762
{
530 763
    if (const_arg2) {
531 764
        if (arg2 == 0) {
532 765
            /* test r, r */
533
            tcg_out_modrm(s, OPC_TESTL, arg1, arg1);
766
            tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
534 767
        } else {
535
            tgen_arithi(s, ARITH_CMP, arg1, arg2, 0);
768
            tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
536 769
        }
537 770
    } else {
538
        tgen_arithr(s, ARITH_CMP, arg1, arg2);
771
        tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
539 772
    }
540 773
}
541 774

  
542
static void tcg_out_brcond(TCGContext *s, TCGCond cond,
543
                           TCGArg arg1, TCGArg arg2, int const_arg2,
544
                           int label_index, int small)
775
static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
776
                             TCGArg arg1, TCGArg arg2, int const_arg2,
777
                             int label_index, int small)
545 778
{
546
    tcg_out_cmp(s, arg1, arg2, const_arg2);
779
    tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
547 780
    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
548 781
}
549 782

  
783
#if TCG_TARGET_REG_BITS == 64
784
static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
785
                             TCGArg arg1, TCGArg arg2, int const_arg2,
786
                             int label_index, int small)
787
{
788
    tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
789
    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
790
}
791
#else
550 792
/* XXX: we implement it at the target level to avoid having to
551 793
   handle cross basic blocks temporaries */
552 794
static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
......
556 798
    label_next = gen_new_label();
557 799
    switch(args[4]) {
558 800
    case TCG_COND_EQ:
559
        tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2],
560
                       label_next, 1);
561
        tcg_out_brcond(s, TCG_COND_EQ, args[1], args[3], const_args[3],
562
                       args[5], small);
801
        tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
802
                         label_next, 1);
803
        tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
804
                         args[5], small);
563 805
        break;
564 806
    case TCG_COND_NE:
565
        tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2],
566
                       args[5], small);
567
        tcg_out_brcond(s, TCG_COND_NE, args[1], args[3], const_args[3],
568
                       args[5], small);
807
        tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
808
                         args[5], small);
809
        tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
810
                         args[5], small);
569 811
        break;
570 812
    case TCG_COND_LT:
571
        tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3],
572
                       args[5], small);
813
        tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
814
                         args[5], small);
573 815
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
574
        tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2],
575
                       args[5], small);
816
        tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
817
                         args[5], small);
576 818
        break;
577 819
    case TCG_COND_LE:
578
        tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3],
579
                       args[5], small);
820
        tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
821
                         args[5], small);
580 822
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
581
        tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2],
582
                       args[5], small);
823
        tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
824
                         args[5], small);
583 825
        break;
584 826
    case TCG_COND_GT:
585
        tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3],
586
                       args[5], small);
827
        tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
828
                         args[5], small);
587 829
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
588
        tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2],
589
                       args[5], small);
830
        tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
831
                         args[5], small);
590 832
        break;
591 833
    case TCG_COND_GE:
592
        tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3],
593
                       args[5], small);
834
        tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
835
                         args[5], small);
594 836
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
595
        tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2],
596
                       args[5], small);
837
        tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
838
                         args[5], small);
597 839
        break;
598 840
    case TCG_COND_LTU:
599
        tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3],
600
                       args[5], small);
841
        tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
842
                         args[5], small);
601 843
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
602
        tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2],
603
                       args[5], small);
844
        tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
845
                         args[5], small);
604 846
        break;
605 847
    case TCG_COND_LEU:
606
        tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3],
607
                       args[5], small);
848
        tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
849
                         args[5], small);
608 850
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
609
        tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2],
610
                       args[5], small);
851
        tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
852
                         args[5], small);
611 853
        break;
612 854
    case TCG_COND_GTU:
613
        tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3],
614
                       args[5], small);
855
        tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
856
                         args[5], small);
615 857
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
616
        tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2],
617
                       args[5], small);
858
        tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
859
                         args[5], small);
618 860
        break;
619 861
    case TCG_COND_GEU:
620
        tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3],
621
                       args[5], small);
862
        tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
863
                         args[5], small);
622 864
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
623
        tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2],
624
                       args[5], small);
865
        tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
866
                         args[5], small);
625 867
        break;
626 868
    default:
627 869
        tcg_abort();
628 870
    }
629 871
    tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
630 872
}
873
#endif
631 874

  
632
static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGArg dest,
633
                            TCGArg arg1, TCGArg arg2, int const_arg2)
875
static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
876
                              TCGArg arg1, TCGArg arg2, int const_arg2)
634 877
{
635
    tcg_out_cmp(s, arg1, arg2, const_arg2);
878
    tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
636 879
    tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
637 880
    tcg_out_ext8u(s, dest, dest);
638 881
}
639 882

  
883
#if TCG_TARGET_REG_BITS == 64
884
static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
885
                              TCGArg arg1, TCGArg arg2, int const_arg2)
886
{
887
    tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
888
    tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
889
    tcg_out_ext8u(s, dest, dest);
890
}
891
#else
640 892
static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
641 893
                             const int *const_args)
642 894
{
......
678 930
        tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
679 931
    }
680 932
}
933
#endif
934

  
935
static void tcg_out_branch(TCGContext *s, int call, tcg_target_long dest)
936
{
937
    tcg_target_long disp = dest - (tcg_target_long)s->code_ptr - 5;
938

  
939
    if (disp == (int32_t)disp) {
940
        tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
941
        tcg_out32(s, disp);
942
    } else {
943
        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest);
944
        tcg_out_modrm(s, OPC_GRP5,
945
                      call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
946
    }
947
}
948

  
949
static inline void tcg_out_calli(TCGContext *s, tcg_target_long dest)
950
{
951
    tcg_out_branch(s, 1, dest);
952
}
681 953

  
682
static void tcg_out_calli(TCGContext *s, tcg_target_long dest)
954
static void tcg_out_jmp(TCGContext *s, tcg_target_long dest)
683 955
{
684
    tcg_out_opc(s, OPC_CALL_Jz);
685
    tcg_out32(s, dest - (tcg_target_long)s->code_ptr - 4);
956
    tcg_out_branch(s, 0, dest);
686 957
}
687 958

  
688 959
#if defined(CONFIG_SOFTMMU)
......
718 989
   LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
719 990
   positions of the displacements of forward jumps to the TLB miss case.
720 991

  
721
   EAX is loaded with the low part of the address.  In the TLB hit case,
722
   it has been adjusted as indicated by the TLB and so is a host address.
723
   In the TLB miss case, it continues to hold a guest address.
992
   First argument register is loaded with the low part of the address.
993
   In the TLB hit case, it has been adjusted as indicated by the TLB
994
   and so is a host address.  In the TLB miss case, it continues to
995
   hold a guest address.
724 996

  
725
   EDX is clobbered.  */
997
   Second argument register is clobbered.  */
726 998

  
727 999
static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
728 1000
                                    int mem_index, int s_bits,
......
730 1002
                                    uint8_t **label_ptr, int which)
731 1003
{
732 1004
    const int addrlo = args[addrlo_idx];
733
    const int r0 = TCG_REG_EAX;
734
    const int r1 = TCG_REG_EDX;
1005
    const int r0 = tcg_target_call_iarg_regs[0];
1006
    const int r1 = tcg_target_call_iarg_regs[1];
1007
    TCGType type = TCG_TYPE_I32;
1008
    int rexw = 0;
1009

  
1010
    if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64) {
1011
        type = TCG_TYPE_I64;
1012
        rexw = P_REXW;
1013
    }
735 1014

  
736
    tcg_out_mov(s, TCG_TYPE_I32, r1, addrlo);
737
    tcg_out_mov(s, TCG_TYPE_I32, r0, addrlo);
1015
    tcg_out_mov(s, type, r1, addrlo);
1016
    tcg_out_mov(s, type, r0, addrlo);
738 1017

  
739
    tcg_out_shifti(s, SHIFT_SHR, r1, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1018
    tcg_out_shifti(s, SHIFT_SHR + rexw, r1,
1019
                   TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
740 1020

  
741
    tgen_arithi(s, ARITH_AND, r0, TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
742
    tgen_arithi(s, ARITH_AND, r1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
1021
    tgen_arithi(s, ARITH_AND + rexw, r0,
1022
                TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
1023
    tgen_arithi(s, ARITH_AND + rexw, r1,
1024
                (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
743 1025

  
744
    tcg_out_modrm_sib_offset(s, OPC_LEA, r1, TCG_AREG0, r1, 0,
1026
    tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r1, TCG_AREG0, r1, 0,
745 1027
                             offsetof(CPUState, tlb_table[mem_index][0])
746 1028
                             + which);
747 1029

  
748 1030
    /* cmp 0(r1), r0 */
749
    tcg_out_modrm_offset(s, OPC_CMP_GvEv, r0, r1, 0);
1031
    tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r0, r1, 0);
750 1032

  
751
    tcg_out_mov(s, TCG_TYPE_I32, r0, addrlo);
1033
    tcg_out_mov(s, type, r0, addrlo);
752 1034

  
753 1035
    /* jne label1 */
754 1036
    tcg_out8(s, OPC_JCC_short + JCC_JNE);
755 1037
    label_ptr[0] = s->code_ptr;
756 1038
    s->code_ptr++;
757 1039

  
758
    if (TARGET_LONG_BITS == 64) {
1040
    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
759 1041
        /* cmp 4(r1), addrhi */
760 1042
        tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r1, 4);
761 1043

  
......
768 1050
    /* TLB Hit.  */
769 1051

  
770 1052
    /* add addend(r1), r0 */
771
    tcg_out_modrm_offset(s, OPC_ADD_GvEv, r0, r1,
1053
    tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r0, r1,
772 1054
                         offsetof(CPUTLBEntry, addend) - which);
773 1055
}
774 1056
#endif
......
783 1065
#endif
784 1066
    switch (sizeop) {
785 1067
    case 0:
786
        /* movzbl */
787 1068
        tcg_out_modrm_offset(s, OPC_MOVZBL, datalo, base, ofs);
788 1069
        break;
789 1070
    case 0 | 4:
790
        /* movsbl */
791
        tcg_out_modrm_offset(s, OPC_MOVSBL, datalo, base, ofs);
1071
        tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW, datalo, base, ofs);
792 1072
        break;
793 1073
    case 1:
794
        /* movzwl */
795 1074
        tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
796 1075
        if (bswap) {
797 1076
            tcg_out_rolw_8(s, datalo);
798 1077
        }
799 1078
        break;
800 1079
    case 1 | 4:
801
        /* movswl */
802
        tcg_out_modrm_offset(s, OPC_MOVSWL, datalo, base, ofs);
803 1080
        if (bswap) {
1081
            tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
804 1082
            tcg_out_rolw_8(s, datalo);
805
            tcg_out_modrm(s, OPC_MOVSWL, datalo, datalo);
1083
            tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1084
        } else {
1085
            tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW, datalo, base, ofs);
806 1086
        }
807 1087
        break;
808 1088
    case 2:
......
811 1091
            tcg_out_bswap32(s, datalo);
812 1092
        }
813 1093
        break;
814
    case 3:
1094
#if TCG_TARGET_REG_BITS == 64
1095
    case 2 | 4:
815 1096
        if (bswap) {
816
            int t = datalo;
817
            datalo = datahi;
818
            datahi = t;
819
        }
820
        if (base != datalo) {
821 1097
            tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
822
            tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1098
            tcg_out_bswap32(s, datalo);
1099
            tcg_out_ext32s(s, datalo, datalo);
823 1100
        } else {
824
            tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
825
            tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1101
            tcg_out_modrm_offset(s, OPC_MOVSLQ, datalo, base, ofs);
826 1102
        }
827
        if (bswap) {
828
            tcg_out_bswap32(s, datalo);
829
            tcg_out_bswap32(s, datahi);
1103
        break;
1104
#endif
1105
    case 3:
1106
        if (TCG_TARGET_REG_BITS == 64) {
1107
            tcg_out_ld(s, TCG_TYPE_I64, datalo, base, ofs);
1108
            if (bswap) {
1109
                tcg_out_bswap64(s, datalo);
1110
            }
1111
        } else {
1112
            if (bswap) {
1113
                int t = datalo;
1114
                datalo = datahi;
1115
                datahi = t;
1116
            }
1117
            if (base != datalo) {
1118
                tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1119
                tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1120
            } else {
1121
                tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1122
                tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1123
            }
1124
            if (bswap) {
1125
                tcg_out_bswap32(s, datalo);
1126
                tcg_out_bswap32(s, datahi);
1127
            }
830 1128
        }
831 1129
        break;
832 1130
    default:
......
849 1147

  
850 1148
    data_reg = args[0];
851 1149
    addrlo_idx = 1;
852
    if (opc == 3) {
1150
    if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
853 1151
        data_reg2 = args[1];
854 1152
        addrlo_idx = 2;
855 1153
    }
856 1154

  
857 1155
#if defined(CONFIG_SOFTMMU)
858
    mem_index = args[addrlo_idx + (TARGET_LONG_BITS / 32)];
1156
    mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
859 1157
    s_bits = opc & 3;
860 1158

  
861 1159
    tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
862 1160
                     label_ptr, offsetof(CPUTLBEntry, addr_read));
863 1161

  
864 1162
    /* TLB Hit.  */
865
    tcg_out_qemu_ld_direct(s, data_reg, data_reg2, TCG_REG_EAX, 0, opc);
1163
    tcg_out_qemu_ld_direct(s, data_reg, data_reg2,
1164
                           tcg_target_call_iarg_regs[0], 0, opc);
866 1165

  
867 1166
    /* jmp label2 */
868 1167
    tcg_out8(s, OPC_JMP_short);
......
873 1172

  
874 1173
    /* label1: */
875 1174
    *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
876
    if (TARGET_LONG_BITS == 64) {
1175
    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
877 1176
        *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
878 1177
    }
879 1178

  
880 1179
    /* XXX: move that code at the end of the TB */
881
    /* EAX is already loaded.  */
1180
    /* The first argument is already loaded with addrlo.  */
882 1181
    arg_idx = 1;
883
    if (TARGET_LONG_BITS == 64) {
1182
    if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) {
884 1183
        tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx++],
885 1184
                    args[addrlo_idx + 1]);
886 1185
    }
......
890 1189

  
891 1190
    switch(opc) {
892 1191
    case 0 | 4:
893
        tcg_out_ext8s(s, data_reg, TCG_REG_EAX);
1192
        tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
894 1193
        break;
895 1194
    case 1 | 4:
896
        tcg_out_ext16s(s, data_reg, TCG_REG_EAX);
1195
        tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
897 1196
        break;
898 1197
    case 0:
899 1198
        tcg_out_ext8u(s, data_reg, TCG_REG_EAX);
......
902 1201
        tcg_out_ext16u(s, data_reg, TCG_REG_EAX);
903 1202
        break;
904 1203
    case 2:
905
    default:
906 1204
        tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
907 1205
        break;
1206
#if TCG_TARGET_REG_BITS == 64
1207
    case 2 | 4:
1208
        tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1209
        break;
1210
#endif
908 1211
    case 3:
909
        if (data_reg == TCG_REG_EDX) {
1212
        if (TCG_TARGET_REG_BITS == 64) {
1213
            tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1214
        } else if (data_reg == TCG_REG_EDX) {
910 1215
            /* xchg %edx, %eax */
911
            tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX);
1216
            tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
912 1217
            tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EAX);
913 1218
        } else {
914 1219
            tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
915 1220
            tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EDX);
916 1221
        }
917 1222
        break;
1223
    default:
1224
        tcg_abort();
918 1225
    }
919 1226

  
920 1227
    /* label2: */
921 1228
    *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
922 1229
#else
923
    tcg_out_qemu_ld_direct(s, data_reg, data_reg2,
924
                           args[addrlo_idx], GUEST_BASE, opc);
1230
    {
1231
        int32_t offset = GUEST_BASE;
1232
        int base = args[addrlo_idx];
1233

  
1234
        if (TCG_TARGET_REG_BITS == 64) {
1235
            /* ??? We assume all operations have left us with register
1236
               contents that are zero extended.  So far this appears to
1237
               be true.  If we want to enforce this, we can either do
1238
               an explicit zero-extension here, or (if GUEST_BASE == 0)
1239
               use the ADDR32 prefix.  For now, do nothing.  */
1240

  
1241
            if (offset != GUEST_BASE) {
1242
                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
1243
                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
1244
                base = TCG_REG_RDI, offset = 0;
1245
            }
1246
        }
1247

  
1248
        tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, opc);
1249
    }
925 1250
#endif
926 1251
}
927 1252

  
......
936 1261
    /* ??? Ideally we wouldn't need a scratch register.  For user-only,
937 1262
       we could perform the bswap twice to restore the original value
938 1263
       instead of moving to the scratch.  But as it is, the L constraint
939
       means that EDX is definitely free here.  */
940
    int scratch = TCG_REG_EDX;
1264
       means that the second argument reg is definitely free here.  */
1265
    int scratch = tcg_target_call_iarg_regs[1];
941 1266

  
942 1267
    switch (sizeop) {
943 1268
    case 0:
944
        tcg_out_modrm_offset(s, OPC_MOVB_EvGv, datalo, base, ofs);
1269
        tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs);
945 1270
        break;
946 1271
    case 1:
947 1272
        if (bswap) {
......
949 1274
            tcg_out_rolw_8(s, scratch);
950 1275
            datalo = scratch;
951 1276
        }
952
        /* movw */
953
        tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
954
                             datalo, base, ofs);
1277
        tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16, datalo, base, ofs);
955 1278
        break;
956 1279
    case 2:
957 1280
        if (bswap) {
......
962 1285
        tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
963 1286
        break;
964 1287
    case 3:
965
        if (bswap) {
1288
        if (TCG_TARGET_REG_BITS == 64) {
1289
            if (bswap) {
1290
                tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1291
                tcg_out_bswap64(s, scratch);
1292
                datalo = scratch;
1293
            }
1294
            tcg_out_st(s, TCG_TYPE_I64, datalo, base, ofs);
1295
        } else if (bswap) {
966 1296
            tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
967 1297
            tcg_out_bswap32(s, scratch);
968 1298
            tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs);
......
992 1322

  
993 1323
    data_reg = args[0];
994 1324
    addrlo_idx = 1;
995
    if (opc == 3) {
1325
    if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
996 1326
        data_reg2 = args[1];
997 1327
        addrlo_idx = 2;
998 1328
    }
999 1329

  
1000 1330
#if defined(CONFIG_SOFTMMU)
1001
    mem_index = args[addrlo_idx + (TARGET_LONG_BITS / 32)];
1331
    mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1002 1332
    s_bits = opc;
1003 1333

  
1004 1334
    tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1005 1335
                     label_ptr, offsetof(CPUTLBEntry, addr_write));
1006 1336

  
1007 1337
    /* TLB Hit.  */
1008
    tcg_out_qemu_st_direct(s, data_reg, data_reg2, TCG_REG_EAX, 0, opc);
1338
    tcg_out_qemu_st_direct(s, data_reg, data_reg2,
1339
                           tcg_target_call_iarg_regs[0], 0, opc);
1009 1340

  
1010 1341
    /* jmp label2 */
1011 1342
    tcg_out8(s, OPC_JMP_short);
......
1016 1347

  
1017 1348
    /* label1: */
1018 1349
    *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
1019
    if (TARGET_LONG_BITS == 64) {
1350
    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1020 1351
        *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
1021 1352
    }
1022 1353

  
1023 1354
    /* XXX: move that code at the end of the TB */
1024
    if (TARGET_LONG_BITS == 32) {
1355
    if (TCG_TARGET_REG_BITS == 64) {
1356
        tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1357
                    TCG_REG_RSI, data_reg);
1358
        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_RDX, mem_index);
1359
        stack_adjust = 0;
1360
    } else if (TARGET_LONG_BITS == 32) {
1025 1361
        tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, data_reg);
1026 1362
        if (opc == 3) {
1027 1363
            tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg2);
......
1058 1394

  
1059 1395
    tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
1060 1396

  
1061
    if (stack_adjust == 4) {
1397
    if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
1062 1398
        /* Pop and discard.  This is 2 bytes smaller than the add.  */
1063 1399
        tcg_out_pop(s, TCG_REG_ECX);
1064 1400
    } else if (stack_adjust != 0) {
......
1068 1404
    /* label2: */
1069 1405
    *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
1070 1406
#else
1071
    tcg_out_qemu_st_direct(s, data_reg, data_reg2,
1072
                           args[addrlo_idx], GUEST_BASE, opc);
1407
    {
1408
        int32_t offset = GUEST_BASE;
1409
        int base = args[addrlo_idx];
1410

  
1411
        if (TCG_TARGET_REG_BITS == 64) {
1412
            /* ??? We assume all operations have left us with register
1413
               contents that are zero extended.  So far this appears to
1414
               be true.  If we want to enforce this, we can either do
1415
               an explicit zero-extension here, or (if GUEST_BASE == 0)
1416
               use the ADDR32 prefix.  For now, do nothing.  */
1417

  
1418
            if (offset != GUEST_BASE) {
1419
                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
1420
                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
1421
                base = TCG_REG_RDI, offset = 0;
1422
            }
1423
        }
1424

  
1425
        tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, opc);
1426
    }
1073 1427
#endif
1074 1428
}
1075 1429

  
1076 1430
static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1077 1431
                              const TCGArg *args, const int *const_args)
1078 1432
{
1079
    int c;
1433
    int c, rexw = 0;
1434

  
1435
#if TCG_TARGET_REG_BITS == 64
1436
# define OP_32_64(x) \
1437
        case glue(glue(INDEX_op_, x), _i64): \
1438
            rexw = P_REXW; /* FALLTHRU */    \
1439
        case glue(glue(INDEX_op_, x), _i32)
1440
#else
1441
# define OP_32_64(x) \
1442
        case glue(glue(INDEX_op_, x), _i32)
1443
#endif
1080 1444

  
1081 1445
    switch(opc) {
1082 1446
    case INDEX_op_exit_tb:
1083
        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_EAX, args[0]);
1084
        tcg_out8(s, OPC_JMP_long); /* jmp tb_ret_addr */
1085
        tcg_out32(s, tb_ret_addr - s->code_ptr - 4);
1447
        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
1448
        tcg_out_jmp(s, (tcg_target_long) tb_ret_addr);
1086 1449
        break;
1087 1450
    case INDEX_op_goto_tb:
1088 1451
        if (s->tb_jmp_offset) {
......
1107 1470
        break;
1108 1471
    case INDEX_op_jmp:
1109 1472
        if (const_args[0]) {
1110
            tcg_out8(s, OPC_JMP_long);
1111
            tcg_out32(s, args[0] - (tcg_target_long)s->code_ptr - 4);
1473
            tcg_out_jmp(s, args[0]);
1112 1474
        } else {
1113 1475
            /* jmp *reg */
1114 1476
            tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, args[0]);
......
1120 1482
    case INDEX_op_movi_i32:
1121 1483
        tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
1122 1484
        break;
1123
    case INDEX_op_ld8u_i32:
1124
        /* movzbl */
1485
    OP_32_64(ld8u):
1486
        /* Note that we can ignore REXW for the zero-extend to 64-bit.  */
1125 1487
        tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
1126 1488
        break;
1127
    case INDEX_op_ld8s_i32:
1128
        /* movsbl */
1129
        tcg_out_modrm_offset(s, OPC_MOVSBL, args[0], args[1], args[2]);
1489
    OP_32_64(ld8s):
1490
        tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
1130 1491
        break;
1131
    case INDEX_op_ld16u_i32:
1132
        /* movzwl */
1492
    OP_32_64(ld16u):
1493
        /* Note that we can ignore REXW for the zero-extend to 64-bit.  */
1133 1494
        tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
1134 1495
        break;
1135
    case INDEX_op_ld16s_i32:
1136
        /* movswl */
1137
        tcg_out_modrm_offset(s, OPC_MOVSWL, args[0], args[1], args[2]);
1496
    OP_32_64(ld16s):
1497
        tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
1138 1498
        break;
1499
#if TCG_TARGET_REG_BITS == 64
1500
    case INDEX_op_ld32u_i64:
1501
#endif
1139 1502
    case INDEX_op_ld_i32:
1140 1503
        tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1141 1504
        break;
1142
    case INDEX_op_st8_i32:
1143
        /* movb */
1144
        tcg_out_modrm_offset(s, OPC_MOVB_EvGv, args[0], args[1], args[2]);
1505

  
1506
    OP_32_64(st8):
1507
        tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
1508
                             args[0], args[1], args[2]);
1145 1509
        break;
1146
    case INDEX_op_st16_i32:
1147
        /* movw */
1510
    OP_32_64(st16):
1148 1511
        tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
1149 1512
                             args[0], args[1], args[2]);
1150 1513
        break;
1514
#if TCG_TARGET_REG_BITS == 64
1515
    case INDEX_op_st32_i64:
1516
#endif
1151 1517
    case INDEX_op_st_i32:
1152 1518
        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1153 1519
        break;
1154
    case INDEX_op_add_i32:
1520

  
1521
    OP_32_64(add):
1155 1522
        /* For 3-operand addition, use LEA.  */
1156 1523
        if (args[0] != args[1]) {
1157 1524
            TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
......
1161 1528
            } else if (a0 == a2) {
1162 1529
                /* Watch out for dest = src + dest, since we've removed
1163 1530
                   the matching constraint on the add.  */
1164
                tgen_arithr(s, ARITH_ADD, a0, a1);
1531
                tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
1165 1532
                break;
1166 1533
            }
1167 1534

  
1168
            tcg_out_modrm_sib_offset(s, OPC_LEA, a0, a1, a2, 0, c3);
1535
            tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
1169 1536
            break;
1170 1537
        }
1171 1538
        c = ARITH_ADD;
1172 1539
        goto gen_arith;
1173
    case INDEX_op_sub_i32:
1540
    OP_32_64(sub):
1174 1541
        c = ARITH_SUB;
1175 1542
        goto gen_arith;
1176
    case INDEX_op_and_i32:
1543
    OP_32_64(and):
1177 1544
        c = ARITH_AND;
1178 1545
        goto gen_arith;
1179
    case INDEX_op_or_i32:
1546
    OP_32_64(or):
1180 1547
        c = ARITH_OR;
1181 1548
        goto gen_arith;
1182
    case INDEX_op_xor_i32:
1549
    OP_32_64(xor):
1183 1550
        c = ARITH_XOR;
1184 1551
        goto gen_arith;
1185 1552
    gen_arith:
1186 1553
        if (const_args[2]) {
1187
            tgen_arithi(s, c, args[0], args[2], 0);
1554
            tgen_arithi(s, c + rexw, args[0], args[2], 0);
1188 1555
        } else {
1189
            tgen_arithr(s, c, args[0], args[2]);
1556
            tgen_arithr(s, c + rexw, args[0], args[2]);
1190 1557
        }
1191 1558
        break;
1192
    case INDEX_op_mul_i32:
1559

  
1560
    OP_32_64(mul):
1193 1561
        if (const_args[2]) {
1194 1562
            int32_t val;
1195 1563
            val = args[2];
1196 1564
            if (val == (int8_t)val) {
1197
                tcg_out_modrm(s, OPC_IMUL_GvEvIb, args[0], args[0]);
1565
                tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
1198 1566
                tcg_out8(s, val);
1199 1567
            } else {
1200
                tcg_out_modrm(s, OPC_IMUL_GvEvIz, args[0], args[0]);
1568
                tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
1201 1569
                tcg_out32(s, val);
1202 1570
            }
1203 1571
        } else {
1204
            tcg_out_modrm(s, OPC_IMUL_GvEv, args[0], args[2]);
1572
            tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
1205 1573
        }
1206 1574
        break;
1207
    case INDEX_op_mulu2_i32:
1208
        tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_MUL, args[3]);
1209
        break;
1210
    case INDEX_op_div2_i32:
1211
        tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_IDIV, args[4]);
1575

  
1576
    OP_32_64(div2):
1577
        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
1212 1578
        break;
1213
    case INDEX_op_divu2_i32:
1214
        tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_DIV, args[4]);
1579
    OP_32_64(divu2):
1580
        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
1215 1581
        break;
1216
    case INDEX_op_shl_i32:
1582

  
1583
    OP_32_64(shl):
1217 1584
        c = SHIFT_SHL;
1218
    gen_shift32:
1219
        if (const_args[2]) {
1220
            tcg_out_shifti(s, c, args[0], args[2]);
1221
        } else {
1222
            tcg_out_modrm(s, OPC_SHIFT_cl, c, args[0]);
1223
        }
1224
        break;
1225
    case INDEX_op_shr_i32:
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff