Statistics
| Branch: | Revision:

root / tcg / i386 / tcg-target.c @ bbc863bf

History | View | Annotate | Download (74.5 kB)

1
/*
2
 * Tiny Code Generator for QEMU
3
 *
4
 * Copyright (c) 2008 Fabrice Bellard
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
 * THE SOFTWARE.
23
 */
24

    
25
#ifndef NDEBUG
26
static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
27
#if TCG_TARGET_REG_BITS == 64
28
    "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
29
    "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
30
#else
31
    "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
32
#endif
33
};
34
#endif
35

    
36
static const int tcg_target_reg_alloc_order[] = {
37
#if TCG_TARGET_REG_BITS == 64
38
    TCG_REG_RBP,
39
    TCG_REG_RBX,
40
    TCG_REG_R12,
41
    TCG_REG_R13,
42
    TCG_REG_R14,
43
    TCG_REG_R15,
44
    TCG_REG_R10,
45
    TCG_REG_R11,
46
    TCG_REG_R9,
47
    TCG_REG_R8,
48
    TCG_REG_RCX,
49
    TCG_REG_RDX,
50
    TCG_REG_RSI,
51
    TCG_REG_RDI,
52
    TCG_REG_RAX,
53
#else
54
    TCG_REG_EBX,
55
    TCG_REG_ESI,
56
    TCG_REG_EDI,
57
    TCG_REG_EBP,
58
    TCG_REG_ECX,
59
    TCG_REG_EDX,
60
    TCG_REG_EAX,
61
#endif
62
};
63

    
64
static const int tcg_target_call_iarg_regs[] = {
65
#if TCG_TARGET_REG_BITS == 64
66
#if defined(_WIN64)
67
    TCG_REG_RCX,
68
    TCG_REG_RDX,
69
#else
70
    TCG_REG_RDI,
71
    TCG_REG_RSI,
72
    TCG_REG_RDX,
73
    TCG_REG_RCX,
74
#endif
75
    TCG_REG_R8,
76
    TCG_REG_R9,
77
#else
78
    /* 32 bit mode uses stack based calling convention (GCC default). */
79
#endif
80
};
81

    
82
static const int tcg_target_call_oarg_regs[] = {
83
    TCG_REG_EAX,
84
#if TCG_TARGET_REG_BITS == 32
85
    TCG_REG_EDX
86
#endif
87
};
88

    
89
/* Registers used with L constraint, which are the first argument 
90
   registers on x86_64, and two random call clobbered registers on
91
   i386. */
92
#if TCG_TARGET_REG_BITS == 64
93
# define TCG_REG_L0 tcg_target_call_iarg_regs[0]
94
# define TCG_REG_L1 tcg_target_call_iarg_regs[1]
95
#else
96
# define TCG_REG_L0 TCG_REG_EAX
97
# define TCG_REG_L1 TCG_REG_EDX
98
#endif
99

    
100
/* For 32-bit, we are going to attempt to determine at runtime whether cmov
101
   is available.  However, the host compiler must supply <cpuid.h>, as we're
102
   not going to go so far as our own inline assembly.  */
103
#if TCG_TARGET_REG_BITS == 64
104
# define have_cmov 1
105
#elif defined(CONFIG_CPUID_H)
106
#include <cpuid.h>
107
static bool have_cmov;
108
#else
109
# define have_cmov 0
110
#endif
111

    
112
static uint8_t *tb_ret_addr;
113

    
114
static void patch_reloc(uint8_t *code_ptr, int type,
115
                        tcg_target_long value, tcg_target_long addend)
116
{
117
    value += addend;
118
    switch(type) {
119
    case R_386_PC32:
120
        value -= (uintptr_t)code_ptr;
121
        if (value != (int32_t)value) {
122
            tcg_abort();
123
        }
124
        *(uint32_t *)code_ptr = value;
125
        break;
126
    case R_386_PC8:
127
        value -= (uintptr_t)code_ptr;
128
        if (value != (int8_t)value) {
129
            tcg_abort();
130
        }
131
        *(uint8_t *)code_ptr = value;
132
        break;
133
    default:
134
        tcg_abort();
135
    }
136
}
137

    
138
/* parse target specific constraints */
139
static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
140
{
141
    const char *ct_str;
142

    
143
    ct_str = *pct_str;
144
    switch(ct_str[0]) {
145
    case 'a':
146
        ct->ct |= TCG_CT_REG;
147
        tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
148
        break;
149
    case 'b':
150
        ct->ct |= TCG_CT_REG;
151
        tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
152
        break;
153
    case 'c':
154
        ct->ct |= TCG_CT_REG;
155
        tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
156
        break;
157
    case 'd':
158
        ct->ct |= TCG_CT_REG;
159
        tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
160
        break;
161
    case 'S':
162
        ct->ct |= TCG_CT_REG;
163
        tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
164
        break;
165
    case 'D':
166
        ct->ct |= TCG_CT_REG;
167
        tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
168
        break;
169
    case 'q':
170
        ct->ct |= TCG_CT_REG;
171
        if (TCG_TARGET_REG_BITS == 64) {
172
            tcg_regset_set32(ct->u.regs, 0, 0xffff);
173
        } else {
174
            tcg_regset_set32(ct->u.regs, 0, 0xf);
175
        }
176
        break;
177
    case 'Q':
178
        ct->ct |= TCG_CT_REG;
179
        tcg_regset_set32(ct->u.regs, 0, 0xf);
180
        break;
181
    case 'r':
182
        ct->ct |= TCG_CT_REG;
183
        if (TCG_TARGET_REG_BITS == 64) {
184
            tcg_regset_set32(ct->u.regs, 0, 0xffff);
185
        } else {
186
            tcg_regset_set32(ct->u.regs, 0, 0xff);
187
        }
188
        break;
189

    
190
        /* qemu_ld/st address constraint */
191
    case 'L':
192
        ct->ct |= TCG_CT_REG;
193
#if TCG_TARGET_REG_BITS == 64
194
            tcg_regset_set32(ct->u.regs, 0, 0xffff);
195
#else
196
            tcg_regset_set32(ct->u.regs, 0, 0xff);
197
#endif
198
        tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
199
        tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
200
        break;
201

    
202
    case 'e':
203
        ct->ct |= TCG_CT_CONST_S32;
204
        break;
205
    case 'Z':
206
        ct->ct |= TCG_CT_CONST_U32;
207
        break;
208

    
209
    default:
210
        return -1;
211
    }
212
    ct_str++;
213
    *pct_str = ct_str;
214
    return 0;
215
}
216

    
217
/* test if a constant matches the constraint */
218
static inline int tcg_target_const_match(tcg_target_long val,
219
                                         const TCGArgConstraint *arg_ct)
220
{
221
    int ct = arg_ct->ct;
222
    if (ct & TCG_CT_CONST) {
223
        return 1;
224
    }
225
    if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
226
        return 1;
227
    }
228
    if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
229
        return 1;
230
    }
231
    return 0;
232
}
233

    
234
#if TCG_TARGET_REG_BITS == 64
235
# define LOWREGMASK(x)        ((x) & 7)
236
#else
237
# define LOWREGMASK(x)        (x)
238
#endif
239

    
240
#define P_EXT                0x100                /* 0x0f opcode prefix */
241
#define P_DATA16        0x200                /* 0x66 opcode prefix */
242
#if TCG_TARGET_REG_BITS == 64
243
# define P_ADDR32        0x400                /* 0x67 opcode prefix */
244
# define P_REXW                0x800                /* Set REX.W = 1 */
245
# define P_REXB_R        0x1000                /* REG field as byte register */
246
# define P_REXB_RM        0x2000                /* R/M field as byte register */
247
# define P_GS           0x4000          /* gs segment override */
248
#else
249
# define P_ADDR32        0
250
# define P_REXW                0
251
# define P_REXB_R        0
252
# define P_REXB_RM        0
253
# define P_GS           0
254
#endif
255

    
256
#define OPC_ARITH_EvIz        (0x81)
257
#define OPC_ARITH_EvIb        (0x83)
258
#define OPC_ARITH_GvEv        (0x03)                /* ... plus (ARITH_FOO << 3) */
259
#define OPC_ADD_GvEv        (OPC_ARITH_GvEv | (ARITH_ADD << 3))
260
#define OPC_BSWAP        (0xc8 | P_EXT)
261
#define OPC_CALL_Jz        (0xe8)
262
#define OPC_CMOVCC      (0x40 | P_EXT)  /* ... plus condition code */
263
#define OPC_CMP_GvEv        (OPC_ARITH_GvEv | (ARITH_CMP << 3))
264
#define OPC_DEC_r32        (0x48)
265
#define OPC_IMUL_GvEv        (0xaf | P_EXT)
266
#define OPC_IMUL_GvEvIb        (0x6b)
267
#define OPC_IMUL_GvEvIz        (0x69)
268
#define OPC_INC_r32        (0x40)
269
#define OPC_JCC_long        (0x80 | P_EXT)        /* ... plus condition code */
270
#define OPC_JCC_short        (0x70)                /* ... plus condition code */
271
#define OPC_JMP_long        (0xe9)
272
#define OPC_JMP_short        (0xeb)
273
#define OPC_LEA         (0x8d)
274
#define OPC_MOVB_EvGv        (0x88)                /* stores, more or less */
275
#define OPC_MOVL_EvGv        (0x89)                /* stores, more or less */
276
#define OPC_MOVL_GvEv        (0x8b)                /* loads, more or less */
277
#define OPC_MOVB_EvIz   (0xc6)
278
#define OPC_MOVL_EvIz        (0xc7)
279
#define OPC_MOVL_Iv     (0xb8)
280
#define OPC_MOVSBL        (0xbe | P_EXT)
281
#define OPC_MOVSWL        (0xbf | P_EXT)
282
#define OPC_MOVSLQ        (0x63 | P_REXW)
283
#define OPC_MOVZBL        (0xb6 | P_EXT)
284
#define OPC_MOVZWL        (0xb7 | P_EXT)
285
#define OPC_POP_r32        (0x58)
286
#define OPC_PUSH_r32        (0x50)
287
#define OPC_PUSH_Iv        (0x68)
288
#define OPC_PUSH_Ib        (0x6a)
289
#define OPC_RET                (0xc3)
290
#define OPC_SETCC        (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
291
#define OPC_SHIFT_1        (0xd1)
292
#define OPC_SHIFT_Ib        (0xc1)
293
#define OPC_SHIFT_cl        (0xd3)
294
#define OPC_TESTL        (0x85)
295
#define OPC_XCHG_ax_r32        (0x90)
296

    
297
#define OPC_GRP3_Ev        (0xf7)
298
#define OPC_GRP5        (0xff)
299

    
300
/* Group 1 opcode extensions for 0x80-0x83.
301
   These are also used as modifiers for OPC_ARITH.  */
302
#define ARITH_ADD 0
303
#define ARITH_OR  1
304
#define ARITH_ADC 2
305
#define ARITH_SBB 3
306
#define ARITH_AND 4
307
#define ARITH_SUB 5
308
#define ARITH_XOR 6
309
#define ARITH_CMP 7
310

    
311
/* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3.  */
312
#define SHIFT_ROL 0
313
#define SHIFT_ROR 1
314
#define SHIFT_SHL 4
315
#define SHIFT_SHR 5
316
#define SHIFT_SAR 7
317

    
318
/* Group 3 opcode extensions for 0xf6, 0xf7.  To be used with OPC_GRP3.  */
319
#define EXT3_NOT   2
320
#define EXT3_NEG   3
321
#define EXT3_MUL   4
322
#define EXT3_IMUL  5
323
#define EXT3_DIV   6
324
#define EXT3_IDIV  7
325

    
326
/* Group 5 opcode extensions for 0xff.  To be used with OPC_GRP5.  */
327
#define EXT5_INC_Ev        0
328
#define EXT5_DEC_Ev        1
329
#define EXT5_CALLN_Ev        2
330
#define EXT5_JMPN_Ev        4
331

    
332
/* Condition codes to be added to OPC_JCC_{long,short}.  */
333
#define JCC_JMP (-1)
334
#define JCC_JO  0x0
335
#define JCC_JNO 0x1
336
#define JCC_JB  0x2
337
#define JCC_JAE 0x3
338
#define JCC_JE  0x4
339
#define JCC_JNE 0x5
340
#define JCC_JBE 0x6
341
#define JCC_JA  0x7
342
#define JCC_JS  0x8
343
#define JCC_JNS 0x9
344
#define JCC_JP  0xa
345
#define JCC_JNP 0xb
346
#define JCC_JL  0xc
347
#define JCC_JGE 0xd
348
#define JCC_JLE 0xe
349
#define JCC_JG  0xf
350

    
351
static const uint8_t tcg_cond_to_jcc[] = {
352
    [TCG_COND_EQ] = JCC_JE,
353
    [TCG_COND_NE] = JCC_JNE,
354
    [TCG_COND_LT] = JCC_JL,
355
    [TCG_COND_GE] = JCC_JGE,
356
    [TCG_COND_LE] = JCC_JLE,
357
    [TCG_COND_GT] = JCC_JG,
358
    [TCG_COND_LTU] = JCC_JB,
359
    [TCG_COND_GEU] = JCC_JAE,
360
    [TCG_COND_LEU] = JCC_JBE,
361
    [TCG_COND_GTU] = JCC_JA,
362
};
363

    
364
#if TCG_TARGET_REG_BITS == 64
365
static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
366
{
367
    int rex;
368

    
369
    if (opc & P_GS) {
370
        tcg_out8(s, 0x65);
371
    }
372
    if (opc & P_DATA16) {
373
        /* We should never be asking for both 16 and 64-bit operation.  */
374
        assert((opc & P_REXW) == 0);
375
        tcg_out8(s, 0x66);
376
    }
377
    if (opc & P_ADDR32) {
378
        tcg_out8(s, 0x67);
379
    }
380

    
381
    rex = 0;
382
    rex |= (opc & P_REXW) >> 8;                /* REX.W */
383
    rex |= (r & 8) >> 1;                /* REX.R */
384
    rex |= (x & 8) >> 2;                /* REX.X */
385
    rex |= (rm & 8) >> 3;                /* REX.B */
386

    
387
    /* P_REXB_{R,RM} indicates that the given register is the low byte.
388
       For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
389
       as otherwise the encoding indicates %[abcd]h.  Note that the values
390
       that are ORed in merely indicate that the REX byte must be present;
391
       those bits get discarded in output.  */
392
    rex |= opc & (r >= 4 ? P_REXB_R : 0);
393
    rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
394

    
395
    if (rex) {
396
        tcg_out8(s, (uint8_t)(rex | 0x40));
397
    }
398

    
399
    if (opc & P_EXT) {
400
        tcg_out8(s, 0x0f);
401
    }
402
    tcg_out8(s, opc);
403
}
404
#else
405
static void tcg_out_opc(TCGContext *s, int opc)
406
{
407
    if (opc & P_DATA16) {
408
        tcg_out8(s, 0x66);
409
    }
410
    if (opc & P_EXT) {
411
        tcg_out8(s, 0x0f);
412
    }
413
    tcg_out8(s, opc);
414
}
415
/* Discard the register arguments to tcg_out_opc early, so as not to penalize
416
   the 32-bit compilation paths.  This method works with all versions of gcc,
417
   whereas relying on optimization may not be able to exclude them.  */
418
#define tcg_out_opc(s, opc, r, rm, x)  (tcg_out_opc)(s, opc)
419
#endif
420

    
421
static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
422
{
423
    tcg_out_opc(s, opc, r, rm, 0);
424
    tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
425
}
426

    
427
/* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
428
   We handle either RM and INDEX missing with a negative value.  In 64-bit
429
   mode for absolute addresses, ~RM is the size of the immediate operand
430
   that will follow the instruction.  */
431

    
432
static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
433
                                     int index, int shift,
434
                                     tcg_target_long offset)
435
{
436
    int mod, len;
437

    
438
    if (index < 0 && rm < 0) {
439
        if (TCG_TARGET_REG_BITS == 64) {
440
            /* Try for a rip-relative addressing mode.  This has replaced
441
               the 32-bit-mode absolute addressing encoding.  */
442
            tcg_target_long pc = (tcg_target_long)s->code_ptr + 5 + ~rm;
443
            tcg_target_long disp = offset - pc;
444
            if (disp == (int32_t)disp) {
445
                tcg_out_opc(s, opc, r, 0, 0);
446
                tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
447
                tcg_out32(s, disp);
448
                return;
449
            }
450

    
451
            /* Try for an absolute address encoding.  This requires the
452
               use of the MODRM+SIB encoding and is therefore larger than
453
               rip-relative addressing.  */
454
            if (offset == (int32_t)offset) {
455
                tcg_out_opc(s, opc, r, 0, 0);
456
                tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
457
                tcg_out8(s, (4 << 3) | 5);
458
                tcg_out32(s, offset);
459
                return;
460
            }
461

    
462
            /* ??? The memory isn't directly addressable.  */
463
            tcg_abort();
464
        } else {
465
            /* Absolute address.  */
466
            tcg_out_opc(s, opc, r, 0, 0);
467
            tcg_out8(s, (r << 3) | 5);
468
            tcg_out32(s, offset);
469
            return;
470
        }
471
    }
472

    
473
    /* Find the length of the immediate addend.  Note that the encoding
474
       that would be used for (%ebp) indicates absolute addressing.  */
475
    if (rm < 0) {
476
        mod = 0, len = 4, rm = 5;
477
    } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
478
        mod = 0, len = 0;
479
    } else if (offset == (int8_t)offset) {
480
        mod = 0x40, len = 1;
481
    } else {
482
        mod = 0x80, len = 4;
483
    }
484

    
485
    /* Use a single byte MODRM format if possible.  Note that the encoding
486
       that would be used for %esp is the escape to the two byte form.  */
487
    if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
488
        /* Single byte MODRM format.  */
489
        tcg_out_opc(s, opc, r, rm, 0);
490
        tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
491
    } else {
492
        /* Two byte MODRM+SIB format.  */
493

    
494
        /* Note that the encoding that would place %esp into the index
495
           field indicates no index register.  In 64-bit mode, the REX.X
496
           bit counts, so %r12 can be used as the index.  */
497
        if (index < 0) {
498
            index = 4;
499
        } else {
500
            assert(index != TCG_REG_ESP);
501
        }
502

    
503
        tcg_out_opc(s, opc, r, rm, index);
504
        tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
505
        tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
506
    }
507

    
508
    if (len == 1) {
509
        tcg_out8(s, offset);
510
    } else if (len == 4) {
511
        tcg_out32(s, offset);
512
    }
513
}
514

    
515
/* A simplification of the above with no index or shift.  */
516
static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
517
                                        int rm, tcg_target_long offset)
518
{
519
    tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
520
}
521

    
522
/* Generate dest op= src.  Uses the same ARITH_* codes as tgen_arithi.  */
523
static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
524
{
525
    /* Propagate an opcode prefix, such as P_REXW.  */
526
    int ext = subop & ~0x7;
527
    subop &= 0x7;
528

    
529
    tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
530
}
531

    
532
static inline void tcg_out_mov(TCGContext *s, TCGType type,
533
                               TCGReg ret, TCGReg arg)
534
{
535
    if (arg != ret) {
536
        int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
537
        tcg_out_modrm(s, opc, ret, arg);
538
    }
539
}
540

    
541
static void tcg_out_movi(TCGContext *s, TCGType type,
542
                         TCGReg ret, tcg_target_long arg)
543
{
544
    if (arg == 0) {
545
        tgen_arithr(s, ARITH_XOR, ret, ret);
546
        return;
547
    } else if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
548
        tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
549
        tcg_out32(s, arg);
550
    } else if (arg == (int32_t)arg) {
551
        tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
552
        tcg_out32(s, arg);
553
    } else {
554
        tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
555
        tcg_out32(s, arg);
556
        tcg_out32(s, arg >> 31 >> 1);
557
    }
558
}
559

    
560
static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
561
{
562
    if (val == (int8_t)val) {
563
        tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
564
        tcg_out8(s, val);
565
    } else if (val == (int32_t)val) {
566
        tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
567
        tcg_out32(s, val);
568
    } else {
569
        tcg_abort();
570
    }
571
}
572

    
573
static inline void tcg_out_push(TCGContext *s, int reg)
574
{
575
    tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
576
}
577

    
578
static inline void tcg_out_pop(TCGContext *s, int reg)
579
{
580
    tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
581
}
582

    
583
static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
584
                              TCGReg arg1, tcg_target_long arg2)
585
{
586
    int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
587
    tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
588
}
589

    
590
static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
591
                              TCGReg arg1, tcg_target_long arg2)
592
{
593
    int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
594
    tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
595
}
596

    
597
static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
598
{
599
    /* Propagate an opcode prefix, such as P_DATA16.  */
600
    int ext = subopc & ~0x7;
601
    subopc &= 0x7;
602

    
603
    if (count == 1) {
604
        tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
605
    } else {
606
        tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
607
        tcg_out8(s, count);
608
    }
609
}
610

    
611
static inline void tcg_out_bswap32(TCGContext *s, int reg)
612
{
613
    tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
614
}
615

    
616
static inline void tcg_out_rolw_8(TCGContext *s, int reg)
617
{
618
    tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
619
}
620

    
621
static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
622
{
623
    /* movzbl */
624
    assert(src < 4 || TCG_TARGET_REG_BITS == 64);
625
    tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
626
}
627

    
628
static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
629
{
630
    /* movsbl */
631
    assert(src < 4 || TCG_TARGET_REG_BITS == 64);
632
    tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
633
}
634

    
635
static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
636
{
637
    /* movzwl */
638
    tcg_out_modrm(s, OPC_MOVZWL, dest, src);
639
}
640

    
641
static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
642
{
643
    /* movsw[lq] */
644
    tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
645
}
646

    
647
static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
648
{
649
    /* 32-bit mov zero extends.  */
650
    tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
651
}
652

    
653
static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
654
{
655
    tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
656
}
657

    
658
static inline void tcg_out_bswap64(TCGContext *s, int reg)
659
{
660
    tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
661
}
662

    
663
static void tgen_arithi(TCGContext *s, int c, int r0,
664
                        tcg_target_long val, int cf)
665
{
666
    int rexw = 0;
667

    
668
    if (TCG_TARGET_REG_BITS == 64) {
669
        rexw = c & -8;
670
        c &= 7;
671
    }
672

    
673
    /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
674
       partial flags update stalls on Pentium4 and are not recommended
675
       by current Intel optimization manuals.  */
676
    if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
677
        int is_inc = (c == ARITH_ADD) ^ (val < 0);
678
        if (TCG_TARGET_REG_BITS == 64) {
679
            /* The single-byte increment encodings are re-tasked as the
680
               REX prefixes.  Use the MODRM encoding.  */
681
            tcg_out_modrm(s, OPC_GRP5 + rexw,
682
                          (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
683
        } else {
684
            tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
685
        }
686
        return;
687
    }
688

    
689
    if (c == ARITH_AND) {
690
        if (TCG_TARGET_REG_BITS == 64) {
691
            if (val == 0xffffffffu) {
692
                tcg_out_ext32u(s, r0, r0);
693
                return;
694
            }
695
            if (val == (uint32_t)val) {
696
                /* AND with no high bits set can use a 32-bit operation.  */
697
                rexw = 0;
698
            }
699
        }
700
        if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
701
            tcg_out_ext8u(s, r0, r0);
702
            return;
703
        }
704
        if (val == 0xffffu) {
705
            tcg_out_ext16u(s, r0, r0);
706
            return;
707
        }
708
    }
709

    
710
    if (val == (int8_t)val) {
711
        tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
712
        tcg_out8(s, val);
713
        return;
714
    }
715
    if (rexw == 0 || val == (int32_t)val) {
716
        tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
717
        tcg_out32(s, val);
718
        return;
719
    }
720

    
721
    tcg_abort();
722
}
723

    
724
static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
725
{
726
    if (val != 0) {
727
        tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
728
    }
729
}
730

    
731
/* Use SMALL != 0 to force a short forward branch.  */
732
static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
733
{
734
    int32_t val, val1;
735
    TCGLabel *l = &s->labels[label_index];
736

    
737
    if (l->has_value) {
738
        val = l->u.value - (tcg_target_long)s->code_ptr;
739
        val1 = val - 2;
740
        if ((int8_t)val1 == val1) {
741
            if (opc == -1) {
742
                tcg_out8(s, OPC_JMP_short);
743
            } else {
744
                tcg_out8(s, OPC_JCC_short + opc);
745
            }
746
            tcg_out8(s, val1);
747
        } else {
748
            if (small) {
749
                tcg_abort();
750
            }
751
            if (opc == -1) {
752
                tcg_out8(s, OPC_JMP_long);
753
                tcg_out32(s, val - 5);
754
            } else {
755
                tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
756
                tcg_out32(s, val - 6);
757
            }
758
        }
759
    } else if (small) {
760
        if (opc == -1) {
761
            tcg_out8(s, OPC_JMP_short);
762
        } else {
763
            tcg_out8(s, OPC_JCC_short + opc);
764
        }
765
        tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
766
        s->code_ptr += 1;
767
    } else {
768
        if (opc == -1) {
769
            tcg_out8(s, OPC_JMP_long);
770
        } else {
771
            tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
772
        }
773
        tcg_out_reloc(s, s->code_ptr, R_386_PC32, label_index, -4);
774
        s->code_ptr += 4;
775
    }
776
}
777

    
778
static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
779
                        int const_arg2, int rexw)
780
{
781
    if (const_arg2) {
782
        if (arg2 == 0) {
783
            /* test r, r */
784
            tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
785
        } else {
786
            tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
787
        }
788
    } else {
789
        tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
790
    }
791
}
792

    
793
static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
794
                             TCGArg arg1, TCGArg arg2, int const_arg2,
795
                             int label_index, int small)
796
{
797
    tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
798
    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
799
}
800

    
801
#if TCG_TARGET_REG_BITS == 64
802
static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
803
                             TCGArg arg1, TCGArg arg2, int const_arg2,
804
                             int label_index, int small)
805
{
806
    tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
807
    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
808
}
809
#else
810
/* XXX: we implement it at the target level to avoid having to
811
   handle cross basic blocks temporaries */
812
static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
813
                            const int *const_args, int small)
814
{
815
    int label_next;
816
    label_next = gen_new_label();
817
    switch(args[4]) {
818
    case TCG_COND_EQ:
819
        tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
820
                         label_next, 1);
821
        tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
822
                         args[5], small);
823
        break;
824
    case TCG_COND_NE:
825
        tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
826
                         args[5], small);
827
        tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
828
                         args[5], small);
829
        break;
830
    case TCG_COND_LT:
831
        tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
832
                         args[5], small);
833
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
834
        tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
835
                         args[5], small);
836
        break;
837
    case TCG_COND_LE:
838
        tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
839
                         args[5], small);
840
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
841
        tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
842
                         args[5], small);
843
        break;
844
    case TCG_COND_GT:
845
        tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
846
                         args[5], small);
847
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
848
        tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
849
                         args[5], small);
850
        break;
851
    case TCG_COND_GE:
852
        tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
853
                         args[5], small);
854
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
855
        tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
856
                         args[5], small);
857
        break;
858
    case TCG_COND_LTU:
859
        tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
860
                         args[5], small);
861
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
862
        tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
863
                         args[5], small);
864
        break;
865
    case TCG_COND_LEU:
866
        tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
867
                         args[5], small);
868
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
869
        tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
870
                         args[5], small);
871
        break;
872
    case TCG_COND_GTU:
873
        tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
874
                         args[5], small);
875
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
876
        tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
877
                         args[5], small);
878
        break;
879
    case TCG_COND_GEU:
880
        tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
881
                         args[5], small);
882
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
883
        tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
884
                         args[5], small);
885
        break;
886
    default:
887
        tcg_abort();
888
    }
889
    tcg_out_label(s, label_next, s->code_ptr);
890
}
891
#endif
892

    
893
static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
894
                              TCGArg arg1, TCGArg arg2, int const_arg2)
895
{
896
    tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
897
    tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
898
    tcg_out_ext8u(s, dest, dest);
899
}
900

    
901
#if TCG_TARGET_REG_BITS == 64
902
static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
903
                              TCGArg arg1, TCGArg arg2, int const_arg2)
904
{
905
    tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
906
    tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
907
    tcg_out_ext8u(s, dest, dest);
908
}
909
#else
910
static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
911
                             const int *const_args)
912
{
913
    TCGArg new_args[6];
914
    int label_true, label_over;
915

    
916
    memcpy(new_args, args+1, 5*sizeof(TCGArg));
917

    
918
    if (args[0] == args[1] || args[0] == args[2]
919
        || (!const_args[3] && args[0] == args[3])
920
        || (!const_args[4] && args[0] == args[4])) {
921
        /* When the destination overlaps with one of the argument
922
           registers, don't do anything tricky.  */
923
        label_true = gen_new_label();
924
        label_over = gen_new_label();
925

    
926
        new_args[5] = label_true;
927
        tcg_out_brcond2(s, new_args, const_args+1, 1);
928

    
929
        tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
930
        tcg_out_jxx(s, JCC_JMP, label_over, 1);
931
        tcg_out_label(s, label_true, s->code_ptr);
932

    
933
        tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
934
        tcg_out_label(s, label_over, s->code_ptr);
935
    } else {
936
        /* When the destination does not overlap one of the arguments,
937
           clear the destination first, jump if cond false, and emit an
938
           increment in the true case.  This results in smaller code.  */
939

    
940
        tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
941

    
942
        label_over = gen_new_label();
943
        new_args[4] = tcg_invert_cond(new_args[4]);
944
        new_args[5] = label_over;
945
        tcg_out_brcond2(s, new_args, const_args+1, 1);
946

    
947
        tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
948
        tcg_out_label(s, label_over, s->code_ptr);
949
    }
950
}
951
#endif
952

    
953
static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
954
                              TCGArg c1, TCGArg c2, int const_c2,
955
                              TCGArg v1)
956
{
957
    tcg_out_cmp(s, c1, c2, const_c2, 0);
958
    if (have_cmov) {
959
        tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
960
    } else {
961
        int over = gen_new_label();
962
        tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
963
        tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
964
        tcg_out_label(s, over, s->code_ptr);
965
    }
966
}
967

    
968
#if TCG_TARGET_REG_BITS == 64
969
static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest,
970
                              TCGArg c1, TCGArg c2, int const_c2,
971
                              TCGArg v1)
972
{
973
    tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
974
    tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1);
975
}
976
#endif
977

    
978
static void tcg_out_branch(TCGContext *s, int call, tcg_target_long dest)
979
{
980
    tcg_target_long disp = dest - (tcg_target_long)s->code_ptr - 5;
981

    
982
    if (disp == (int32_t)disp) {
983
        tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
984
        tcg_out32(s, disp);
985
    } else {
986
        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest);
987
        tcg_out_modrm(s, OPC_GRP5,
988
                      call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
989
    }
990
}
991

    
992
static inline void tcg_out_calli(TCGContext *s, tcg_target_long dest)
993
{
994
    tcg_out_branch(s, 1, dest);
995
}
996

    
997
static void tcg_out_jmp(TCGContext *s, tcg_target_long dest)
998
{
999
    tcg_out_branch(s, 0, dest);
1000
}
1001

    
1002
#if defined(CONFIG_SOFTMMU)
1003

    
1004
#include "exec/softmmu_defs.h"
1005

    
1006
/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
1007
   int mmu_idx) */
1008
static const void *qemu_ld_helpers[4] = {
1009
    helper_ldb_mmu,
1010
    helper_ldw_mmu,
1011
    helper_ldl_mmu,
1012
    helper_ldq_mmu,
1013
};
1014

    
1015
/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
1016
   uintxx_t val, int mmu_idx) */
1017
static const void *qemu_st_helpers[4] = {
1018
    helper_stb_mmu,
1019
    helper_stw_mmu,
1020
    helper_stl_mmu,
1021
    helper_stq_mmu,
1022
};
1023

    
1024
static void add_qemu_ldst_label(TCGContext *s,
1025
                                int is_ld,
1026
                                int opc,
1027
                                int data_reg,
1028
                                int data_reg2,
1029
                                int addrlo_reg,
1030
                                int addrhi_reg,
1031
                                int mem_index,
1032
                                uint8_t *raddr,
1033
                                uint8_t **label_ptr);
1034

    
1035
/* Perform the TLB load and compare.
1036

1037
   Inputs:
1038
   ADDRLO_IDX contains the index into ARGS of the low part of the
1039
   address; the high part of the address is at ADDR_LOW_IDX+1.
1040

1041
   MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1042

1043
   WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1044
   This should be offsetof addr_read or addr_write.
1045

1046
   Outputs:
1047
   LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
1048
   positions of the displacements of forward jumps to the TLB miss case.
1049

1050
   Second argument register is loaded with the low part of the address.
1051
   In the TLB hit case, it has been adjusted as indicated by the TLB
1052
   and so is a host address.  In the TLB miss case, it continues to
1053
   hold a guest address.
1054

1055
   First argument register is clobbered.  */
1056

    
1057
static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
1058
                                    int mem_index, int s_bits,
1059
                                    const TCGArg *args,
1060
                                    uint8_t **label_ptr, int which)
1061
{
1062
    const int addrlo = args[addrlo_idx];
1063
    const int r0 = TCG_REG_L0;
1064
    const int r1 = TCG_REG_L1;
1065
    TCGType type = TCG_TYPE_I32;
1066
    int rexw = 0;
1067

    
1068
    if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64) {
1069
        type = TCG_TYPE_I64;
1070
        rexw = P_REXW;
1071
    }
1072

    
1073
    tcg_out_mov(s, type, r0, addrlo);
1074
    tcg_out_mov(s, type, r1, addrlo);
1075

    
1076
    tcg_out_shifti(s, SHIFT_SHR + rexw, r0,
1077
                   TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1078

    
1079
    tgen_arithi(s, ARITH_AND + rexw, r1,
1080
                TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
1081
    tgen_arithi(s, ARITH_AND + rexw, r0,
1082
                (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
1083

    
1084
    tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r0, TCG_AREG0, r0, 0,
1085
                             offsetof(CPUArchState, tlb_table[mem_index][0])
1086
                             + which);
1087

    
1088
    /* cmp 0(r0), r1 */
1089
    tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r1, r0, 0);
1090

    
1091
    tcg_out_mov(s, type, r1, addrlo);
1092

    
1093
    /* jne slow_path */
1094
    tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1095
    label_ptr[0] = s->code_ptr;
1096
    s->code_ptr += 4;
1097

    
1098
    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1099
        /* cmp 4(r0), addrhi */
1100
        tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r0, 4);
1101

    
1102
        /* jne slow_path */
1103
        tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1104
        label_ptr[1] = s->code_ptr;
1105
        s->code_ptr += 4;
1106
    }
1107

    
1108
    /* TLB Hit.  */
1109

    
1110
    /* add addend(r0), r1 */
1111
    tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r1, r0,
1112
                         offsetof(CPUTLBEntry, addend) - which);
1113
}
1114
#elif defined(__x86_64__) && defined(__linux__)
1115
# include <asm/prctl.h>
1116
# include <sys/prctl.h>
1117

    
1118
int arch_prctl(int code, unsigned long addr);
1119

    
1120
static int guest_base_flags;
1121
static inline void setup_guest_base_seg(void)
1122
{
1123
    if (arch_prctl(ARCH_SET_GS, GUEST_BASE) == 0) {
1124
        guest_base_flags = P_GS;
1125
    }
1126
}
1127
#else
1128
# define guest_base_flags 0
1129
static inline void setup_guest_base_seg(void) { }
1130
#endif /* SOFTMMU */
1131

    
1132
static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
1133
                                   int base, tcg_target_long ofs, int seg,
1134
                                   int sizeop)
1135
{
1136
#ifdef TARGET_WORDS_BIGENDIAN
1137
    const int bswap = 1;
1138
#else
1139
    const int bswap = 0;
1140
#endif
1141
    switch (sizeop) {
1142
    case 0:
1143
        tcg_out_modrm_offset(s, OPC_MOVZBL + seg, datalo, base, ofs);
1144
        break;
1145
    case 0 | 4:
1146
        tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW + seg, datalo, base, ofs);
1147
        break;
1148
    case 1:
1149
        tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
1150
        if (bswap) {
1151
            tcg_out_rolw_8(s, datalo);
1152
        }
1153
        break;
1154
    case 1 | 4:
1155
        if (bswap) {
1156
            tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
1157
            tcg_out_rolw_8(s, datalo);
1158
            tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1159
        } else {
1160
            tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW + seg,
1161
                                 datalo, base, ofs);
1162
        }
1163
        break;
1164
    case 2:
1165
        tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
1166
        if (bswap) {
1167
            tcg_out_bswap32(s, datalo);
1168
        }
1169
        break;
1170
#if TCG_TARGET_REG_BITS == 64
1171
    case 2 | 4:
1172
        if (bswap) {
1173
            tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
1174
            tcg_out_bswap32(s, datalo);
1175
            tcg_out_ext32s(s, datalo, datalo);
1176
        } else {
1177
            tcg_out_modrm_offset(s, OPC_MOVSLQ + seg, datalo, base, ofs);
1178
        }
1179
        break;
1180
#endif
1181
    case 3:
1182
        if (TCG_TARGET_REG_BITS == 64) {
1183
            tcg_out_modrm_offset(s, OPC_MOVL_GvEv + P_REXW + seg,
1184
                                 datalo, base, ofs);
1185
            if (bswap) {
1186
                tcg_out_bswap64(s, datalo);
1187
            }
1188
        } else {
1189
            if (bswap) {
1190
                int t = datalo;
1191
                datalo = datahi;
1192
                datahi = t;
1193
            }
1194
            if (base != datalo) {
1195
                tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
1196
                                     datalo, base, ofs);
1197
                tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
1198
                                     datahi, base, ofs + 4);
1199
            } else {
1200
                tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
1201
                                     datahi, base, ofs + 4);
1202
                tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
1203
                                     datalo, base, ofs);
1204
            }
1205
            if (bswap) {
1206
                tcg_out_bswap32(s, datalo);
1207
                tcg_out_bswap32(s, datahi);
1208
            }
1209
        }
1210
        break;
1211
    default:
1212
        tcg_abort();
1213
    }
1214
}
1215

    
1216
/* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1217
   EAX. It will be useful once fixed registers globals are less
1218
   common. */
1219
static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
1220
                            int opc)
1221
{
1222
    int data_reg, data_reg2 = 0;
1223
    int addrlo_idx;
1224
#if defined(CONFIG_SOFTMMU)
1225
    int mem_index, s_bits;
1226
    uint8_t *label_ptr[2];
1227
#endif
1228

    
1229
    data_reg = args[0];
1230
    addrlo_idx = 1;
1231
    if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
1232
        data_reg2 = args[1];
1233
        addrlo_idx = 2;
1234
    }
1235

    
1236
#if defined(CONFIG_SOFTMMU)
1237
    mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1238
    s_bits = opc & 3;
1239

    
1240
    tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1241
                     label_ptr, offsetof(CPUTLBEntry, addr_read));
1242

    
1243
    /* TLB Hit.  */
1244
    tcg_out_qemu_ld_direct(s, data_reg, data_reg2, TCG_REG_L1, 0, 0, opc);
1245

    
1246
    /* Record the current context of a load into ldst label */
1247
    add_qemu_ldst_label(s,
1248
                        1,
1249
                        opc,
1250
                        data_reg,
1251
                        data_reg2,
1252
                        args[addrlo_idx],
1253
                        args[addrlo_idx + 1],
1254
                        mem_index,
1255
                        s->code_ptr,
1256
                        label_ptr);
1257
#else
1258
    {
1259
        int32_t offset = GUEST_BASE;
1260
        int base = args[addrlo_idx];
1261
        int seg = 0;
1262

    
1263
        /* ??? We assume all operations have left us with register contents
1264
           that are zero extended.  So far this appears to be true.  If we
1265
           want to enforce this, we can either do an explicit zero-extension
1266
           here, or (if GUEST_BASE == 0, or a segment register is in use)
1267
           use the ADDR32 prefix.  For now, do nothing.  */
1268
        if (GUEST_BASE && guest_base_flags) {
1269
            seg = guest_base_flags;
1270
            offset = 0;
1271
        } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
1272
            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
1273
            tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
1274
            base = TCG_REG_L1;
1275
            offset = 0;
1276
        }
1277

    
1278
        tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, seg, opc);
1279
    }
1280
#endif
1281
}
1282

    
1283
static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
1284
                                   int base, tcg_target_long ofs, int seg,
1285
                                   int sizeop)
1286
{
1287
#ifdef TARGET_WORDS_BIGENDIAN
1288
    const int bswap = 1;
1289
#else
1290
    const int bswap = 0;
1291
#endif
1292
    /* ??? Ideally we wouldn't need a scratch register.  For user-only,
1293
       we could perform the bswap twice to restore the original value
1294
       instead of moving to the scratch.  But as it is, the L constraint
1295
       means that TCG_REG_L0 is definitely free here.  */
1296
    const int scratch = TCG_REG_L0;
1297

    
1298
    switch (sizeop) {
1299
    case 0:
1300
        tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
1301
                             datalo, base, ofs);
1302
        break;
1303
    case 1:
1304
        if (bswap) {
1305
            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1306
            tcg_out_rolw_8(s, scratch);
1307
            datalo = scratch;
1308
        }
1309
        tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16 + seg,
1310
                             datalo, base, ofs);
1311
        break;
1312
    case 2:
1313
        if (bswap) {
1314
            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1315
            tcg_out_bswap32(s, scratch);
1316
            datalo = scratch;
1317
        }
1318
        tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
1319
        break;
1320
    case 3:
1321
        if (TCG_TARGET_REG_BITS == 64) {
1322
            if (bswap) {
1323
                tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1324
                tcg_out_bswap64(s, scratch);
1325
                datalo = scratch;
1326
            }
1327
            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_REXW + seg,
1328
                                 datalo, base, ofs);
1329
        } else if (bswap) {
1330
            tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
1331
            tcg_out_bswap32(s, scratch);
1332
            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
1333
            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1334
            tcg_out_bswap32(s, scratch);
1335
            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
1336
        } else {
1337
            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
1338
            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datahi, base, ofs+4);
1339
        }
1340
        break;
1341
    default:
1342
        tcg_abort();
1343
    }
1344
}
1345

    
1346
static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
1347
                            int opc)
1348
{
1349
    int data_reg, data_reg2 = 0;
1350
    int addrlo_idx;
1351
#if defined(CONFIG_SOFTMMU)
1352
    int mem_index, s_bits;
1353
    uint8_t *label_ptr[2];
1354
#endif
1355

    
1356
    data_reg = args[0];
1357
    addrlo_idx = 1;
1358
    if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
1359
        data_reg2 = args[1];
1360
        addrlo_idx = 2;
1361
    }
1362

    
1363
#if defined(CONFIG_SOFTMMU)
1364
    mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1365
    s_bits = opc;
1366

    
1367
    tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1368
                     label_ptr, offsetof(CPUTLBEntry, addr_write));
1369

    
1370
    /* TLB Hit.  */
1371
    tcg_out_qemu_st_direct(s, data_reg, data_reg2, TCG_REG_L1, 0, 0, opc);
1372

    
1373
    /* Record the current context of a store into ldst label */
1374
    add_qemu_ldst_label(s,
1375
                        0,
1376
                        opc,
1377
                        data_reg,
1378
                        data_reg2,
1379
                        args[addrlo_idx],
1380
                        args[addrlo_idx + 1],
1381
                        mem_index,
1382
                        s->code_ptr,
1383
                        label_ptr);
1384
#else
1385
    {
1386
        int32_t offset = GUEST_BASE;
1387
        int base = args[addrlo_idx];
1388
        int seg = 0;
1389

    
1390
        /* ??? We assume all operations have left us with register contents
1391
           that are zero extended.  So far this appears to be true.  If we
1392
           want to enforce this, we can either do an explicit zero-extension
1393
           here, or (if GUEST_BASE == 0, or a segment register is in use)
1394
           use the ADDR32 prefix.  For now, do nothing.  */
1395
        if (GUEST_BASE && guest_base_flags) {
1396
            seg = guest_base_flags;
1397
            offset = 0;
1398
        } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
1399
            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
1400
            tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
1401
            base = TCG_REG_L1;
1402
            offset = 0;
1403
        }
1404

    
1405
        tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, seg, opc);
1406
    }
1407
#endif
1408
}
1409

    
1410
#if defined(CONFIG_SOFTMMU)
1411
/*
1412
 * Record the context of a call to the out of line helper code for the slow path
1413
 * for a load or store, so that we can later generate the correct helper code
1414
 */
1415
static void add_qemu_ldst_label(TCGContext *s,
1416
                                int is_ld,
1417
                                int opc,
1418
                                int data_reg,
1419
                                int data_reg2,
1420
                                int addrlo_reg,
1421
                                int addrhi_reg,
1422
                                int mem_index,
1423
                                uint8_t *raddr,
1424
                                uint8_t **label_ptr)
1425
{
1426
    int idx;
1427
    TCGLabelQemuLdst *label;
1428

    
1429
    if (s->nb_qemu_ldst_labels >= TCG_MAX_QEMU_LDST) {
1430
        tcg_abort();
1431
    }
1432

    
1433
    idx = s->nb_qemu_ldst_labels++;
1434
    label = (TCGLabelQemuLdst *)&s->qemu_ldst_labels[idx];
1435
    label->is_ld = is_ld;
1436
    label->opc = opc;
1437
    label->datalo_reg = data_reg;
1438
    label->datahi_reg = data_reg2;
1439
    label->addrlo_reg = addrlo_reg;
1440
    label->addrhi_reg = addrhi_reg;
1441
    label->mem_index = mem_index;
1442
    label->raddr = raddr;
1443
    label->label_ptr[0] = label_ptr[0];
1444
    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1445
        label->label_ptr[1] = label_ptr[1];
1446
    }
1447
}
1448

    
1449
/*
1450
 * Generate code for the slow path for a load at the end of block
1451
 */
1452
static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *label)
1453
{
1454
    int s_bits;
1455
    int opc = label->opc;
1456
    int mem_index = label->mem_index;
1457
#if TCG_TARGET_REG_BITS == 32
1458
    int stack_adjust;
1459
    int addrlo_reg = label->addrlo_reg;
1460
    int addrhi_reg = label->addrhi_reg;
1461
#endif
1462
    int data_reg = label->datalo_reg;
1463
    int data_reg2 = label->datahi_reg;
1464
    uint8_t *raddr = label->raddr;
1465
    uint8_t **label_ptr = &label->label_ptr[0];
1466

    
1467
    s_bits = opc & 3;
1468

    
1469
    /* resolve label address */
1470
    *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
1471
    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1472
        *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4);
1473
    }
1474

    
1475
#if TCG_TARGET_REG_BITS == 32
1476
    tcg_out_pushi(s, mem_index);
1477
    stack_adjust = 4;
1478
    if (TARGET_LONG_BITS == 64) {
1479
        tcg_out_push(s, addrhi_reg);
1480
        stack_adjust += 4;
1481
    }
1482
    tcg_out_push(s, addrlo_reg);
1483
    stack_adjust += 4;
1484
    tcg_out_push(s, TCG_AREG0);
1485
    stack_adjust += 4;
1486
#else
1487
    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0);
1488
    /* The second argument is already loaded with addrlo.  */
1489
    tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], mem_index);
1490
#endif
1491

    
1492
    /* Code generation of qemu_ld/st's slow path calling MMU helper
1493

1494
       PRE_PROC ...
1495
       call MMU helper
1496
       jmp POST_PROC (2b) : short forward jump <- GETRA()
1497
       jmp next_code (5b) : dummy long backward jump which is never executed
1498
       POST_PROC ... : do post-processing <- GETRA() + 7
1499
       jmp next_code : jump to the code corresponding to next IR of qemu_ld/st
1500
    */
1501

    
1502
    tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
1503

    
1504
    /* Jump to post-processing code */
1505
    tcg_out8(s, OPC_JMP_short);
1506
    tcg_out8(s, 5);
1507
    /* Dummy backward jump having information of fast path'pc for MMU helpers */
1508
    tcg_out8(s, OPC_JMP_long);
1509
    *(int32_t *)s->code_ptr = (int32_t)(raddr - s->code_ptr - 4);
1510
    s->code_ptr += 4;
1511

    
1512
#if TCG_TARGET_REG_BITS == 32
1513
    if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
1514
        /* Pop and discard.  This is 2 bytes smaller than the add.  */
1515
        tcg_out_pop(s, TCG_REG_ECX);
1516
    } else if (stack_adjust != 0) {
1517
        tcg_out_addi(s, TCG_REG_CALL_STACK, stack_adjust);
1518
    }
1519
#endif
1520

    
1521
    switch(opc) {
1522
    case 0 | 4:
1523
        tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1524
        break;
1525
    case 1 | 4:
1526
        tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1527
        break;
1528
    case 0:
1529
        tcg_out_ext8u(s, data_reg, TCG_REG_EAX);
1530
        break;
1531
    case 1:
1532
        tcg_out_ext16u(s, data_reg, TCG_REG_EAX);
1533
        break;
1534
    case 2:
1535
        tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1536
        break;
1537
#if TCG_TARGET_REG_BITS == 64
1538
    case 2 | 4:
1539
        tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1540
        break;
1541
#endif
1542
    case 3:
1543
        if (TCG_TARGET_REG_BITS == 64) {
1544
            tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1545
        } else if (data_reg == TCG_REG_EDX) {
1546
            /* xchg %edx, %eax */
1547
            tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1548
            tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EAX);
1549
        } else {
1550
            tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1551
            tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EDX);
1552
        }
1553
        break;
1554
    default:
1555
        tcg_abort();
1556
    }
1557

    
1558
    /* Jump to the code corresponding to next IR of qemu_st */
1559
    tcg_out_jmp(s, (tcg_target_long)raddr);
1560
}
1561

    
1562
/*
1563
 * Generate code for the slow path for a store at the end of block
1564
 */
1565
static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *label)
1566
{
1567
    int s_bits;
1568
    int stack_adjust;
1569
    int opc = label->opc;
1570
    int mem_index = label->mem_index;
1571
    int data_reg = label->datalo_reg;
1572
#if TCG_TARGET_REG_BITS == 32
1573
    int data_reg2 = label->datahi_reg;
1574
    int addrlo_reg = label->addrlo_reg;
1575
    int addrhi_reg = label->addrhi_reg;
1576
#endif
1577
    uint8_t *raddr = label->raddr;
1578
    uint8_t **label_ptr = &label->label_ptr[0];
1579

    
1580
    s_bits = opc & 3;
1581

    
1582
    /* resolve label address */
1583
    *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
1584
    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1585
        *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4);
1586
    }
1587

    
1588
#if TCG_TARGET_REG_BITS == 32
1589
    tcg_out_pushi(s, mem_index);
1590
    stack_adjust = 4;
1591
    if (opc == 3) {
1592
        tcg_out_push(s, data_reg2);
1593
        stack_adjust += 4;
1594
    }
1595
    tcg_out_push(s, data_reg);
1596
    stack_adjust += 4;
1597
    if (TARGET_LONG_BITS == 64) {
1598
        tcg_out_push(s, addrhi_reg);
1599
        stack_adjust += 4;
1600
    }
1601
    tcg_out_push(s, addrlo_reg);
1602
    stack_adjust += 4;
1603
    tcg_out_push(s, TCG_AREG0);
1604
    stack_adjust += 4;
1605
#else
1606
    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0);
1607
    /* The second argument is already loaded with addrlo.  */
1608
    tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1609
                tcg_target_call_iarg_regs[2], data_reg);
1610
    tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], mem_index);
1611
    stack_adjust = 0;
1612
#endif
1613

    
1614
    /* Code generation of qemu_ld/st's slow path calling MMU helper
1615

1616
       PRE_PROC ...
1617
       call MMU helper
1618
       jmp POST_PROC (2b) : short forward jump <- GETRA()
1619
       jmp next_code (5b) : dummy long backward jump which is never executed
1620
       POST_PROC ... : do post-processing <- GETRA() + 7
1621
       jmp next_code : jump to the code corresponding to next IR of qemu_ld/st
1622
    */
1623

    
1624
    tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
1625

    
1626
    /* Jump to post-processing code */
1627
    tcg_out8(s, OPC_JMP_short);
1628
    tcg_out8(s, 5);
1629
    /* Dummy backward jump having information of fast path'pc for MMU helpers */
1630
    tcg_out8(s, OPC_JMP_long);
1631
    *(int32_t *)s->code_ptr = (int32_t)(raddr - s->code_ptr - 4);
1632
    s->code_ptr += 4;
1633

    
1634
    if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
1635
        /* Pop and discard.  This is 2 bytes smaller than the add.  */
1636
        tcg_out_pop(s, TCG_REG_ECX);
1637
    } else if (stack_adjust != 0) {
1638
        tcg_out_addi(s, TCG_REG_CALL_STACK, stack_adjust);
1639
    }
1640

    
1641
    /* Jump to the code corresponding to next IR of qemu_st */
1642
    tcg_out_jmp(s, (tcg_target_long)raddr);
1643
}
1644

    
1645
/*
1646
 * Generate TB finalization at the end of block
1647
 */
1648
void tcg_out_tb_finalize(TCGContext *s)
1649
{
1650
    int i;
1651
    TCGLabelQemuLdst *label;
1652

    
1653
    /* qemu_ld/st slow paths */
1654
    for (i = 0; i < s->nb_qemu_ldst_labels; i++) {
1655
        label = (TCGLabelQemuLdst *)&s->qemu_ldst_labels[i];
1656
        if (label->is_ld) {
1657
            tcg_out_qemu_ld_slow_path(s, label);
1658
        } else {
1659
            tcg_out_qemu_st_slow_path(s, label);
1660
        }
1661
    }
1662
}
1663
#endif  /* CONFIG_SOFTMMU */
1664

    
1665
static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1666
                              const TCGArg *args, const int *const_args)
1667
{
1668
    int c, rexw = 0;
1669

    
1670
#if TCG_TARGET_REG_BITS == 64
1671
# define OP_32_64(x) \
1672
        case glue(glue(INDEX_op_, x), _i64): \
1673
            rexw = P_REXW; /* FALLTHRU */    \
1674
        case glue(glue(INDEX_op_, x), _i32)
1675
#else
1676
# define OP_32_64(x) \
1677
        case glue(glue(INDEX_op_, x), _i32)
1678
#endif
1679

    
1680
    switch(opc) {
1681
    case INDEX_op_exit_tb:
1682
        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
1683
        tcg_out_jmp(s, (tcg_target_long) tb_ret_addr);
1684
        break;
1685
    case INDEX_op_goto_tb:
1686
        if (s->tb_jmp_offset) {
1687
            /* direct jump method */
1688
            tcg_out8(s, OPC_JMP_long); /* jmp im */
1689
            s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
1690
            tcg_out32(s, 0);
1691
        } else {
1692
            /* indirect jump method */
1693
            tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
1694
                                 (tcg_target_long)(s->tb_next + args[0]));
1695
        }
1696
        s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
1697
        break;
1698
    case INDEX_op_call:
1699
        if (const_args[0]) {
1700
            tcg_out_calli(s, args[0]);
1701
        } else {
1702
            /* call *reg */
1703
            tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]);
1704
        }
1705
        break;
1706
    case INDEX_op_br:
1707
        tcg_out_jxx(s, JCC_JMP, args[0], 0);
1708
        break;
1709
    case INDEX_op_movi_i32:
1710
        tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
1711
        break;
1712
    OP_32_64(ld8u):
1713
        /* Note that we can ignore REXW for the zero-extend to 64-bit.  */
1714
        tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
1715
        break;
1716
    OP_32_64(ld8s):
1717
        tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
1718
        break;
1719
    OP_32_64(ld16u):
1720
        /* Note that we can ignore REXW for the zero-extend to 64-bit.  */
1721
        tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
1722
        break;
1723
    OP_32_64(ld16s):
1724
        tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
1725
        break;
1726
#if TCG_TARGET_REG_BITS == 64
1727
    case INDEX_op_ld32u_i64:
1728
#endif
1729
    case INDEX_op_ld_i32:
1730
        tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1731
        break;
1732

    
1733
    OP_32_64(st8):
1734
        if (const_args[0]) {
1735
            tcg_out_modrm_offset(s, OPC_MOVB_EvIz,
1736
                                 0, args[1], args[2]);
1737
            tcg_out8(s, args[0]);
1738
        } else {
1739
            tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
1740
                                 args[0], args[1], args[2]);
1741
        }
1742
        break;
1743
    OP_32_64(st16):
1744
        if (const_args[0]) {
1745
            tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16,
1746
                                 0, args[1], args[2]);
1747
            tcg_out16(s, args[0]);
1748
        } else {
1749
            tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
1750
                                 args[0], args[1], args[2]);
1751
        }
1752
        break;
1753
#if TCG_TARGET_REG_BITS == 64
1754
    case INDEX_op_st32_i64:
1755
#endif
1756
    case INDEX_op_st_i32:
1757
        if (const_args[0]) {
1758
            tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, args[1], args[2]);
1759
            tcg_out32(s, args[0]);
1760
        } else {
1761
            tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1762
        }
1763
        break;
1764

    
1765
    OP_32_64(add):
1766
        /* For 3-operand addition, use LEA.  */
1767
        if (args[0] != args[1]) {
1768
            TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
1769

    
1770
            if (const_args[2]) {
1771
                c3 = a2, a2 = -1;
1772
            } else if (a0 == a2) {
1773
                /* Watch out for dest = src + dest, since we've removed
1774
                   the matching constraint on the add.  */
1775
                tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
1776
                break;
1777
            }
1778

    
1779
            tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
1780
            break;
1781
        }
1782
        c = ARITH_ADD;
1783
        goto gen_arith;
1784
    OP_32_64(sub):
1785
        c = ARITH_SUB;
1786
        goto gen_arith;
1787
    OP_32_64(and):
1788
        c = ARITH_AND;
1789
        goto gen_arith;
1790
    OP_32_64(or):
1791
        c = ARITH_OR;
1792
        goto gen_arith;
1793
    OP_32_64(xor):
1794
        c = ARITH_XOR;
1795
        goto gen_arith;
1796
    gen_arith:
1797
        if (const_args[2]) {
1798
            tgen_arithi(s, c + rexw, args[0], args[2], 0);
1799
        } else {
1800
            tgen_arithr(s, c + rexw, args[0], args[2]);
1801
        }
1802
        break;
1803

    
1804
    OP_32_64(mul):
1805
        if (const_args[2]) {
1806
            int32_t val;
1807
            val = args[2];
1808
            if (val == (int8_t)val) {
1809
                tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
1810
                tcg_out8(s, val);
1811
            } else {
1812
                tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
1813
                tcg_out32(s, val);
1814
            }
1815
        } else {
1816
            tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
1817
        }
1818
        break;
1819

    
1820
    OP_32_64(div2):
1821
        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
1822
        break;
1823
    OP_32_64(divu2):
1824
        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
1825
        break;
1826

    
1827
    OP_32_64(shl):
1828
        c = SHIFT_SHL;
1829
        goto gen_shift;
1830
    OP_32_64(shr):
1831
        c = SHIFT_SHR;
1832
        goto gen_shift;
1833
    OP_32_64(sar):
1834
        c = SHIFT_SAR;
1835
        goto gen_shift;
1836
    OP_32_64(rotl):
1837
        c = SHIFT_ROL;
1838
        goto gen_shift;
1839
    OP_32_64(rotr):
1840
        c = SHIFT_ROR;
1841
        goto gen_shift;
1842
    gen_shift:
1843
        if (const_args[2]) {
1844
            tcg_out_shifti(s, c + rexw, args[0], args[2]);
1845
        } else {
1846
            tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
1847
        }
1848
        break;
1849

    
1850
    case INDEX_op_brcond_i32:
1851
        tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
1852
                         args[3], 0);
1853
        break;
1854
    case INDEX_op_setcond_i32:
1855
        tcg_out_setcond32(s, args[3], args[0], args[1],
1856
                          args[2], const_args[2]);
1857
        break;
1858
    case INDEX_op_movcond_i32:
1859
        tcg_out_movcond32(s, args[5], args[0], args[1],
1860
                          args[2], const_args[2], args[3]);
1861
        break;
1862

    
1863
    OP_32_64(bswap16):
1864
        tcg_out_rolw_8(s, args[0]);
1865
        break;
1866
    OP_32_64(bswap32):
1867
        tcg_out_bswap32(s, args[0]);
1868
        break;
1869

    
1870
    OP_32_64(neg):
1871
        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
1872
        break;
1873
    OP_32_64(not):
1874
        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
1875
        break;
1876

    
1877
    OP_32_64(ext8s):
1878
        tcg_out_ext8s(s, args[0], args[1], rexw);
1879
        break;
1880
    OP_32_64(ext16s):
1881
        tcg_out_ext16s(s, args[0], args[1], rexw);
1882
        break;
1883
    OP_32_64(ext8u):
1884
        tcg_out_ext8u(s, args[0], args[1]);
1885
        break;
1886
    OP_32_64(ext16u):
1887
        tcg_out_ext16u(s, args[0], args[1]);
1888
        break;
1889

    
1890
    case INDEX_op_qemu_ld8u:
1891
        tcg_out_qemu_ld(s, args, 0);
1892
        break;
1893
    case INDEX_op_qemu_ld8s:
1894
        tcg_out_qemu_ld(s, args, 0 | 4);
1895
        break;
1896
    case INDEX_op_qemu_ld16u:
1897
        tcg_out_qemu_ld(s, args, 1);
1898
        break;
1899
    case INDEX_op_qemu_ld16s:
1900
        tcg_out_qemu_ld(s, args, 1 | 4);
1901
        break;
1902
#if TCG_TARGET_REG_BITS == 64
1903
    case INDEX_op_qemu_ld32u:
1904
#endif
1905
    case INDEX_op_qemu_ld32:
1906
        tcg_out_qemu_ld(s, args, 2);
1907
        break;
1908
    case INDEX_op_qemu_ld64:
1909
        tcg_out_qemu_ld(s, args, 3);
1910
        break;
1911

    
1912
    case INDEX_op_qemu_st8:
1913
        tcg_out_qemu_st(s, args, 0);
1914
        break;
1915
    case INDEX_op_qemu_st16:
1916
        tcg_out_qemu_st(s, args, 1);
1917
        break;
1918
    case INDEX_op_qemu_st32:
1919
        tcg_out_qemu_st(s, args, 2);
1920
        break;
1921
    case INDEX_op_qemu_st64:
1922
        tcg_out_qemu_st(s, args, 3);
1923
        break;
1924

    
1925
    case INDEX_op_mulu2_i32:
1926
        tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_MUL, args[3]);
1927
        break;
1928
    case INDEX_op_add2_i32:
1929
        if (const_args[4]) {
1930
            tgen_arithi(s, ARITH_ADD, args[0], args[4], 1);
1931
        } else {
1932
            tgen_arithr(s, ARITH_ADD, args[0], args[4]);
1933
        }
1934
        if (const_args[5]) {
1935
            tgen_arithi(s, ARITH_ADC, args[1], args[5], 1);
1936
        } else {
1937
            tgen_arithr(s, ARITH_ADC, args[1], args[5]);
1938
        }
1939
        break;
1940
    case INDEX_op_sub2_i32:
1941
        if (const_args[4]) {
1942
            tgen_arithi(s, ARITH_SUB, args[0], args[4], 1);
1943
        } else {
1944
            tgen_arithr(s, ARITH_SUB, args[0], args[4]);
1945
        }
1946
        if (const_args[5]) {
1947
            tgen_arithi(s, ARITH_SBB, args[1], args[5], 1);
1948
        } else {
1949
            tgen_arithr(s, ARITH_SBB, args[1], args[5]);
1950
        }
1951
        break;
1952

    
1953
#if TCG_TARGET_REG_BITS == 32
1954
    case INDEX_op_brcond2_i32:
1955
        tcg_out_brcond2(s, args, const_args, 0);
1956
        break;
1957
    case INDEX_op_setcond2_i32:
1958
        tcg_out_setcond2(s, args, const_args);
1959
        break;
1960
#else /* TCG_TARGET_REG_BITS == 64 */
1961
    case INDEX_op_movi_i64:
1962
        tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
1963
        break;
1964
    case INDEX_op_ld32s_i64:
1965
        tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
1966
        break;
1967
    case INDEX_op_ld_i64:
1968
        tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1969
        break;
1970
    case INDEX_op_st_i64:
1971
        if (const_args[0]) {
1972
            tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW,
1973
                                 0, args[1], args[2]);
1974
            tcg_out32(s, args[0]);
1975
        } else {
1976
            tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1977
        }
1978
        break;
1979
    case INDEX_op_qemu_ld32s:
1980
        tcg_out_qemu_ld(s, args, 2 | 4);
1981
        break;
1982

    
1983
    case INDEX_op_brcond_i64:
1984
        tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
1985
                         args[3], 0);
1986
        break;
1987
    case INDEX_op_setcond_i64:
1988
        tcg_out_setcond64(s, args[3], args[0], args[1],
1989
                          args[2], const_args[2]);
1990
        break;
1991
    case INDEX_op_movcond_i64:
1992
        tcg_out_movcond64(s, args[5], args[0], args[1],
1993
                          args[2], const_args[2], args[3]);
1994
        break;
1995

    
1996
    case INDEX_op_bswap64_i64:
1997
        tcg_out_bswap64(s, args[0]);
1998
        break;
1999
    case INDEX_op_ext32u_i64:
2000
        tcg_out_ext32u(s, args[0], args[1]);
2001
        break;
2002
    case INDEX_op_ext32s_i64:
2003
        tcg_out_ext32s(s, args[0], args[1]);
2004
        break;
2005
#endif
2006

    
2007
    OP_32_64(deposit):
2008
        if (args[3] == 0 && args[4] == 8) {
2009
            /* load bits 0..7 */
2010
            tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM,
2011
                          args[2], args[0]);
2012
        } else if (args[3] == 8 && args[4] == 8) {
2013
            /* load bits 8..15 */
2014
            tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4);
2015
        } else if (args[3] == 0 && args[4] == 16) {
2016
            /* load bits 0..15 */
2017
            tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]);
2018
        } else {
2019
            tcg_abort();
2020
        }
2021
        break;
2022

    
2023
    default:
2024
        tcg_abort();
2025
    }
2026

    
2027
#undef OP_32_64
2028
}
2029

    
2030
static const TCGTargetOpDef x86_op_defs[] = {
2031
    { INDEX_op_exit_tb, { } },
2032
    { INDEX_op_goto_tb, { } },
2033
    { INDEX_op_call, { "ri" } },
2034
    { INDEX_op_br, { } },
2035
    { INDEX_op_mov_i32, { "r", "r" } },
2036
    { INDEX_op_movi_i32, { "r" } },
2037
    { INDEX_op_ld8u_i32, { "r", "r" } },
2038
    { INDEX_op_ld8s_i32, { "r", "r" } },
2039
    { INDEX_op_ld16u_i32, { "r", "r" } },
2040
    { INDEX_op_ld16s_i32, { "r", "r" } },
2041
    { INDEX_op_ld_i32, { "r", "r" } },
2042
    { INDEX_op_st8_i32, { "qi", "r" } },
2043
    { INDEX_op_st16_i32, { "ri", "r" } },
2044
    { INDEX_op_st_i32, { "ri", "r" } },
2045

    
2046
    { INDEX_op_add_i32, { "r", "r", "ri" } },
2047
    { INDEX_op_sub_i32, { "r", "0", "ri" } },
2048
    { INDEX_op_mul_i32, { "r", "0", "ri" } },
2049
    { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
2050
    { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
2051
    { INDEX_op_and_i32, { "r", "0", "ri" } },
2052
    { INDEX_op_or_i32, { "r", "0", "ri" } },
2053
    { INDEX_op_xor_i32, { "r", "0", "ri" } },
2054

    
2055
    { INDEX_op_shl_i32, { "r", "0", "ci" } },
2056
    { INDEX_op_shr_i32, { "r", "0", "ci" } },
2057
    { INDEX_op_sar_i32, { "r", "0", "ci" } },
2058
    { INDEX_op_rotl_i32, { "r", "0", "ci" } },
2059
    { INDEX_op_rotr_i32, { "r", "0", "ci" } },
2060

    
2061
    { INDEX_op_brcond_i32, { "r", "ri" } },
2062

    
2063
    { INDEX_op_bswap16_i32, { "r", "0" } },
2064
    { INDEX_op_bswap32_i32, { "r", "0" } },
2065

    
2066
    { INDEX_op_neg_i32, { "r", "0" } },
2067

    
2068
    { INDEX_op_not_i32, { "r", "0" } },
2069

    
2070
    { INDEX_op_ext8s_i32, { "r", "q" } },
2071
    { INDEX_op_ext16s_i32, { "r", "r" } },
2072
    { INDEX_op_ext8u_i32, { "r", "q" } },
2073
    { INDEX_op_ext16u_i32, { "r", "r" } },
2074

    
2075
    { INDEX_op_setcond_i32, { "q", "r", "ri" } },
2076

    
2077
    { INDEX_op_deposit_i32, { "Q", "0", "Q" } },
2078
#if TCG_TARGET_HAS_movcond_i32
2079
    { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } },
2080
#endif
2081

    
2082
    { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
2083
    { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
2084
    { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
2085

    
2086
#if TCG_TARGET_REG_BITS == 32
2087
    { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
2088
    { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
2089
#else
2090
    { INDEX_op_mov_i64, { "r", "r" } },
2091
    { INDEX_op_movi_i64, { "r" } },
2092
    { INDEX_op_ld8u_i64, { "r", "r" } },
2093
    { INDEX_op_ld8s_i64, { "r", "r" } },
2094
    { INDEX_op_ld16u_i64, { "r", "r" } },
2095
    { INDEX_op_ld16s_i64, { "r", "r" } },
2096
    { INDEX_op_ld32u_i64, { "r", "r" } },
2097
    { INDEX_op_ld32s_i64, { "r", "r" } },
2098
    { INDEX_op_ld_i64, { "r", "r" } },
2099
    { INDEX_op_st8_i64, { "ri", "r" } },
2100
    { INDEX_op_st16_i64, { "ri", "r" } },
2101
    { INDEX_op_st32_i64, { "ri", "r" } },
2102
    { INDEX_op_st_i64, { "re", "r" } },
2103

    
2104
    { INDEX_op_add_i64, { "r", "r", "re" } },
2105
    { INDEX_op_mul_i64, { "r", "0", "re" } },
2106
    { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
2107
    { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
2108
    { INDEX_op_sub_i64, { "r", "0", "re" } },
2109
    { INDEX_op_and_i64, { "r", "0", "reZ" } },
2110
    { INDEX_op_or_i64, { "r", "0", "re" } },
2111
    { INDEX_op_xor_i64, { "r", "0", "re" } },
2112

    
2113
    { INDEX_op_shl_i64, { "r", "0", "ci" } },
2114
    { INDEX_op_shr_i64, { "r", "0", "ci" } },
2115
    { INDEX_op_sar_i64, { "r", "0", "ci" } },
2116
    { INDEX_op_rotl_i64, { "r", "0", "ci" } },
2117
    { INDEX_op_rotr_i64, { "r", "0", "ci" } },
2118

    
2119
    { INDEX_op_brcond_i64, { "r", "re" } },
2120
    { INDEX_op_setcond_i64, { "r", "r", "re" } },
2121

    
2122
    { INDEX_op_bswap16_i64, { "r", "0" } },
2123
    { INDEX_op_bswap32_i64, { "r", "0" } },
2124
    { INDEX_op_bswap64_i64, { "r", "0" } },
2125
    { INDEX_op_neg_i64, { "r", "0" } },
2126
    { INDEX_op_not_i64, { "r", "0" } },
2127

    
2128
    { INDEX_op_ext8s_i64, { "r", "r" } },
2129
    { INDEX_op_ext16s_i64, { "r", "r" } },
2130
    { INDEX_op_ext32s_i64, { "r", "r" } },
2131
    { INDEX_op_ext8u_i64, { "r", "r" } },
2132
    { INDEX_op_ext16u_i64, { "r", "r" } },
2133
    { INDEX_op_ext32u_i64, { "r", "r" } },
2134

    
2135
    { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
2136
    { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } },
2137
#endif
2138

    
2139
#if TCG_TARGET_REG_BITS == 64
2140
    { INDEX_op_qemu_ld8u, { "r", "L" } },
2141
    { INDEX_op_qemu_ld8s, { "r", "L" } },
2142
    { INDEX_op_qemu_ld16u, { "r", "L" } },
2143
    { INDEX_op_qemu_ld16s, { "r", "L" } },
2144
    { INDEX_op_qemu_ld32, { "r", "L" } },
2145
    { INDEX_op_qemu_ld32u, { "r", "L" } },
2146
    { INDEX_op_qemu_ld32s, { "r", "L" } },
2147
    { INDEX_op_qemu_ld64, { "r", "L" } },
2148

    
2149
    { INDEX_op_qemu_st8, { "L", "L" } },
2150
    { INDEX_op_qemu_st16, { "L", "L" } },
2151
    { INDEX_op_qemu_st32, { "L", "L" } },
2152
    { INDEX_op_qemu_st64, { "L", "L" } },
2153
#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
2154
    { INDEX_op_qemu_ld8u, { "r", "L" } },
2155
    { INDEX_op_qemu_ld8s, { "r", "L" } },
2156
    { INDEX_op_qemu_ld16u, { "r", "L" } },
2157
    { INDEX_op_qemu_ld16s, { "r", "L" } },
2158
    { INDEX_op_qemu_ld32, { "r", "L" } },
2159
    { INDEX_op_qemu_ld64, { "r", "r", "L" } },
2160

    
2161
    { INDEX_op_qemu_st8, { "cb", "L" } },
2162
    { INDEX_op_qemu_st16, { "L", "L" } },
2163
    { INDEX_op_qemu_st32, { "L", "L" } },
2164
    { INDEX_op_qemu_st64, { "L", "L", "L" } },
2165
#else
2166
    { INDEX_op_qemu_ld8u, { "r", "L", "L" } },
2167
    { INDEX_op_qemu_ld8s, { "r", "L", "L" } },
2168
    { INDEX_op_qemu_ld16u, { "r", "L", "L" } },
2169
    { INDEX_op_qemu_ld16s, { "r", "L", "L" } },
2170
    { INDEX_op_qemu_ld32, { "r", "L", "L" } },
2171
    { INDEX_op_qemu_ld64, { "r", "r", "L", "L" } },
2172

    
2173
    { INDEX_op_qemu_st8, { "cb", "L", "L" } },
2174
    { INDEX_op_qemu_st16, { "L", "L", "L" } },
2175
    { INDEX_op_qemu_st32, { "L", "L", "L" } },
2176
    { INDEX_op_qemu_st64, { "L", "L", "L", "L" } },
2177
#endif
2178
    { -1 },
2179
};
2180

    
2181
static int tcg_target_callee_save_regs[] = {
2182
#if TCG_TARGET_REG_BITS == 64
2183
    TCG_REG_RBP,
2184
    TCG_REG_RBX,
2185
#if defined(_WIN64)
2186
    TCG_REG_RDI,
2187
    TCG_REG_RSI,
2188
#endif
2189
    TCG_REG_R12,
2190
    TCG_REG_R13,
2191
    TCG_REG_R14, /* Currently used for the global env. */
2192
    TCG_REG_R15,
2193
#else
2194
    TCG_REG_EBP, /* Currently used for the global env. */
2195
    TCG_REG_EBX,
2196
    TCG_REG_ESI,
2197
    TCG_REG_EDI,
2198
#endif
2199
};
2200

    
2201
/* Compute frame size via macros, to share between tcg_target_qemu_prologue
2202
   and tcg_register_jit.  */
2203

    
2204
#define PUSH_SIZE \
2205
    ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
2206
     * (TCG_TARGET_REG_BITS / 8))
2207

    
2208
#define FRAME_SIZE \
2209
    ((PUSH_SIZE \
2210
      + TCG_STATIC_CALL_ARGS_SIZE \
2211
      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2212
      + TCG_TARGET_STACK_ALIGN - 1) \
2213
     & ~(TCG_TARGET_STACK_ALIGN - 1))
2214

    
2215
/* Generate global QEMU prologue and epilogue code */
2216
static void tcg_target_qemu_prologue(TCGContext *s)
2217
{
2218
    int i, stack_addend;
2219

    
2220
    /* TB prologue */
2221

    
2222
    /* Reserve some stack space, also for TCG temps.  */
2223
    stack_addend = FRAME_SIZE - PUSH_SIZE;
2224
    tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2225
                  CPU_TEMP_BUF_NLONGS * sizeof(long));
2226

    
2227
    /* Save all callee saved registers.  */
2228
    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2229
        tcg_out_push(s, tcg_target_callee_save_regs[i]);
2230
    }
2231

    
2232
#if TCG_TARGET_REG_BITS == 32
2233
    tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
2234
               (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
2235
    tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2236
    /* jmp *tb.  */
2237
    tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
2238
                         (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
2239
                         + stack_addend);
2240
#else
2241
    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2242
    tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2243
    /* jmp *tb.  */
2244
    tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
2245
#endif
2246

    
2247
    /* TB epilogue */
2248
    tb_ret_addr = s->code_ptr;
2249

    
2250
    tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
2251

    
2252
    for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
2253
        tcg_out_pop(s, tcg_target_callee_save_regs[i]);
2254
    }
2255
    tcg_out_opc(s, OPC_RET, 0, 0, 0);
2256

    
2257
#if !defined(CONFIG_SOFTMMU)
2258
    /* Try to set up a segment register to point to GUEST_BASE.  */
2259
    if (GUEST_BASE) {
2260
        setup_guest_base_seg();
2261
    }
2262
#endif
2263
}
2264

    
2265
static void tcg_target_init(TCGContext *s)
2266
{
2267
    /* For 32-bit, 99% certainty that we're running on hardware that supports
2268
       cmov, but we still need to check.  In case cmov is not available, we'll
2269
       use a small forward branch.  */
2270
#ifndef have_cmov
2271
    {
2272
        unsigned a, b, c, d;
2273
        have_cmov = (__get_cpuid(1, &a, &b, &c, &d) && (d & bit_CMOV));
2274
    }
2275
#endif
2276

    
2277
#if !defined(CONFIG_USER_ONLY)
2278
    /* fail safe */
2279
    if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry))
2280
        tcg_abort();
2281
#endif
2282

    
2283
    if (TCG_TARGET_REG_BITS == 64) {
2284
        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2285
        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
2286
    } else {
2287
        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
2288
    }
2289

    
2290
    tcg_regset_clear(tcg_target_call_clobber_regs);
2291
    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
2292
    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
2293
    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
2294
    if (TCG_TARGET_REG_BITS == 64) {
2295
#if !defined(_WIN64)
2296
        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
2297
        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
2298
#endif
2299
        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
2300
        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
2301
        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
2302
        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
2303
    }
2304

    
2305
    tcg_regset_clear(s->reserved_regs);
2306
    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2307

    
2308
    tcg_add_target_add_op_defs(x86_op_defs);
2309
}
2310

    
2311
typedef struct {
2312
    uint32_t len __attribute__((aligned((sizeof(void *)))));
2313
    uint32_t id;
2314
    uint8_t version;
2315
    char augmentation[1];
2316
    uint8_t code_align;
2317
    uint8_t data_align;
2318
    uint8_t return_column;
2319
} DebugFrameCIE;
2320

    
2321
typedef struct {
2322
    uint32_t len __attribute__((aligned((sizeof(void *)))));
2323
    uint32_t cie_offset;
2324
    tcg_target_long func_start __attribute__((packed));
2325
    tcg_target_long func_len __attribute__((packed));
2326
    uint8_t def_cfa[4];
2327
    uint8_t reg_ofs[14];
2328
} DebugFrameFDE;
2329

    
2330
typedef struct {
2331
    DebugFrameCIE cie;
2332
    DebugFrameFDE fde;
2333
} DebugFrame;
2334

    
2335
#if !defined(__ELF__)
2336
    /* Host machine without ELF. */
2337
#elif TCG_TARGET_REG_BITS == 64
2338
#define ELF_HOST_MACHINE EM_X86_64
2339
static DebugFrame debug_frame = {
2340
    .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2341
    .cie.id = -1,
2342
    .cie.version = 1,
2343
    .cie.code_align = 1,
2344
    .cie.data_align = 0x78,             /* sleb128 -8 */
2345
    .cie.return_column = 16,
2346

    
2347
    .fde.len = sizeof(DebugFrameFDE)-4, /* length after .len member */
2348
    .fde.def_cfa = {
2349
        12, 7,                          /* DW_CFA_def_cfa %rsp, ... */
2350
        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
2351
        (FRAME_SIZE >> 7)
2352
    },
2353
    .fde.reg_ofs = {
2354
        0x90, 1,                        /* DW_CFA_offset, %rip, -8 */
2355
        /* The following ordering must match tcg_target_callee_save_regs.  */
2356
        0x86, 2,                        /* DW_CFA_offset, %rbp, -16 */
2357
        0x83, 3,                        /* DW_CFA_offset, %rbx, -24 */
2358
        0x8c, 4,                        /* DW_CFA_offset, %r12, -32 */
2359
        0x8d, 5,                        /* DW_CFA_offset, %r13, -40 */
2360
        0x8e, 6,                        /* DW_CFA_offset, %r14, -48 */
2361
        0x8f, 7,                        /* DW_CFA_offset, %r15, -56 */
2362
    }
2363
};
2364
#else
2365
#define ELF_HOST_MACHINE EM_386
2366
static DebugFrame debug_frame = {
2367
    .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2368
    .cie.id = -1,
2369
    .cie.version = 1,
2370
    .cie.code_align = 1,
2371
    .cie.data_align = 0x7c,             /* sleb128 -4 */
2372
    .cie.return_column = 8,
2373

    
2374
    .fde.len = sizeof(DebugFrameFDE)-4, /* length after .len member */
2375
    .fde.def_cfa = {
2376
        12, 4,                          /* DW_CFA_def_cfa %esp, ... */
2377
        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
2378
        (FRAME_SIZE >> 7)
2379
    },
2380
    .fde.reg_ofs = {
2381
        0x88, 1,                        /* DW_CFA_offset, %eip, -4 */
2382
        /* The following ordering must match tcg_target_callee_save_regs.  */
2383
        0x85, 2,                        /* DW_CFA_offset, %ebp, -8 */
2384
        0x83, 3,                        /* DW_CFA_offset, %ebx, -12 */
2385
        0x86, 4,                        /* DW_CFA_offset, %esi, -16 */
2386
        0x87, 5,                        /* DW_CFA_offset, %edi, -20 */
2387
    }
2388
};
2389
#endif
2390

    
2391
#if defined(ELF_HOST_MACHINE)
2392
void tcg_register_jit(void *buf, size_t buf_size)
2393
{
2394
    /* We're expecting a 2 byte uleb128 encoded value.  */
2395
    assert(FRAME_SIZE >> 14 == 0);
2396

    
2397
    debug_frame.fde.func_start = (tcg_target_long) buf;
2398
    debug_frame.fde.func_len = buf_size;
2399

    
2400
    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2401
}
2402
#endif