Statistics
| Branch: | Revision:

root / tcg / i386 / tcg-target.c @ 5d8a4f8f

History | View | Annotate | Download (60.4 kB)

1
/*
2
 * Tiny Code Generator for QEMU
3
 *
4
 * Copyright (c) 2008 Fabrice Bellard
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
 * THE SOFTWARE.
23
 */
24

    
25
#ifndef NDEBUG
26
static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
27
#if TCG_TARGET_REG_BITS == 64
28
    "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
29
    "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
30
#else
31
    "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
32
#endif
33
};
34
#endif
35

    
36
static const int tcg_target_reg_alloc_order[] = {
37
#if TCG_TARGET_REG_BITS == 64
38
    TCG_REG_RBP,
39
    TCG_REG_RBX,
40
    TCG_REG_R12,
41
    TCG_REG_R13,
42
    TCG_REG_R14,
43
    TCG_REG_R15,
44
    TCG_REG_R10,
45
    TCG_REG_R11,
46
    TCG_REG_R9,
47
    TCG_REG_R8,
48
    TCG_REG_RCX,
49
    TCG_REG_RDX,
50
    TCG_REG_RSI,
51
    TCG_REG_RDI,
52
    TCG_REG_RAX,
53
#else
54
    TCG_REG_EBX,
55
    TCG_REG_ESI,
56
    TCG_REG_EDI,
57
    TCG_REG_EBP,
58
    TCG_REG_ECX,
59
    TCG_REG_EDX,
60
    TCG_REG_EAX,
61
#endif
62
};
63

    
64
static const int tcg_target_call_iarg_regs[] = {
65
#if TCG_TARGET_REG_BITS == 64
66
    TCG_REG_RDI,
67
    TCG_REG_RSI,
68
    TCG_REG_RDX,
69
    TCG_REG_RCX,
70
    TCG_REG_R8,
71
    TCG_REG_R9,
72
#else
73
    TCG_REG_EAX,
74
    TCG_REG_EDX,
75
    TCG_REG_ECX
76
#endif
77
};
78

    
79
static const int tcg_target_call_oarg_regs[2] = {
80
    TCG_REG_EAX,
81
    TCG_REG_EDX
82
};
83

    
84
static uint8_t *tb_ret_addr;
85

    
86
static void patch_reloc(uint8_t *code_ptr, int type,
87
                        tcg_target_long value, tcg_target_long addend)
88
{
89
    value += addend;
90
    switch(type) {
91
    case R_386_PC32:
92
        value -= (uintptr_t)code_ptr;
93
        if (value != (int32_t)value) {
94
            tcg_abort();
95
        }
96
        *(uint32_t *)code_ptr = value;
97
        break;
98
    case R_386_PC8:
99
        value -= (uintptr_t)code_ptr;
100
        if (value != (int8_t)value) {
101
            tcg_abort();
102
        }
103
        *(uint8_t *)code_ptr = value;
104
        break;
105
    default:
106
        tcg_abort();
107
    }
108
}
109

    
110
/* maximum number of register used for input function arguments */
111
static inline int tcg_target_get_call_iarg_regs_count(int flags)
112
{
113
    if (TCG_TARGET_REG_BITS == 64) {
114
        return 6;
115
    }
116

    
117
    flags &= TCG_CALL_TYPE_MASK;
118
    switch(flags) {
119
    case TCG_CALL_TYPE_STD:
120
        return 0;
121
    case TCG_CALL_TYPE_REGPARM_1:
122
    case TCG_CALL_TYPE_REGPARM_2:
123
    case TCG_CALL_TYPE_REGPARM:
124
        return flags - TCG_CALL_TYPE_REGPARM_1 + 1;
125
    default:
126
        tcg_abort();
127
    }
128
}
129

    
130
/* parse target specific constraints */
131
static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
132
{
133
    const char *ct_str;
134

    
135
    ct_str = *pct_str;
136
    switch(ct_str[0]) {
137
    case 'a':
138
        ct->ct |= TCG_CT_REG;
139
        tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
140
        break;
141
    case 'b':
142
        ct->ct |= TCG_CT_REG;
143
        tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
144
        break;
145
    case 'c':
146
        ct->ct |= TCG_CT_REG;
147
        tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
148
        break;
149
    case 'd':
150
        ct->ct |= TCG_CT_REG;
151
        tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
152
        break;
153
    case 'S':
154
        ct->ct |= TCG_CT_REG;
155
        tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
156
        break;
157
    case 'D':
158
        ct->ct |= TCG_CT_REG;
159
        tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
160
        break;
161
    case 'q':
162
        ct->ct |= TCG_CT_REG;
163
        if (TCG_TARGET_REG_BITS == 64) {
164
            tcg_regset_set32(ct->u.regs, 0, 0xffff);
165
        } else {
166
            tcg_regset_set32(ct->u.regs, 0, 0xf);
167
        }
168
        break;
169
    case 'r':
170
        ct->ct |= TCG_CT_REG;
171
        if (TCG_TARGET_REG_BITS == 64) {
172
            tcg_regset_set32(ct->u.regs, 0, 0xffff);
173
        } else {
174
            tcg_regset_set32(ct->u.regs, 0, 0xff);
175
        }
176
        break;
177

    
178
        /* qemu_ld/st address constraint */
179
    case 'L':
180
        ct->ct |= TCG_CT_REG;
181
        if (TCG_TARGET_REG_BITS == 64) {
182
            tcg_regset_set32(ct->u.regs, 0, 0xffff);
183
            tcg_regset_reset_reg(ct->u.regs, TCG_REG_RSI);
184
            tcg_regset_reset_reg(ct->u.regs, TCG_REG_RDI);
185
        } else {
186
            tcg_regset_set32(ct->u.regs, 0, 0xff);
187
            tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX);
188
            tcg_regset_reset_reg(ct->u.regs, TCG_REG_EDX);
189
        }
190
        break;
191

    
192
    case 'e':
193
        ct->ct |= TCG_CT_CONST_S32;
194
        break;
195
    case 'Z':
196
        ct->ct |= TCG_CT_CONST_U32;
197
        break;
198

    
199
    default:
200
        return -1;
201
    }
202
    ct_str++;
203
    *pct_str = ct_str;
204
    return 0;
205
}
206

    
207
/* test if a constant matches the constraint */
208
static inline int tcg_target_const_match(tcg_target_long val,
209
                                         const TCGArgConstraint *arg_ct)
210
{
211
    int ct = arg_ct->ct;
212
    if (ct & TCG_CT_CONST) {
213
        return 1;
214
    }
215
    if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
216
        return 1;
217
    }
218
    if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
219
        return 1;
220
    }
221
    return 0;
222
}
223

    
224
#if TCG_TARGET_REG_BITS == 64
225
# define LOWREGMASK(x)        ((x) & 7)
226
#else
227
# define LOWREGMASK(x)        (x)
228
#endif
229

    
230
#define P_EXT                0x100                /* 0x0f opcode prefix */
231
#define P_DATA16        0x200                /* 0x66 opcode prefix */
232
#if TCG_TARGET_REG_BITS == 64
233
# define P_ADDR32        0x400                /* 0x67 opcode prefix */
234
# define P_REXW                0x800                /* Set REX.W = 1 */
235
# define P_REXB_R        0x1000                /* REG field as byte register */
236
# define P_REXB_RM        0x2000                /* R/M field as byte register */
237
#else
238
# define P_ADDR32        0
239
# define P_REXW                0
240
# define P_REXB_R        0
241
# define P_REXB_RM        0
242
#endif
243

    
244
#define OPC_ARITH_EvIz        (0x81)
245
#define OPC_ARITH_EvIb        (0x83)
246
#define OPC_ARITH_GvEv        (0x03)                /* ... plus (ARITH_FOO << 3) */
247
#define OPC_ADD_GvEv        (OPC_ARITH_GvEv | (ARITH_ADD << 3))
248
#define OPC_BSWAP        (0xc8 | P_EXT)
249
#define OPC_CALL_Jz        (0xe8)
250
#define OPC_CMP_GvEv        (OPC_ARITH_GvEv | (ARITH_CMP << 3))
251
#define OPC_DEC_r32        (0x48)
252
#define OPC_IMUL_GvEv        (0xaf | P_EXT)
253
#define OPC_IMUL_GvEvIb        (0x6b)
254
#define OPC_IMUL_GvEvIz        (0x69)
255
#define OPC_INC_r32        (0x40)
256
#define OPC_JCC_long        (0x80 | P_EXT)        /* ... plus condition code */
257
#define OPC_JCC_short        (0x70)                /* ... plus condition code */
258
#define OPC_JMP_long        (0xe9)
259
#define OPC_JMP_short        (0xeb)
260
#define OPC_LEA         (0x8d)
261
#define OPC_MOVB_EvGv        (0x88)                /* stores, more or less */
262
#define OPC_MOVL_EvGv        (0x89)                /* stores, more or less */
263
#define OPC_MOVL_GvEv        (0x8b)                /* loads, more or less */
264
#define OPC_MOVL_EvIz        (0xc7)
265
#define OPC_MOVL_Iv     (0xb8)
266
#define OPC_MOVSBL        (0xbe | P_EXT)
267
#define OPC_MOVSWL        (0xbf | P_EXT)
268
#define OPC_MOVSLQ        (0x63 | P_REXW)
269
#define OPC_MOVZBL        (0xb6 | P_EXT)
270
#define OPC_MOVZWL        (0xb7 | P_EXT)
271
#define OPC_POP_r32        (0x58)
272
#define OPC_PUSH_r32        (0x50)
273
#define OPC_PUSH_Iv        (0x68)
274
#define OPC_PUSH_Ib        (0x6a)
275
#define OPC_RET                (0xc3)
276
#define OPC_SETCC        (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
277
#define OPC_SHIFT_1        (0xd1)
278
#define OPC_SHIFT_Ib        (0xc1)
279
#define OPC_SHIFT_cl        (0xd3)
280
#define OPC_TESTL        (0x85)
281
#define OPC_XCHG_ax_r32        (0x90)
282

    
283
#define OPC_GRP3_Ev        (0xf7)
284
#define OPC_GRP5        (0xff)
285

    
286
/* Group 1 opcode extensions for 0x80-0x83.
287
   These are also used as modifiers for OPC_ARITH.  */
288
#define ARITH_ADD 0
289
#define ARITH_OR  1
290
#define ARITH_ADC 2
291
#define ARITH_SBB 3
292
#define ARITH_AND 4
293
#define ARITH_SUB 5
294
#define ARITH_XOR 6
295
#define ARITH_CMP 7
296

    
297
/* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3.  */
298
#define SHIFT_ROL 0
299
#define SHIFT_ROR 1
300
#define SHIFT_SHL 4
301
#define SHIFT_SHR 5
302
#define SHIFT_SAR 7
303

    
304
/* Group 3 opcode extensions for 0xf6, 0xf7.  To be used with OPC_GRP3.  */
305
#define EXT3_NOT   2
306
#define EXT3_NEG   3
307
#define EXT3_MUL   4
308
#define EXT3_IMUL  5
309
#define EXT3_DIV   6
310
#define EXT3_IDIV  7
311

    
312
/* Group 5 opcode extensions for 0xff.  To be used with OPC_GRP5.  */
313
#define EXT5_INC_Ev        0
314
#define EXT5_DEC_Ev        1
315
#define EXT5_CALLN_Ev        2
316
#define EXT5_JMPN_Ev        4
317

    
318
/* Condition codes to be added to OPC_JCC_{long,short}.  */
319
#define JCC_JMP (-1)
320
#define JCC_JO  0x0
321
#define JCC_JNO 0x1
322
#define JCC_JB  0x2
323
#define JCC_JAE 0x3
324
#define JCC_JE  0x4
325
#define JCC_JNE 0x5
326
#define JCC_JBE 0x6
327
#define JCC_JA  0x7
328
#define JCC_JS  0x8
329
#define JCC_JNS 0x9
330
#define JCC_JP  0xa
331
#define JCC_JNP 0xb
332
#define JCC_JL  0xc
333
#define JCC_JGE 0xd
334
#define JCC_JLE 0xe
335
#define JCC_JG  0xf
336

    
337
static const uint8_t tcg_cond_to_jcc[10] = {
338
    [TCG_COND_EQ] = JCC_JE,
339
    [TCG_COND_NE] = JCC_JNE,
340
    [TCG_COND_LT] = JCC_JL,
341
    [TCG_COND_GE] = JCC_JGE,
342
    [TCG_COND_LE] = JCC_JLE,
343
    [TCG_COND_GT] = JCC_JG,
344
    [TCG_COND_LTU] = JCC_JB,
345
    [TCG_COND_GEU] = JCC_JAE,
346
    [TCG_COND_LEU] = JCC_JBE,
347
    [TCG_COND_GTU] = JCC_JA,
348
};
349

    
350
#if TCG_TARGET_REG_BITS == 64
351
static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
352
{
353
    int rex;
354

    
355
    if (opc & P_DATA16) {
356
        /* We should never be asking for both 16 and 64-bit operation.  */
357
        assert((opc & P_REXW) == 0);
358
        tcg_out8(s, 0x66);
359
    }
360
    if (opc & P_ADDR32) {
361
        tcg_out8(s, 0x67);
362
    }
363

    
364
    rex = 0;
365
    rex |= (opc & P_REXW) >> 8;                /* REX.W */
366
    rex |= (r & 8) >> 1;                /* REX.R */
367
    rex |= (x & 8) >> 2;                /* REX.X */
368
    rex |= (rm & 8) >> 3;                /* REX.B */
369

    
370
    /* P_REXB_{R,RM} indicates that the given register is the low byte.
371
       For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
372
       as otherwise the encoding indicates %[abcd]h.  Note that the values
373
       that are ORed in merely indicate that the REX byte must be present;
374
       those bits get discarded in output.  */
375
    rex |= opc & (r >= 4 ? P_REXB_R : 0);
376
    rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
377

    
378
    if (rex) {
379
        tcg_out8(s, (uint8_t)(rex | 0x40));
380
    }
381

    
382
    if (opc & P_EXT) {
383
        tcg_out8(s, 0x0f);
384
    }
385
    tcg_out8(s, opc);
386
}
387
#else
388
static void tcg_out_opc(TCGContext *s, int opc)
389
{
390
    if (opc & P_DATA16) {
391
        tcg_out8(s, 0x66);
392
    }
393
    if (opc & P_EXT) {
394
        tcg_out8(s, 0x0f);
395
    }
396
    tcg_out8(s, opc);
397
}
398
/* Discard the register arguments to tcg_out_opc early, so as not to penalize
399
   the 32-bit compilation paths.  This method works with all versions of gcc,
400
   whereas relying on optimization may not be able to exclude them.  */
401
#define tcg_out_opc(s, opc, r, rm, x)  (tcg_out_opc)(s, opc)
402
#endif
403

    
404
static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
405
{
406
    tcg_out_opc(s, opc, r, rm, 0);
407
    tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
408
}
409

    
410
/* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
411
   We handle either RM and INDEX missing with a negative value.  In 64-bit
412
   mode for absolute addresses, ~RM is the size of the immediate operand
413
   that will follow the instruction.  */
414

    
415
static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
416
                                     int index, int shift,
417
                                     tcg_target_long offset)
418
{
419
    int mod, len;
420

    
421
    if (index < 0 && rm < 0) {
422
        if (TCG_TARGET_REG_BITS == 64) {
423
            /* Try for a rip-relative addressing mode.  This has replaced
424
               the 32-bit-mode absolute addressing encoding.  */
425
            tcg_target_long pc = (tcg_target_long)s->code_ptr + 5 + ~rm;
426
            tcg_target_long disp = offset - pc;
427
            if (disp == (int32_t)disp) {
428
                tcg_out_opc(s, opc, r, 0, 0);
429
                tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
430
                tcg_out32(s, disp);
431
                return;
432
            }
433

    
434
            /* Try for an absolute address encoding.  This requires the
435
               use of the MODRM+SIB encoding and is therefore larger than
436
               rip-relative addressing.  */
437
            if (offset == (int32_t)offset) {
438
                tcg_out_opc(s, opc, r, 0, 0);
439
                tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
440
                tcg_out8(s, (4 << 3) | 5);
441
                tcg_out32(s, offset);
442
                return;
443
            }
444

    
445
            /* ??? The memory isn't directly addressable.  */
446
            tcg_abort();
447
        } else {
448
            /* Absolute address.  */
449
            tcg_out_opc(s, opc, r, 0, 0);
450
            tcg_out8(s, (r << 3) | 5);
451
            tcg_out32(s, offset);
452
            return;
453
        }
454
    }
455

    
456
    /* Find the length of the immediate addend.  Note that the encoding
457
       that would be used for (%ebp) indicates absolute addressing.  */
458
    if (rm < 0) {
459
        mod = 0, len = 4, rm = 5;
460
    } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
461
        mod = 0, len = 0;
462
    } else if (offset == (int8_t)offset) {
463
        mod = 0x40, len = 1;
464
    } else {
465
        mod = 0x80, len = 4;
466
    }
467

    
468
    /* Use a single byte MODRM format if possible.  Note that the encoding
469
       that would be used for %esp is the escape to the two byte form.  */
470
    if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
471
        /* Single byte MODRM format.  */
472
        tcg_out_opc(s, opc, r, rm, 0);
473
        tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
474
    } else {
475
        /* Two byte MODRM+SIB format.  */
476

    
477
        /* Note that the encoding that would place %esp into the index
478
           field indicates no index register.  In 64-bit mode, the REX.X
479
           bit counts, so %r12 can be used as the index.  */
480
        if (index < 0) {
481
            index = 4;
482
        } else {
483
            assert(index != TCG_REG_ESP);
484
        }
485

    
486
        tcg_out_opc(s, opc, r, rm, index);
487
        tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
488
        tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
489
    }
490

    
491
    if (len == 1) {
492
        tcg_out8(s, offset);
493
    } else if (len == 4) {
494
        tcg_out32(s, offset);
495
    }
496
}
497

    
498
/* A simplification of the above with no index or shift.  */
499
static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
500
                                        int rm, tcg_target_long offset)
501
{
502
    tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
503
}
504

    
505
/* Generate dest op= src.  Uses the same ARITH_* codes as tgen_arithi.  */
506
static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
507
{
508
    /* Propagate an opcode prefix, such as P_REXW.  */
509
    int ext = subop & ~0x7;
510
    subop &= 0x7;
511

    
512
    tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
513
}
514

    
515
static inline void tcg_out_mov(TCGContext *s, TCGType type, int ret, int arg)
516
{
517
    if (arg != ret) {
518
        int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
519
        tcg_out_modrm(s, opc, ret, arg);
520
    }
521
}
522

    
523
static void tcg_out_movi(TCGContext *s, TCGType type,
524
                         int ret, tcg_target_long arg)
525
{
526
    if (arg == 0) {
527
        tgen_arithr(s, ARITH_XOR, ret, ret);
528
        return;
529
    } else if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
530
        tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
531
        tcg_out32(s, arg);
532
    } else if (arg == (int32_t)arg) {
533
        tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
534
        tcg_out32(s, arg);
535
    } else {
536
        tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
537
        tcg_out32(s, arg);
538
        tcg_out32(s, arg >> 31 >> 1);
539
    }
540
}
541

    
542
static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
543
{
544
    if (val == (int8_t)val) {
545
        tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
546
        tcg_out8(s, val);
547
    } else if (val == (int32_t)val) {
548
        tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
549
        tcg_out32(s, val);
550
    } else {
551
        tcg_abort();
552
    }
553
}
554

    
555
static inline void tcg_out_push(TCGContext *s, int reg)
556
{
557
    tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
558
}
559

    
560
static inline void tcg_out_pop(TCGContext *s, int reg)
561
{
562
    tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
563
}
564

    
565
static inline void tcg_out_ld(TCGContext *s, TCGType type, int ret,
566
                              int arg1, tcg_target_long arg2)
567
{
568
    int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
569
    tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
570
}
571

    
572
static inline void tcg_out_st(TCGContext *s, TCGType type, int arg,
573
                              int arg1, tcg_target_long arg2)
574
{
575
    int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
576
    tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
577
}
578

    
579
static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
580
{
581
    /* Propagate an opcode prefix, such as P_DATA16.  */
582
    int ext = subopc & ~0x7;
583
    subopc &= 0x7;
584

    
585
    if (count == 1) {
586
        tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
587
    } else {
588
        tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
589
        tcg_out8(s, count);
590
    }
591
}
592

    
593
static inline void tcg_out_bswap32(TCGContext *s, int reg)
594
{
595
    tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
596
}
597

    
598
static inline void tcg_out_rolw_8(TCGContext *s, int reg)
599
{
600
    tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
601
}
602

    
603
static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
604
{
605
    /* movzbl */
606
    assert(src < 4 || TCG_TARGET_REG_BITS == 64);
607
    tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
608
}
609

    
610
static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
611
{
612
    /* movsbl */
613
    assert(src < 4 || TCG_TARGET_REG_BITS == 64);
614
    tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
615
}
616

    
617
static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
618
{
619
    /* movzwl */
620
    tcg_out_modrm(s, OPC_MOVZWL, dest, src);
621
}
622

    
623
static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
624
{
625
    /* movsw[lq] */
626
    tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
627
}
628

    
629
static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
630
{
631
    /* 32-bit mov zero extends.  */
632
    tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
633
}
634

    
635
static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
636
{
637
    tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
638
}
639

    
640
static inline void tcg_out_bswap64(TCGContext *s, int reg)
641
{
642
    tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
643
}
644

    
645
static void tgen_arithi(TCGContext *s, int c, int r0,
646
                        tcg_target_long val, int cf)
647
{
648
    int rexw = 0;
649

    
650
    if (TCG_TARGET_REG_BITS == 64) {
651
        rexw = c & -8;
652
        c &= 7;
653
    }
654

    
655
    /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
656
       partial flags update stalls on Pentium4 and are not recommended
657
       by current Intel optimization manuals.  */
658
    if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
659
        _Bool is_inc = (c == ARITH_ADD) ^ (val < 0);
660
        if (TCG_TARGET_REG_BITS == 64) {
661
            /* The single-byte increment encodings are re-tasked as the
662
               REX prefixes.  Use the MODRM encoding.  */
663
            tcg_out_modrm(s, OPC_GRP5 + rexw,
664
                          (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
665
        } else {
666
            tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
667
        }
668
        return;
669
    }
670

    
671
    if (c == ARITH_AND) {
672
        if (TCG_TARGET_REG_BITS == 64) {
673
            if (val == 0xffffffffu) {
674
                tcg_out_ext32u(s, r0, r0);
675
                return;
676
            }
677
            if (val == (uint32_t)val) {
678
                /* AND with no high bits set can use a 32-bit operation.  */
679
                rexw = 0;
680
            }
681
        }
682
        if (val == 0xffu) {
683
            tcg_out_ext8u(s, r0, r0);
684
            return;
685
        }
686
        if (val == 0xffffu) {
687
            tcg_out_ext16u(s, r0, r0);
688
            return;
689
        }
690
    }
691

    
692
    if (val == (int8_t)val) {
693
        tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
694
        tcg_out8(s, val);
695
        return;
696
    }
697
    if (rexw == 0 || val == (int32_t)val) {
698
        tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
699
        tcg_out32(s, val);
700
        return;
701
    }
702

    
703
    tcg_abort();
704
}
705

    
706
static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
707
{
708
    if (val != 0) {
709
        tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
710
    }
711
}
712

    
713
/* Use SMALL != 0 to force a short forward branch.  */
714
static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
715
{
716
    int32_t val, val1;
717
    TCGLabel *l = &s->labels[label_index];
718

    
719
    if (l->has_value) {
720
        val = l->u.value - (tcg_target_long)s->code_ptr;
721
        val1 = val - 2;
722
        if ((int8_t)val1 == val1) {
723
            if (opc == -1) {
724
                tcg_out8(s, OPC_JMP_short);
725
            } else {
726
                tcg_out8(s, OPC_JCC_short + opc);
727
            }
728
            tcg_out8(s, val1);
729
        } else {
730
            if (small) {
731
                tcg_abort();
732
            }
733
            if (opc == -1) {
734
                tcg_out8(s, OPC_JMP_long);
735
                tcg_out32(s, val - 5);
736
            } else {
737
                tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
738
                tcg_out32(s, val - 6);
739
            }
740
        }
741
    } else if (small) {
742
        if (opc == -1) {
743
            tcg_out8(s, OPC_JMP_short);
744
        } else {
745
            tcg_out8(s, OPC_JCC_short + opc);
746
        }
747
        tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
748
        s->code_ptr += 1;
749
    } else {
750
        if (opc == -1) {
751
            tcg_out8(s, OPC_JMP_long);
752
        } else {
753
            tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
754
        }
755
        tcg_out_reloc(s, s->code_ptr, R_386_PC32, label_index, -4);
756
        s->code_ptr += 4;
757
    }
758
}
759

    
760
static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
761
                        int const_arg2, int rexw)
762
{
763
    if (const_arg2) {
764
        if (arg2 == 0) {
765
            /* test r, r */
766
            tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
767
        } else {
768
            tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
769
        }
770
    } else {
771
        tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
772
    }
773
}
774

    
775
static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
776
                             TCGArg arg1, TCGArg arg2, int const_arg2,
777
                             int label_index, int small)
778
{
779
    tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
780
    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
781
}
782

    
783
#if TCG_TARGET_REG_BITS == 64
784
static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
785
                             TCGArg arg1, TCGArg arg2, int const_arg2,
786
                             int label_index, int small)
787
{
788
    tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
789
    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
790
}
791
#else
792
/* XXX: we implement it at the target level to avoid having to
793
   handle cross basic blocks temporaries */
794
static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
795
                            const int *const_args, int small)
796
{
797
    int label_next;
798
    label_next = gen_new_label();
799
    switch(args[4]) {
800
    case TCG_COND_EQ:
801
        tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
802
                         label_next, 1);
803
        tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
804
                         args[5], small);
805
        break;
806
    case TCG_COND_NE:
807
        tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
808
                         args[5], small);
809
        tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
810
                         args[5], small);
811
        break;
812
    case TCG_COND_LT:
813
        tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
814
                         args[5], small);
815
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
816
        tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
817
                         args[5], small);
818
        break;
819
    case TCG_COND_LE:
820
        tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
821
                         args[5], small);
822
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
823
        tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
824
                         args[5], small);
825
        break;
826
    case TCG_COND_GT:
827
        tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
828
                         args[5], small);
829
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
830
        tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
831
                         args[5], small);
832
        break;
833
    case TCG_COND_GE:
834
        tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
835
                         args[5], small);
836
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
837
        tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
838
                         args[5], small);
839
        break;
840
    case TCG_COND_LTU:
841
        tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
842
                         args[5], small);
843
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
844
        tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
845
                         args[5], small);
846
        break;
847
    case TCG_COND_LEU:
848
        tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
849
                         args[5], small);
850
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
851
        tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
852
                         args[5], small);
853
        break;
854
    case TCG_COND_GTU:
855
        tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
856
                         args[5], small);
857
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
858
        tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
859
                         args[5], small);
860
        break;
861
    case TCG_COND_GEU:
862
        tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
863
                         args[5], small);
864
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
865
        tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
866
                         args[5], small);
867
        break;
868
    default:
869
        tcg_abort();
870
    }
871
    tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
872
}
873
#endif
874

    
875
static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
876
                              TCGArg arg1, TCGArg arg2, int const_arg2)
877
{
878
    tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
879
    tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
880
    tcg_out_ext8u(s, dest, dest);
881
}
882

    
883
#if TCG_TARGET_REG_BITS == 64
884
static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
885
                              TCGArg arg1, TCGArg arg2, int const_arg2)
886
{
887
    tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
888
    tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
889
    tcg_out_ext8u(s, dest, dest);
890
}
891
#else
892
static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
893
                             const int *const_args)
894
{
895
    TCGArg new_args[6];
896
    int label_true, label_over;
897

    
898
    memcpy(new_args, args+1, 5*sizeof(TCGArg));
899

    
900
    if (args[0] == args[1] || args[0] == args[2]
901
        || (!const_args[3] && args[0] == args[3])
902
        || (!const_args[4] && args[0] == args[4])) {
903
        /* When the destination overlaps with one of the argument
904
           registers, don't do anything tricky.  */
905
        label_true = gen_new_label();
906
        label_over = gen_new_label();
907

    
908
        new_args[5] = label_true;
909
        tcg_out_brcond2(s, new_args, const_args+1, 1);
910

    
911
        tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
912
        tcg_out_jxx(s, JCC_JMP, label_over, 1);
913
        tcg_out_label(s, label_true, (tcg_target_long)s->code_ptr);
914

    
915
        tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
916
        tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
917
    } else {
918
        /* When the destination does not overlap one of the arguments,
919
           clear the destination first, jump if cond false, and emit an
920
           increment in the true case.  This results in smaller code.  */
921

    
922
        tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
923

    
924
        label_over = gen_new_label();
925
        new_args[4] = tcg_invert_cond(new_args[4]);
926
        new_args[5] = label_over;
927
        tcg_out_brcond2(s, new_args, const_args+1, 1);
928

    
929
        tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
930
        tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
931
    }
932
}
933
#endif
934

    
935
static void tcg_out_branch(TCGContext *s, int call, tcg_target_long dest)
936
{
937
    tcg_target_long disp = dest - (tcg_target_long)s->code_ptr - 5;
938

    
939
    if (disp == (int32_t)disp) {
940
        tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
941
        tcg_out32(s, disp);
942
    } else {
943
        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest);
944
        tcg_out_modrm(s, OPC_GRP5,
945
                      call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
946
    }
947
}
948

    
949
static inline void tcg_out_calli(TCGContext *s, tcg_target_long dest)
950
{
951
    tcg_out_branch(s, 1, dest);
952
}
953

    
954
static void tcg_out_jmp(TCGContext *s, tcg_target_long dest)
955
{
956
    tcg_out_branch(s, 0, dest);
957
}
958

    
959
#if defined(CONFIG_SOFTMMU)
960

    
961
#include "../../softmmu_defs.h"
962

    
963
static void *qemu_ld_helpers[4] = {
964
    __ldb_mmu,
965
    __ldw_mmu,
966
    __ldl_mmu,
967
    __ldq_mmu,
968
};
969

    
970
static void *qemu_st_helpers[4] = {
971
    __stb_mmu,
972
    __stw_mmu,
973
    __stl_mmu,
974
    __stq_mmu,
975
};
976

    
977
/* Perform the TLB load and compare.
978

979
   Inputs:
980
   ADDRLO_IDX contains the index into ARGS of the low part of the
981
   address; the high part of the address is at ADDR_LOW_IDX+1.
982

983
   MEM_INDEX and S_BITS are the memory context and log2 size of the load.
984

985
   WHICH is the offset into the CPUTLBEntry structure of the slot to read.
986
   This should be offsetof addr_read or addr_write.
987

988
   Outputs:
989
   LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
990
   positions of the displacements of forward jumps to the TLB miss case.
991

992
   First argument register is loaded with the low part of the address.
993
   In the TLB hit case, it has been adjusted as indicated by the TLB
994
   and so is a host address.  In the TLB miss case, it continues to
995
   hold a guest address.
996

997
   Second argument register is clobbered.  */
998

    
999
static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
1000
                                    int mem_index, int s_bits,
1001
                                    const TCGArg *args,
1002
                                    uint8_t **label_ptr, int which)
1003
{
1004
    const int addrlo = args[addrlo_idx];
1005
    const int r0 = tcg_target_call_iarg_regs[0];
1006
    const int r1 = tcg_target_call_iarg_regs[1];
1007
    TCGType type = TCG_TYPE_I32;
1008
    int rexw = 0;
1009

    
1010
    if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64) {
1011
        type = TCG_TYPE_I64;
1012
        rexw = P_REXW;
1013
    }
1014

    
1015
    tcg_out_mov(s, type, r1, addrlo);
1016
    tcg_out_mov(s, type, r0, addrlo);
1017

    
1018
    tcg_out_shifti(s, SHIFT_SHR + rexw, r1,
1019
                   TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1020

    
1021
    tgen_arithi(s, ARITH_AND + rexw, r0,
1022
                TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
1023
    tgen_arithi(s, ARITH_AND + rexw, r1,
1024
                (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
1025

    
1026
    tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r1, TCG_AREG0, r1, 0,
1027
                             offsetof(CPUState, tlb_table[mem_index][0])
1028
                             + which);
1029

    
1030
    /* cmp 0(r1), r0 */
1031
    tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r0, r1, 0);
1032

    
1033
    tcg_out_mov(s, type, r0, addrlo);
1034

    
1035
    /* jne label1 */
1036
    tcg_out8(s, OPC_JCC_short + JCC_JNE);
1037
    label_ptr[0] = s->code_ptr;
1038
    s->code_ptr++;
1039

    
1040
    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1041
        /* cmp 4(r1), addrhi */
1042
        tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r1, 4);
1043

    
1044
        /* jne label1 */
1045
        tcg_out8(s, OPC_JCC_short + JCC_JNE);
1046
        label_ptr[1] = s->code_ptr;
1047
        s->code_ptr++;
1048
    }
1049

    
1050
    /* TLB Hit.  */
1051

    
1052
    /* add addend(r1), r0 */
1053
    tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r0, r1,
1054
                         offsetof(CPUTLBEntry, addend) - which);
1055
}
1056
#endif
1057

    
1058
static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
1059
                                   int base, tcg_target_long ofs, int sizeop)
1060
{
1061
#ifdef TARGET_WORDS_BIGENDIAN
1062
    const int bswap = 1;
1063
#else
1064
    const int bswap = 0;
1065
#endif
1066
    switch (sizeop) {
1067
    case 0:
1068
        tcg_out_modrm_offset(s, OPC_MOVZBL, datalo, base, ofs);
1069
        break;
1070
    case 0 | 4:
1071
        tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW, datalo, base, ofs);
1072
        break;
1073
    case 1:
1074
        tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
1075
        if (bswap) {
1076
            tcg_out_rolw_8(s, datalo);
1077
        }
1078
        break;
1079
    case 1 | 4:
1080
        if (bswap) {
1081
            tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
1082
            tcg_out_rolw_8(s, datalo);
1083
            tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1084
        } else {
1085
            tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW, datalo, base, ofs);
1086
        }
1087
        break;
1088
    case 2:
1089
        tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1090
        if (bswap) {
1091
            tcg_out_bswap32(s, datalo);
1092
        }
1093
        break;
1094
#if TCG_TARGET_REG_BITS == 64
1095
    case 2 | 4:
1096
        if (bswap) {
1097
            tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1098
            tcg_out_bswap32(s, datalo);
1099
            tcg_out_ext32s(s, datalo, datalo);
1100
        } else {
1101
            tcg_out_modrm_offset(s, OPC_MOVSLQ, datalo, base, ofs);
1102
        }
1103
        break;
1104
#endif
1105
    case 3:
1106
        if (TCG_TARGET_REG_BITS == 64) {
1107
            tcg_out_ld(s, TCG_TYPE_I64, datalo, base, ofs);
1108
            if (bswap) {
1109
                tcg_out_bswap64(s, datalo);
1110
            }
1111
        } else {
1112
            if (bswap) {
1113
                int t = datalo;
1114
                datalo = datahi;
1115
                datahi = t;
1116
            }
1117
            if (base != datalo) {
1118
                tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1119
                tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1120
            } else {
1121
                tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1122
                tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1123
            }
1124
            if (bswap) {
1125
                tcg_out_bswap32(s, datalo);
1126
                tcg_out_bswap32(s, datahi);
1127
            }
1128
        }
1129
        break;
1130
    default:
1131
        tcg_abort();
1132
    }
1133
}
1134

    
1135
/* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1136
   EAX. It will be useful once fixed registers globals are less
1137
   common. */
1138
static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
1139
                            int opc)
1140
{
1141
    int data_reg, data_reg2 = 0;
1142
    int addrlo_idx;
1143
#if defined(CONFIG_SOFTMMU)
1144
    int mem_index, s_bits, arg_idx;
1145
    uint8_t *label_ptr[3];
1146
#endif
1147

    
1148
    data_reg = args[0];
1149
    addrlo_idx = 1;
1150
    if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
1151
        data_reg2 = args[1];
1152
        addrlo_idx = 2;
1153
    }
1154

    
1155
#if defined(CONFIG_SOFTMMU)
1156
    mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1157
    s_bits = opc & 3;
1158

    
1159
    tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1160
                     label_ptr, offsetof(CPUTLBEntry, addr_read));
1161

    
1162
    /* TLB Hit.  */
1163
    tcg_out_qemu_ld_direct(s, data_reg, data_reg2,
1164
                           tcg_target_call_iarg_regs[0], 0, opc);
1165

    
1166
    /* jmp label2 */
1167
    tcg_out8(s, OPC_JMP_short);
1168
    label_ptr[2] = s->code_ptr;
1169
    s->code_ptr++;
1170

    
1171
    /* TLB Miss.  */
1172

    
1173
    /* label1: */
1174
    *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
1175
    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1176
        *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
1177
    }
1178

    
1179
    /* XXX: move that code at the end of the TB */
1180
    /* The first argument is already loaded with addrlo.  */
1181
    arg_idx = 1;
1182
    if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) {
1183
        tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx++],
1184
                    args[addrlo_idx + 1]);
1185
    }
1186
    tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx],
1187
                 mem_index);
1188
    tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
1189

    
1190
    switch(opc) {
1191
    case 0 | 4:
1192
        tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1193
        break;
1194
    case 1 | 4:
1195
        tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1196
        break;
1197
    case 0:
1198
        tcg_out_ext8u(s, data_reg, TCG_REG_EAX);
1199
        break;
1200
    case 1:
1201
        tcg_out_ext16u(s, data_reg, TCG_REG_EAX);
1202
        break;
1203
    case 2:
1204
        tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1205
        break;
1206
#if TCG_TARGET_REG_BITS == 64
1207
    case 2 | 4:
1208
        tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1209
        break;
1210
#endif
1211
    case 3:
1212
        if (TCG_TARGET_REG_BITS == 64) {
1213
            tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1214
        } else if (data_reg == TCG_REG_EDX) {
1215
            /* xchg %edx, %eax */
1216
            tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1217
            tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EAX);
1218
        } else {
1219
            tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1220
            tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EDX);
1221
        }
1222
        break;
1223
    default:
1224
        tcg_abort();
1225
    }
1226

    
1227
    /* label2: */
1228
    *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
1229
#else
1230
    {
1231
        int32_t offset = GUEST_BASE;
1232
        int base = args[addrlo_idx];
1233

    
1234
        if (TCG_TARGET_REG_BITS == 64) {
1235
            /* ??? We assume all operations have left us with register
1236
               contents that are zero extended.  So far this appears to
1237
               be true.  If we want to enforce this, we can either do
1238
               an explicit zero-extension here, or (if GUEST_BASE == 0)
1239
               use the ADDR32 prefix.  For now, do nothing.  */
1240

    
1241
            if (offset != GUEST_BASE) {
1242
                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
1243
                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
1244
                base = TCG_REG_RDI, offset = 0;
1245
            }
1246
        }
1247

    
1248
        tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, opc);
1249
    }
1250
#endif
1251
}
1252

    
1253
static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
1254
                                   int base, tcg_target_long ofs, int sizeop)
1255
{
1256
#ifdef TARGET_WORDS_BIGENDIAN
1257
    const int bswap = 1;
1258
#else
1259
    const int bswap = 0;
1260
#endif
1261
    /* ??? Ideally we wouldn't need a scratch register.  For user-only,
1262
       we could perform the bswap twice to restore the original value
1263
       instead of moving to the scratch.  But as it is, the L constraint
1264
       means that the second argument reg is definitely free here.  */
1265
    int scratch = tcg_target_call_iarg_regs[1];
1266

    
1267
    switch (sizeop) {
1268
    case 0:
1269
        tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs);
1270
        break;
1271
    case 1:
1272
        if (bswap) {
1273
            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1274
            tcg_out_rolw_8(s, scratch);
1275
            datalo = scratch;
1276
        }
1277
        tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16, datalo, base, ofs);
1278
        break;
1279
    case 2:
1280
        if (bswap) {
1281
            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1282
            tcg_out_bswap32(s, scratch);
1283
            datalo = scratch;
1284
        }
1285
        tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
1286
        break;
1287
    case 3:
1288
        if (TCG_TARGET_REG_BITS == 64) {
1289
            if (bswap) {
1290
                tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1291
                tcg_out_bswap64(s, scratch);
1292
                datalo = scratch;
1293
            }
1294
            tcg_out_st(s, TCG_TYPE_I64, datalo, base, ofs);
1295
        } else if (bswap) {
1296
            tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
1297
            tcg_out_bswap32(s, scratch);
1298
            tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs);
1299
            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1300
            tcg_out_bswap32(s, scratch);
1301
            tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs + 4);
1302
        } else {
1303
            tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
1304
            tcg_out_st(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1305
        }
1306
        break;
1307
    default:
1308
        tcg_abort();
1309
    }
1310
}
1311

    
1312
static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
1313
                            int opc)
1314
{
1315
    int data_reg, data_reg2 = 0;
1316
    int addrlo_idx;
1317
#if defined(CONFIG_SOFTMMU)
1318
    int mem_index, s_bits;
1319
    int stack_adjust;
1320
    uint8_t *label_ptr[3];
1321
#endif
1322

    
1323
    data_reg = args[0];
1324
    addrlo_idx = 1;
1325
    if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
1326
        data_reg2 = args[1];
1327
        addrlo_idx = 2;
1328
    }
1329

    
1330
#if defined(CONFIG_SOFTMMU)
1331
    mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1332
    s_bits = opc;
1333

    
1334
    tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1335
                     label_ptr, offsetof(CPUTLBEntry, addr_write));
1336

    
1337
    /* TLB Hit.  */
1338
    tcg_out_qemu_st_direct(s, data_reg, data_reg2,
1339
                           tcg_target_call_iarg_regs[0], 0, opc);
1340

    
1341
    /* jmp label2 */
1342
    tcg_out8(s, OPC_JMP_short);
1343
    label_ptr[2] = s->code_ptr;
1344
    s->code_ptr++;
1345

    
1346
    /* TLB Miss.  */
1347

    
1348
    /* label1: */
1349
    *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
1350
    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1351
        *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
1352
    }
1353

    
1354
    /* XXX: move that code at the end of the TB */
1355
    if (TCG_TARGET_REG_BITS == 64) {
1356
        tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1357
                    TCG_REG_RSI, data_reg);
1358
        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_RDX, mem_index);
1359
        stack_adjust = 0;
1360
    } else if (TARGET_LONG_BITS == 32) {
1361
        tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, data_reg);
1362
        if (opc == 3) {
1363
            tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg2);
1364
            tcg_out_pushi(s, mem_index);
1365
            stack_adjust = 4;
1366
        } else {
1367
            tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_ECX, mem_index);
1368
            stack_adjust = 0;
1369
        }
1370
    } else {
1371
        if (opc == 3) {
1372
            tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]);
1373
            tcg_out_pushi(s, mem_index);
1374
            tcg_out_push(s, data_reg2);
1375
            tcg_out_push(s, data_reg);
1376
            stack_adjust = 12;
1377
        } else {
1378
            tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]);
1379
            switch(opc) {
1380
            case 0:
1381
                tcg_out_ext8u(s, TCG_REG_ECX, data_reg);
1382
                break;
1383
            case 1:
1384
                tcg_out_ext16u(s, TCG_REG_ECX, data_reg);
1385
                break;
1386
            case 2:
1387
                tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg);
1388
                break;
1389
            }
1390
            tcg_out_pushi(s, mem_index);
1391
            stack_adjust = 4;
1392
        }
1393
    }
1394

    
1395
    tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
1396

    
1397
    if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
1398
        /* Pop and discard.  This is 2 bytes smaller than the add.  */
1399
        tcg_out_pop(s, TCG_REG_ECX);
1400
    } else if (stack_adjust != 0) {
1401
        tcg_out_addi(s, TCG_REG_ESP, stack_adjust);
1402
    }
1403

    
1404
    /* label2: */
1405
    *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
1406
#else
1407
    {
1408
        int32_t offset = GUEST_BASE;
1409
        int base = args[addrlo_idx];
1410

    
1411
        if (TCG_TARGET_REG_BITS == 64) {
1412
            /* ??? We assume all operations have left us with register
1413
               contents that are zero extended.  So far this appears to
1414
               be true.  If we want to enforce this, we can either do
1415
               an explicit zero-extension here, or (if GUEST_BASE == 0)
1416
               use the ADDR32 prefix.  For now, do nothing.  */
1417

    
1418
            if (offset != GUEST_BASE) {
1419
                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
1420
                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
1421
                base = TCG_REG_RDI, offset = 0;
1422
            }
1423
        }
1424

    
1425
        tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, opc);
1426
    }
1427
#endif
1428
}
1429

    
1430
static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1431
                              const TCGArg *args, const int *const_args)
1432
{
1433
    int c, rexw = 0;
1434

    
1435
#if TCG_TARGET_REG_BITS == 64
1436
# define OP_32_64(x) \
1437
        case glue(glue(INDEX_op_, x), _i64): \
1438
            rexw = P_REXW; /* FALLTHRU */    \
1439
        case glue(glue(INDEX_op_, x), _i32)
1440
#else
1441
# define OP_32_64(x) \
1442
        case glue(glue(INDEX_op_, x), _i32)
1443
#endif
1444

    
1445
    switch(opc) {
1446
    case INDEX_op_exit_tb:
1447
        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
1448
        tcg_out_jmp(s, (tcg_target_long) tb_ret_addr);
1449
        break;
1450
    case INDEX_op_goto_tb:
1451
        if (s->tb_jmp_offset) {
1452
            /* direct jump method */
1453
            tcg_out8(s, OPC_JMP_long); /* jmp im */
1454
            s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
1455
            tcg_out32(s, 0);
1456
        } else {
1457
            /* indirect jump method */
1458
            tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
1459
                                 (tcg_target_long)(s->tb_next + args[0]));
1460
        }
1461
        s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
1462
        break;
1463
    case INDEX_op_call:
1464
        if (const_args[0]) {
1465
            tcg_out_calli(s, args[0]);
1466
        } else {
1467
            /* call *reg */
1468
            tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]);
1469
        }
1470
        break;
1471
    case INDEX_op_jmp:
1472
        if (const_args[0]) {
1473
            tcg_out_jmp(s, args[0]);
1474
        } else {
1475
            /* jmp *reg */
1476
            tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, args[0]);
1477
        }
1478
        break;
1479
    case INDEX_op_br:
1480
        tcg_out_jxx(s, JCC_JMP, args[0], 0);
1481
        break;
1482
    case INDEX_op_movi_i32:
1483
        tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
1484
        break;
1485
    OP_32_64(ld8u):
1486
        /* Note that we can ignore REXW for the zero-extend to 64-bit.  */
1487
        tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
1488
        break;
1489
    OP_32_64(ld8s):
1490
        tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
1491
        break;
1492
    OP_32_64(ld16u):
1493
        /* Note that we can ignore REXW for the zero-extend to 64-bit.  */
1494
        tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
1495
        break;
1496
    OP_32_64(ld16s):
1497
        tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
1498
        break;
1499
#if TCG_TARGET_REG_BITS == 64
1500
    case INDEX_op_ld32u_i64:
1501
#endif
1502
    case INDEX_op_ld_i32:
1503
        tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1504
        break;
1505

    
1506
    OP_32_64(st8):
1507
        tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
1508
                             args[0], args[1], args[2]);
1509
        break;
1510
    OP_32_64(st16):
1511
        tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
1512
                             args[0], args[1], args[2]);
1513
        break;
1514
#if TCG_TARGET_REG_BITS == 64
1515
    case INDEX_op_st32_i64:
1516
#endif
1517
    case INDEX_op_st_i32:
1518
        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1519
        break;
1520

    
1521
    OP_32_64(add):
1522
        /* For 3-operand addition, use LEA.  */
1523
        if (args[0] != args[1]) {
1524
            TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
1525

    
1526
            if (const_args[2]) {
1527
                c3 = a2, a2 = -1;
1528
            } else if (a0 == a2) {
1529
                /* Watch out for dest = src + dest, since we've removed
1530
                   the matching constraint on the add.  */
1531
                tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
1532
                break;
1533
            }
1534

    
1535
            tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
1536
            break;
1537
        }
1538
        c = ARITH_ADD;
1539
        goto gen_arith;
1540
    OP_32_64(sub):
1541
        c = ARITH_SUB;
1542
        goto gen_arith;
1543
    OP_32_64(and):
1544
        c = ARITH_AND;
1545
        goto gen_arith;
1546
    OP_32_64(or):
1547
        c = ARITH_OR;
1548
        goto gen_arith;
1549
    OP_32_64(xor):
1550
        c = ARITH_XOR;
1551
        goto gen_arith;
1552
    gen_arith:
1553
        if (const_args[2]) {
1554
            tgen_arithi(s, c + rexw, args[0], args[2], 0);
1555
        } else {
1556
            tgen_arithr(s, c + rexw, args[0], args[2]);
1557
        }
1558
        break;
1559

    
1560
    OP_32_64(mul):
1561
        if (const_args[2]) {
1562
            int32_t val;
1563
            val = args[2];
1564
            if (val == (int8_t)val) {
1565
                tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
1566
                tcg_out8(s, val);
1567
            } else {
1568
                tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
1569
                tcg_out32(s, val);
1570
            }
1571
        } else {
1572
            tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
1573
        }
1574
        break;
1575

    
1576
    OP_32_64(div2):
1577
        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
1578
        break;
1579
    OP_32_64(divu2):
1580
        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
1581
        break;
1582

    
1583
    OP_32_64(shl):
1584
        c = SHIFT_SHL;
1585
        goto gen_shift;
1586
    OP_32_64(shr):
1587
        c = SHIFT_SHR;
1588
        goto gen_shift;
1589
    OP_32_64(sar):
1590
        c = SHIFT_SAR;
1591
        goto gen_shift;
1592
    OP_32_64(rotl):
1593
        c = SHIFT_ROL;
1594
        goto gen_shift;
1595
    OP_32_64(rotr):
1596
        c = SHIFT_ROR;
1597
        goto gen_shift;
1598
    gen_shift:
1599
        if (const_args[2]) {
1600
            tcg_out_shifti(s, c + rexw, args[0], args[2]);
1601
        } else {
1602
            tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
1603
        }
1604
        break;
1605

    
1606
    case INDEX_op_brcond_i32:
1607
        tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
1608
                         args[3], 0);
1609
        break;
1610
    case INDEX_op_setcond_i32:
1611
        tcg_out_setcond32(s, args[3], args[0], args[1],
1612
                          args[2], const_args[2]);
1613
        break;
1614

    
1615
    OP_32_64(bswap16):
1616
        tcg_out_rolw_8(s, args[0]);
1617
        break;
1618
    OP_32_64(bswap32):
1619
        tcg_out_bswap32(s, args[0]);
1620
        break;
1621

    
1622
    OP_32_64(neg):
1623
        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
1624
        break;
1625
    OP_32_64(not):
1626
        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
1627
        break;
1628

    
1629
    OP_32_64(ext8s):
1630
        tcg_out_ext8s(s, args[0], args[1], rexw);
1631
        break;
1632
    OP_32_64(ext16s):
1633
        tcg_out_ext16s(s, args[0], args[1], rexw);
1634
        break;
1635
    OP_32_64(ext8u):
1636
        tcg_out_ext8u(s, args[0], args[1]);
1637
        break;
1638
    OP_32_64(ext16u):
1639
        tcg_out_ext16u(s, args[0], args[1]);
1640
        break;
1641

    
1642
    case INDEX_op_qemu_ld8u:
1643
        tcg_out_qemu_ld(s, args, 0);
1644
        break;
1645
    case INDEX_op_qemu_ld8s:
1646
        tcg_out_qemu_ld(s, args, 0 | 4);
1647
        break;
1648
    case INDEX_op_qemu_ld16u:
1649
        tcg_out_qemu_ld(s, args, 1);
1650
        break;
1651
    case INDEX_op_qemu_ld16s:
1652
        tcg_out_qemu_ld(s, args, 1 | 4);
1653
        break;
1654
#if TCG_TARGET_REG_BITS == 64
1655
    case INDEX_op_qemu_ld32u:
1656
#endif
1657
    case INDEX_op_qemu_ld32:
1658
        tcg_out_qemu_ld(s, args, 2);
1659
        break;
1660
    case INDEX_op_qemu_ld64:
1661
        tcg_out_qemu_ld(s, args, 3);
1662
        break;
1663

    
1664
    case INDEX_op_qemu_st8:
1665
        tcg_out_qemu_st(s, args, 0);
1666
        break;
1667
    case INDEX_op_qemu_st16:
1668
        tcg_out_qemu_st(s, args, 1);
1669
        break;
1670
    case INDEX_op_qemu_st32:
1671
        tcg_out_qemu_st(s, args, 2);
1672
        break;
1673
    case INDEX_op_qemu_st64:
1674
        tcg_out_qemu_st(s, args, 3);
1675
        break;
1676

    
1677
#if TCG_TARGET_REG_BITS == 32
1678
    case INDEX_op_brcond2_i32:
1679
        tcg_out_brcond2(s, args, const_args, 0);
1680
        break;
1681
    case INDEX_op_setcond2_i32:
1682
        tcg_out_setcond2(s, args, const_args);
1683
        break;
1684
    case INDEX_op_mulu2_i32:
1685
        tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_MUL, args[3]);
1686
        break;
1687
    case INDEX_op_add2_i32:
1688
        if (const_args[4]) {
1689
            tgen_arithi(s, ARITH_ADD, args[0], args[4], 1);
1690
        } else {
1691
            tgen_arithr(s, ARITH_ADD, args[0], args[4]);
1692
        }
1693
        if (const_args[5]) {
1694
            tgen_arithi(s, ARITH_ADC, args[1], args[5], 1);
1695
        } else {
1696
            tgen_arithr(s, ARITH_ADC, args[1], args[5]);
1697
        }
1698
        break;
1699
    case INDEX_op_sub2_i32:
1700
        if (const_args[4]) {
1701
            tgen_arithi(s, ARITH_SUB, args[0], args[4], 1);
1702
        } else {
1703
            tgen_arithr(s, ARITH_SUB, args[0], args[4]);
1704
        }
1705
        if (const_args[5]) {
1706
            tgen_arithi(s, ARITH_SBB, args[1], args[5], 1);
1707
        } else {
1708
            tgen_arithr(s, ARITH_SBB, args[1], args[5]);
1709
        }
1710
        break;
1711
#else /* TCG_TARGET_REG_BITS == 64 */
1712
    case INDEX_op_movi_i64:
1713
        tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
1714
        break;
1715
    case INDEX_op_ld32s_i64:
1716
        tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
1717
        break;
1718
    case INDEX_op_ld_i64:
1719
        tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1720
        break;
1721
    case INDEX_op_st_i64:
1722
        tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1723
        break;
1724
    case INDEX_op_qemu_ld32s:
1725
        tcg_out_qemu_ld(s, args, 2 | 4);
1726
        break;
1727

    
1728
    case INDEX_op_brcond_i64:
1729
        tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
1730
                         args[3], 0);
1731
        break;
1732
    case INDEX_op_setcond_i64:
1733
        tcg_out_setcond64(s, args[3], args[0], args[1],
1734
                          args[2], const_args[2]);
1735
        break;
1736

    
1737
    case INDEX_op_bswap64_i64:
1738
        tcg_out_bswap64(s, args[0]);
1739
        break;
1740
    case INDEX_op_ext32u_i64:
1741
        tcg_out_ext32u(s, args[0], args[1]);
1742
        break;
1743
    case INDEX_op_ext32s_i64:
1744
        tcg_out_ext32s(s, args[0], args[1]);
1745
        break;
1746
#endif
1747

    
1748
    default:
1749
        tcg_abort();
1750
    }
1751

    
1752
#undef OP_32_64
1753
}
1754

    
1755
static const TCGTargetOpDef x86_op_defs[] = {
1756
    { INDEX_op_exit_tb, { } },
1757
    { INDEX_op_goto_tb, { } },
1758
    { INDEX_op_call, { "ri" } },
1759
    { INDEX_op_jmp, { "ri" } },
1760
    { INDEX_op_br, { } },
1761
    { INDEX_op_mov_i32, { "r", "r" } },
1762
    { INDEX_op_movi_i32, { "r" } },
1763
    { INDEX_op_ld8u_i32, { "r", "r" } },
1764
    { INDEX_op_ld8s_i32, { "r", "r" } },
1765
    { INDEX_op_ld16u_i32, { "r", "r" } },
1766
    { INDEX_op_ld16s_i32, { "r", "r" } },
1767
    { INDEX_op_ld_i32, { "r", "r" } },
1768
    { INDEX_op_st8_i32, { "q", "r" } },
1769
    { INDEX_op_st16_i32, { "r", "r" } },
1770
    { INDEX_op_st_i32, { "r", "r" } },
1771

    
1772
    { INDEX_op_add_i32, { "r", "r", "ri" } },
1773
    { INDEX_op_sub_i32, { "r", "0", "ri" } },
1774
    { INDEX_op_mul_i32, { "r", "0", "ri" } },
1775
    { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
1776
    { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
1777
    { INDEX_op_and_i32, { "r", "0", "ri" } },
1778
    { INDEX_op_or_i32, { "r", "0", "ri" } },
1779
    { INDEX_op_xor_i32, { "r", "0", "ri" } },
1780

    
1781
    { INDEX_op_shl_i32, { "r", "0", "ci" } },
1782
    { INDEX_op_shr_i32, { "r", "0", "ci" } },
1783
    { INDEX_op_sar_i32, { "r", "0", "ci" } },
1784
    { INDEX_op_rotl_i32, { "r", "0", "ci" } },
1785
    { INDEX_op_rotr_i32, { "r", "0", "ci" } },
1786

    
1787
    { INDEX_op_brcond_i32, { "r", "ri" } },
1788

    
1789
    { INDEX_op_bswap16_i32, { "r", "0" } },
1790
    { INDEX_op_bswap32_i32, { "r", "0" } },
1791

    
1792
    { INDEX_op_neg_i32, { "r", "0" } },
1793

    
1794
    { INDEX_op_not_i32, { "r", "0" } },
1795

    
1796
    { INDEX_op_ext8s_i32, { "r", "q" } },
1797
    { INDEX_op_ext16s_i32, { "r", "r" } },
1798
    { INDEX_op_ext8u_i32, { "r", "q" } },
1799
    { INDEX_op_ext16u_i32, { "r", "r" } },
1800

    
1801
    { INDEX_op_setcond_i32, { "q", "r", "ri" } },
1802

    
1803
#if TCG_TARGET_REG_BITS == 32
1804
    { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
1805
    { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
1806
    { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
1807
    { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
1808
    { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
1809
#else
1810
    { INDEX_op_mov_i64, { "r", "r" } },
1811
    { INDEX_op_movi_i64, { "r" } },
1812
    { INDEX_op_ld8u_i64, { "r", "r" } },
1813
    { INDEX_op_ld8s_i64, { "r", "r" } },
1814
    { INDEX_op_ld16u_i64, { "r", "r" } },
1815
    { INDEX_op_ld16s_i64, { "r", "r" } },
1816
    { INDEX_op_ld32u_i64, { "r", "r" } },
1817
    { INDEX_op_ld32s_i64, { "r", "r" } },
1818
    { INDEX_op_ld_i64, { "r", "r" } },
1819
    { INDEX_op_st8_i64, { "r", "r" } },
1820
    { INDEX_op_st16_i64, { "r", "r" } },
1821
    { INDEX_op_st32_i64, { "r", "r" } },
1822
    { INDEX_op_st_i64, { "r", "r" } },
1823

    
1824
    { INDEX_op_add_i64, { "r", "0", "re" } },
1825
    { INDEX_op_mul_i64, { "r", "0", "re" } },
1826
    { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
1827
    { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
1828
    { INDEX_op_sub_i64, { "r", "0", "re" } },
1829
    { INDEX_op_and_i64, { "r", "0", "reZ" } },
1830
    { INDEX_op_or_i64, { "r", "0", "re" } },
1831
    { INDEX_op_xor_i64, { "r", "0", "re" } },
1832

    
1833
    { INDEX_op_shl_i64, { "r", "0", "ci" } },
1834
    { INDEX_op_shr_i64, { "r", "0", "ci" } },
1835
    { INDEX_op_sar_i64, { "r", "0", "ci" } },
1836
    { INDEX_op_rotl_i64, { "r", "0", "ci" } },
1837
    { INDEX_op_rotr_i64, { "r", "0", "ci" } },
1838

    
1839
    { INDEX_op_brcond_i64, { "r", "re" } },
1840
    { INDEX_op_setcond_i64, { "r", "r", "re" } },
1841

    
1842
    { INDEX_op_bswap16_i64, { "r", "0" } },
1843
    { INDEX_op_bswap32_i64, { "r", "0" } },
1844
    { INDEX_op_bswap64_i64, { "r", "0" } },
1845
    { INDEX_op_neg_i64, { "r", "0" } },
1846
    { INDEX_op_not_i64, { "r", "0" } },
1847

    
1848
    { INDEX_op_ext8s_i64, { "r", "r" } },
1849
    { INDEX_op_ext16s_i64, { "r", "r" } },
1850
    { INDEX_op_ext32s_i64, { "r", "r" } },
1851
    { INDEX_op_ext8u_i64, { "r", "r" } },
1852
    { INDEX_op_ext16u_i64, { "r", "r" } },
1853
    { INDEX_op_ext32u_i64, { "r", "r" } },
1854
#endif
1855

    
1856
#if TCG_TARGET_REG_BITS == 64
1857
    { INDEX_op_qemu_ld8u, { "r", "L" } },
1858
    { INDEX_op_qemu_ld8s, { "r", "L" } },
1859
    { INDEX_op_qemu_ld16u, { "r", "L" } },
1860
    { INDEX_op_qemu_ld16s, { "r", "L" } },
1861
    { INDEX_op_qemu_ld32, { "r", "L" } },
1862
    { INDEX_op_qemu_ld32u, { "r", "L" } },
1863
    { INDEX_op_qemu_ld32s, { "r", "L" } },
1864
    { INDEX_op_qemu_ld64, { "r", "L" } },
1865

    
1866
    { INDEX_op_qemu_st8, { "L", "L" } },
1867
    { INDEX_op_qemu_st16, { "L", "L" } },
1868
    { INDEX_op_qemu_st32, { "L", "L" } },
1869
    { INDEX_op_qemu_st64, { "L", "L" } },
1870
#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
1871
    { INDEX_op_qemu_ld8u, { "r", "L" } },
1872
    { INDEX_op_qemu_ld8s, { "r", "L" } },
1873
    { INDEX_op_qemu_ld16u, { "r", "L" } },
1874
    { INDEX_op_qemu_ld16s, { "r", "L" } },
1875
    { INDEX_op_qemu_ld32, { "r", "L" } },
1876
    { INDEX_op_qemu_ld64, { "r", "r", "L" } },
1877

    
1878
    { INDEX_op_qemu_st8, { "cb", "L" } },
1879
    { INDEX_op_qemu_st16, { "L", "L" } },
1880
    { INDEX_op_qemu_st32, { "L", "L" } },
1881
    { INDEX_op_qemu_st64, { "L", "L", "L" } },
1882
#else
1883
    { INDEX_op_qemu_ld8u, { "r", "L", "L" } },
1884
    { INDEX_op_qemu_ld8s, { "r", "L", "L" } },
1885
    { INDEX_op_qemu_ld16u, { "r", "L", "L" } },
1886
    { INDEX_op_qemu_ld16s, { "r", "L", "L" } },
1887
    { INDEX_op_qemu_ld32, { "r", "L", "L" } },
1888
    { INDEX_op_qemu_ld64, { "r", "r", "L", "L" } },
1889

    
1890
    { INDEX_op_qemu_st8, { "cb", "L", "L" } },
1891
    { INDEX_op_qemu_st16, { "L", "L", "L" } },
1892
    { INDEX_op_qemu_st32, { "L", "L", "L" } },
1893
    { INDEX_op_qemu_st64, { "L", "L", "L", "L" } },
1894
#endif
1895
    { -1 },
1896
};
1897

    
1898
static int tcg_target_callee_save_regs[] = {
1899
#if TCG_TARGET_REG_BITS == 64
1900
    TCG_REG_RBP,
1901
    TCG_REG_RBX,
1902
    TCG_REG_R12,
1903
    TCG_REG_R13,
1904
    /* TCG_REG_R14, */ /* Currently used for the global env. */
1905
    TCG_REG_R15,
1906
#else
1907
    /* TCG_REG_EBP, */ /* Currently used for the global env. */
1908
    TCG_REG_EBX,
1909
    TCG_REG_ESI,
1910
    TCG_REG_EDI,
1911
#endif
1912
};
1913

    
1914
/* Generate global QEMU prologue and epilogue code */
1915
static void tcg_target_qemu_prologue(TCGContext *s)
1916
{
1917
    int i, frame_size, push_size, stack_addend;
1918

    
1919
    /* TB prologue */
1920

    
1921
    /* Save all callee saved registers.  */
1922
    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
1923
        tcg_out_push(s, tcg_target_callee_save_regs[i]);
1924
    }
1925

    
1926
    /* Reserve some stack space.  */
1927
    push_size = 1 + ARRAY_SIZE(tcg_target_callee_save_regs);
1928
    push_size *= TCG_TARGET_REG_BITS / 8;
1929

    
1930
    frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE;
1931
    frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
1932
        ~(TCG_TARGET_STACK_ALIGN - 1);
1933
    stack_addend = frame_size - push_size;
1934
    tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
1935

    
1936
    /* jmp *tb.  */
1937
    tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[0]);
1938

    
1939
    /* TB epilogue */
1940
    tb_ret_addr = s->code_ptr;
1941

    
1942
    tcg_out_addi(s, TCG_REG_ESP, stack_addend);
1943

    
1944
    for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
1945
        tcg_out_pop(s, tcg_target_callee_save_regs[i]);
1946
    }
1947
    tcg_out_opc(s, OPC_RET, 0, 0, 0);
1948
}
1949

    
1950
static void tcg_target_init(TCGContext *s)
1951
{
1952
#if !defined(CONFIG_USER_ONLY)
1953
    /* fail safe */
1954
    if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry))
1955
        tcg_abort();
1956
#endif
1957

    
1958
    if (TCG_TARGET_REG_BITS == 64) {
1959
        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
1960
        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
1961
    } else {
1962
        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
1963
    }
1964

    
1965
    tcg_regset_clear(tcg_target_call_clobber_regs);
1966
    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
1967
    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
1968
    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
1969
    if (TCG_TARGET_REG_BITS == 64) {
1970
        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
1971
        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
1972
        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
1973
        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
1974
        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
1975
        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
1976
    }
1977

    
1978
    tcg_regset_clear(s->reserved_regs);
1979
    tcg_regset_set_reg(s->reserved_regs, TCG_REG_ESP);
1980

    
1981
    tcg_add_target_add_op_defs(x86_op_defs);
1982
}