Statistics
| Branch: | Revision:

root / tcg / i386 / tcg-target.c @ 6a18ae2d

History | View | Annotate | Download (61.2 kB)

1
/*
2
 * Tiny Code Generator for QEMU
3
 *
4
 * Copyright (c) 2008 Fabrice Bellard
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
 * THE SOFTWARE.
23
 */
24

    
25
#ifndef NDEBUG
26
static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
27
#if TCG_TARGET_REG_BITS == 64
28
    "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
29
    "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
30
#else
31
    "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
32
#endif
33
};
34
#endif
35

    
36
static const int tcg_target_reg_alloc_order[] = {
37
#if TCG_TARGET_REG_BITS == 64
38
    TCG_REG_RBP,
39
    TCG_REG_RBX,
40
    TCG_REG_R12,
41
    TCG_REG_R13,
42
    TCG_REG_R14,
43
    TCG_REG_R15,
44
    TCG_REG_R10,
45
    TCG_REG_R11,
46
    TCG_REG_R9,
47
    TCG_REG_R8,
48
    TCG_REG_RCX,
49
    TCG_REG_RDX,
50
    TCG_REG_RSI,
51
    TCG_REG_RDI,
52
    TCG_REG_RAX,
53
#else
54
    TCG_REG_EBX,
55
    TCG_REG_ESI,
56
    TCG_REG_EDI,
57
    TCG_REG_EBP,
58
    TCG_REG_ECX,
59
    TCG_REG_EDX,
60
    TCG_REG_EAX,
61
#endif
62
};
63

    
64
static const int tcg_target_call_iarg_regs[] = {
65
#if TCG_TARGET_REG_BITS == 64
66
    TCG_REG_RDI,
67
    TCG_REG_RSI,
68
    TCG_REG_RDX,
69
    TCG_REG_RCX,
70
    TCG_REG_R8,
71
    TCG_REG_R9,
72
#else
73
    TCG_REG_EAX,
74
    TCG_REG_EDX,
75
    TCG_REG_ECX
76
#endif
77
};
78

    
79
static const int tcg_target_call_oarg_regs[] = {
80
    TCG_REG_EAX,
81
#if TCG_TARGET_REG_BITS == 32
82
    TCG_REG_EDX
83
#endif
84
};
85

    
86
static uint8_t *tb_ret_addr;
87

    
88
static void patch_reloc(uint8_t *code_ptr, int type,
89
                        tcg_target_long value, tcg_target_long addend)
90
{
91
    value += addend;
92
    switch(type) {
93
    case R_386_PC32:
94
        value -= (uintptr_t)code_ptr;
95
        if (value != (int32_t)value) {
96
            tcg_abort();
97
        }
98
        *(uint32_t *)code_ptr = value;
99
        break;
100
    case R_386_PC8:
101
        value -= (uintptr_t)code_ptr;
102
        if (value != (int8_t)value) {
103
            tcg_abort();
104
        }
105
        *(uint8_t *)code_ptr = value;
106
        break;
107
    default:
108
        tcg_abort();
109
    }
110
}
111

    
112
/* maximum number of register used for input function arguments */
113
static inline int tcg_target_get_call_iarg_regs_count(int flags)
114
{
115
    if (TCG_TARGET_REG_BITS == 64) {
116
        return 6;
117
    }
118

    
119
    return 0;
120
}
121

    
122
/* parse target specific constraints */
123
static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
124
{
125
    const char *ct_str;
126

    
127
    ct_str = *pct_str;
128
    switch(ct_str[0]) {
129
    case 'a':
130
        ct->ct |= TCG_CT_REG;
131
        tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
132
        break;
133
    case 'b':
134
        ct->ct |= TCG_CT_REG;
135
        tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
136
        break;
137
    case 'c':
138
        ct->ct |= TCG_CT_REG;
139
        tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
140
        break;
141
    case 'd':
142
        ct->ct |= TCG_CT_REG;
143
        tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
144
        break;
145
    case 'S':
146
        ct->ct |= TCG_CT_REG;
147
        tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
148
        break;
149
    case 'D':
150
        ct->ct |= TCG_CT_REG;
151
        tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
152
        break;
153
    case 'q':
154
        ct->ct |= TCG_CT_REG;
155
        if (TCG_TARGET_REG_BITS == 64) {
156
            tcg_regset_set32(ct->u.regs, 0, 0xffff);
157
        } else {
158
            tcg_regset_set32(ct->u.regs, 0, 0xf);
159
        }
160
        break;
161
    case 'Q':
162
        ct->ct |= TCG_CT_REG;
163
        tcg_regset_set32(ct->u.regs, 0, 0xf);
164
        break;
165
    case 'r':
166
        ct->ct |= TCG_CT_REG;
167
        if (TCG_TARGET_REG_BITS == 64) {
168
            tcg_regset_set32(ct->u.regs, 0, 0xffff);
169
        } else {
170
            tcg_regset_set32(ct->u.regs, 0, 0xff);
171
        }
172
        break;
173

    
174
        /* qemu_ld/st address constraint */
175
    case 'L':
176
        ct->ct |= TCG_CT_REG;
177
        if (TCG_TARGET_REG_BITS == 64) {
178
            tcg_regset_set32(ct->u.regs, 0, 0xffff);
179
            tcg_regset_reset_reg(ct->u.regs, TCG_REG_RSI);
180
            tcg_regset_reset_reg(ct->u.regs, TCG_REG_RDI);
181
        } else {
182
            tcg_regset_set32(ct->u.regs, 0, 0xff);
183
            tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX);
184
            tcg_regset_reset_reg(ct->u.regs, TCG_REG_EDX);
185
        }
186
        break;
187

    
188
    case 'e':
189
        ct->ct |= TCG_CT_CONST_S32;
190
        break;
191
    case 'Z':
192
        ct->ct |= TCG_CT_CONST_U32;
193
        break;
194

    
195
    default:
196
        return -1;
197
    }
198
    ct_str++;
199
    *pct_str = ct_str;
200
    return 0;
201
}
202

    
203
/* test if a constant matches the constraint */
204
static inline int tcg_target_const_match(tcg_target_long val,
205
                                         const TCGArgConstraint *arg_ct)
206
{
207
    int ct = arg_ct->ct;
208
    if (ct & TCG_CT_CONST) {
209
        return 1;
210
    }
211
    if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
212
        return 1;
213
    }
214
    if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
215
        return 1;
216
    }
217
    return 0;
218
}
219

    
220
#if TCG_TARGET_REG_BITS == 64
221
# define LOWREGMASK(x)        ((x) & 7)
222
#else
223
# define LOWREGMASK(x)        (x)
224
#endif
225

    
226
#define P_EXT                0x100                /* 0x0f opcode prefix */
227
#define P_DATA16        0x200                /* 0x66 opcode prefix */
228
#if TCG_TARGET_REG_BITS == 64
229
# define P_ADDR32        0x400                /* 0x67 opcode prefix */
230
# define P_REXW                0x800                /* Set REX.W = 1 */
231
# define P_REXB_R        0x1000                /* REG field as byte register */
232
# define P_REXB_RM        0x2000                /* R/M field as byte register */
233
#else
234
# define P_ADDR32        0
235
# define P_REXW                0
236
# define P_REXB_R        0
237
# define P_REXB_RM        0
238
#endif
239

    
240
#define OPC_ARITH_EvIz        (0x81)
241
#define OPC_ARITH_EvIb        (0x83)
242
#define OPC_ARITH_GvEv        (0x03)                /* ... plus (ARITH_FOO << 3) */
243
#define OPC_ADD_GvEv        (OPC_ARITH_GvEv | (ARITH_ADD << 3))
244
#define OPC_BSWAP        (0xc8 | P_EXT)
245
#define OPC_CALL_Jz        (0xe8)
246
#define OPC_CMP_GvEv        (OPC_ARITH_GvEv | (ARITH_CMP << 3))
247
#define OPC_DEC_r32        (0x48)
248
#define OPC_IMUL_GvEv        (0xaf | P_EXT)
249
#define OPC_IMUL_GvEvIb        (0x6b)
250
#define OPC_IMUL_GvEvIz        (0x69)
251
#define OPC_INC_r32        (0x40)
252
#define OPC_JCC_long        (0x80 | P_EXT)        /* ... plus condition code */
253
#define OPC_JCC_short        (0x70)                /* ... plus condition code */
254
#define OPC_JMP_long        (0xe9)
255
#define OPC_JMP_short        (0xeb)
256
#define OPC_LEA         (0x8d)
257
#define OPC_MOVB_EvGv        (0x88)                /* stores, more or less */
258
#define OPC_MOVL_EvGv        (0x89)                /* stores, more or less */
259
#define OPC_MOVL_GvEv        (0x8b)                /* loads, more or less */
260
#define OPC_MOVL_EvIz        (0xc7)
261
#define OPC_MOVL_Iv     (0xb8)
262
#define OPC_MOVSBL        (0xbe | P_EXT)
263
#define OPC_MOVSWL        (0xbf | P_EXT)
264
#define OPC_MOVSLQ        (0x63 | P_REXW)
265
#define OPC_MOVZBL        (0xb6 | P_EXT)
266
#define OPC_MOVZWL        (0xb7 | P_EXT)
267
#define OPC_POP_r32        (0x58)
268
#define OPC_PUSH_r32        (0x50)
269
#define OPC_PUSH_Iv        (0x68)
270
#define OPC_PUSH_Ib        (0x6a)
271
#define OPC_RET                (0xc3)
272
#define OPC_SETCC        (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
273
#define OPC_SHIFT_1        (0xd1)
274
#define OPC_SHIFT_Ib        (0xc1)
275
#define OPC_SHIFT_cl        (0xd3)
276
#define OPC_TESTL        (0x85)
277
#define OPC_XCHG_ax_r32        (0x90)
278

    
279
#define OPC_GRP3_Ev        (0xf7)
280
#define OPC_GRP5        (0xff)
281

    
282
/* Group 1 opcode extensions for 0x80-0x83.
283
   These are also used as modifiers for OPC_ARITH.  */
284
#define ARITH_ADD 0
285
#define ARITH_OR  1
286
#define ARITH_ADC 2
287
#define ARITH_SBB 3
288
#define ARITH_AND 4
289
#define ARITH_SUB 5
290
#define ARITH_XOR 6
291
#define ARITH_CMP 7
292

    
293
/* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3.  */
294
#define SHIFT_ROL 0
295
#define SHIFT_ROR 1
296
#define SHIFT_SHL 4
297
#define SHIFT_SHR 5
298
#define SHIFT_SAR 7
299

    
300
/* Group 3 opcode extensions for 0xf6, 0xf7.  To be used with OPC_GRP3.  */
301
#define EXT3_NOT   2
302
#define EXT3_NEG   3
303
#define EXT3_MUL   4
304
#define EXT3_IMUL  5
305
#define EXT3_DIV   6
306
#define EXT3_IDIV  7
307

    
308
/* Group 5 opcode extensions for 0xff.  To be used with OPC_GRP5.  */
309
#define EXT5_INC_Ev        0
310
#define EXT5_DEC_Ev        1
311
#define EXT5_CALLN_Ev        2
312
#define EXT5_JMPN_Ev        4
313

    
314
/* Condition codes to be added to OPC_JCC_{long,short}.  */
315
#define JCC_JMP (-1)
316
#define JCC_JO  0x0
317
#define JCC_JNO 0x1
318
#define JCC_JB  0x2
319
#define JCC_JAE 0x3
320
#define JCC_JE  0x4
321
#define JCC_JNE 0x5
322
#define JCC_JBE 0x6
323
#define JCC_JA  0x7
324
#define JCC_JS  0x8
325
#define JCC_JNS 0x9
326
#define JCC_JP  0xa
327
#define JCC_JNP 0xb
328
#define JCC_JL  0xc
329
#define JCC_JGE 0xd
330
#define JCC_JLE 0xe
331
#define JCC_JG  0xf
332

    
333
static const uint8_t tcg_cond_to_jcc[10] = {
334
    [TCG_COND_EQ] = JCC_JE,
335
    [TCG_COND_NE] = JCC_JNE,
336
    [TCG_COND_LT] = JCC_JL,
337
    [TCG_COND_GE] = JCC_JGE,
338
    [TCG_COND_LE] = JCC_JLE,
339
    [TCG_COND_GT] = JCC_JG,
340
    [TCG_COND_LTU] = JCC_JB,
341
    [TCG_COND_GEU] = JCC_JAE,
342
    [TCG_COND_LEU] = JCC_JBE,
343
    [TCG_COND_GTU] = JCC_JA,
344
};
345

    
346
#if TCG_TARGET_REG_BITS == 64
347
static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
348
{
349
    int rex;
350

    
351
    if (opc & P_DATA16) {
352
        /* We should never be asking for both 16 and 64-bit operation.  */
353
        assert((opc & P_REXW) == 0);
354
        tcg_out8(s, 0x66);
355
    }
356
    if (opc & P_ADDR32) {
357
        tcg_out8(s, 0x67);
358
    }
359

    
360
    rex = 0;
361
    rex |= (opc & P_REXW) >> 8;                /* REX.W */
362
    rex |= (r & 8) >> 1;                /* REX.R */
363
    rex |= (x & 8) >> 2;                /* REX.X */
364
    rex |= (rm & 8) >> 3;                /* REX.B */
365

    
366
    /* P_REXB_{R,RM} indicates that the given register is the low byte.
367
       For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
368
       as otherwise the encoding indicates %[abcd]h.  Note that the values
369
       that are ORed in merely indicate that the REX byte must be present;
370
       those bits get discarded in output.  */
371
    rex |= opc & (r >= 4 ? P_REXB_R : 0);
372
    rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
373

    
374
    if (rex) {
375
        tcg_out8(s, (uint8_t)(rex | 0x40));
376
    }
377

    
378
    if (opc & P_EXT) {
379
        tcg_out8(s, 0x0f);
380
    }
381
    tcg_out8(s, opc);
382
}
383
#else
384
static void tcg_out_opc(TCGContext *s, int opc)
385
{
386
    if (opc & P_DATA16) {
387
        tcg_out8(s, 0x66);
388
    }
389
    if (opc & P_EXT) {
390
        tcg_out8(s, 0x0f);
391
    }
392
    tcg_out8(s, opc);
393
}
394
/* Discard the register arguments to tcg_out_opc early, so as not to penalize
395
   the 32-bit compilation paths.  This method works with all versions of gcc,
396
   whereas relying on optimization may not be able to exclude them.  */
397
#define tcg_out_opc(s, opc, r, rm, x)  (tcg_out_opc)(s, opc)
398
#endif
399

    
400
static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
401
{
402
    tcg_out_opc(s, opc, r, rm, 0);
403
    tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
404
}
405

    
406
/* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
407
   We handle either RM and INDEX missing with a negative value.  In 64-bit
408
   mode for absolute addresses, ~RM is the size of the immediate operand
409
   that will follow the instruction.  */
410

    
411
static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
412
                                     int index, int shift,
413
                                     tcg_target_long offset)
414
{
415
    int mod, len;
416

    
417
    if (index < 0 && rm < 0) {
418
        if (TCG_TARGET_REG_BITS == 64) {
419
            /* Try for a rip-relative addressing mode.  This has replaced
420
               the 32-bit-mode absolute addressing encoding.  */
421
            tcg_target_long pc = (tcg_target_long)s->code_ptr + 5 + ~rm;
422
            tcg_target_long disp = offset - pc;
423
            if (disp == (int32_t)disp) {
424
                tcg_out_opc(s, opc, r, 0, 0);
425
                tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
426
                tcg_out32(s, disp);
427
                return;
428
            }
429

    
430
            /* Try for an absolute address encoding.  This requires the
431
               use of the MODRM+SIB encoding and is therefore larger than
432
               rip-relative addressing.  */
433
            if (offset == (int32_t)offset) {
434
                tcg_out_opc(s, opc, r, 0, 0);
435
                tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
436
                tcg_out8(s, (4 << 3) | 5);
437
                tcg_out32(s, offset);
438
                return;
439
            }
440

    
441
            /* ??? The memory isn't directly addressable.  */
442
            tcg_abort();
443
        } else {
444
            /* Absolute address.  */
445
            tcg_out_opc(s, opc, r, 0, 0);
446
            tcg_out8(s, (r << 3) | 5);
447
            tcg_out32(s, offset);
448
            return;
449
        }
450
    }
451

    
452
    /* Find the length of the immediate addend.  Note that the encoding
453
       that would be used for (%ebp) indicates absolute addressing.  */
454
    if (rm < 0) {
455
        mod = 0, len = 4, rm = 5;
456
    } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
457
        mod = 0, len = 0;
458
    } else if (offset == (int8_t)offset) {
459
        mod = 0x40, len = 1;
460
    } else {
461
        mod = 0x80, len = 4;
462
    }
463

    
464
    /* Use a single byte MODRM format if possible.  Note that the encoding
465
       that would be used for %esp is the escape to the two byte form.  */
466
    if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
467
        /* Single byte MODRM format.  */
468
        tcg_out_opc(s, opc, r, rm, 0);
469
        tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
470
    } else {
471
        /* Two byte MODRM+SIB format.  */
472

    
473
        /* Note that the encoding that would place %esp into the index
474
           field indicates no index register.  In 64-bit mode, the REX.X
475
           bit counts, so %r12 can be used as the index.  */
476
        if (index < 0) {
477
            index = 4;
478
        } else {
479
            assert(index != TCG_REG_ESP);
480
        }
481

    
482
        tcg_out_opc(s, opc, r, rm, index);
483
        tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
484
        tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
485
    }
486

    
487
    if (len == 1) {
488
        tcg_out8(s, offset);
489
    } else if (len == 4) {
490
        tcg_out32(s, offset);
491
    }
492
}
493

    
494
/* A simplification of the above with no index or shift.  */
495
static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
496
                                        int rm, tcg_target_long offset)
497
{
498
    tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
499
}
500

    
501
/* Generate dest op= src.  Uses the same ARITH_* codes as tgen_arithi.  */
502
static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
503
{
504
    /* Propagate an opcode prefix, such as P_REXW.  */
505
    int ext = subop & ~0x7;
506
    subop &= 0x7;
507

    
508
    tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
509
}
510

    
511
static inline void tcg_out_mov(TCGContext *s, TCGType type,
512
                               TCGReg ret, TCGReg arg)
513
{
514
    if (arg != ret) {
515
        int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
516
        tcg_out_modrm(s, opc, ret, arg);
517
    }
518
}
519

    
520
static void tcg_out_movi(TCGContext *s, TCGType type,
521
                         TCGReg ret, tcg_target_long arg)
522
{
523
    if (arg == 0) {
524
        tgen_arithr(s, ARITH_XOR, ret, ret);
525
        return;
526
    } else if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
527
        tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
528
        tcg_out32(s, arg);
529
    } else if (arg == (int32_t)arg) {
530
        tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
531
        tcg_out32(s, arg);
532
    } else {
533
        tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
534
        tcg_out32(s, arg);
535
        tcg_out32(s, arg >> 31 >> 1);
536
    }
537
}
538

    
539
static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
540
{
541
    if (val == (int8_t)val) {
542
        tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
543
        tcg_out8(s, val);
544
    } else if (val == (int32_t)val) {
545
        tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
546
        tcg_out32(s, val);
547
    } else {
548
        tcg_abort();
549
    }
550
}
551

    
552
static inline void tcg_out_push(TCGContext *s, int reg)
553
{
554
    tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
555
}
556

    
557
static inline void tcg_out_pop(TCGContext *s, int reg)
558
{
559
    tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
560
}
561

    
562
static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
563
                              TCGReg arg1, tcg_target_long arg2)
564
{
565
    int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
566
    tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
567
}
568

    
569
static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
570
                              TCGReg arg1, tcg_target_long arg2)
571
{
572
    int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
573
    tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
574
}
575

    
576
static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
577
{
578
    /* Propagate an opcode prefix, such as P_DATA16.  */
579
    int ext = subopc & ~0x7;
580
    subopc &= 0x7;
581

    
582
    if (count == 1) {
583
        tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
584
    } else {
585
        tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
586
        tcg_out8(s, count);
587
    }
588
}
589

    
590
static inline void tcg_out_bswap32(TCGContext *s, int reg)
591
{
592
    tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
593
}
594

    
595
static inline void tcg_out_rolw_8(TCGContext *s, int reg)
596
{
597
    tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
598
}
599

    
600
static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
601
{
602
    /* movzbl */
603
    assert(src < 4 || TCG_TARGET_REG_BITS == 64);
604
    tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
605
}
606

    
607
static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
608
{
609
    /* movsbl */
610
    assert(src < 4 || TCG_TARGET_REG_BITS == 64);
611
    tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
612
}
613

    
614
static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
615
{
616
    /* movzwl */
617
    tcg_out_modrm(s, OPC_MOVZWL, dest, src);
618
}
619

    
620
static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
621
{
622
    /* movsw[lq] */
623
    tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
624
}
625

    
626
static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
627
{
628
    /* 32-bit mov zero extends.  */
629
    tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
630
}
631

    
632
static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
633
{
634
    tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
635
}
636

    
637
static inline void tcg_out_bswap64(TCGContext *s, int reg)
638
{
639
    tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
640
}
641

    
642
static void tgen_arithi(TCGContext *s, int c, int r0,
643
                        tcg_target_long val, int cf)
644
{
645
    int rexw = 0;
646

    
647
    if (TCG_TARGET_REG_BITS == 64) {
648
        rexw = c & -8;
649
        c &= 7;
650
    }
651

    
652
    /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
653
       partial flags update stalls on Pentium4 and are not recommended
654
       by current Intel optimization manuals.  */
655
    if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
656
        int is_inc = (c == ARITH_ADD) ^ (val < 0);
657
        if (TCG_TARGET_REG_BITS == 64) {
658
            /* The single-byte increment encodings are re-tasked as the
659
               REX prefixes.  Use the MODRM encoding.  */
660
            tcg_out_modrm(s, OPC_GRP5 + rexw,
661
                          (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
662
        } else {
663
            tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
664
        }
665
        return;
666
    }
667

    
668
    if (c == ARITH_AND) {
669
        if (TCG_TARGET_REG_BITS == 64) {
670
            if (val == 0xffffffffu) {
671
                tcg_out_ext32u(s, r0, r0);
672
                return;
673
            }
674
            if (val == (uint32_t)val) {
675
                /* AND with no high bits set can use a 32-bit operation.  */
676
                rexw = 0;
677
            }
678
        }
679
        if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
680
            tcg_out_ext8u(s, r0, r0);
681
            return;
682
        }
683
        if (val == 0xffffu) {
684
            tcg_out_ext16u(s, r0, r0);
685
            return;
686
        }
687
    }
688

    
689
    if (val == (int8_t)val) {
690
        tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
691
        tcg_out8(s, val);
692
        return;
693
    }
694
    if (rexw == 0 || val == (int32_t)val) {
695
        tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
696
        tcg_out32(s, val);
697
        return;
698
    }
699

    
700
    tcg_abort();
701
}
702

    
703
static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
704
{
705
    if (val != 0) {
706
        tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
707
    }
708
}
709

    
710
/* Use SMALL != 0 to force a short forward branch.  */
711
static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
712
{
713
    int32_t val, val1;
714
    TCGLabel *l = &s->labels[label_index];
715

    
716
    if (l->has_value) {
717
        val = l->u.value - (tcg_target_long)s->code_ptr;
718
        val1 = val - 2;
719
        if ((int8_t)val1 == val1) {
720
            if (opc == -1) {
721
                tcg_out8(s, OPC_JMP_short);
722
            } else {
723
                tcg_out8(s, OPC_JCC_short + opc);
724
            }
725
            tcg_out8(s, val1);
726
        } else {
727
            if (small) {
728
                tcg_abort();
729
            }
730
            if (opc == -1) {
731
                tcg_out8(s, OPC_JMP_long);
732
                tcg_out32(s, val - 5);
733
            } else {
734
                tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
735
                tcg_out32(s, val - 6);
736
            }
737
        }
738
    } else if (small) {
739
        if (opc == -1) {
740
            tcg_out8(s, OPC_JMP_short);
741
        } else {
742
            tcg_out8(s, OPC_JCC_short + opc);
743
        }
744
        tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
745
        s->code_ptr += 1;
746
    } else {
747
        if (opc == -1) {
748
            tcg_out8(s, OPC_JMP_long);
749
        } else {
750
            tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
751
        }
752
        tcg_out_reloc(s, s->code_ptr, R_386_PC32, label_index, -4);
753
        s->code_ptr += 4;
754
    }
755
}
756

    
757
static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
758
                        int const_arg2, int rexw)
759
{
760
    if (const_arg2) {
761
        if (arg2 == 0) {
762
            /* test r, r */
763
            tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
764
        } else {
765
            tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
766
        }
767
    } else {
768
        tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
769
    }
770
}
771

    
772
static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
773
                             TCGArg arg1, TCGArg arg2, int const_arg2,
774
                             int label_index, int small)
775
{
776
    tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
777
    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
778
}
779

    
780
#if TCG_TARGET_REG_BITS == 64
781
static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
782
                             TCGArg arg1, TCGArg arg2, int const_arg2,
783
                             int label_index, int small)
784
{
785
    tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
786
    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
787
}
788
#else
789
/* XXX: we implement it at the target level to avoid having to
790
   handle cross basic blocks temporaries */
791
static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
792
                            const int *const_args, int small)
793
{
794
    int label_next;
795
    label_next = gen_new_label();
796
    switch(args[4]) {
797
    case TCG_COND_EQ:
798
        tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
799
                         label_next, 1);
800
        tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
801
                         args[5], small);
802
        break;
803
    case TCG_COND_NE:
804
        tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
805
                         args[5], small);
806
        tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
807
                         args[5], small);
808
        break;
809
    case TCG_COND_LT:
810
        tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
811
                         args[5], small);
812
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
813
        tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
814
                         args[5], small);
815
        break;
816
    case TCG_COND_LE:
817
        tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
818
                         args[5], small);
819
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
820
        tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
821
                         args[5], small);
822
        break;
823
    case TCG_COND_GT:
824
        tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
825
                         args[5], small);
826
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
827
        tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
828
                         args[5], small);
829
        break;
830
    case TCG_COND_GE:
831
        tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
832
                         args[5], small);
833
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
834
        tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
835
                         args[5], small);
836
        break;
837
    case TCG_COND_LTU:
838
        tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
839
                         args[5], small);
840
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
841
        tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
842
                         args[5], small);
843
        break;
844
    case TCG_COND_LEU:
845
        tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
846
                         args[5], small);
847
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
848
        tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
849
                         args[5], small);
850
        break;
851
    case TCG_COND_GTU:
852
        tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
853
                         args[5], small);
854
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
855
        tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
856
                         args[5], small);
857
        break;
858
    case TCG_COND_GEU:
859
        tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
860
                         args[5], small);
861
        tcg_out_jxx(s, JCC_JNE, label_next, 1);
862
        tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
863
                         args[5], small);
864
        break;
865
    default:
866
        tcg_abort();
867
    }
868
    tcg_out_label(s, label_next, s->code_ptr);
869
}
870
#endif
871

    
872
static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
873
                              TCGArg arg1, TCGArg arg2, int const_arg2)
874
{
875
    tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
876
    tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
877
    tcg_out_ext8u(s, dest, dest);
878
}
879

    
880
#if TCG_TARGET_REG_BITS == 64
881
static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
882
                              TCGArg arg1, TCGArg arg2, int const_arg2)
883
{
884
    tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
885
    tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
886
    tcg_out_ext8u(s, dest, dest);
887
}
888
#else
889
static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
890
                             const int *const_args)
891
{
892
    TCGArg new_args[6];
893
    int label_true, label_over;
894

    
895
    memcpy(new_args, args+1, 5*sizeof(TCGArg));
896

    
897
    if (args[0] == args[1] || args[0] == args[2]
898
        || (!const_args[3] && args[0] == args[3])
899
        || (!const_args[4] && args[0] == args[4])) {
900
        /* When the destination overlaps with one of the argument
901
           registers, don't do anything tricky.  */
902
        label_true = gen_new_label();
903
        label_over = gen_new_label();
904

    
905
        new_args[5] = label_true;
906
        tcg_out_brcond2(s, new_args, const_args+1, 1);
907

    
908
        tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
909
        tcg_out_jxx(s, JCC_JMP, label_over, 1);
910
        tcg_out_label(s, label_true, s->code_ptr);
911

    
912
        tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
913
        tcg_out_label(s, label_over, s->code_ptr);
914
    } else {
915
        /* When the destination does not overlap one of the arguments,
916
           clear the destination first, jump if cond false, and emit an
917
           increment in the true case.  This results in smaller code.  */
918

    
919
        tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
920

    
921
        label_over = gen_new_label();
922
        new_args[4] = tcg_invert_cond(new_args[4]);
923
        new_args[5] = label_over;
924
        tcg_out_brcond2(s, new_args, const_args+1, 1);
925

    
926
        tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
927
        tcg_out_label(s, label_over, s->code_ptr);
928
    }
929
}
930
#endif
931

    
932
static void tcg_out_branch(TCGContext *s, int call, tcg_target_long dest)
933
{
934
    tcg_target_long disp = dest - (tcg_target_long)s->code_ptr - 5;
935

    
936
    if (disp == (int32_t)disp) {
937
        tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
938
        tcg_out32(s, disp);
939
    } else {
940
        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest);
941
        tcg_out_modrm(s, OPC_GRP5,
942
                      call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
943
    }
944
}
945

    
946
static inline void tcg_out_calli(TCGContext *s, tcg_target_long dest)
947
{
948
    tcg_out_branch(s, 1, dest);
949
}
950

    
951
static void tcg_out_jmp(TCGContext *s, tcg_target_long dest)
952
{
953
    tcg_out_branch(s, 0, dest);
954
}
955

    
956
#if defined(CONFIG_SOFTMMU)
957

    
958
#include "../../softmmu_defs.h"
959

    
960
static void *qemu_ld_helpers[4] = {
961
    __ldb_mmu,
962
    __ldw_mmu,
963
    __ldl_mmu,
964
    __ldq_mmu,
965
};
966

    
967
static void *qemu_st_helpers[4] = {
968
    __stb_mmu,
969
    __stw_mmu,
970
    __stl_mmu,
971
    __stq_mmu,
972
};
973

    
974
/* Perform the TLB load and compare.
975

976
   Inputs:
977
   ADDRLO_IDX contains the index into ARGS of the low part of the
978
   address; the high part of the address is at ADDR_LOW_IDX+1.
979

980
   MEM_INDEX and S_BITS are the memory context and log2 size of the load.
981

982
   WHICH is the offset into the CPUTLBEntry structure of the slot to read.
983
   This should be offsetof addr_read or addr_write.
984

985
   Outputs:
986
   LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
987
   positions of the displacements of forward jumps to the TLB miss case.
988

989
   First argument register is loaded with the low part of the address.
990
   In the TLB hit case, it has been adjusted as indicated by the TLB
991
   and so is a host address.  In the TLB miss case, it continues to
992
   hold a guest address.
993

994
   Second argument register is clobbered.  */
995

    
996
static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
997
                                    int mem_index, int s_bits,
998
                                    const TCGArg *args,
999
                                    uint8_t **label_ptr, int which)
1000
{
1001
    const int addrlo = args[addrlo_idx];
1002
    const int r0 = tcg_target_call_iarg_regs[0];
1003
    const int r1 = tcg_target_call_iarg_regs[1];
1004
    TCGType type = TCG_TYPE_I32;
1005
    int rexw = 0;
1006

    
1007
    if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64) {
1008
        type = TCG_TYPE_I64;
1009
        rexw = P_REXW;
1010
    }
1011

    
1012
    tcg_out_mov(s, type, r1, addrlo);
1013
    tcg_out_mov(s, type, r0, addrlo);
1014

    
1015
    tcg_out_shifti(s, SHIFT_SHR + rexw, r1,
1016
                   TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1017

    
1018
    tgen_arithi(s, ARITH_AND + rexw, r0,
1019
                TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
1020
    tgen_arithi(s, ARITH_AND + rexw, r1,
1021
                (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
1022

    
1023
    tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r1, TCG_AREG0, r1, 0,
1024
                             offsetof(CPUArchState, tlb_table[mem_index][0])
1025
                             + which);
1026

    
1027
    /* cmp 0(r1), r0 */
1028
    tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r0, r1, 0);
1029

    
1030
    tcg_out_mov(s, type, r0, addrlo);
1031

    
1032
    /* jne label1 */
1033
    tcg_out8(s, OPC_JCC_short + JCC_JNE);
1034
    label_ptr[0] = s->code_ptr;
1035
    s->code_ptr++;
1036

    
1037
    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1038
        /* cmp 4(r1), addrhi */
1039
        tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r1, 4);
1040

    
1041
        /* jne label1 */
1042
        tcg_out8(s, OPC_JCC_short + JCC_JNE);
1043
        label_ptr[1] = s->code_ptr;
1044
        s->code_ptr++;
1045
    }
1046

    
1047
    /* TLB Hit.  */
1048

    
1049
    /* add addend(r1), r0 */
1050
    tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r0, r1,
1051
                         offsetof(CPUTLBEntry, addend) - which);
1052
}
1053
#endif
1054

    
1055
static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
1056
                                   int base, tcg_target_long ofs, int sizeop)
1057
{
1058
#ifdef TARGET_WORDS_BIGENDIAN
1059
    const int bswap = 1;
1060
#else
1061
    const int bswap = 0;
1062
#endif
1063
    switch (sizeop) {
1064
    case 0:
1065
        tcg_out_modrm_offset(s, OPC_MOVZBL, datalo, base, ofs);
1066
        break;
1067
    case 0 | 4:
1068
        tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW, datalo, base, ofs);
1069
        break;
1070
    case 1:
1071
        tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
1072
        if (bswap) {
1073
            tcg_out_rolw_8(s, datalo);
1074
        }
1075
        break;
1076
    case 1 | 4:
1077
        if (bswap) {
1078
            tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
1079
            tcg_out_rolw_8(s, datalo);
1080
            tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1081
        } else {
1082
            tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW, datalo, base, ofs);
1083
        }
1084
        break;
1085
    case 2:
1086
        tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1087
        if (bswap) {
1088
            tcg_out_bswap32(s, datalo);
1089
        }
1090
        break;
1091
#if TCG_TARGET_REG_BITS == 64
1092
    case 2 | 4:
1093
        if (bswap) {
1094
            tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1095
            tcg_out_bswap32(s, datalo);
1096
            tcg_out_ext32s(s, datalo, datalo);
1097
        } else {
1098
            tcg_out_modrm_offset(s, OPC_MOVSLQ, datalo, base, ofs);
1099
        }
1100
        break;
1101
#endif
1102
    case 3:
1103
        if (TCG_TARGET_REG_BITS == 64) {
1104
            tcg_out_ld(s, TCG_TYPE_I64, datalo, base, ofs);
1105
            if (bswap) {
1106
                tcg_out_bswap64(s, datalo);
1107
            }
1108
        } else {
1109
            if (bswap) {
1110
                int t = datalo;
1111
                datalo = datahi;
1112
                datahi = t;
1113
            }
1114
            if (base != datalo) {
1115
                tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1116
                tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1117
            } else {
1118
                tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1119
                tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1120
            }
1121
            if (bswap) {
1122
                tcg_out_bswap32(s, datalo);
1123
                tcg_out_bswap32(s, datahi);
1124
            }
1125
        }
1126
        break;
1127
    default:
1128
        tcg_abort();
1129
    }
1130
}
1131

    
1132
/* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1133
   EAX. It will be useful once fixed registers globals are less
1134
   common. */
1135
static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
1136
                            int opc)
1137
{
1138
    int data_reg, data_reg2 = 0;
1139
    int addrlo_idx;
1140
#if defined(CONFIG_SOFTMMU)
1141
    int mem_index, s_bits;
1142
#if TCG_TARGET_REG_BITS == 64
1143
    int arg_idx;
1144
#else
1145
    int stack_adjust;
1146
#endif
1147
    uint8_t *label_ptr[3];
1148
#endif
1149

    
1150
    data_reg = args[0];
1151
    addrlo_idx = 1;
1152
    if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
1153
        data_reg2 = args[1];
1154
        addrlo_idx = 2;
1155
    }
1156

    
1157
#if defined(CONFIG_SOFTMMU)
1158
    mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1159
    s_bits = opc & 3;
1160

    
1161
    tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1162
                     label_ptr, offsetof(CPUTLBEntry, addr_read));
1163

    
1164
    /* TLB Hit.  */
1165
    tcg_out_qemu_ld_direct(s, data_reg, data_reg2,
1166
                           tcg_target_call_iarg_regs[0], 0, opc);
1167

    
1168
    /* jmp label2 */
1169
    tcg_out8(s, OPC_JMP_short);
1170
    label_ptr[2] = s->code_ptr;
1171
    s->code_ptr++;
1172

    
1173
    /* TLB Miss.  */
1174

    
1175
    /* label1: */
1176
    *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
1177
    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1178
        *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
1179
    }
1180

    
1181
    /* XXX: move that code at the end of the TB */
1182
#if TCG_TARGET_REG_BITS == 32
1183
    tcg_out_pushi(s, mem_index);
1184
    stack_adjust = 4;
1185
    if (TARGET_LONG_BITS == 64) {
1186
        tcg_out_push(s, args[addrlo_idx + 1]);
1187
        stack_adjust += 4;
1188
    }
1189
    tcg_out_push(s, args[addrlo_idx]);
1190
    stack_adjust += 4;
1191
#else
1192
    /* The first argument is already loaded with addrlo.  */
1193
    arg_idx = 1;
1194
    tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx],
1195
                 mem_index);
1196
#endif
1197

    
1198
    tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
1199

    
1200
#if TCG_TARGET_REG_BITS == 32
1201
    if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
1202
        /* Pop and discard.  This is 2 bytes smaller than the add.  */
1203
        tcg_out_pop(s, TCG_REG_ECX);
1204
    } else if (stack_adjust != 0) {
1205
        tcg_out_addi(s, TCG_REG_CALL_STACK, stack_adjust);
1206
    }
1207
#endif
1208

    
1209
    switch(opc) {
1210
    case 0 | 4:
1211
        tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1212
        break;
1213
    case 1 | 4:
1214
        tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1215
        break;
1216
    case 0:
1217
        tcg_out_ext8u(s, data_reg, TCG_REG_EAX);
1218
        break;
1219
    case 1:
1220
        tcg_out_ext16u(s, data_reg, TCG_REG_EAX);
1221
        break;
1222
    case 2:
1223
        tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1224
        break;
1225
#if TCG_TARGET_REG_BITS == 64
1226
    case 2 | 4:
1227
        tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1228
        break;
1229
#endif
1230
    case 3:
1231
        if (TCG_TARGET_REG_BITS == 64) {
1232
            tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1233
        } else if (data_reg == TCG_REG_EDX) {
1234
            /* xchg %edx, %eax */
1235
            tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1236
            tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EAX);
1237
        } else {
1238
            tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1239
            tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EDX);
1240
        }
1241
        break;
1242
    default:
1243
        tcg_abort();
1244
    }
1245

    
1246
    /* label2: */
1247
    *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
1248
#else
1249
    {
1250
        int32_t offset = GUEST_BASE;
1251
        int base = args[addrlo_idx];
1252

    
1253
        if (TCG_TARGET_REG_BITS == 64) {
1254
            /* ??? We assume all operations have left us with register
1255
               contents that are zero extended.  So far this appears to
1256
               be true.  If we want to enforce this, we can either do
1257
               an explicit zero-extension here, or (if GUEST_BASE == 0)
1258
               use the ADDR32 prefix.  For now, do nothing.  */
1259

    
1260
            if (offset != GUEST_BASE) {
1261
                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
1262
                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
1263
                base = TCG_REG_RDI, offset = 0;
1264
            }
1265
        }
1266

    
1267
        tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, opc);
1268
    }
1269
#endif
1270
}
1271

    
1272
static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
1273
                                   int base, tcg_target_long ofs, int sizeop)
1274
{
1275
#ifdef TARGET_WORDS_BIGENDIAN
1276
    const int bswap = 1;
1277
#else
1278
    const int bswap = 0;
1279
#endif
1280
    /* ??? Ideally we wouldn't need a scratch register.  For user-only,
1281
       we could perform the bswap twice to restore the original value
1282
       instead of moving to the scratch.  But as it is, the L constraint
1283
       means that the second argument reg is definitely free here.  */
1284
    int scratch = tcg_target_call_iarg_regs[1];
1285

    
1286
    switch (sizeop) {
1287
    case 0:
1288
        tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs);
1289
        break;
1290
    case 1:
1291
        if (bswap) {
1292
            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1293
            tcg_out_rolw_8(s, scratch);
1294
            datalo = scratch;
1295
        }
1296
        tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16, datalo, base, ofs);
1297
        break;
1298
    case 2:
1299
        if (bswap) {
1300
            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1301
            tcg_out_bswap32(s, scratch);
1302
            datalo = scratch;
1303
        }
1304
        tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
1305
        break;
1306
    case 3:
1307
        if (TCG_TARGET_REG_BITS == 64) {
1308
            if (bswap) {
1309
                tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1310
                tcg_out_bswap64(s, scratch);
1311
                datalo = scratch;
1312
            }
1313
            tcg_out_st(s, TCG_TYPE_I64, datalo, base, ofs);
1314
        } else if (bswap) {
1315
            tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
1316
            tcg_out_bswap32(s, scratch);
1317
            tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs);
1318
            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1319
            tcg_out_bswap32(s, scratch);
1320
            tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs + 4);
1321
        } else {
1322
            tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
1323
            tcg_out_st(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1324
        }
1325
        break;
1326
    default:
1327
        tcg_abort();
1328
    }
1329
}
1330

    
1331
static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
1332
                            int opc)
1333
{
1334
    int data_reg, data_reg2 = 0;
1335
    int addrlo_idx;
1336
#if defined(CONFIG_SOFTMMU)
1337
    int mem_index, s_bits;
1338
    int stack_adjust;
1339
    uint8_t *label_ptr[3];
1340
#endif
1341

    
1342
    data_reg = args[0];
1343
    addrlo_idx = 1;
1344
    if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
1345
        data_reg2 = args[1];
1346
        addrlo_idx = 2;
1347
    }
1348

    
1349
#if defined(CONFIG_SOFTMMU)
1350
    mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1351
    s_bits = opc;
1352

    
1353
    tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1354
                     label_ptr, offsetof(CPUTLBEntry, addr_write));
1355

    
1356
    /* TLB Hit.  */
1357
    tcg_out_qemu_st_direct(s, data_reg, data_reg2,
1358
                           tcg_target_call_iarg_regs[0], 0, opc);
1359

    
1360
    /* jmp label2 */
1361
    tcg_out8(s, OPC_JMP_short);
1362
    label_ptr[2] = s->code_ptr;
1363
    s->code_ptr++;
1364

    
1365
    /* TLB Miss.  */
1366

    
1367
    /* label1: */
1368
    *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
1369
    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1370
        *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
1371
    }
1372

    
1373
    /* XXX: move that code at the end of the TB */
1374
#if TCG_TARGET_REG_BITS == 32
1375
    tcg_out_pushi(s, mem_index);
1376
    stack_adjust = 4;
1377
    if (opc == 3) {
1378
        tcg_out_push(s, data_reg2);
1379
        stack_adjust += 4;
1380
    }
1381
    tcg_out_push(s, data_reg);
1382
    stack_adjust += 4;
1383
    if (TARGET_LONG_BITS == 64) {
1384
        tcg_out_push(s, args[addrlo_idx + 1]);
1385
        stack_adjust += 4;
1386
    }
1387
    tcg_out_push(s, args[addrlo_idx]);
1388
    stack_adjust += 4;
1389
#else
1390
    tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1391
                TCG_REG_RSI, data_reg);
1392
    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_RDX, mem_index);
1393
    stack_adjust = 0;
1394
#endif
1395

    
1396
    tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
1397

    
1398
    if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
1399
        /* Pop and discard.  This is 2 bytes smaller than the add.  */
1400
        tcg_out_pop(s, TCG_REG_ECX);
1401
    } else if (stack_adjust != 0) {
1402
        tcg_out_addi(s, TCG_REG_CALL_STACK, stack_adjust);
1403
    }
1404

    
1405
    /* label2: */
1406
    *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
1407
#else
1408
    {
1409
        int32_t offset = GUEST_BASE;
1410
        int base = args[addrlo_idx];
1411

    
1412
        if (TCG_TARGET_REG_BITS == 64) {
1413
            /* ??? We assume all operations have left us with register
1414
               contents that are zero extended.  So far this appears to
1415
               be true.  If we want to enforce this, we can either do
1416
               an explicit zero-extension here, or (if GUEST_BASE == 0)
1417
               use the ADDR32 prefix.  For now, do nothing.  */
1418

    
1419
            if (offset != GUEST_BASE) {
1420
                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
1421
                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
1422
                base = TCG_REG_RDI, offset = 0;
1423
            }
1424
        }
1425

    
1426
        tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, opc);
1427
    }
1428
#endif
1429
}
1430

    
1431
static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1432
                              const TCGArg *args, const int *const_args)
1433
{
1434
    int c, rexw = 0;
1435

    
1436
#if TCG_TARGET_REG_BITS == 64
1437
# define OP_32_64(x) \
1438
        case glue(glue(INDEX_op_, x), _i64): \
1439
            rexw = P_REXW; /* FALLTHRU */    \
1440
        case glue(glue(INDEX_op_, x), _i32)
1441
#else
1442
# define OP_32_64(x) \
1443
        case glue(glue(INDEX_op_, x), _i32)
1444
#endif
1445

    
1446
    switch(opc) {
1447
    case INDEX_op_exit_tb:
1448
        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
1449
        tcg_out_jmp(s, (tcg_target_long) tb_ret_addr);
1450
        break;
1451
    case INDEX_op_goto_tb:
1452
        if (s->tb_jmp_offset) {
1453
            /* direct jump method */
1454
            tcg_out8(s, OPC_JMP_long); /* jmp im */
1455
            s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
1456
            tcg_out32(s, 0);
1457
        } else {
1458
            /* indirect jump method */
1459
            tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
1460
                                 (tcg_target_long)(s->tb_next + args[0]));
1461
        }
1462
        s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
1463
        break;
1464
    case INDEX_op_call:
1465
        if (const_args[0]) {
1466
            tcg_out_calli(s, args[0]);
1467
        } else {
1468
            /* call *reg */
1469
            tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]);
1470
        }
1471
        break;
1472
    case INDEX_op_jmp:
1473
        if (const_args[0]) {
1474
            tcg_out_jmp(s, args[0]);
1475
        } else {
1476
            /* jmp *reg */
1477
            tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, args[0]);
1478
        }
1479
        break;
1480
    case INDEX_op_br:
1481
        tcg_out_jxx(s, JCC_JMP, args[0], 0);
1482
        break;
1483
    case INDEX_op_movi_i32:
1484
        tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
1485
        break;
1486
    OP_32_64(ld8u):
1487
        /* Note that we can ignore REXW for the zero-extend to 64-bit.  */
1488
        tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
1489
        break;
1490
    OP_32_64(ld8s):
1491
        tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
1492
        break;
1493
    OP_32_64(ld16u):
1494
        /* Note that we can ignore REXW for the zero-extend to 64-bit.  */
1495
        tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
1496
        break;
1497
    OP_32_64(ld16s):
1498
        tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
1499
        break;
1500
#if TCG_TARGET_REG_BITS == 64
1501
    case INDEX_op_ld32u_i64:
1502
#endif
1503
    case INDEX_op_ld_i32:
1504
        tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1505
        break;
1506

    
1507
    OP_32_64(st8):
1508
        tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
1509
                             args[0], args[1], args[2]);
1510
        break;
1511
    OP_32_64(st16):
1512
        tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
1513
                             args[0], args[1], args[2]);
1514
        break;
1515
#if TCG_TARGET_REG_BITS == 64
1516
    case INDEX_op_st32_i64:
1517
#endif
1518
    case INDEX_op_st_i32:
1519
        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1520
        break;
1521

    
1522
    OP_32_64(add):
1523
        /* For 3-operand addition, use LEA.  */
1524
        if (args[0] != args[1]) {
1525
            TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
1526

    
1527
            if (const_args[2]) {
1528
                c3 = a2, a2 = -1;
1529
            } else if (a0 == a2) {
1530
                /* Watch out for dest = src + dest, since we've removed
1531
                   the matching constraint on the add.  */
1532
                tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
1533
                break;
1534
            }
1535

    
1536
            tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
1537
            break;
1538
        }
1539
        c = ARITH_ADD;
1540
        goto gen_arith;
1541
    OP_32_64(sub):
1542
        c = ARITH_SUB;
1543
        goto gen_arith;
1544
    OP_32_64(and):
1545
        c = ARITH_AND;
1546
        goto gen_arith;
1547
    OP_32_64(or):
1548
        c = ARITH_OR;
1549
        goto gen_arith;
1550
    OP_32_64(xor):
1551
        c = ARITH_XOR;
1552
        goto gen_arith;
1553
    gen_arith:
1554
        if (const_args[2]) {
1555
            tgen_arithi(s, c + rexw, args[0], args[2], 0);
1556
        } else {
1557
            tgen_arithr(s, c + rexw, args[0], args[2]);
1558
        }
1559
        break;
1560

    
1561
    OP_32_64(mul):
1562
        if (const_args[2]) {
1563
            int32_t val;
1564
            val = args[2];
1565
            if (val == (int8_t)val) {
1566
                tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
1567
                tcg_out8(s, val);
1568
            } else {
1569
                tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
1570
                tcg_out32(s, val);
1571
            }
1572
        } else {
1573
            tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
1574
        }
1575
        break;
1576

    
1577
    OP_32_64(div2):
1578
        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
1579
        break;
1580
    OP_32_64(divu2):
1581
        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
1582
        break;
1583

    
1584
    OP_32_64(shl):
1585
        c = SHIFT_SHL;
1586
        goto gen_shift;
1587
    OP_32_64(shr):
1588
        c = SHIFT_SHR;
1589
        goto gen_shift;
1590
    OP_32_64(sar):
1591
        c = SHIFT_SAR;
1592
        goto gen_shift;
1593
    OP_32_64(rotl):
1594
        c = SHIFT_ROL;
1595
        goto gen_shift;
1596
    OP_32_64(rotr):
1597
        c = SHIFT_ROR;
1598
        goto gen_shift;
1599
    gen_shift:
1600
        if (const_args[2]) {
1601
            tcg_out_shifti(s, c + rexw, args[0], args[2]);
1602
        } else {
1603
            tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
1604
        }
1605
        break;
1606

    
1607
    case INDEX_op_brcond_i32:
1608
        tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
1609
                         args[3], 0);
1610
        break;
1611
    case INDEX_op_setcond_i32:
1612
        tcg_out_setcond32(s, args[3], args[0], args[1],
1613
                          args[2], const_args[2]);
1614
        break;
1615

    
1616
    OP_32_64(bswap16):
1617
        tcg_out_rolw_8(s, args[0]);
1618
        break;
1619
    OP_32_64(bswap32):
1620
        tcg_out_bswap32(s, args[0]);
1621
        break;
1622

    
1623
    OP_32_64(neg):
1624
        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
1625
        break;
1626
    OP_32_64(not):
1627
        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
1628
        break;
1629

    
1630
    OP_32_64(ext8s):
1631
        tcg_out_ext8s(s, args[0], args[1], rexw);
1632
        break;
1633
    OP_32_64(ext16s):
1634
        tcg_out_ext16s(s, args[0], args[1], rexw);
1635
        break;
1636
    OP_32_64(ext8u):
1637
        tcg_out_ext8u(s, args[0], args[1]);
1638
        break;
1639
    OP_32_64(ext16u):
1640
        tcg_out_ext16u(s, args[0], args[1]);
1641
        break;
1642

    
1643
    case INDEX_op_qemu_ld8u:
1644
        tcg_out_qemu_ld(s, args, 0);
1645
        break;
1646
    case INDEX_op_qemu_ld8s:
1647
        tcg_out_qemu_ld(s, args, 0 | 4);
1648
        break;
1649
    case INDEX_op_qemu_ld16u:
1650
        tcg_out_qemu_ld(s, args, 1);
1651
        break;
1652
    case INDEX_op_qemu_ld16s:
1653
        tcg_out_qemu_ld(s, args, 1 | 4);
1654
        break;
1655
#if TCG_TARGET_REG_BITS == 64
1656
    case INDEX_op_qemu_ld32u:
1657
#endif
1658
    case INDEX_op_qemu_ld32:
1659
        tcg_out_qemu_ld(s, args, 2);
1660
        break;
1661
    case INDEX_op_qemu_ld64:
1662
        tcg_out_qemu_ld(s, args, 3);
1663
        break;
1664

    
1665
    case INDEX_op_qemu_st8:
1666
        tcg_out_qemu_st(s, args, 0);
1667
        break;
1668
    case INDEX_op_qemu_st16:
1669
        tcg_out_qemu_st(s, args, 1);
1670
        break;
1671
    case INDEX_op_qemu_st32:
1672
        tcg_out_qemu_st(s, args, 2);
1673
        break;
1674
    case INDEX_op_qemu_st64:
1675
        tcg_out_qemu_st(s, args, 3);
1676
        break;
1677

    
1678
#if TCG_TARGET_REG_BITS == 32
1679
    case INDEX_op_brcond2_i32:
1680
        tcg_out_brcond2(s, args, const_args, 0);
1681
        break;
1682
    case INDEX_op_setcond2_i32:
1683
        tcg_out_setcond2(s, args, const_args);
1684
        break;
1685
    case INDEX_op_mulu2_i32:
1686
        tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_MUL, args[3]);
1687
        break;
1688
    case INDEX_op_add2_i32:
1689
        if (const_args[4]) {
1690
            tgen_arithi(s, ARITH_ADD, args[0], args[4], 1);
1691
        } else {
1692
            tgen_arithr(s, ARITH_ADD, args[0], args[4]);
1693
        }
1694
        if (const_args[5]) {
1695
            tgen_arithi(s, ARITH_ADC, args[1], args[5], 1);
1696
        } else {
1697
            tgen_arithr(s, ARITH_ADC, args[1], args[5]);
1698
        }
1699
        break;
1700
    case INDEX_op_sub2_i32:
1701
        if (const_args[4]) {
1702
            tgen_arithi(s, ARITH_SUB, args[0], args[4], 1);
1703
        } else {
1704
            tgen_arithr(s, ARITH_SUB, args[0], args[4]);
1705
        }
1706
        if (const_args[5]) {
1707
            tgen_arithi(s, ARITH_SBB, args[1], args[5], 1);
1708
        } else {
1709
            tgen_arithr(s, ARITH_SBB, args[1], args[5]);
1710
        }
1711
        break;
1712
#else /* TCG_TARGET_REG_BITS == 64 */
1713
    case INDEX_op_movi_i64:
1714
        tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
1715
        break;
1716
    case INDEX_op_ld32s_i64:
1717
        tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
1718
        break;
1719
    case INDEX_op_ld_i64:
1720
        tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1721
        break;
1722
    case INDEX_op_st_i64:
1723
        tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1724
        break;
1725
    case INDEX_op_qemu_ld32s:
1726
        tcg_out_qemu_ld(s, args, 2 | 4);
1727
        break;
1728

    
1729
    case INDEX_op_brcond_i64:
1730
        tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
1731
                         args[3], 0);
1732
        break;
1733
    case INDEX_op_setcond_i64:
1734
        tcg_out_setcond64(s, args[3], args[0], args[1],
1735
                          args[2], const_args[2]);
1736
        break;
1737

    
1738
    case INDEX_op_bswap64_i64:
1739
        tcg_out_bswap64(s, args[0]);
1740
        break;
1741
    case INDEX_op_ext32u_i64:
1742
        tcg_out_ext32u(s, args[0], args[1]);
1743
        break;
1744
    case INDEX_op_ext32s_i64:
1745
        tcg_out_ext32s(s, args[0], args[1]);
1746
        break;
1747
#endif
1748

    
1749
    OP_32_64(deposit):
1750
        if (args[3] == 0 && args[4] == 8) {
1751
            /* load bits 0..7 */
1752
            tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM,
1753
                          args[2], args[0]);
1754
        } else if (args[3] == 8 && args[4] == 8) {
1755
            /* load bits 8..15 */
1756
            tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4);
1757
        } else if (args[3] == 0 && args[4] == 16) {
1758
            /* load bits 0..15 */
1759
            tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]);
1760
        } else {
1761
            tcg_abort();
1762
        }
1763
        break;
1764

    
1765
    default:
1766
        tcg_abort();
1767
    }
1768

    
1769
#undef OP_32_64
1770
}
1771

    
1772
static const TCGTargetOpDef x86_op_defs[] = {
1773
    { INDEX_op_exit_tb, { } },
1774
    { INDEX_op_goto_tb, { } },
1775
    { INDEX_op_call, { "ri" } },
1776
    { INDEX_op_jmp, { "ri" } },
1777
    { INDEX_op_br, { } },
1778
    { INDEX_op_mov_i32, { "r", "r" } },
1779
    { INDEX_op_movi_i32, { "r" } },
1780
    { INDEX_op_ld8u_i32, { "r", "r" } },
1781
    { INDEX_op_ld8s_i32, { "r", "r" } },
1782
    { INDEX_op_ld16u_i32, { "r", "r" } },
1783
    { INDEX_op_ld16s_i32, { "r", "r" } },
1784
    { INDEX_op_ld_i32, { "r", "r" } },
1785
    { INDEX_op_st8_i32, { "q", "r" } },
1786
    { INDEX_op_st16_i32, { "r", "r" } },
1787
    { INDEX_op_st_i32, { "r", "r" } },
1788

    
1789
    { INDEX_op_add_i32, { "r", "r", "ri" } },
1790
    { INDEX_op_sub_i32, { "r", "0", "ri" } },
1791
    { INDEX_op_mul_i32, { "r", "0", "ri" } },
1792
    { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
1793
    { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
1794
    { INDEX_op_and_i32, { "r", "0", "ri" } },
1795
    { INDEX_op_or_i32, { "r", "0", "ri" } },
1796
    { INDEX_op_xor_i32, { "r", "0", "ri" } },
1797

    
1798
    { INDEX_op_shl_i32, { "r", "0", "ci" } },
1799
    { INDEX_op_shr_i32, { "r", "0", "ci" } },
1800
    { INDEX_op_sar_i32, { "r", "0", "ci" } },
1801
    { INDEX_op_rotl_i32, { "r", "0", "ci" } },
1802
    { INDEX_op_rotr_i32, { "r", "0", "ci" } },
1803

    
1804
    { INDEX_op_brcond_i32, { "r", "ri" } },
1805

    
1806
    { INDEX_op_bswap16_i32, { "r", "0" } },
1807
    { INDEX_op_bswap32_i32, { "r", "0" } },
1808

    
1809
    { INDEX_op_neg_i32, { "r", "0" } },
1810

    
1811
    { INDEX_op_not_i32, { "r", "0" } },
1812

    
1813
    { INDEX_op_ext8s_i32, { "r", "q" } },
1814
    { INDEX_op_ext16s_i32, { "r", "r" } },
1815
    { INDEX_op_ext8u_i32, { "r", "q" } },
1816
    { INDEX_op_ext16u_i32, { "r", "r" } },
1817

    
1818
    { INDEX_op_setcond_i32, { "q", "r", "ri" } },
1819

    
1820
    { INDEX_op_deposit_i32, { "Q", "0", "Q" } },
1821

    
1822
#if TCG_TARGET_REG_BITS == 32
1823
    { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
1824
    { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
1825
    { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
1826
    { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
1827
    { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
1828
#else
1829
    { INDEX_op_mov_i64, { "r", "r" } },
1830
    { INDEX_op_movi_i64, { "r" } },
1831
    { INDEX_op_ld8u_i64, { "r", "r" } },
1832
    { INDEX_op_ld8s_i64, { "r", "r" } },
1833
    { INDEX_op_ld16u_i64, { "r", "r" } },
1834
    { INDEX_op_ld16s_i64, { "r", "r" } },
1835
    { INDEX_op_ld32u_i64, { "r", "r" } },
1836
    { INDEX_op_ld32s_i64, { "r", "r" } },
1837
    { INDEX_op_ld_i64, { "r", "r" } },
1838
    { INDEX_op_st8_i64, { "r", "r" } },
1839
    { INDEX_op_st16_i64, { "r", "r" } },
1840
    { INDEX_op_st32_i64, { "r", "r" } },
1841
    { INDEX_op_st_i64, { "r", "r" } },
1842

    
1843
    { INDEX_op_add_i64, { "r", "0", "re" } },
1844
    { INDEX_op_mul_i64, { "r", "0", "re" } },
1845
    { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
1846
    { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
1847
    { INDEX_op_sub_i64, { "r", "0", "re" } },
1848
    { INDEX_op_and_i64, { "r", "0", "reZ" } },
1849
    { INDEX_op_or_i64, { "r", "0", "re" } },
1850
    { INDEX_op_xor_i64, { "r", "0", "re" } },
1851

    
1852
    { INDEX_op_shl_i64, { "r", "0", "ci" } },
1853
    { INDEX_op_shr_i64, { "r", "0", "ci" } },
1854
    { INDEX_op_sar_i64, { "r", "0", "ci" } },
1855
    { INDEX_op_rotl_i64, { "r", "0", "ci" } },
1856
    { INDEX_op_rotr_i64, { "r", "0", "ci" } },
1857

    
1858
    { INDEX_op_brcond_i64, { "r", "re" } },
1859
    { INDEX_op_setcond_i64, { "r", "r", "re" } },
1860

    
1861
    { INDEX_op_bswap16_i64, { "r", "0" } },
1862
    { INDEX_op_bswap32_i64, { "r", "0" } },
1863
    { INDEX_op_bswap64_i64, { "r", "0" } },
1864
    { INDEX_op_neg_i64, { "r", "0" } },
1865
    { INDEX_op_not_i64, { "r", "0" } },
1866

    
1867
    { INDEX_op_ext8s_i64, { "r", "r" } },
1868
    { INDEX_op_ext16s_i64, { "r", "r" } },
1869
    { INDEX_op_ext32s_i64, { "r", "r" } },
1870
    { INDEX_op_ext8u_i64, { "r", "r" } },
1871
    { INDEX_op_ext16u_i64, { "r", "r" } },
1872
    { INDEX_op_ext32u_i64, { "r", "r" } },
1873

    
1874
    { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
1875
#endif
1876

    
1877
#if TCG_TARGET_REG_BITS == 64
1878
    { INDEX_op_qemu_ld8u, { "r", "L" } },
1879
    { INDEX_op_qemu_ld8s, { "r", "L" } },
1880
    { INDEX_op_qemu_ld16u, { "r", "L" } },
1881
    { INDEX_op_qemu_ld16s, { "r", "L" } },
1882
    { INDEX_op_qemu_ld32, { "r", "L" } },
1883
    { INDEX_op_qemu_ld32u, { "r", "L" } },
1884
    { INDEX_op_qemu_ld32s, { "r", "L" } },
1885
    { INDEX_op_qemu_ld64, { "r", "L" } },
1886

    
1887
    { INDEX_op_qemu_st8, { "L", "L" } },
1888
    { INDEX_op_qemu_st16, { "L", "L" } },
1889
    { INDEX_op_qemu_st32, { "L", "L" } },
1890
    { INDEX_op_qemu_st64, { "L", "L" } },
1891
#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
1892
    { INDEX_op_qemu_ld8u, { "r", "L" } },
1893
    { INDEX_op_qemu_ld8s, { "r", "L" } },
1894
    { INDEX_op_qemu_ld16u, { "r", "L" } },
1895
    { INDEX_op_qemu_ld16s, { "r", "L" } },
1896
    { INDEX_op_qemu_ld32, { "r", "L" } },
1897
    { INDEX_op_qemu_ld64, { "r", "r", "L" } },
1898

    
1899
    { INDEX_op_qemu_st8, { "cb", "L" } },
1900
    { INDEX_op_qemu_st16, { "L", "L" } },
1901
    { INDEX_op_qemu_st32, { "L", "L" } },
1902
    { INDEX_op_qemu_st64, { "L", "L", "L" } },
1903
#else
1904
    { INDEX_op_qemu_ld8u, { "r", "L", "L" } },
1905
    { INDEX_op_qemu_ld8s, { "r", "L", "L" } },
1906
    { INDEX_op_qemu_ld16u, { "r", "L", "L" } },
1907
    { INDEX_op_qemu_ld16s, { "r", "L", "L" } },
1908
    { INDEX_op_qemu_ld32, { "r", "L", "L" } },
1909
    { INDEX_op_qemu_ld64, { "r", "r", "L", "L" } },
1910

    
1911
    { INDEX_op_qemu_st8, { "cb", "L", "L" } },
1912
    { INDEX_op_qemu_st16, { "L", "L", "L" } },
1913
    { INDEX_op_qemu_st32, { "L", "L", "L" } },
1914
    { INDEX_op_qemu_st64, { "L", "L", "L", "L" } },
1915
#endif
1916
    { -1 },
1917
};
1918

    
1919
static int tcg_target_callee_save_regs[] = {
1920
#if TCG_TARGET_REG_BITS == 64
1921
    TCG_REG_RBP,
1922
    TCG_REG_RBX,
1923
    TCG_REG_R12,
1924
    TCG_REG_R13,
1925
    TCG_REG_R14, /* Currently used for the global env. */
1926
    TCG_REG_R15,
1927
#else
1928
    TCG_REG_EBP, /* Currently used for the global env. */
1929
    TCG_REG_EBX,
1930
    TCG_REG_ESI,
1931
    TCG_REG_EDI,
1932
#endif
1933
};
1934

    
1935
/* Generate global QEMU prologue and epilogue code */
1936
static void tcg_target_qemu_prologue(TCGContext *s)
1937
{
1938
    int i, frame_size, push_size, stack_addend;
1939

    
1940
    /* TB prologue */
1941

    
1942
    /* Reserve some stack space, also for TCG temps.  */
1943
    push_size = 1 + ARRAY_SIZE(tcg_target_callee_save_regs);
1944
    push_size *= TCG_TARGET_REG_BITS / 8;
1945

    
1946
    frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE +
1947
        CPU_TEMP_BUF_NLONGS * sizeof(long);
1948
    frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
1949
        ~(TCG_TARGET_STACK_ALIGN - 1);
1950
    stack_addend = frame_size - push_size;
1951
    tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
1952
                  CPU_TEMP_BUF_NLONGS * sizeof(long));
1953

    
1954
    /* Save all callee saved registers.  */
1955
    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
1956
        tcg_out_push(s, tcg_target_callee_save_regs[i]);
1957
    }
1958

    
1959
#if TCG_TARGET_REG_BITS == 32
1960
    tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
1961
               (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
1962
    tcg_out_ld(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[1], TCG_REG_ESP,
1963
               (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4);
1964
#else
1965
    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
1966
#endif
1967
    tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
1968

    
1969
    /* jmp *tb.  */
1970
    tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
1971

    
1972
    /* TB epilogue */
1973
    tb_ret_addr = s->code_ptr;
1974

    
1975
    tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
1976

    
1977
    for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
1978
        tcg_out_pop(s, tcg_target_callee_save_regs[i]);
1979
    }
1980
    tcg_out_opc(s, OPC_RET, 0, 0, 0);
1981
}
1982

    
1983
static void tcg_target_init(TCGContext *s)
1984
{
1985
#if !defined(CONFIG_USER_ONLY)
1986
    /* fail safe */
1987
    if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry))
1988
        tcg_abort();
1989
#endif
1990

    
1991
    if (TCG_TARGET_REG_BITS == 64) {
1992
        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
1993
        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
1994
    } else {
1995
        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
1996
    }
1997

    
1998
    tcg_regset_clear(tcg_target_call_clobber_regs);
1999
    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
2000
    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
2001
    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
2002
    if (TCG_TARGET_REG_BITS == 64) {
2003
        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
2004
        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
2005
        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
2006
        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
2007
        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
2008
        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
2009
    }
2010

    
2011
    tcg_regset_clear(s->reserved_regs);
2012
    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2013

    
2014
    tcg_add_target_add_op_defs(x86_op_defs);
2015
}