Statistics
| Branch: | Revision:

root / tcg / arm / tcg-target.c @ d9f4dde4

History | View | Annotate | Download (72.6 kB)

1
/*
2
 * Tiny Code Generator for QEMU
3
 *
4
 * Copyright (c) 2008 Andrzej Zaborowski
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
 * THE SOFTWARE.
23
 */
24

    
25
/* The __ARM_ARCH define is provided by gcc 4.8.  Construct it otherwise.  */
26
#ifndef __ARM_ARCH
27
# if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
28
     || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
29
     || defined(__ARM_ARCH_7EM__)
30
#  define __ARM_ARCH 7
31
# elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
32
       || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \
33
       || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__)
34
#  define __ARM_ARCH 6
35
# elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5E__) \
36
       || defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) \
37
       || defined(__ARM_ARCH_5TEJ__)
38
#  define __ARM_ARCH 5
39
# else
40
#  define __ARM_ARCH 4
41
# endif
42
#endif
43

    
44
static int arm_arch = __ARM_ARCH;
45

    
46
#if defined(__ARM_ARCH_5T__) \
47
    || defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__)
48
# define use_armv5t_instructions 1
49
#else
50
# define use_armv5t_instructions use_armv6_instructions
51
#endif
52

    
53
#define use_armv6_instructions  (__ARM_ARCH >= 6 || arm_arch >= 6)
54
#define use_armv7_instructions  (__ARM_ARCH >= 7 || arm_arch >= 7)
55

    
56
#ifndef use_idiv_instructions
57
bool use_idiv_instructions;
58
#endif
59
#ifdef CONFIG_GETAUXVAL
60
# include <sys/auxv.h>
61
#endif
62

    
63
#ifndef NDEBUG
64
static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
65
    "%r0",
66
    "%r1",
67
    "%r2",
68
    "%r3",
69
    "%r4",
70
    "%r5",
71
    "%r6",
72
    "%r7",
73
    "%r8",
74
    "%r9",
75
    "%r10",
76
    "%r11",
77
    "%r12",
78
    "%r13",
79
    "%r14",
80
    "%pc",
81
};
82
#endif
83

    
84
static const int tcg_target_reg_alloc_order[] = {
85
    TCG_REG_R4,
86
    TCG_REG_R5,
87
    TCG_REG_R6,
88
    TCG_REG_R7,
89
    TCG_REG_R8,
90
    TCG_REG_R9,
91
    TCG_REG_R10,
92
    TCG_REG_R11,
93
    TCG_REG_R13,
94
    TCG_REG_R0,
95
    TCG_REG_R1,
96
    TCG_REG_R2,
97
    TCG_REG_R3,
98
    TCG_REG_R12,
99
    TCG_REG_R14,
100
};
101

    
102
static const int tcg_target_call_iarg_regs[4] = {
103
    TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3
104
};
105
static const int tcg_target_call_oarg_regs[2] = {
106
    TCG_REG_R0, TCG_REG_R1
107
};
108

    
109
#define TCG_REG_TMP  TCG_REG_R12
110

    
111
static inline void reloc_abs32(void *code_ptr, intptr_t target)
112
{
113
    *(uint32_t *) code_ptr = target;
114
}
115

    
116
static inline void reloc_pc24(void *code_ptr, intptr_t target)
117
{
118
    uint32_t offset = ((target - ((intptr_t)code_ptr + 8)) >> 2);
119

    
120
    *(uint32_t *) code_ptr = ((*(uint32_t *) code_ptr) & ~0xffffff)
121
                             | (offset & 0xffffff);
122
}
123

    
124
static void patch_reloc(uint8_t *code_ptr, int type,
125
                        intptr_t value, intptr_t addend)
126
{
127
    switch (type) {
128
    case R_ARM_ABS32:
129
        reloc_abs32(code_ptr, value);
130
        break;
131

    
132
    case R_ARM_CALL:
133
    case R_ARM_JUMP24:
134
    default:
135
        tcg_abort();
136

    
137
    case R_ARM_PC24:
138
        reloc_pc24(code_ptr, value);
139
        break;
140
    }
141
}
142

    
143
#define TCG_CT_CONST_ARM  0x100
144
#define TCG_CT_CONST_INV  0x200
145
#define TCG_CT_CONST_NEG  0x400
146
#define TCG_CT_CONST_ZERO 0x800
147

    
148
/* parse target specific constraints */
149
static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
150
{
151
    const char *ct_str;
152

    
153
    ct_str = *pct_str;
154
    switch (ct_str[0]) {
155
    case 'I':
156
        ct->ct |= TCG_CT_CONST_ARM;
157
        break;
158
    case 'K':
159
        ct->ct |= TCG_CT_CONST_INV;
160
        break;
161
    case 'N': /* The gcc constraint letter is L, already used here.  */
162
        ct->ct |= TCG_CT_CONST_NEG;
163
        break;
164
    case 'Z':
165
        ct->ct |= TCG_CT_CONST_ZERO;
166
        break;
167

    
168
    case 'r':
169
        ct->ct |= TCG_CT_REG;
170
        tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
171
        break;
172

    
173
    /* qemu_ld address */
174
    case 'l':
175
        ct->ct |= TCG_CT_REG;
176
        tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
177
#ifdef CONFIG_SOFTMMU
178
        /* r0-r2,lr will be overwritten when reading the tlb entry,
179
           so don't use these. */
180
        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
181
        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
182
        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
183
        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
184
#endif
185
        break;
186
    case 'L':
187
        ct->ct |= TCG_CT_REG;
188
        tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
189
#ifdef CONFIG_SOFTMMU
190
        /* r1 is still needed to load data_reg or data_reg2,
191
           so don't use it. */
192
        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
193
#endif
194
        break;
195

    
196
    /* qemu_st address & data_reg */
197
    case 's':
198
        ct->ct |= TCG_CT_REG;
199
        tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
200
        /* r0-r2 will be overwritten when reading the tlb entry (softmmu only)
201
           and r0-r1 doing the byte swapping, so don't use these. */
202
        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
203
        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
204
#if defined(CONFIG_SOFTMMU)
205
        /* Avoid clashes with registers being used for helper args */
206
        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
207
#if TARGET_LONG_BITS == 64
208
        /* Avoid clashes with registers being used for helper args */
209
        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
210
#endif
211
        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
212
#endif
213
        break;
214

    
215
    default:
216
        return -1;
217
    }
218
    ct_str++;
219
    *pct_str = ct_str;
220

    
221
    return 0;
222
}
223

    
224
static inline uint32_t rotl(uint32_t val, int n)
225
{
226
  return (val << n) | (val >> (32 - n));
227
}
228

    
229
/* ARM immediates for ALU instructions are made of an unsigned 8-bit
230
   right-rotated by an even amount between 0 and 30. */
231
static inline int encode_imm(uint32_t imm)
232
{
233
    int shift;
234

    
235
    /* simple case, only lower bits */
236
    if ((imm & ~0xff) == 0)
237
        return 0;
238
    /* then try a simple even shift */
239
    shift = ctz32(imm) & ~1;
240
    if (((imm >> shift) & ~0xff) == 0)
241
        return 32 - shift;
242
    /* now try harder with rotations */
243
    if ((rotl(imm, 2) & ~0xff) == 0)
244
        return 2;
245
    if ((rotl(imm, 4) & ~0xff) == 0)
246
        return 4;
247
    if ((rotl(imm, 6) & ~0xff) == 0)
248
        return 6;
249
    /* imm can't be encoded */
250
    return -1;
251
}
252

    
253
static inline int check_fit_imm(uint32_t imm)
254
{
255
    return encode_imm(imm) >= 0;
256
}
257

    
258
/* Test if a constant matches the constraint.
259
 * TODO: define constraints for:
260
 *
261
 * ldr/str offset:   between -0xfff and 0xfff
262
 * ldrh/strh offset: between -0xff and 0xff
263
 * mov operand2:     values represented with x << (2 * y), x < 0x100
264
 * add, sub, eor...: ditto
265
 */
266
static inline int tcg_target_const_match(tcg_target_long val,
267
                                         const TCGArgConstraint *arg_ct)
268
{
269
    int ct;
270
    ct = arg_ct->ct;
271
    if (ct & TCG_CT_CONST) {
272
        return 1;
273
    } else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) {
274
        return 1;
275
    } else if ((ct & TCG_CT_CONST_INV) && check_fit_imm(~val)) {
276
        return 1;
277
    } else if ((ct & TCG_CT_CONST_NEG) && check_fit_imm(-val)) {
278
        return 1;
279
    } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
280
        return 1;
281
    } else {
282
        return 0;
283
    }
284
}
285

    
286
#define TO_CPSR (1 << 20)
287

    
288
typedef enum {
289
    ARITH_AND = 0x0 << 21,
290
    ARITH_EOR = 0x1 << 21,
291
    ARITH_SUB = 0x2 << 21,
292
    ARITH_RSB = 0x3 << 21,
293
    ARITH_ADD = 0x4 << 21,
294
    ARITH_ADC = 0x5 << 21,
295
    ARITH_SBC = 0x6 << 21,
296
    ARITH_RSC = 0x7 << 21,
297
    ARITH_TST = 0x8 << 21 | TO_CPSR,
298
    ARITH_CMP = 0xa << 21 | TO_CPSR,
299
    ARITH_CMN = 0xb << 21 | TO_CPSR,
300
    ARITH_ORR = 0xc << 21,
301
    ARITH_MOV = 0xd << 21,
302
    ARITH_BIC = 0xe << 21,
303
    ARITH_MVN = 0xf << 21,
304

    
305
    INSN_LDR_IMM   = 0x04100000,
306
    INSN_LDR_REG   = 0x06100000,
307
    INSN_STR_IMM   = 0x04000000,
308
    INSN_STR_REG   = 0x06000000,
309

    
310
    INSN_LDRH_IMM  = 0x005000b0,
311
    INSN_LDRH_REG  = 0x001000b0,
312
    INSN_LDRSH_IMM = 0x005000f0,
313
    INSN_LDRSH_REG = 0x001000f0,
314
    INSN_STRH_IMM  = 0x004000b0,
315
    INSN_STRH_REG  = 0x000000b0,
316

    
317
    INSN_LDRB_IMM  = 0x04500000,
318
    INSN_LDRB_REG  = 0x06500000,
319
    INSN_LDRSB_IMM = 0x005000d0,
320
    INSN_LDRSB_REG = 0x001000d0,
321
    INSN_STRB_IMM  = 0x04400000,
322
    INSN_STRB_REG  = 0x06400000,
323

    
324
    INSN_LDRD_IMM  = 0x004000d0,
325
    INSN_LDRD_REG  = 0x000000d0,
326
    INSN_STRD_IMM  = 0x004000f0,
327
    INSN_STRD_REG  = 0x000000f0,
328
} ARMInsn;
329

    
330
#define SHIFT_IMM_LSL(im)        (((im) << 7) | 0x00)
331
#define SHIFT_IMM_LSR(im)        (((im) << 7) | 0x20)
332
#define SHIFT_IMM_ASR(im)        (((im) << 7) | 0x40)
333
#define SHIFT_IMM_ROR(im)        (((im) << 7) | 0x60)
334
#define SHIFT_REG_LSL(rs)        (((rs) << 8) | 0x10)
335
#define SHIFT_REG_LSR(rs)        (((rs) << 8) | 0x30)
336
#define SHIFT_REG_ASR(rs)        (((rs) << 8) | 0x50)
337
#define SHIFT_REG_ROR(rs)        (((rs) << 8) | 0x70)
338

    
339
enum arm_cond_code_e {
340
    COND_EQ = 0x0,
341
    COND_NE = 0x1,
342
    COND_CS = 0x2,        /* Unsigned greater or equal */
343
    COND_CC = 0x3,        /* Unsigned less than */
344
    COND_MI = 0x4,        /* Negative */
345
    COND_PL = 0x5,        /* Zero or greater */
346
    COND_VS = 0x6,        /* Overflow */
347
    COND_VC = 0x7,        /* No overflow */
348
    COND_HI = 0x8,        /* Unsigned greater than */
349
    COND_LS = 0x9,        /* Unsigned less or equal */
350
    COND_GE = 0xa,
351
    COND_LT = 0xb,
352
    COND_GT = 0xc,
353
    COND_LE = 0xd,
354
    COND_AL = 0xe,
355
};
356

    
357
static const uint8_t tcg_cond_to_arm_cond[] = {
358
    [TCG_COND_EQ] = COND_EQ,
359
    [TCG_COND_NE] = COND_NE,
360
    [TCG_COND_LT] = COND_LT,
361
    [TCG_COND_GE] = COND_GE,
362
    [TCG_COND_LE] = COND_LE,
363
    [TCG_COND_GT] = COND_GT,
364
    /* unsigned */
365
    [TCG_COND_LTU] = COND_CC,
366
    [TCG_COND_GEU] = COND_CS,
367
    [TCG_COND_LEU] = COND_LS,
368
    [TCG_COND_GTU] = COND_HI,
369
};
370

    
371
static inline void tcg_out_bx(TCGContext *s, int cond, int rn)
372
{
373
    tcg_out32(s, (cond << 28) | 0x012fff10 | rn);
374
}
375

    
376
static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset)
377
{
378
    tcg_out32(s, (cond << 28) | 0x0a000000 |
379
                    (((offset - 8) >> 2) & 0x00ffffff));
380
}
381

    
382
static inline void tcg_out_b_noaddr(TCGContext *s, int cond)
383
{
384
    /* We pay attention here to not modify the branch target by skipping
385
       the corresponding bytes. This ensure that caches and memory are
386
       kept coherent during retranslation. */
387
    s->code_ptr += 3;
388
    tcg_out8(s, (cond << 4) | 0x0a);
389
}
390

    
391
static inline void tcg_out_bl_noaddr(TCGContext *s, int cond)
392
{
393
    /* We pay attention here to not modify the branch target by skipping
394
       the corresponding bytes. This ensure that caches and memory are
395
       kept coherent during retranslation. */
396
    s->code_ptr += 3;
397
    tcg_out8(s, (cond << 4) | 0x0b);
398
}
399

    
400
static inline void tcg_out_bl(TCGContext *s, int cond, int32_t offset)
401
{
402
    tcg_out32(s, (cond << 28) | 0x0b000000 |
403
                    (((offset - 8) >> 2) & 0x00ffffff));
404
}
405

    
406
static inline void tcg_out_blx(TCGContext *s, int cond, int rn)
407
{
408
    tcg_out32(s, (cond << 28) | 0x012fff30 | rn);
409
}
410

    
411
static inline void tcg_out_blx_imm(TCGContext *s, int32_t offset)
412
{
413
    tcg_out32(s, 0xfa000000 | ((offset & 2) << 23) |
414
                (((offset - 8) >> 2) & 0x00ffffff));
415
}
416

    
417
static inline void tcg_out_dat_reg(TCGContext *s,
418
                int cond, int opc, int rd, int rn, int rm, int shift)
419
{
420
    tcg_out32(s, (cond << 28) | (0 << 25) | opc |
421
                    (rn << 16) | (rd << 12) | shift | rm);
422
}
423

    
424
static inline void tcg_out_nop(TCGContext *s)
425
{
426
    if (use_armv7_instructions) {
427
        /* Architected nop introduced in v6k.  */
428
        /* ??? This is an MSR (imm) 0,0,0 insn.  Anyone know if this
429
           also Just So Happened to do nothing on pre-v6k so that we
430
           don't need to conditionalize it?  */
431
        tcg_out32(s, 0xe320f000);
432
    } else {
433
        /* Prior to that the assembler uses mov r0, r0.  */
434
        tcg_out_dat_reg(s, COND_AL, ARITH_MOV, 0, 0, 0, SHIFT_IMM_LSL(0));
435
    }
436
}
437

    
438
static inline void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm)
439
{
440
    /* Simple reg-reg move, optimising out the 'do nothing' case */
441
    if (rd != rm) {
442
        tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, rm, SHIFT_IMM_LSL(0));
443
    }
444
}
445

    
446
static inline void tcg_out_dat_imm(TCGContext *s,
447
                int cond, int opc, int rd, int rn, int im)
448
{
449
    tcg_out32(s, (cond << 28) | (1 << 25) | opc |
450
                    (rn << 16) | (rd << 12) | im);
451
}
452

    
453
static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg)
454
{
455
    int rot, opc, rn;
456

    
457
    /* For armv7, make sure not to use movw+movt when mov/mvn would do.
458
       Speed things up by only checking when movt would be required.
459
       Prior to armv7, have one go at fully rotated immediates before
460
       doing the decomposition thing below.  */
461
    if (!use_armv7_instructions || (arg & 0xffff0000)) {
462
        rot = encode_imm(arg);
463
        if (rot >= 0) {
464
            tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0,
465
                            rotl(arg, rot) | (rot << 7));
466
            return;
467
        }
468
        rot = encode_imm(~arg);
469
        if (rot >= 0) {
470
            tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0,
471
                            rotl(~arg, rot) | (rot << 7));
472
            return;
473
        }
474
    }
475

    
476
    /* Use movw + movt.  */
477
    if (use_armv7_instructions) {
478
        /* movw */
479
        tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12)
480
                  | ((arg << 4) & 0x000f0000) | (arg & 0xfff));
481
        if (arg & 0xffff0000) {
482
            /* movt */
483
            tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12)
484
                      | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff));
485
        }
486
        return;
487
    }
488

    
489
    /* TODO: This is very suboptimal, we can easily have a constant
490
       pool somewhere after all the instructions.  */
491
    opc = ARITH_MOV;
492
    rn = 0;
493
    /* If we have lots of leading 1's, we can shorten the sequence by
494
       beginning with mvn and then clearing higher bits with eor.  */
495
    if (clz32(~arg) > clz32(arg)) {
496
        opc = ARITH_MVN, arg = ~arg;
497
    }
498
    do {
499
        int i = ctz32(arg) & ~1;
500
        rot = ((32 - i) << 7) & 0xf00;
501
        tcg_out_dat_imm(s, cond, opc, rd, rn, ((arg >> i) & 0xff) | rot);
502
        arg &= ~(0xff << i);
503

    
504
        opc = ARITH_EOR;
505
        rn = rd;
506
    } while (arg);
507
}
508

    
509
static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst,
510
                                  TCGArg lhs, TCGArg rhs, int rhs_is_const)
511
{
512
    /* Emit either the reg,imm or reg,reg form of a data-processing insn.
513
     * rhs must satisfy the "rI" constraint.
514
     */
515
    if (rhs_is_const) {
516
        int rot = encode_imm(rhs);
517
        assert(rot >= 0);
518
        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
519
    } else {
520
        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
521
    }
522
}
523

    
524
static void tcg_out_dat_rIK(TCGContext *s, int cond, int opc, int opinv,
525
                            TCGReg dst, TCGReg lhs, TCGArg rhs,
526
                            bool rhs_is_const)
527
{
528
    /* Emit either the reg,imm or reg,reg form of a data-processing insn.
529
     * rhs must satisfy the "rIK" constraint.
530
     */
531
    if (rhs_is_const) {
532
        int rot = encode_imm(rhs);
533
        if (rot < 0) {
534
            rhs = ~rhs;
535
            rot = encode_imm(rhs);
536
            assert(rot >= 0);
537
            opc = opinv;
538
        }
539
        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
540
    } else {
541
        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
542
    }
543
}
544

    
545
static void tcg_out_dat_rIN(TCGContext *s, int cond, int opc, int opneg,
546
                            TCGArg dst, TCGArg lhs, TCGArg rhs,
547
                            bool rhs_is_const)
548
{
549
    /* Emit either the reg,imm or reg,reg form of a data-processing insn.
550
     * rhs must satisfy the "rIN" constraint.
551
     */
552
    if (rhs_is_const) {
553
        int rot = encode_imm(rhs);
554
        if (rot < 0) {
555
            rhs = -rhs;
556
            rot = encode_imm(rhs);
557
            assert(rot >= 0);
558
            opc = opneg;
559
        }
560
        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
561
    } else {
562
        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
563
    }
564
}
565

    
566
static inline void tcg_out_mul32(TCGContext *s, int cond, TCGReg rd,
567
                                 TCGReg rn, TCGReg rm)
568
{
569
    /* if ArchVersion() < 6 && d == n then UNPREDICTABLE;  */
570
    if (!use_armv6_instructions && rd == rn) {
571
        if (rd == rm) {
572
            /* rd == rn == rm; copy an input to tmp first.  */
573
            tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
574
            rm = rn = TCG_REG_TMP;
575
        } else {
576
            rn = rm;
577
            rm = rd;
578
        }
579
    }
580
    /* mul */
581
    tcg_out32(s, (cond << 28) | 0x90 | (rd << 16) | (rm << 8) | rn);
582
}
583

    
584
static inline void tcg_out_umull32(TCGContext *s, int cond, TCGReg rd0,
585
                                   TCGReg rd1, TCGReg rn, TCGReg rm)
586
{
587
    /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE;  */
588
    if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
589
        if (rd0 == rm || rd1 == rm) {
590
            tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
591
            rn = TCG_REG_TMP;
592
        } else {
593
            TCGReg t = rn;
594
            rn = rm;
595
            rm = t;
596
        }
597
    }
598
    /* umull */
599
    tcg_out32(s, (cond << 28) | 0x00800090 |
600
              (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
601
}
602

    
603
static inline void tcg_out_smull32(TCGContext *s, int cond, TCGReg rd0,
604
                                   TCGReg rd1, TCGReg rn, TCGReg rm)
605
{
606
    /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE;  */
607
    if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
608
        if (rd0 == rm || rd1 == rm) {
609
            tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
610
            rn = TCG_REG_TMP;
611
        } else {
612
            TCGReg t = rn;
613
            rn = rm;
614
            rm = t;
615
        }
616
    }
617
    /* smull */
618
    tcg_out32(s, (cond << 28) | 0x00c00090 |
619
              (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
620
}
621

    
622
static inline void tcg_out_sdiv(TCGContext *s, int cond, int rd, int rn, int rm)
623
{
624
    tcg_out32(s, 0x0710f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
625
}
626

    
627
static inline void tcg_out_udiv(TCGContext *s, int cond, int rd, int rn, int rm)
628
{
629
    tcg_out32(s, 0x0730f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
630
}
631

    
632
static inline void tcg_out_ext8s(TCGContext *s, int cond,
633
                                 int rd, int rn)
634
{
635
    if (use_armv6_instructions) {
636
        /* sxtb */
637
        tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | rn);
638
    } else {
639
        tcg_out_dat_reg(s, cond, ARITH_MOV,
640
                        rd, 0, rn, SHIFT_IMM_LSL(24));
641
        tcg_out_dat_reg(s, cond, ARITH_MOV,
642
                        rd, 0, rd, SHIFT_IMM_ASR(24));
643
    }
644
}
645

    
646
static inline void tcg_out_ext8u(TCGContext *s, int cond,
647
                                 int rd, int rn)
648
{
649
    tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff);
650
}
651

    
652
static inline void tcg_out_ext16s(TCGContext *s, int cond,
653
                                  int rd, int rn)
654
{
655
    if (use_armv6_instructions) {
656
        /* sxth */
657
        tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | rn);
658
    } else {
659
        tcg_out_dat_reg(s, cond, ARITH_MOV,
660
                        rd, 0, rn, SHIFT_IMM_LSL(16));
661
        tcg_out_dat_reg(s, cond, ARITH_MOV,
662
                        rd, 0, rd, SHIFT_IMM_ASR(16));
663
    }
664
}
665

    
666
static inline void tcg_out_ext16u(TCGContext *s, int cond,
667
                                  int rd, int rn)
668
{
669
    if (use_armv6_instructions) {
670
        /* uxth */
671
        tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn);
672
    } else {
673
        tcg_out_dat_reg(s, cond, ARITH_MOV,
674
                        rd, 0, rn, SHIFT_IMM_LSL(16));
675
        tcg_out_dat_reg(s, cond, ARITH_MOV,
676
                        rd, 0, rd, SHIFT_IMM_LSR(16));
677
    }
678
}
679

    
680
static inline void tcg_out_bswap16s(TCGContext *s, int cond, int rd, int rn)
681
{
682
    if (use_armv6_instructions) {
683
        /* revsh */
684
        tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn);
685
    } else {
686
        tcg_out_dat_reg(s, cond, ARITH_MOV,
687
                        TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
688
        tcg_out_dat_reg(s, cond, ARITH_MOV,
689
                        TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_ASR(16));
690
        tcg_out_dat_reg(s, cond, ARITH_ORR,
691
                        rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
692
    }
693
}
694

    
695
static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn)
696
{
697
    if (use_armv6_instructions) {
698
        /* rev16 */
699
        tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
700
    } else {
701
        tcg_out_dat_reg(s, cond, ARITH_MOV,
702
                        TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
703
        tcg_out_dat_reg(s, cond, ARITH_MOV,
704
                        TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSR(16));
705
        tcg_out_dat_reg(s, cond, ARITH_ORR,
706
                        rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
707
    }
708
}
709

    
710
/* swap the two low bytes assuming that the two high input bytes and the
711
   two high output bit can hold any value. */
712
static inline void tcg_out_bswap16st(TCGContext *s, int cond, int rd, int rn)
713
{
714
    if (use_armv6_instructions) {
715
        /* rev16 */
716
        tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
717
    } else {
718
        tcg_out_dat_reg(s, cond, ARITH_MOV,
719
                        TCG_REG_TMP, 0, rn, SHIFT_IMM_LSR(8));
720
        tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, 0xff);
721
        tcg_out_dat_reg(s, cond, ARITH_ORR,
722
                        rd, TCG_REG_TMP, rn, SHIFT_IMM_LSL(8));
723
    }
724
}
725

    
726
static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn)
727
{
728
    if (use_armv6_instructions) {
729
        /* rev */
730
        tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn);
731
    } else {
732
        tcg_out_dat_reg(s, cond, ARITH_EOR,
733
                        TCG_REG_TMP, rn, rn, SHIFT_IMM_ROR(16));
734
        tcg_out_dat_imm(s, cond, ARITH_BIC,
735
                        TCG_REG_TMP, TCG_REG_TMP, 0xff | 0x800);
736
        tcg_out_dat_reg(s, cond, ARITH_MOV,
737
                        rd, 0, rn, SHIFT_IMM_ROR(8));
738
        tcg_out_dat_reg(s, cond, ARITH_EOR,
739
                        rd, rd, TCG_REG_TMP, SHIFT_IMM_LSR(8));
740
    }
741
}
742

    
743
bool tcg_target_deposit_valid(int ofs, int len)
744
{
745
    /* ??? Without bfi, we could improve over generic code by combining
746
       the right-shift from a non-zero ofs with the orr.  We do run into
747
       problems when rd == rs, and the mask generated from ofs+len doesn't
748
       fit into an immediate.  We would have to be careful not to pessimize
749
       wrt the optimizations performed on the expanded code.  */
750
    return use_armv7_instructions;
751
}
752

    
753
static inline void tcg_out_deposit(TCGContext *s, int cond, TCGReg rd,
754
                                   TCGArg a1, int ofs, int len, bool const_a1)
755
{
756
    if (const_a1) {
757
        /* bfi becomes bfc with rn == 15.  */
758
        a1 = 15;
759
    }
760
    /* bfi/bfc */
761
    tcg_out32(s, 0x07c00010 | (cond << 28) | (rd << 12) | a1
762
              | (ofs << 7) | ((ofs + len - 1) << 16));
763
}
764

    
765
/* Note that this routine is used for both LDR and LDRH formats, so we do
766
   not wish to include an immediate shift at this point.  */
767
static void tcg_out_memop_r(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
768
                            TCGReg rn, TCGReg rm, bool u, bool p, bool w)
769
{
770
    tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24)
771
              | (w << 21) | (rn << 16) | (rt << 12) | rm);
772
}
773

    
774
static void tcg_out_memop_8(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
775
                            TCGReg rn, int imm8, bool p, bool w)
776
{
777
    bool u = 1;
778
    if (imm8 < 0) {
779
        imm8 = -imm8;
780
        u = 0;
781
    }
782
    tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
783
              (rn << 16) | (rt << 12) | ((imm8 & 0xf0) << 4) | (imm8 & 0xf));
784
}
785

    
786
static void tcg_out_memop_12(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
787
                             TCGReg rn, int imm12, bool p, bool w)
788
{
789
    bool u = 1;
790
    if (imm12 < 0) {
791
        imm12 = -imm12;
792
        u = 0;
793
    }
794
    tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
795
              (rn << 16) | (rt << 12) | imm12);
796
}
797

    
798
static inline void tcg_out_ld32_12(TCGContext *s, int cond, TCGReg rt,
799
                                   TCGReg rn, int imm12)
800
{
801
    tcg_out_memop_12(s, cond, INSN_LDR_IMM, rt, rn, imm12, 1, 0);
802
}
803

    
804
static inline void tcg_out_st32_12(TCGContext *s, int cond, TCGReg rt,
805
                                   TCGReg rn, int imm12)
806
{
807
    tcg_out_memop_12(s, cond, INSN_STR_IMM, rt, rn, imm12, 1, 0);
808
}
809

    
810
static inline void tcg_out_ld32_r(TCGContext *s, int cond, TCGReg rt,
811
                                  TCGReg rn, TCGReg rm)
812
{
813
    tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 0);
814
}
815

    
816
static inline void tcg_out_st32_r(TCGContext *s, int cond, TCGReg rt,
817
                                  TCGReg rn, TCGReg rm)
818
{
819
    tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 0);
820
}
821

    
822
static inline void tcg_out_ldrd_8(TCGContext *s, int cond, TCGReg rt,
823
                                   TCGReg rn, int imm8)
824
{
825
    tcg_out_memop_8(s, cond, INSN_LDRD_IMM, rt, rn, imm8, 1, 0);
826
}
827

    
828
static inline void tcg_out_ldrd_r(TCGContext *s, int cond, TCGReg rt,
829
                                  TCGReg rn, TCGReg rm)
830
{
831
    tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 0);
832
}
833

    
834
static inline void tcg_out_strd_8(TCGContext *s, int cond, TCGReg rt,
835
                                   TCGReg rn, int imm8)
836
{
837
    tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0);
838
}
839

    
840
static inline void tcg_out_strd_r(TCGContext *s, int cond, TCGReg rt,
841
                                  TCGReg rn, TCGReg rm)
842
{
843
    tcg_out_memop_r(s, cond, INSN_STRD_REG, rt, rn, rm, 1, 1, 0);
844
}
845

    
846
/* Register pre-increment with base writeback.  */
847
static inline void tcg_out_ld32_rwb(TCGContext *s, int cond, TCGReg rt,
848
                                    TCGReg rn, TCGReg rm)
849
{
850
    tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 1);
851
}
852

    
853
static inline void tcg_out_st32_rwb(TCGContext *s, int cond, TCGReg rt,
854
                                    TCGReg rn, TCGReg rm)
855
{
856
    tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 1);
857
}
858

    
859
static inline void tcg_out_ld16u_8(TCGContext *s, int cond, TCGReg rt,
860
                                   TCGReg rn, int imm8)
861
{
862
    tcg_out_memop_8(s, cond, INSN_LDRH_IMM, rt, rn, imm8, 1, 0);
863
}
864

    
865
static inline void tcg_out_st16_8(TCGContext *s, int cond, TCGReg rt,
866
                                  TCGReg rn, int imm8)
867
{
868
    tcg_out_memop_8(s, cond, INSN_STRH_IMM, rt, rn, imm8, 1, 0);
869
}
870

    
871
static inline void tcg_out_ld16u_r(TCGContext *s, int cond, TCGReg rt,
872
                                   TCGReg rn, TCGReg rm)
873
{
874
    tcg_out_memop_r(s, cond, INSN_LDRH_REG, rt, rn, rm, 1, 1, 0);
875
}
876

    
877
static inline void tcg_out_st16_r(TCGContext *s, int cond, TCGReg rt,
878
                                  TCGReg rn, TCGReg rm)
879
{
880
    tcg_out_memop_r(s, cond, INSN_STRH_REG, rt, rn, rm, 1, 1, 0);
881
}
882

    
883
static inline void tcg_out_ld16s_8(TCGContext *s, int cond, TCGReg rt,
884
                                   TCGReg rn, int imm8)
885
{
886
    tcg_out_memop_8(s, cond, INSN_LDRSH_IMM, rt, rn, imm8, 1, 0);
887
}
888

    
889
static inline void tcg_out_ld16s_r(TCGContext *s, int cond, TCGReg rt,
890
                                   TCGReg rn, TCGReg rm)
891
{
892
    tcg_out_memop_r(s, cond, INSN_LDRSH_REG, rt, rn, rm, 1, 1, 0);
893
}
894

    
895
static inline void tcg_out_ld8_12(TCGContext *s, int cond, TCGReg rt,
896
                                  TCGReg rn, int imm12)
897
{
898
    tcg_out_memop_12(s, cond, INSN_LDRB_IMM, rt, rn, imm12, 1, 0);
899
}
900

    
901
static inline void tcg_out_st8_12(TCGContext *s, int cond, TCGReg rt,
902
                                  TCGReg rn, int imm12)
903
{
904
    tcg_out_memop_12(s, cond, INSN_STRB_IMM, rt, rn, imm12, 1, 0);
905
}
906

    
907
static inline void tcg_out_ld8_r(TCGContext *s, int cond, TCGReg rt,
908
                                 TCGReg rn, TCGReg rm)
909
{
910
    tcg_out_memop_r(s, cond, INSN_LDRB_REG, rt, rn, rm, 1, 1, 0);
911
}
912

    
913
static inline void tcg_out_st8_r(TCGContext *s, int cond, TCGReg rt,
914
                                 TCGReg rn, TCGReg rm)
915
{
916
    tcg_out_memop_r(s, cond, INSN_STRB_REG, rt, rn, rm, 1, 1, 0);
917
}
918

    
919
static inline void tcg_out_ld8s_8(TCGContext *s, int cond, TCGReg rt,
920
                                  TCGReg rn, int imm8)
921
{
922
    tcg_out_memop_8(s, cond, INSN_LDRSB_IMM, rt, rn, imm8, 1, 0);
923
}
924

    
925
static inline void tcg_out_ld8s_r(TCGContext *s, int cond, TCGReg rt,
926
                                  TCGReg rn, TCGReg rm)
927
{
928
    tcg_out_memop_r(s, cond, INSN_LDRSB_REG, rt, rn, rm, 1, 1, 0);
929
}
930

    
931
static inline void tcg_out_ld32u(TCGContext *s, int cond,
932
                int rd, int rn, int32_t offset)
933
{
934
    if (offset > 0xfff || offset < -0xfff) {
935
        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
936
        tcg_out_ld32_r(s, cond, rd, rn, TCG_REG_TMP);
937
    } else
938
        tcg_out_ld32_12(s, cond, rd, rn, offset);
939
}
940

    
941
static inline void tcg_out_st32(TCGContext *s, int cond,
942
                int rd, int rn, int32_t offset)
943
{
944
    if (offset > 0xfff || offset < -0xfff) {
945
        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
946
        tcg_out_st32_r(s, cond, rd, rn, TCG_REG_TMP);
947
    } else
948
        tcg_out_st32_12(s, cond, rd, rn, offset);
949
}
950

    
951
static inline void tcg_out_ld16u(TCGContext *s, int cond,
952
                int rd, int rn, int32_t offset)
953
{
954
    if (offset > 0xff || offset < -0xff) {
955
        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
956
        tcg_out_ld16u_r(s, cond, rd, rn, TCG_REG_TMP);
957
    } else
958
        tcg_out_ld16u_8(s, cond, rd, rn, offset);
959
}
960

    
961
static inline void tcg_out_ld16s(TCGContext *s, int cond,
962
                int rd, int rn, int32_t offset)
963
{
964
    if (offset > 0xff || offset < -0xff) {
965
        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
966
        tcg_out_ld16s_r(s, cond, rd, rn, TCG_REG_TMP);
967
    } else
968
        tcg_out_ld16s_8(s, cond, rd, rn, offset);
969
}
970

    
971
static inline void tcg_out_st16(TCGContext *s, int cond,
972
                int rd, int rn, int32_t offset)
973
{
974
    if (offset > 0xff || offset < -0xff) {
975
        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
976
        tcg_out_st16_r(s, cond, rd, rn, TCG_REG_TMP);
977
    } else
978
        tcg_out_st16_8(s, cond, rd, rn, offset);
979
}
980

    
981
static inline void tcg_out_ld8u(TCGContext *s, int cond,
982
                int rd, int rn, int32_t offset)
983
{
984
    if (offset > 0xfff || offset < -0xfff) {
985
        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
986
        tcg_out_ld8_r(s, cond, rd, rn, TCG_REG_TMP);
987
    } else
988
        tcg_out_ld8_12(s, cond, rd, rn, offset);
989
}
990

    
991
static inline void tcg_out_ld8s(TCGContext *s, int cond,
992
                int rd, int rn, int32_t offset)
993
{
994
    if (offset > 0xff || offset < -0xff) {
995
        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
996
        tcg_out_ld8s_r(s, cond, rd, rn, TCG_REG_TMP);
997
    } else
998
        tcg_out_ld8s_8(s, cond, rd, rn, offset);
999
}
1000

    
1001
static inline void tcg_out_st8(TCGContext *s, int cond,
1002
                int rd, int rn, int32_t offset)
1003
{
1004
    if (offset > 0xfff || offset < -0xfff) {
1005
        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
1006
        tcg_out_st8_r(s, cond, rd, rn, TCG_REG_TMP);
1007
    } else
1008
        tcg_out_st8_12(s, cond, rd, rn, offset);
1009
}
1010

    
1011
/* The _goto case is normally between TBs within the same code buffer, and
1012
 * with the code buffer limited to 16MB we wouldn't need the long case.
1013
 * But we also use it for the tail-call to the qemu_ld/st helpers, which does.
1014
 */
1015
static inline void tcg_out_goto(TCGContext *s, int cond, uint32_t addr)
1016
{
1017
    int32_t disp = addr - (tcg_target_long) s->code_ptr;
1018

    
1019
    if ((addr & 1) == 0 && disp - 8 < 0x01fffffd && disp - 8 > -0x01fffffd) {
1020
        tcg_out_b(s, cond, disp);
1021
        return;
1022
    }
1023

    
1024
    tcg_out_movi32(s, cond, TCG_REG_TMP, addr);
1025
    if (use_armv5t_instructions) {
1026
        tcg_out_bx(s, cond, TCG_REG_TMP);
1027
    } else {
1028
        if (addr & 1) {
1029
            tcg_abort();
1030
        }
1031
        tcg_out_mov_reg(s, cond, TCG_REG_PC, TCG_REG_TMP);
1032
    }
1033
}
1034

    
1035
/* The call case is mostly used for helpers - so it's not unreasonable
1036
 * for them to be beyond branch range */
1037
static inline void tcg_out_call(TCGContext *s, uint32_t addr)
1038
{
1039
    int32_t val;
1040

    
1041
    val = addr - (tcg_target_long) s->code_ptr;
1042
    if (val - 8 < 0x02000000 && val - 8 >= -0x02000000) {
1043
        if (addr & 1) {
1044
            /* Use BLX if the target is in Thumb mode */
1045
            if (!use_armv5t_instructions) {
1046
                tcg_abort();
1047
            }
1048
            tcg_out_blx_imm(s, val);
1049
        } else {
1050
            tcg_out_bl(s, COND_AL, val);
1051
        }
1052
    } else if (use_armv7_instructions) {
1053
        tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addr);
1054
        tcg_out_blx(s, COND_AL, TCG_REG_TMP);
1055
    } else {
1056
        tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4);
1057
        tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4);
1058
        tcg_out32(s, addr);
1059
    }
1060
}
1061

    
1062
static inline void tcg_out_callr(TCGContext *s, int cond, int arg)
1063
{
1064
    if (use_armv5t_instructions) {
1065
        tcg_out_blx(s, cond, arg);
1066
    } else {
1067
        tcg_out_dat_reg(s, cond, ARITH_MOV, TCG_REG_R14, 0,
1068
                        TCG_REG_PC, SHIFT_IMM_LSL(0));
1069
        tcg_out_bx(s, cond, arg);
1070
    }
1071
}
1072

    
1073
static inline void tcg_out_goto_label(TCGContext *s, int cond, int label_index)
1074
{
1075
    TCGLabel *l = &s->labels[label_index];
1076

    
1077
    if (l->has_value) {
1078
        tcg_out_goto(s, cond, l->u.value);
1079
    } else {
1080
        tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, label_index, 31337);
1081
        tcg_out_b_noaddr(s, cond);
1082
    }
1083
}
1084

    
1085
#ifdef CONFIG_SOFTMMU
1086
/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1087
 *                                     int mmu_idx, uintptr_t ra)
1088
 */
1089
static const void * const qemu_ld_helpers[8] = {
1090
    helper_ret_ldub_mmu,
1091
    helper_ret_lduw_mmu,
1092
    helper_ret_ldul_mmu,
1093
    helper_ret_ldq_mmu,
1094

    
1095
    helper_ret_ldsb_mmu,
1096
    helper_ret_ldsw_mmu,
1097
    helper_ret_ldul_mmu,
1098
    helper_ret_ldq_mmu,
1099
};
1100

    
1101
/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1102
 *                                     uintxx_t val, int mmu_idx, uintptr_t ra)
1103
 */
1104
static const void * const qemu_st_helpers[4] = {
1105
    helper_ret_stb_mmu,
1106
    helper_ret_stw_mmu,
1107
    helper_ret_stl_mmu,
1108
    helper_ret_stq_mmu,
1109
};
1110

    
1111
/* Helper routines for marshalling helper function arguments into
1112
 * the correct registers and stack.
1113
 * argreg is where we want to put this argument, arg is the argument itself.
1114
 * Return value is the updated argreg ready for the next call.
1115
 * Note that argreg 0..3 is real registers, 4+ on stack.
1116
 *
1117
 * We provide routines for arguments which are: immediate, 32 bit
1118
 * value in register, 16 and 8 bit values in register (which must be zero
1119
 * extended before use) and 64 bit value in a lo:hi register pair.
1120
 */
1121
#define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG)                \
1122
static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg)              \
1123
{                                                                          \
1124
    if (argreg < 4) {                                                      \
1125
        MOV_ARG(s, COND_AL, argreg, arg);                                  \
1126
    } else {                                                               \
1127
        int ofs = (argreg - 4) * 4;                                        \
1128
        EXT_ARG;                                                           \
1129
        assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE);                      \
1130
        tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs);         \
1131
    }                                                                      \
1132
    return argreg + 1;                                                     \
1133
}
1134

    
1135
DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32,
1136
    (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1137
DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u,
1138
    (tcg_out_ext8u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1139
DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u,
1140
    (tcg_out_ext16u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1141
DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, )
1142

    
1143
static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
1144
                                TCGReg arglo, TCGReg arghi)
1145
{
1146
    /* 64 bit arguments must go in even/odd register pairs
1147
     * and in 8-aligned stack slots.
1148
     */
1149
    if (argreg & 1) {
1150
        argreg++;
1151
    }
1152
    argreg = tcg_out_arg_reg32(s, argreg, arglo);
1153
    argreg = tcg_out_arg_reg32(s, argreg, arghi);
1154
    return argreg;
1155
}
1156

    
1157
#define TLB_SHIFT        (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
1158

    
1159
/* Load and compare a TLB entry, leaving the flags set.  Leaves R2 pointing
1160
   to the tlb entry.  Clobbers R1 and TMP.  */
1161

    
1162
static void tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
1163
                             int s_bits, int tlb_offset)
1164
{
1165
    TCGReg base = TCG_AREG0;
1166

    
1167
    /* Should generate something like the following:
1168
     * pre-v7:
1169
     *   shr    tmp, addr_reg, #TARGET_PAGE_BITS                  (1)
1170
     *   add    r2, env, #off & 0xff00
1171
     *   and    r0, tmp, #(CPU_TLB_SIZE - 1)                      (2)
1172
     *   add    r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS               (3)
1173
     *   ldr    r0, [r2, #off & 0xff]!                            (4)
1174
     *   tst    addr_reg, #s_mask
1175
     *   cmpeq  r0, tmp, lsl #TARGET_PAGE_BITS                    (5)
1176
     *
1177
     * v7 (not implemented yet):
1178
     *   ubfx   r2, addr_reg, #TARGET_PAGE_BITS, #CPU_TLB_BITS    (1)
1179
     *   movw   tmp, #~TARGET_PAGE_MASK & ~s_mask
1180
     *   movw   r0, #off
1181
     *   add    r2, env, r2, lsl #CPU_TLB_ENTRY_BITS              (2)
1182
     *   bic    tmp, addr_reg, tmp
1183
     *   ldr    r0, [r2, r0]!                                     (3)
1184
     *   cmp    r0, tmp                                           (4)
1185
     */
1186
#  if CPU_TLB_BITS > 8
1187
#   error
1188
#  endif
1189
    tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP,
1190
                    0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
1191

    
1192
    /* We assume that the offset is contained within 16 bits.  */
1193
    assert((tlb_offset & ~0xffff) == 0);
1194
    if (tlb_offset > 0xff) {
1195
        tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
1196
                        (24 << 7) | (tlb_offset >> 8));
1197
        tlb_offset &= 0xff;
1198
        base = TCG_REG_R2;
1199
    }
1200

    
1201
    tcg_out_dat_imm(s, COND_AL, ARITH_AND,
1202
                    TCG_REG_R0, TCG_REG_TMP, CPU_TLB_SIZE - 1);
1203
    tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
1204
                    TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
1205

    
1206
    /* Load the tlb comparator.  Use ldrd if needed and available,
1207
       but due to how the pointer needs setting up, ldm isn't useful.
1208
       Base arm5 doesn't have ldrd, but armv5te does.  */
1209
    if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
1210
        tcg_out_memop_8(s, COND_AL, INSN_LDRD_IMM, TCG_REG_R0,
1211
                        TCG_REG_R2, tlb_offset, 1, 1);
1212
    } else {
1213
        tcg_out_memop_12(s, COND_AL, INSN_LDR_IMM, TCG_REG_R0,
1214
                         TCG_REG_R2, tlb_offset, 1, 1);
1215
        if (TARGET_LONG_BITS == 64) {
1216
            tcg_out_memop_12(s, COND_AL, INSN_LDR_IMM, TCG_REG_R1,
1217
                             TCG_REG_R2, 4, 1, 0);
1218
        }
1219
    }
1220

    
1221
    /* Check alignment.  */
1222
    if (s_bits) {
1223
        tcg_out_dat_imm(s, COND_AL, ARITH_TST,
1224
                        0, addrlo, (1 << s_bits) - 1);
1225
    }
1226

    
1227
    tcg_out_dat_reg(s, (s_bits ? COND_EQ : COND_AL), ARITH_CMP, 0,
1228
                    TCG_REG_R0, TCG_REG_TMP, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
1229

    
1230
    if (TARGET_LONG_BITS == 64) {
1231
        tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
1232
                        TCG_REG_R1, addrhi, SHIFT_IMM_LSL(0));
1233
    }
1234
}
1235

    
1236
/* Record the context of a call to the out of line helper code for the slow
1237
   path for a load or store, so that we can later generate the correct
1238
   helper code.  */
1239
static void add_qemu_ldst_label(TCGContext *s, int is_ld, int opc,
1240
                                int data_reg, int data_reg2, int addrlo_reg,
1241
                                int addrhi_reg, int mem_index,
1242
                                uint8_t *raddr, uint8_t *label_ptr)
1243
{
1244
    int idx;
1245
    TCGLabelQemuLdst *label;
1246

    
1247
    if (s->nb_qemu_ldst_labels >= TCG_MAX_QEMU_LDST) {
1248
        tcg_abort();
1249
    }
1250

    
1251
    idx = s->nb_qemu_ldst_labels++;
1252
    label = (TCGLabelQemuLdst *)&s->qemu_ldst_labels[idx];
1253
    label->is_ld = is_ld;
1254
    label->opc = opc;
1255
    label->datalo_reg = data_reg;
1256
    label->datahi_reg = data_reg2;
1257
    label->addrlo_reg = addrlo_reg;
1258
    label->addrhi_reg = addrhi_reg;
1259
    label->mem_index = mem_index;
1260
    label->raddr = raddr;
1261
    label->label_ptr[0] = label_ptr;
1262
}
1263

    
1264
static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1265
{
1266
    TCGReg argreg, data_reg, data_reg2;
1267
    int opc = lb->opc;
1268
    uintptr_t func;
1269

    
1270
    reloc_pc24(lb->label_ptr[0], (tcg_target_long)s->code_ptr);
1271

    
1272
    argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0);
1273
    if (TARGET_LONG_BITS == 64) {
1274
        argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
1275
    } else {
1276
        argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
1277
    }
1278
    argreg = tcg_out_arg_imm32(s, argreg, lb->mem_index);
1279
    argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
1280

    
1281
    /* For armv6 we can use the canonical unsigned helpers and minimize
1282
       icache usage.  For pre-armv6, use the signed helpers since we do
1283
       not have a single insn sign-extend.  */
1284
    if (use_armv6_instructions) {
1285
        func = (uintptr_t)qemu_ld_helpers[opc & 3];
1286
    } else {
1287
        func = (uintptr_t)qemu_ld_helpers[opc];
1288
        if (opc & 4) {
1289
            opc = 2;
1290
        }
1291
    }
1292
    tcg_out_call(s, func);
1293

    
1294
    data_reg = lb->datalo_reg;
1295
    data_reg2 = lb->datahi_reg;
1296
    switch (opc) {
1297
    case 0 | 4:
1298
        tcg_out_ext8s(s, COND_AL, data_reg, TCG_REG_R0);
1299
        break;
1300
    case 1 | 4:
1301
        tcg_out_ext16s(s, COND_AL, data_reg, TCG_REG_R0);
1302
        break;
1303
    default:
1304
        tcg_out_mov_reg(s, COND_AL, data_reg, TCG_REG_R0);
1305
        break;
1306
    case 3:
1307
        tcg_out_mov_reg(s, COND_AL, data_reg, TCG_REG_R0);
1308
        tcg_out_mov_reg(s, COND_AL, data_reg2, TCG_REG_R1);
1309
        break;
1310
    }
1311

    
1312
    tcg_out_goto(s, COND_AL, (tcg_target_long)lb->raddr);
1313
}
1314

    
1315
static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1316
{
1317
    TCGReg argreg, data_reg, data_reg2;
1318

    
1319
    reloc_pc24(lb->label_ptr[0], (tcg_target_long)s->code_ptr);
1320

    
1321
    argreg = TCG_REG_R0;
1322
    argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
1323
    if (TARGET_LONG_BITS == 64) {
1324
        argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
1325
    } else {
1326
        argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
1327
    }
1328

    
1329
    data_reg = lb->datalo_reg;
1330
    data_reg2 = lb->datahi_reg;
1331
    switch (lb->opc) {
1332
    case 0:
1333
        argreg = tcg_out_arg_reg8(s, argreg, data_reg);
1334
        break;
1335
    case 1:
1336
        argreg = tcg_out_arg_reg16(s, argreg, data_reg);
1337
        break;
1338
    case 2:
1339
        argreg = tcg_out_arg_reg32(s, argreg, data_reg);
1340
        break;
1341
    case 3:
1342
        argreg = tcg_out_arg_reg64(s, argreg, data_reg, data_reg2);
1343
        break;
1344
    }
1345

    
1346
    argreg = tcg_out_arg_imm32(s, argreg, lb->mem_index);
1347
    argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
1348

    
1349
    /* Tail-call to the helper, which will return to the fast path.  */
1350
    tcg_out_goto(s, COND_AL, (tcg_target_long) qemu_st_helpers[lb->opc & 3]);
1351
}
1352
#endif /* SOFTMMU */
1353

    
1354
static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
1355
{
1356
    TCGReg addr_reg, data_reg, data_reg2;
1357
    bool bswap;
1358
#ifdef CONFIG_SOFTMMU
1359
    int mem_index, s_bits;
1360
    TCGReg addr_reg2;
1361
    uint8_t *label_ptr;
1362
#endif
1363
#ifdef TARGET_WORDS_BIGENDIAN
1364
    bswap = 1;
1365
#else
1366
    bswap = 0;
1367
#endif
1368

    
1369
    data_reg = *args++;
1370
    data_reg2 = (opc == 3 ? *args++ : 0);
1371
    addr_reg = *args++;
1372
#ifdef CONFIG_SOFTMMU
1373
    addr_reg2 = (TARGET_LONG_BITS == 64 ? *args++ : 0);
1374
    mem_index = *args;
1375
    s_bits = opc & 3;
1376

    
1377
    tcg_out_tlb_read(s, addr_reg, addr_reg2, s_bits,
1378
                     offsetof(CPUArchState, tlb_table[mem_index][0].addr_read));
1379

    
1380
    /* This a conditional BL only to load a pointer within this opcode into LR
1381
       for the slow path.  We will not be using the value for a tail call.  */
1382
    label_ptr = s->code_ptr;
1383
    tcg_out_bl_noaddr(s, COND_NE);
1384

    
1385
    tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2,
1386
                    offsetof(CPUTLBEntry, addend)
1387
                    - offsetof(CPUTLBEntry, addr_read));
1388

    
1389
    switch (opc) {
1390
    case 0:
1391
        tcg_out_ld8_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1);
1392
        break;
1393
    case 0 | 4:
1394
        tcg_out_ld8s_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1);
1395
        break;
1396
    case 1:
1397
        tcg_out_ld16u_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1);
1398
        if (bswap) {
1399
            tcg_out_bswap16(s, COND_AL, data_reg, data_reg);
1400
        }
1401
        break;
1402
    case 1 | 4:
1403
        if (bswap) {
1404
            tcg_out_ld16u_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1);
1405
            tcg_out_bswap16s(s, COND_AL, data_reg, data_reg);
1406
        } else {
1407
            tcg_out_ld16s_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1);
1408
        }
1409
        break;
1410
    case 2:
1411
    default:
1412
        tcg_out_ld32_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1);
1413
        if (bswap) {
1414
            tcg_out_bswap32(s, COND_AL, data_reg, data_reg);
1415
        }
1416
        break;
1417
    case 3:
1418
        if (bswap) {
1419
            tcg_out_ld32_rwb(s, COND_AL, data_reg2, TCG_REG_R1, addr_reg);
1420
            tcg_out_ld32_12(s, COND_AL, data_reg, TCG_REG_R1, 4);
1421
            tcg_out_bswap32(s, COND_AL, data_reg2, data_reg2);
1422
            tcg_out_bswap32(s, COND_AL, data_reg, data_reg);
1423
        } else if (use_armv6_instructions
1424
                   && (data_reg & 1) == 0 && data_reg2 == data_reg + 1) {
1425
            tcg_out_ldrd_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1);
1426
        } else {
1427
            tcg_out_ld32_rwb(s, COND_AL, data_reg, TCG_REG_R1, addr_reg);
1428
            tcg_out_ld32_12(s, COND_AL, data_reg2, TCG_REG_R1, 4);
1429
        }
1430
        break;
1431
    }
1432

    
1433
    add_qemu_ldst_label(s, 1, opc, data_reg, data_reg2, addr_reg, addr_reg2,
1434
                        mem_index, s->code_ptr, label_ptr);
1435
#else /* !CONFIG_SOFTMMU */
1436
    if (GUEST_BASE) {
1437
        uint32_t offset = GUEST_BASE;
1438
        int i, rot;
1439

    
1440
        while (offset) {
1441
            i = ctz32(offset) & ~1;
1442
            rot = ((32 - i) << 7) & 0xf00;
1443

    
1444
            tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_TMP, addr_reg,
1445
                            ((offset >> i) & 0xff) | rot);
1446
            addr_reg = TCG_REG_TMP;
1447
            offset &= ~(0xff << i);
1448
        }
1449
    }
1450
    switch (opc) {
1451
    case 0:
1452
        tcg_out_ld8_12(s, COND_AL, data_reg, addr_reg, 0);
1453
        break;
1454
    case 0 | 4:
1455
        tcg_out_ld8s_8(s, COND_AL, data_reg, addr_reg, 0);
1456
        break;
1457
    case 1:
1458
        tcg_out_ld16u_8(s, COND_AL, data_reg, addr_reg, 0);
1459
        if (bswap) {
1460
            tcg_out_bswap16(s, COND_AL, data_reg, data_reg);
1461
        }
1462
        break;
1463
    case 1 | 4:
1464
        if (bswap) {
1465
            tcg_out_ld16u_8(s, COND_AL, data_reg, addr_reg, 0);
1466
            tcg_out_bswap16s(s, COND_AL, data_reg, data_reg);
1467
        } else {
1468
            tcg_out_ld16s_8(s, COND_AL, data_reg, addr_reg, 0);
1469
        }
1470
        break;
1471
    case 2:
1472
    default:
1473
        tcg_out_ld32_12(s, COND_AL, data_reg, addr_reg, 0);
1474
        if (bswap) {
1475
            tcg_out_bswap32(s, COND_AL, data_reg, data_reg);
1476
        }
1477
        break;
1478
    case 3:
1479
        if (use_armv6_instructions && !bswap
1480
            && (data_reg & 1) == 0 && data_reg2 == data_reg + 1) {
1481
            tcg_out_ldrd_8(s, COND_AL, data_reg, addr_reg, 0);
1482
        } else if (use_armv6_instructions && bswap
1483
                   && (data_reg2 & 1) == 0 && data_reg == data_reg2 + 1) {
1484
            tcg_out_ldrd_8(s, COND_AL, data_reg2, addr_reg, 0);
1485
        } else if (data_reg == addr_reg) {
1486
            tcg_out_ld32_12(s, COND_AL, data_reg2, addr_reg, bswap ? 0 : 4);
1487
            tcg_out_ld32_12(s, COND_AL, data_reg, addr_reg, bswap ? 4 : 0);
1488
        } else {
1489
            tcg_out_ld32_12(s, COND_AL, data_reg, addr_reg, bswap ? 4 : 0);
1490
            tcg_out_ld32_12(s, COND_AL, data_reg2, addr_reg, bswap ? 0 : 4);
1491
        }
1492
        if (bswap) {
1493
            tcg_out_bswap32(s, COND_AL, data_reg, data_reg);
1494
            tcg_out_bswap32(s, COND_AL, data_reg2, data_reg2);
1495
        }
1496
        break;
1497
    }
1498
#endif
1499
}
1500

    
1501
static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
1502
{
1503
    TCGReg addr_reg, data_reg, data_reg2;
1504
    bool bswap;
1505
#ifdef CONFIG_SOFTMMU
1506
    int mem_index, s_bits;
1507
    TCGReg addr_reg2;
1508
    uint8_t *label_ptr;
1509
#endif
1510
#ifdef TARGET_WORDS_BIGENDIAN
1511
    bswap = 1;
1512
#else
1513
    bswap = 0;
1514
#endif
1515

    
1516
    data_reg = *args++;
1517
    data_reg2 = (opc == 3 ? *args++ : 0);
1518
    addr_reg = *args++;
1519
#ifdef CONFIG_SOFTMMU
1520
    addr_reg2 = (TARGET_LONG_BITS == 64 ? *args++ : 0);
1521
    mem_index = *args;
1522
    s_bits = opc & 3;
1523

    
1524
    tcg_out_tlb_read(s, addr_reg, addr_reg2, s_bits,
1525
                     offsetof(CPUArchState,
1526
                              tlb_table[mem_index][0].addr_write));
1527

    
1528
    tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2,
1529
                    offsetof(CPUTLBEntry, addend)
1530
                    - offsetof(CPUTLBEntry, addr_write));
1531

    
1532
    switch (opc) {
1533
    case 0:
1534
        tcg_out_st8_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
1535
        break;
1536
    case 1:
1537
        if (bswap) {
1538
            tcg_out_bswap16st(s, COND_EQ, TCG_REG_R0, data_reg);
1539
            tcg_out_st16_r(s, COND_EQ, TCG_REG_R0, addr_reg, TCG_REG_R1);
1540
        } else {
1541
            tcg_out_st16_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
1542
        }
1543
        break;
1544
    case 2:
1545
    default:
1546
        if (bswap) {
1547
            tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, data_reg);
1548
            tcg_out_st32_r(s, COND_EQ, TCG_REG_R0, addr_reg, TCG_REG_R1);
1549
        } else {
1550
            tcg_out_st32_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
1551
        }
1552
        break;
1553
    case 3:
1554
        if (bswap) {
1555
            tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, data_reg2);
1556
            tcg_out_st32_rwb(s, COND_EQ, TCG_REG_R0, TCG_REG_R1, addr_reg);
1557
            tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, data_reg);
1558
            tcg_out_st32_12(s, COND_EQ, TCG_REG_R0, TCG_REG_R1, 4);
1559
        } else if (use_armv6_instructions
1560
                   && (data_reg & 1) == 0 && data_reg2 == data_reg + 1) {
1561
            tcg_out_strd_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
1562
        } else {
1563
            tcg_out_st32_rwb(s, COND_EQ, data_reg, TCG_REG_R1, addr_reg);
1564
            tcg_out_st32_12(s, COND_EQ, data_reg2, TCG_REG_R1, 4);
1565
        }
1566
        break;
1567
    }
1568

    
1569
    /* The conditional call must come last, as we're going to return here.  */
1570
    label_ptr = s->code_ptr;
1571
    tcg_out_bl_noaddr(s, COND_NE);
1572

    
1573
    add_qemu_ldst_label(s, 0, opc, data_reg, data_reg2, addr_reg, addr_reg2,
1574
                        mem_index, s->code_ptr, label_ptr);
1575
#else /* !CONFIG_SOFTMMU */
1576
    if (GUEST_BASE) {
1577
        uint32_t offset = GUEST_BASE;
1578
        int i;
1579
        int rot;
1580

    
1581
        while (offset) {
1582
            i = ctz32(offset) & ~1;
1583
            rot = ((32 - i) << 7) & 0xf00;
1584

    
1585
            tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R1, addr_reg,
1586
                            ((offset >> i) & 0xff) | rot);
1587
            addr_reg = TCG_REG_R1;
1588
            offset &= ~(0xff << i);
1589
        }
1590
    }
1591
    switch (opc) {
1592
    case 0:
1593
        tcg_out_st8_12(s, COND_AL, data_reg, addr_reg, 0);
1594
        break;
1595
    case 1:
1596
        if (bswap) {
1597
            tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, data_reg);
1598
            tcg_out_st16_8(s, COND_AL, TCG_REG_R0, addr_reg, 0);
1599
        } else {
1600
            tcg_out_st16_8(s, COND_AL, data_reg, addr_reg, 0);
1601
        }
1602
        break;
1603
    case 2:
1604
    default:
1605
        if (bswap) {
1606
            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, data_reg);
1607
            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addr_reg, 0);
1608
        } else {
1609
            tcg_out_st32_12(s, COND_AL, data_reg, addr_reg, 0);
1610
        }
1611
        break;
1612
    case 3:
1613
        if (bswap) {
1614
            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, data_reg2);
1615
            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addr_reg, 0);
1616
            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, data_reg);
1617
            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addr_reg, 4);
1618
        } else if (use_armv6_instructions
1619
                   && (data_reg & 1) == 0 && data_reg2 == data_reg + 1) {
1620
            tcg_out_strd_8(s, COND_AL, data_reg, addr_reg, 0);
1621
        } else {
1622
            tcg_out_st32_12(s, COND_AL, data_reg, addr_reg, 0);
1623
            tcg_out_st32_12(s, COND_AL, data_reg2, addr_reg, 4);
1624
        }
1625
        break;
1626
    }
1627
#endif
1628
}
1629

    
1630
static uint8_t *tb_ret_addr;
1631

    
1632
static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1633
                const TCGArg *args, const int *const_args)
1634
{
1635
    TCGArg a0, a1, a2, a3, a4, a5;
1636
    int c;
1637

    
1638
    switch (opc) {
1639
    case INDEX_op_exit_tb:
1640
        if (use_armv7_instructions || check_fit_imm(args[0])) {
1641
            tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]);
1642
            tcg_out_goto(s, COND_AL, (tcg_target_ulong) tb_ret_addr);
1643
        } else {
1644
            uint8_t *ld_ptr = s->code_ptr;
1645
            tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_PC, 0);
1646
            tcg_out_goto(s, COND_AL, (tcg_target_ulong) tb_ret_addr);
1647
            *ld_ptr = (uint8_t) (s->code_ptr - ld_ptr) - 8;
1648
            tcg_out32(s, args[0]);
1649
        }
1650
        break;
1651
    case INDEX_op_goto_tb:
1652
        if (s->tb_jmp_offset) {
1653
            /* Direct jump method */
1654
#if defined(USE_DIRECT_JUMP)
1655
            s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
1656
            tcg_out_b_noaddr(s, COND_AL);
1657
#else
1658
            tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4);
1659
            s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
1660
            tcg_out32(s, 0);
1661
#endif
1662
        } else {
1663
            /* Indirect jump method */
1664
#if 1
1665
            c = (int) (s->tb_next + args[0]) - ((int) s->code_ptr + 8);
1666
            if (c > 0xfff || c < -0xfff) {
1667
                tcg_out_movi32(s, COND_AL, TCG_REG_R0,
1668
                                (tcg_target_long) (s->tb_next + args[0]));
1669
                tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, 0);
1670
            } else
1671
                tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, c);
1672
#else
1673
            tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_PC, 0);
1674
            tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, 0);
1675
            tcg_out32(s, (tcg_target_long) (s->tb_next + args[0]));
1676
#endif
1677
        }
1678
        s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
1679
        break;
1680
    case INDEX_op_call:
1681
        if (const_args[0])
1682
            tcg_out_call(s, args[0]);
1683
        else
1684
            tcg_out_callr(s, COND_AL, args[0]);
1685
        break;
1686
    case INDEX_op_br:
1687
        tcg_out_goto_label(s, COND_AL, args[0]);
1688
        break;
1689

    
1690
    case INDEX_op_ld8u_i32:
1691
        tcg_out_ld8u(s, COND_AL, args[0], args[1], args[2]);
1692
        break;
1693
    case INDEX_op_ld8s_i32:
1694
        tcg_out_ld8s(s, COND_AL, args[0], args[1], args[2]);
1695
        break;
1696
    case INDEX_op_ld16u_i32:
1697
        tcg_out_ld16u(s, COND_AL, args[0], args[1], args[2]);
1698
        break;
1699
    case INDEX_op_ld16s_i32:
1700
        tcg_out_ld16s(s, COND_AL, args[0], args[1], args[2]);
1701
        break;
1702
    case INDEX_op_ld_i32:
1703
        tcg_out_ld32u(s, COND_AL, args[0], args[1], args[2]);
1704
        break;
1705
    case INDEX_op_st8_i32:
1706
        tcg_out_st8(s, COND_AL, args[0], args[1], args[2]);
1707
        break;
1708
    case INDEX_op_st16_i32:
1709
        tcg_out_st16(s, COND_AL, args[0], args[1], args[2]);
1710
        break;
1711
    case INDEX_op_st_i32:
1712
        tcg_out_st32(s, COND_AL, args[0], args[1], args[2]);
1713
        break;
1714

    
1715
    case INDEX_op_mov_i32:
1716
        tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
1717
                        args[0], 0, args[1], SHIFT_IMM_LSL(0));
1718
        break;
1719
    case INDEX_op_movi_i32:
1720
        tcg_out_movi32(s, COND_AL, args[0], args[1]);
1721
        break;
1722
    case INDEX_op_movcond_i32:
1723
        /* Constraints mean that v2 is always in the same register as dest,
1724
         * so we only need to do "if condition passed, move v1 to dest".
1725
         */
1726
        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1727
                        args[1], args[2], const_args[2]);
1728
        tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[args[5]], ARITH_MOV,
1729
                        ARITH_MVN, args[0], 0, args[3], const_args[3]);
1730
        break;
1731
    case INDEX_op_add_i32:
1732
        tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB,
1733
                        args[0], args[1], args[2], const_args[2]);
1734
        break;
1735
    case INDEX_op_sub_i32:
1736
        if (const_args[1]) {
1737
            if (const_args[2]) {
1738
                tcg_out_movi32(s, COND_AL, args[0], args[1] - args[2]);
1739
            } else {
1740
                tcg_out_dat_rI(s, COND_AL, ARITH_RSB,
1741
                               args[0], args[2], args[1], 1);
1742
            }
1743
        } else {
1744
            tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD,
1745
                            args[0], args[1], args[2], const_args[2]);
1746
        }
1747
        break;
1748
    case INDEX_op_and_i32:
1749
        tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC,
1750
                        args[0], args[1], args[2], const_args[2]);
1751
        break;
1752
    case INDEX_op_andc_i32:
1753
        tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND,
1754
                        args[0], args[1], args[2], const_args[2]);
1755
        break;
1756
    case INDEX_op_or_i32:
1757
        c = ARITH_ORR;
1758
        goto gen_arith;
1759
    case INDEX_op_xor_i32:
1760
        c = ARITH_EOR;
1761
        /* Fall through.  */
1762
    gen_arith:
1763
        tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]);
1764
        break;
1765
    case INDEX_op_add2_i32:
1766
        a0 = args[0], a1 = args[1], a2 = args[2];
1767
        a3 = args[3], a4 = args[4], a5 = args[5];
1768
        if (a0 == a3 || (a0 == a5 && !const_args[5])) {
1769
            a0 = TCG_REG_TMP;
1770
        }
1771
        tcg_out_dat_rIN(s, COND_AL, ARITH_ADD | TO_CPSR, ARITH_SUB | TO_CPSR,
1772
                        a0, a2, a4, const_args[4]);
1773
        tcg_out_dat_rIK(s, COND_AL, ARITH_ADC, ARITH_SBC,
1774
                        a1, a3, a5, const_args[5]);
1775
        tcg_out_mov_reg(s, COND_AL, args[0], a0);
1776
        break;
1777
    case INDEX_op_sub2_i32:
1778
        a0 = args[0], a1 = args[1], a2 = args[2];
1779
        a3 = args[3], a4 = args[4], a5 = args[5];
1780
        if ((a0 == a3 && !const_args[3]) || (a0 == a5 && !const_args[5])) {
1781
            a0 = TCG_REG_TMP;
1782
        }
1783
        if (const_args[2]) {
1784
            if (const_args[4]) {
1785
                tcg_out_movi32(s, COND_AL, a0, a4);
1786
                a4 = a0;
1787
            }
1788
            tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR, a0, a4, a2, 1);
1789
        } else {
1790
            tcg_out_dat_rIN(s, COND_AL, ARITH_SUB | TO_CPSR,
1791
                            ARITH_ADD | TO_CPSR, a0, a2, a4, const_args[4]);
1792
        }
1793
        if (const_args[3]) {
1794
            if (const_args[5]) {
1795
                tcg_out_movi32(s, COND_AL, a1, a5);
1796
                a5 = a1;
1797
            }
1798
            tcg_out_dat_rI(s, COND_AL, ARITH_RSC, a1, a5, a3, 1);
1799
        } else {
1800
            tcg_out_dat_rIK(s, COND_AL, ARITH_SBC, ARITH_ADC,
1801
                            a1, a3, a5, const_args[5]);
1802
        }
1803
        tcg_out_mov_reg(s, COND_AL, args[0], a0);
1804
        break;
1805
    case INDEX_op_neg_i32:
1806
        tcg_out_dat_imm(s, COND_AL, ARITH_RSB, args[0], args[1], 0);
1807
        break;
1808
    case INDEX_op_not_i32:
1809
        tcg_out_dat_reg(s, COND_AL,
1810
                        ARITH_MVN, args[0], 0, args[1], SHIFT_IMM_LSL(0));
1811
        break;
1812
    case INDEX_op_mul_i32:
1813
        tcg_out_mul32(s, COND_AL, args[0], args[1], args[2]);
1814
        break;
1815
    case INDEX_op_mulu2_i32:
1816
        tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]);
1817
        break;
1818
    case INDEX_op_muls2_i32:
1819
        tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]);
1820
        break;
1821
    /* XXX: Perhaps args[2] & 0x1f is wrong */
1822
    case INDEX_op_shl_i32:
1823
        c = const_args[2] ?
1824
                SHIFT_IMM_LSL(args[2] & 0x1f) : SHIFT_REG_LSL(args[2]);
1825
        goto gen_shift32;
1826
    case INDEX_op_shr_i32:
1827
        c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_LSR(args[2] & 0x1f) :
1828
                SHIFT_IMM_LSL(0) : SHIFT_REG_LSR(args[2]);
1829
        goto gen_shift32;
1830
    case INDEX_op_sar_i32:
1831
        c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ASR(args[2] & 0x1f) :
1832
                SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(args[2]);
1833
        goto gen_shift32;
1834
    case INDEX_op_rotr_i32:
1835
        c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ROR(args[2] & 0x1f) :
1836
                SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(args[2]);
1837
        /* Fall through.  */
1838
    gen_shift32:
1839
        tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], c);
1840
        break;
1841

    
1842
    case INDEX_op_rotl_i32:
1843
        if (const_args[2]) {
1844
            tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
1845
                            ((0x20 - args[2]) & 0x1f) ?
1846
                            SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) :
1847
                            SHIFT_IMM_LSL(0));
1848
        } else {
1849
            tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[1], 0x20);
1850
            tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
1851
                            SHIFT_REG_ROR(TCG_REG_TMP));
1852
        }
1853
        break;
1854

    
1855
    case INDEX_op_brcond_i32:
1856
        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1857
                       args[0], args[1], const_args[1]);
1858
        tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]], args[3]);
1859
        break;
1860
    case INDEX_op_brcond2_i32:
1861
        /* The resulting conditions are:
1862
         * TCG_COND_EQ    -->  a0 == a2 && a1 == a3,
1863
         * TCG_COND_NE    --> (a0 != a2 && a1 == a3) ||  a1 != a3,
1864
         * TCG_COND_LT(U) --> (a0 <  a2 && a1 == a3) ||  a1 <  a3,
1865
         * TCG_COND_GE(U) --> (a0 >= a2 && a1 == a3) || (a1 >= a3 && a1 != a3),
1866
         * TCG_COND_LE(U) --> (a0 <= a2 && a1 == a3) || (a1 <= a3 && a1 != a3),
1867
         * TCG_COND_GT(U) --> (a0 >  a2 && a1 == a3) ||  a1 >  a3,
1868
         */
1869
        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1870
                        args[1], args[3], const_args[3]);
1871
        tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
1872
                        args[0], args[2], const_args[2]);
1873
        tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[4]], args[5]);
1874
        break;
1875
    case INDEX_op_setcond_i32:
1876
        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1877
                        args[1], args[2], const_args[2]);
1878
        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]],
1879
                        ARITH_MOV, args[0], 0, 1);
1880
        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
1881
                        ARITH_MOV, args[0], 0, 0);
1882
        break;
1883
    case INDEX_op_setcond2_i32:
1884
        /* See brcond2_i32 comment */
1885
        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1886
                        args[2], args[4], const_args[4]);
1887
        tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
1888
                        args[1], args[3], const_args[3]);
1889
        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[5]],
1890
                        ARITH_MOV, args[0], 0, 1);
1891
        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[5])],
1892
                        ARITH_MOV, args[0], 0, 0);
1893
        break;
1894

    
1895
    case INDEX_op_qemu_ld8u:
1896
        tcg_out_qemu_ld(s, args, 0);
1897
        break;
1898
    case INDEX_op_qemu_ld8s:
1899
        tcg_out_qemu_ld(s, args, 0 | 4);
1900
        break;
1901
    case INDEX_op_qemu_ld16u:
1902
        tcg_out_qemu_ld(s, args, 1);
1903
        break;
1904
    case INDEX_op_qemu_ld16s:
1905
        tcg_out_qemu_ld(s, args, 1 | 4);
1906
        break;
1907
    case INDEX_op_qemu_ld32:
1908
        tcg_out_qemu_ld(s, args, 2);
1909
        break;
1910
    case INDEX_op_qemu_ld64:
1911
        tcg_out_qemu_ld(s, args, 3);
1912
        break;
1913

    
1914
    case INDEX_op_qemu_st8:
1915
        tcg_out_qemu_st(s, args, 0);
1916
        break;
1917
    case INDEX_op_qemu_st16:
1918
        tcg_out_qemu_st(s, args, 1);
1919
        break;
1920
    case INDEX_op_qemu_st32:
1921
        tcg_out_qemu_st(s, args, 2);
1922
        break;
1923
    case INDEX_op_qemu_st64:
1924
        tcg_out_qemu_st(s, args, 3);
1925
        break;
1926

    
1927
    case INDEX_op_bswap16_i32:
1928
        tcg_out_bswap16(s, COND_AL, args[0], args[1]);
1929
        break;
1930
    case INDEX_op_bswap32_i32:
1931
        tcg_out_bswap32(s, COND_AL, args[0], args[1]);
1932
        break;
1933

    
1934
    case INDEX_op_ext8s_i32:
1935
        tcg_out_ext8s(s, COND_AL, args[0], args[1]);
1936
        break;
1937
    case INDEX_op_ext16s_i32:
1938
        tcg_out_ext16s(s, COND_AL, args[0], args[1]);
1939
        break;
1940
    case INDEX_op_ext16u_i32:
1941
        tcg_out_ext16u(s, COND_AL, args[0], args[1]);
1942
        break;
1943

    
1944
    case INDEX_op_deposit_i32:
1945
        tcg_out_deposit(s, COND_AL, args[0], args[2],
1946
                        args[3], args[4], const_args[2]);
1947
        break;
1948

    
1949
    case INDEX_op_div_i32:
1950
        tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]);
1951
        break;
1952
    case INDEX_op_divu_i32:
1953
        tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]);
1954
        break;
1955

    
1956
    default:
1957
        tcg_abort();
1958
    }
1959
}
1960

    
1961
#ifdef CONFIG_SOFTMMU
1962
/* Generate TB finalization at the end of block.  */
1963
void tcg_out_tb_finalize(TCGContext *s)
1964
{
1965
    int i;
1966
    for (i = 0; i < s->nb_qemu_ldst_labels; i++) {
1967
        TCGLabelQemuLdst *label = &s->qemu_ldst_labels[i];
1968
        if (label->is_ld) {
1969
            tcg_out_qemu_ld_slow_path(s, label);
1970
        } else {
1971
            tcg_out_qemu_st_slow_path(s, label);
1972
        }
1973
    }
1974
}
1975
#endif /* SOFTMMU */
1976

    
1977
static const TCGTargetOpDef arm_op_defs[] = {
1978
    { INDEX_op_exit_tb, { } },
1979
    { INDEX_op_goto_tb, { } },
1980
    { INDEX_op_call, { "ri" } },
1981
    { INDEX_op_br, { } },
1982

    
1983
    { INDEX_op_mov_i32, { "r", "r" } },
1984
    { INDEX_op_movi_i32, { "r" } },
1985

    
1986
    { INDEX_op_ld8u_i32, { "r", "r" } },
1987
    { INDEX_op_ld8s_i32, { "r", "r" } },
1988
    { INDEX_op_ld16u_i32, { "r", "r" } },
1989
    { INDEX_op_ld16s_i32, { "r", "r" } },
1990
    { INDEX_op_ld_i32, { "r", "r" } },
1991
    { INDEX_op_st8_i32, { "r", "r" } },
1992
    { INDEX_op_st16_i32, { "r", "r" } },
1993
    { INDEX_op_st_i32, { "r", "r" } },
1994

    
1995
    /* TODO: "r", "r", "ri" */
1996
    { INDEX_op_add_i32, { "r", "r", "rIN" } },
1997
    { INDEX_op_sub_i32, { "r", "rI", "rIN" } },
1998
    { INDEX_op_mul_i32, { "r", "r", "r" } },
1999
    { INDEX_op_mulu2_i32, { "r", "r", "r", "r" } },
2000
    { INDEX_op_muls2_i32, { "r", "r", "r", "r" } },
2001
    { INDEX_op_and_i32, { "r", "r", "rIK" } },
2002
    { INDEX_op_andc_i32, { "r", "r", "rIK" } },
2003
    { INDEX_op_or_i32, { "r", "r", "rI" } },
2004
    { INDEX_op_xor_i32, { "r", "r", "rI" } },
2005
    { INDEX_op_neg_i32, { "r", "r" } },
2006
    { INDEX_op_not_i32, { "r", "r" } },
2007

    
2008
    { INDEX_op_shl_i32, { "r", "r", "ri" } },
2009
    { INDEX_op_shr_i32, { "r", "r", "ri" } },
2010
    { INDEX_op_sar_i32, { "r", "r", "ri" } },
2011
    { INDEX_op_rotl_i32, { "r", "r", "ri" } },
2012
    { INDEX_op_rotr_i32, { "r", "r", "ri" } },
2013

    
2014
    { INDEX_op_brcond_i32, { "r", "rIN" } },
2015
    { INDEX_op_setcond_i32, { "r", "r", "rIN" } },
2016
    { INDEX_op_movcond_i32, { "r", "r", "rIN", "rIK", "0" } },
2017

    
2018
    { INDEX_op_add2_i32, { "r", "r", "r", "r", "rIN", "rIK" } },
2019
    { INDEX_op_sub2_i32, { "r", "r", "rI", "rI", "rIN", "rIK" } },
2020
    { INDEX_op_brcond2_i32, { "r", "r", "rIN", "rIN" } },
2021
    { INDEX_op_setcond2_i32, { "r", "r", "r", "rIN", "rIN" } },
2022

    
2023
#if TARGET_LONG_BITS == 32
2024
    { INDEX_op_qemu_ld8u, { "r", "l" } },
2025
    { INDEX_op_qemu_ld8s, { "r", "l" } },
2026
    { INDEX_op_qemu_ld16u, { "r", "l" } },
2027
    { INDEX_op_qemu_ld16s, { "r", "l" } },
2028
    { INDEX_op_qemu_ld32, { "r", "l" } },
2029
    { INDEX_op_qemu_ld64, { "L", "L", "l" } },
2030

    
2031
    { INDEX_op_qemu_st8, { "s", "s" } },
2032
    { INDEX_op_qemu_st16, { "s", "s" } },
2033
    { INDEX_op_qemu_st32, { "s", "s" } },
2034
    { INDEX_op_qemu_st64, { "s", "s", "s" } },
2035
#else
2036
    { INDEX_op_qemu_ld8u, { "r", "l", "l" } },
2037
    { INDEX_op_qemu_ld8s, { "r", "l", "l" } },
2038
    { INDEX_op_qemu_ld16u, { "r", "l", "l" } },
2039
    { INDEX_op_qemu_ld16s, { "r", "l", "l" } },
2040
    { INDEX_op_qemu_ld32, { "r", "l", "l" } },
2041
    { INDEX_op_qemu_ld64, { "L", "L", "l", "l" } },
2042

    
2043
    { INDEX_op_qemu_st8, { "s", "s", "s" } },
2044
    { INDEX_op_qemu_st16, { "s", "s", "s" } },
2045
    { INDEX_op_qemu_st32, { "s", "s", "s" } },
2046
    { INDEX_op_qemu_st64, { "s", "s", "s", "s" } },
2047
#endif
2048

    
2049
    { INDEX_op_bswap16_i32, { "r", "r" } },
2050
    { INDEX_op_bswap32_i32, { "r", "r" } },
2051

    
2052
    { INDEX_op_ext8s_i32, { "r", "r" } },
2053
    { INDEX_op_ext16s_i32, { "r", "r" } },
2054
    { INDEX_op_ext16u_i32, { "r", "r" } },
2055

    
2056
    { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
2057

    
2058
    { INDEX_op_div_i32, { "r", "r", "r" } },
2059
    { INDEX_op_divu_i32, { "r", "r", "r" } },
2060

    
2061
    { -1 },
2062
};
2063

    
2064
static void tcg_target_init(TCGContext *s)
2065
{
2066
#if defined(CONFIG_GETAUXVAL)
2067
    /* Only probe for the platform and capabilities if we havn't already
2068
       determined maximum values at compile time.  */
2069
# if !defined(use_idiv_instructions)
2070
    {
2071
        unsigned long hwcap = getauxval(AT_HWCAP);
2072
        use_idiv_instructions = (hwcap & HWCAP_ARM_IDIVA) != 0;
2073
    }
2074
# endif
2075
    if (__ARM_ARCH < 7) {
2076
        const char *pl = (const char *)getauxval(AT_PLATFORM);
2077
        if (pl != NULL && pl[0] == 'v' && pl[1] >= '4' && pl[1] <= '9') {
2078
            arm_arch = pl[1] - '0';
2079
        }
2080
    }
2081
#endif /* GETAUXVAL */
2082

    
2083
    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2084
    tcg_regset_set32(tcg_target_call_clobber_regs, 0,
2085
                     (1 << TCG_REG_R0) |
2086
                     (1 << TCG_REG_R1) |
2087
                     (1 << TCG_REG_R2) |
2088
                     (1 << TCG_REG_R3) |
2089
                     (1 << TCG_REG_R12) |
2090
                     (1 << TCG_REG_R14));
2091

    
2092
    tcg_regset_clear(s->reserved_regs);
2093
    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2094
    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2095
    tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC);
2096

    
2097
    tcg_add_target_add_op_defs(arm_op_defs);
2098
}
2099

    
2100
static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
2101
                              TCGReg arg1, intptr_t arg2)
2102
{
2103
    tcg_out_ld32u(s, COND_AL, arg, arg1, arg2);
2104
}
2105

    
2106
static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
2107
                              TCGReg arg1, intptr_t arg2)
2108
{
2109
    tcg_out_st32(s, COND_AL, arg, arg1, arg2);
2110
}
2111

    
2112
static inline void tcg_out_mov(TCGContext *s, TCGType type,
2113
                               TCGReg ret, TCGReg arg)
2114
{
2115
    tcg_out_dat_reg(s, COND_AL, ARITH_MOV, ret, 0, arg, SHIFT_IMM_LSL(0));
2116
}
2117

    
2118
static inline void tcg_out_movi(TCGContext *s, TCGType type,
2119
                                TCGReg ret, tcg_target_long arg)
2120
{
2121
    tcg_out_movi32(s, COND_AL, ret, arg);
2122
}
2123

    
2124
/* Compute frame size via macros, to share between tcg_target_qemu_prologue
2125
   and tcg_register_jit.  */
2126

    
2127
#define PUSH_SIZE  ((11 - 4 + 1 + 1) * sizeof(tcg_target_long))
2128

    
2129
#define FRAME_SIZE \
2130
    ((PUSH_SIZE \
2131
      + TCG_STATIC_CALL_ARGS_SIZE \
2132
      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2133
      + TCG_TARGET_STACK_ALIGN - 1) \
2134
     & -TCG_TARGET_STACK_ALIGN)
2135

    
2136
static void tcg_target_qemu_prologue(TCGContext *s)
2137
{
2138
    int stack_addend;
2139

    
2140
    /* Calling convention requires us to save r4-r11 and lr.  */
2141
    /* stmdb sp!, { r4 - r11, lr } */
2142
    tcg_out32(s, (COND_AL << 28) | 0x092d4ff0);
2143

    
2144
    /* Reserve callee argument and tcg temp space.  */
2145
    stack_addend = FRAME_SIZE - PUSH_SIZE;
2146

    
2147
    tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK,
2148
                   TCG_REG_CALL_STACK, stack_addend, 1);
2149
    tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2150
                  CPU_TEMP_BUF_NLONGS * sizeof(long));
2151

    
2152
    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2153

    
2154
    tcg_out_bx(s, COND_AL, tcg_target_call_iarg_regs[1]);
2155
    tb_ret_addr = s->code_ptr;
2156

    
2157
    /* Epilogue.  We branch here via tb_ret_addr.  */
2158
    tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK,
2159
                   TCG_REG_CALL_STACK, stack_addend, 1);
2160

    
2161
    /* ldmia sp!, { r4 - r11, pc } */
2162
    tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0);
2163
}
2164

    
2165
typedef struct {
2166
    DebugFrameCIE cie;
2167
    DebugFrameFDEHeader fde;
2168
    uint8_t fde_def_cfa[4];
2169
    uint8_t fde_reg_ofs[18];
2170
} DebugFrame;
2171

    
2172
#define ELF_HOST_MACHINE EM_ARM
2173

    
2174
/* We're expecting a 2 byte uleb128 encoded value.  */
2175
QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2176

    
2177
static DebugFrame debug_frame = {
2178
    .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2179
    .cie.id = -1,
2180
    .cie.version = 1,
2181
    .cie.code_align = 1,
2182
    .cie.data_align = 0x7c,             /* sleb128 -4 */
2183
    .cie.return_column = 14,
2184

    
2185
    /* Total FDE size does not include the "len" member.  */
2186
    .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
2187

    
2188
    .fde_def_cfa = {
2189
        12, 13,                         /* DW_CFA_def_cfa sp, ... */
2190
        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
2191
        (FRAME_SIZE >> 7)
2192
    },
2193
    .fde_reg_ofs = {
2194
        /* The following must match the stmdb in the prologue.  */
2195
        0x8e, 1,                        /* DW_CFA_offset, lr, -4 */
2196
        0x8b, 2,                        /* DW_CFA_offset, r11, -8 */
2197
        0x8a, 3,                        /* DW_CFA_offset, r10, -12 */
2198
        0x89, 4,                        /* DW_CFA_offset, r9, -16 */
2199
        0x88, 5,                        /* DW_CFA_offset, r8, -20 */
2200
        0x87, 6,                        /* DW_CFA_offset, r7, -24 */
2201
        0x86, 7,                        /* DW_CFA_offset, r6, -28 */
2202
        0x85, 8,                        /* DW_CFA_offset, r5, -32 */
2203
        0x84, 9,                        /* DW_CFA_offset, r4, -36 */
2204
    }
2205
};
2206

    
2207
void tcg_register_jit(void *buf, size_t buf_size)
2208
{
2209
    debug_frame.fde.func_start = (tcg_target_long) buf;
2210
    debug_frame.fde.func_len = buf_size;
2211

    
2212
    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2213
}