Statistics
| Branch: | Revision:

root / target-arm / translate-a64.c @ 6d9571f7

History | View | Annotate | Download (216.7 kB)

1
/*
2
 *  AArch64 translation
3
 *
4
 *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18
 */
19
#include <stdarg.h>
20
#include <stdlib.h>
21
#include <stdio.h>
22
#include <string.h>
23
#include <inttypes.h>
24

    
25
#include "cpu.h"
26
#include "tcg-op.h"
27
#include "qemu/log.h"
28
#include "translate.h"
29
#include "qemu/host-utils.h"
30

    
31
#include "exec/gen-icount.h"
32

    
33
#include "helper.h"
34
#define GEN_HELPER 1
35
#include "helper.h"
36

    
37
static TCGv_i64 cpu_X[32];
38
static TCGv_i64 cpu_pc;
39
static TCGv_i32 cpu_NF, cpu_ZF, cpu_CF, cpu_VF;
40

    
41
/* Load/store exclusive handling */
42
static TCGv_i64 cpu_exclusive_addr;
43
static TCGv_i64 cpu_exclusive_val;
44
static TCGv_i64 cpu_exclusive_high;
45
#ifdef CONFIG_USER_ONLY
46
static TCGv_i64 cpu_exclusive_test;
47
static TCGv_i32 cpu_exclusive_info;
48
#endif
49

    
50
static const char *regnames[] = {
51
    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
52
    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
53
    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
54
    "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
55
};
56

    
57
enum a64_shift_type {
58
    A64_SHIFT_TYPE_LSL = 0,
59
    A64_SHIFT_TYPE_LSR = 1,
60
    A64_SHIFT_TYPE_ASR = 2,
61
    A64_SHIFT_TYPE_ROR = 3
62
};
63

    
64
/* Table based decoder typedefs - used when the relevant bits for decode
65
 * are too awkwardly scattered across the instruction (eg SIMD).
66
 */
67
typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
68

    
69
typedef struct AArch64DecodeTable {
70
    uint32_t pattern;
71
    uint32_t mask;
72
    AArch64DecodeFn *disas_fn;
73
} AArch64DecodeTable;
74

    
75
/* Function prototype for gen_ functions for calling Neon helpers */
76
typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
77
typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
78

    
79
/* initialize TCG globals.  */
80
void a64_translate_init(void)
81
{
82
    int i;
83

    
84
    cpu_pc = tcg_global_mem_new_i64(TCG_AREG0,
85
                                    offsetof(CPUARMState, pc),
86
                                    "pc");
87
    for (i = 0; i < 32; i++) {
88
        cpu_X[i] = tcg_global_mem_new_i64(TCG_AREG0,
89
                                          offsetof(CPUARMState, xregs[i]),
90
                                          regnames[i]);
91
    }
92

    
93
    cpu_NF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, NF), "NF");
94
    cpu_ZF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, ZF), "ZF");
95
    cpu_CF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, CF), "CF");
96
    cpu_VF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, VF), "VF");
97

    
98
    cpu_exclusive_addr = tcg_global_mem_new_i64(TCG_AREG0,
99
        offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
100
    cpu_exclusive_val = tcg_global_mem_new_i64(TCG_AREG0,
101
        offsetof(CPUARMState, exclusive_val), "exclusive_val");
102
    cpu_exclusive_high = tcg_global_mem_new_i64(TCG_AREG0,
103
        offsetof(CPUARMState, exclusive_high), "exclusive_high");
104
#ifdef CONFIG_USER_ONLY
105
    cpu_exclusive_test = tcg_global_mem_new_i64(TCG_AREG0,
106
        offsetof(CPUARMState, exclusive_test), "exclusive_test");
107
    cpu_exclusive_info = tcg_global_mem_new_i32(TCG_AREG0,
108
        offsetof(CPUARMState, exclusive_info), "exclusive_info");
109
#endif
110
}
111

    
112
void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
113
                            fprintf_function cpu_fprintf, int flags)
114
{
115
    ARMCPU *cpu = ARM_CPU(cs);
116
    CPUARMState *env = &cpu->env;
117
    uint32_t psr = pstate_read(env);
118
    int i;
119

    
120
    cpu_fprintf(f, "PC=%016"PRIx64"  SP=%016"PRIx64"\n",
121
            env->pc, env->xregs[31]);
122
    for (i = 0; i < 31; i++) {
123
        cpu_fprintf(f, "X%02d=%016"PRIx64, i, env->xregs[i]);
124
        if ((i % 4) == 3) {
125
            cpu_fprintf(f, "\n");
126
        } else {
127
            cpu_fprintf(f, " ");
128
        }
129
    }
130
    cpu_fprintf(f, "PSTATE=%08x (flags %c%c%c%c)\n",
131
                psr,
132
                psr & PSTATE_N ? 'N' : '-',
133
                psr & PSTATE_Z ? 'Z' : '-',
134
                psr & PSTATE_C ? 'C' : '-',
135
                psr & PSTATE_V ? 'V' : '-');
136
    cpu_fprintf(f, "\n");
137

    
138
    if (flags & CPU_DUMP_FPU) {
139
        int numvfpregs = 32;
140
        for (i = 0; i < numvfpregs; i += 2) {
141
            uint64_t vlo = float64_val(env->vfp.regs[i * 2]);
142
            uint64_t vhi = float64_val(env->vfp.regs[(i * 2) + 1]);
143
            cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 " ",
144
                        i, vhi, vlo);
145
            vlo = float64_val(env->vfp.regs[(i + 1) * 2]);
146
            vhi = float64_val(env->vfp.regs[((i + 1) * 2) + 1]);
147
            cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 "\n",
148
                        i + 1, vhi, vlo);
149
        }
150
        cpu_fprintf(f, "FPCR: %08x  FPSR: %08x\n",
151
                    vfp_get_fpcr(env), vfp_get_fpsr(env));
152
    }
153
}
154

    
155
static int get_mem_index(DisasContext *s)
156
{
157
#ifdef CONFIG_USER_ONLY
158
    return 1;
159
#else
160
    return s->user;
161
#endif
162
}
163

    
164
void gen_a64_set_pc_im(uint64_t val)
165
{
166
    tcg_gen_movi_i64(cpu_pc, val);
167
}
168

    
169
static void gen_exception(int excp)
170
{
171
    TCGv_i32 tmp = tcg_temp_new_i32();
172
    tcg_gen_movi_i32(tmp, excp);
173
    gen_helper_exception(cpu_env, tmp);
174
    tcg_temp_free_i32(tmp);
175
}
176

    
177
static void gen_exception_insn(DisasContext *s, int offset, int excp)
178
{
179
    gen_a64_set_pc_im(s->pc - offset);
180
    gen_exception(excp);
181
    s->is_jmp = DISAS_EXC;
182
}
183

    
184
static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
185
{
186
    /* No direct tb linking with singlestep or deterministic io */
187
    if (s->singlestep_enabled || (s->tb->cflags & CF_LAST_IO)) {
188
        return false;
189
    }
190

    
191
    /* Only link tbs from inside the same guest page */
192
    if ((s->tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
193
        return false;
194
    }
195

    
196
    return true;
197
}
198

    
199
static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
200
{
201
    TranslationBlock *tb;
202

    
203
    tb = s->tb;
204
    if (use_goto_tb(s, n, dest)) {
205
        tcg_gen_goto_tb(n);
206
        gen_a64_set_pc_im(dest);
207
        tcg_gen_exit_tb((tcg_target_long)tb + n);
208
        s->is_jmp = DISAS_TB_JUMP;
209
    } else {
210
        gen_a64_set_pc_im(dest);
211
        if (s->singlestep_enabled) {
212
            gen_exception(EXCP_DEBUG);
213
        }
214
        tcg_gen_exit_tb(0);
215
        s->is_jmp = DISAS_JUMP;
216
    }
217
}
218

    
219
static void unallocated_encoding(DisasContext *s)
220
{
221
    gen_exception_insn(s, 4, EXCP_UDEF);
222
}
223

    
224
#define unsupported_encoding(s, insn)                                    \
225
    do {                                                                 \
226
        qemu_log_mask(LOG_UNIMP,                                         \
227
                      "%s:%d: unsupported instruction encoding 0x%08x "  \
228
                      "at pc=%016" PRIx64 "\n",                          \
229
                      __FILE__, __LINE__, insn, s->pc - 4);              \
230
        unallocated_encoding(s);                                         \
231
    } while (0);
232

    
233
static void init_tmp_a64_array(DisasContext *s)
234
{
235
#ifdef CONFIG_DEBUG_TCG
236
    int i;
237
    for (i = 0; i < ARRAY_SIZE(s->tmp_a64); i++) {
238
        TCGV_UNUSED_I64(s->tmp_a64[i]);
239
    }
240
#endif
241
    s->tmp_a64_count = 0;
242
}
243

    
244
static void free_tmp_a64(DisasContext *s)
245
{
246
    int i;
247
    for (i = 0; i < s->tmp_a64_count; i++) {
248
        tcg_temp_free_i64(s->tmp_a64[i]);
249
    }
250
    init_tmp_a64_array(s);
251
}
252

    
253
static TCGv_i64 new_tmp_a64(DisasContext *s)
254
{
255
    assert(s->tmp_a64_count < TMP_A64_MAX);
256
    return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
257
}
258

    
259
static TCGv_i64 new_tmp_a64_zero(DisasContext *s)
260
{
261
    TCGv_i64 t = new_tmp_a64(s);
262
    tcg_gen_movi_i64(t, 0);
263
    return t;
264
}
265

    
266
/*
267
 * Register access functions
268
 *
269
 * These functions are used for directly accessing a register in where
270
 * changes to the final register value are likely to be made. If you
271
 * need to use a register for temporary calculation (e.g. index type
272
 * operations) use the read_* form.
273
 *
274
 * B1.2.1 Register mappings
275
 *
276
 * In instruction register encoding 31 can refer to ZR (zero register) or
277
 * the SP (stack pointer) depending on context. In QEMU's case we map SP
278
 * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
279
 * This is the point of the _sp forms.
280
 */
281
static TCGv_i64 cpu_reg(DisasContext *s, int reg)
282
{
283
    if (reg == 31) {
284
        return new_tmp_a64_zero(s);
285
    } else {
286
        return cpu_X[reg];
287
    }
288
}
289

    
290
/* register access for when 31 == SP */
291
static TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
292
{
293
    return cpu_X[reg];
294
}
295

    
296
/* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
297
 * representing the register contents. This TCGv is an auto-freed
298
 * temporary so it need not be explicitly freed, and may be modified.
299
 */
300
static TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
301
{
302
    TCGv_i64 v = new_tmp_a64(s);
303
    if (reg != 31) {
304
        if (sf) {
305
            tcg_gen_mov_i64(v, cpu_X[reg]);
306
        } else {
307
            tcg_gen_ext32u_i64(v, cpu_X[reg]);
308
        }
309
    } else {
310
        tcg_gen_movi_i64(v, 0);
311
    }
312
    return v;
313
}
314

    
315
static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
316
{
317
    TCGv_i64 v = new_tmp_a64(s);
318
    if (sf) {
319
        tcg_gen_mov_i64(v, cpu_X[reg]);
320
    } else {
321
        tcg_gen_ext32u_i64(v, cpu_X[reg]);
322
    }
323
    return v;
324
}
325

    
326
/* Return the offset into CPUARMState of an element of specified
327
 * size, 'element' places in from the least significant end of
328
 * the FP/vector register Qn.
329
 */
330
static inline int vec_reg_offset(int regno, int element, TCGMemOp size)
331
{
332
    int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
333
#ifdef HOST_WORDS_BIGENDIAN
334
    /* This is complicated slightly because vfp.regs[2n] is
335
     * still the low half and  vfp.regs[2n+1] the high half
336
     * of the 128 bit vector, even on big endian systems.
337
     * Calculate the offset assuming a fully bigendian 128 bits,
338
     * then XOR to account for the order of the two 64 bit halves.
339
     */
340
    offs += (16 - ((element + 1) * (1 << size)));
341
    offs ^= 8;
342
#else
343
    offs += element * (1 << size);
344
#endif
345
    return offs;
346
}
347

    
348
/* Return the offset into CPUARMState of a slice (from
349
 * the least significant end) of FP register Qn (ie
350
 * Dn, Sn, Hn or Bn).
351
 * (Note that this is not the same mapping as for A32; see cpu.h)
352
 */
353
static inline int fp_reg_offset(int regno, TCGMemOp size)
354
{
355
    int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
356
#ifdef HOST_WORDS_BIGENDIAN
357
    offs += (8 - (1 << size));
358
#endif
359
    return offs;
360
}
361

    
362
/* Offset of the high half of the 128 bit vector Qn */
363
static inline int fp_reg_hi_offset(int regno)
364
{
365
    return offsetof(CPUARMState, vfp.regs[regno * 2 + 1]);
366
}
367

    
368
/* Convenience accessors for reading and writing single and double
369
 * FP registers. Writing clears the upper parts of the associated
370
 * 128 bit vector register, as required by the architecture.
371
 * Note that unlike the GP register accessors, the values returned
372
 * by the read functions must be manually freed.
373
 */
374
static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
375
{
376
    TCGv_i64 v = tcg_temp_new_i64();
377

    
378
    tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(reg, MO_64));
379
    return v;
380
}
381

    
382
static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
383
{
384
    TCGv_i32 v = tcg_temp_new_i32();
385

    
386
    tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(reg, MO_32));
387
    return v;
388
}
389

    
390
static void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
391
{
392
    TCGv_i64 tcg_zero = tcg_const_i64(0);
393

    
394
    tcg_gen_st_i64(v, cpu_env, fp_reg_offset(reg, MO_64));
395
    tcg_gen_st_i64(tcg_zero, cpu_env, fp_reg_hi_offset(reg));
396
    tcg_temp_free_i64(tcg_zero);
397
}
398

    
399
static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
400
{
401
    TCGv_i64 tmp = tcg_temp_new_i64();
402

    
403
    tcg_gen_extu_i32_i64(tmp, v);
404
    write_fp_dreg(s, reg, tmp);
405
    tcg_temp_free_i64(tmp);
406
}
407

    
408
static TCGv_ptr get_fpstatus_ptr(void)
409
{
410
    TCGv_ptr statusptr = tcg_temp_new_ptr();
411
    int offset;
412

    
413
    /* In A64 all instructions (both FP and Neon) use the FPCR;
414
     * there is no equivalent of the A32 Neon "standard FPSCR value"
415
     * and all operations use vfp.fp_status.
416
     */
417
    offset = offsetof(CPUARMState, vfp.fp_status);
418
    tcg_gen_addi_ptr(statusptr, cpu_env, offset);
419
    return statusptr;
420
}
421

    
422
/* Set ZF and NF based on a 64 bit result. This is alas fiddlier
423
 * than the 32 bit equivalent.
424
 */
425
static inline void gen_set_NZ64(TCGv_i64 result)
426
{
427
    TCGv_i64 flag = tcg_temp_new_i64();
428

    
429
    tcg_gen_setcondi_i64(TCG_COND_NE, flag, result, 0);
430
    tcg_gen_trunc_i64_i32(cpu_ZF, flag);
431
    tcg_gen_shri_i64(flag, result, 32);
432
    tcg_gen_trunc_i64_i32(cpu_NF, flag);
433
    tcg_temp_free_i64(flag);
434
}
435

    
436
/* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
437
static inline void gen_logic_CC(int sf, TCGv_i64 result)
438
{
439
    if (sf) {
440
        gen_set_NZ64(result);
441
    } else {
442
        tcg_gen_trunc_i64_i32(cpu_ZF, result);
443
        tcg_gen_trunc_i64_i32(cpu_NF, result);
444
    }
445
    tcg_gen_movi_i32(cpu_CF, 0);
446
    tcg_gen_movi_i32(cpu_VF, 0);
447
}
448

    
449
/* dest = T0 + T1; compute C, N, V and Z flags */
450
static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
451
{
452
    if (sf) {
453
        TCGv_i64 result, flag, tmp;
454
        result = tcg_temp_new_i64();
455
        flag = tcg_temp_new_i64();
456
        tmp = tcg_temp_new_i64();
457

    
458
        tcg_gen_movi_i64(tmp, 0);
459
        tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
460

    
461
        tcg_gen_trunc_i64_i32(cpu_CF, flag);
462

    
463
        gen_set_NZ64(result);
464

    
465
        tcg_gen_xor_i64(flag, result, t0);
466
        tcg_gen_xor_i64(tmp, t0, t1);
467
        tcg_gen_andc_i64(flag, flag, tmp);
468
        tcg_temp_free_i64(tmp);
469
        tcg_gen_shri_i64(flag, flag, 32);
470
        tcg_gen_trunc_i64_i32(cpu_VF, flag);
471

    
472
        tcg_gen_mov_i64(dest, result);
473
        tcg_temp_free_i64(result);
474
        tcg_temp_free_i64(flag);
475
    } else {
476
        /* 32 bit arithmetic */
477
        TCGv_i32 t0_32 = tcg_temp_new_i32();
478
        TCGv_i32 t1_32 = tcg_temp_new_i32();
479
        TCGv_i32 tmp = tcg_temp_new_i32();
480

    
481
        tcg_gen_movi_i32(tmp, 0);
482
        tcg_gen_trunc_i64_i32(t0_32, t0);
483
        tcg_gen_trunc_i64_i32(t1_32, t1);
484
        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
485
        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
486
        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
487
        tcg_gen_xor_i32(tmp, t0_32, t1_32);
488
        tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
489
        tcg_gen_extu_i32_i64(dest, cpu_NF);
490

    
491
        tcg_temp_free_i32(tmp);
492
        tcg_temp_free_i32(t0_32);
493
        tcg_temp_free_i32(t1_32);
494
    }
495
}
496

    
497
/* dest = T0 - T1; compute C, N, V and Z flags */
498
static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
499
{
500
    if (sf) {
501
        /* 64 bit arithmetic */
502
        TCGv_i64 result, flag, tmp;
503

    
504
        result = tcg_temp_new_i64();
505
        flag = tcg_temp_new_i64();
506
        tcg_gen_sub_i64(result, t0, t1);
507

    
508
        gen_set_NZ64(result);
509

    
510
        tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
511
        tcg_gen_trunc_i64_i32(cpu_CF, flag);
512

    
513
        tcg_gen_xor_i64(flag, result, t0);
514
        tmp = tcg_temp_new_i64();
515
        tcg_gen_xor_i64(tmp, t0, t1);
516
        tcg_gen_and_i64(flag, flag, tmp);
517
        tcg_temp_free_i64(tmp);
518
        tcg_gen_shri_i64(flag, flag, 32);
519
        tcg_gen_trunc_i64_i32(cpu_VF, flag);
520
        tcg_gen_mov_i64(dest, result);
521
        tcg_temp_free_i64(flag);
522
        tcg_temp_free_i64(result);
523
    } else {
524
        /* 32 bit arithmetic */
525
        TCGv_i32 t0_32 = tcg_temp_new_i32();
526
        TCGv_i32 t1_32 = tcg_temp_new_i32();
527
        TCGv_i32 tmp;
528

    
529
        tcg_gen_trunc_i64_i32(t0_32, t0);
530
        tcg_gen_trunc_i64_i32(t1_32, t1);
531
        tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
532
        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
533
        tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
534
        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
535
        tmp = tcg_temp_new_i32();
536
        tcg_gen_xor_i32(tmp, t0_32, t1_32);
537
        tcg_temp_free_i32(t0_32);
538
        tcg_temp_free_i32(t1_32);
539
        tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
540
        tcg_temp_free_i32(tmp);
541
        tcg_gen_extu_i32_i64(dest, cpu_NF);
542
    }
543
}
544

    
545
/* dest = T0 + T1 + CF; do not compute flags. */
546
static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
547
{
548
    TCGv_i64 flag = tcg_temp_new_i64();
549
    tcg_gen_extu_i32_i64(flag, cpu_CF);
550
    tcg_gen_add_i64(dest, t0, t1);
551
    tcg_gen_add_i64(dest, dest, flag);
552
    tcg_temp_free_i64(flag);
553

    
554
    if (!sf) {
555
        tcg_gen_ext32u_i64(dest, dest);
556
    }
557
}
558

    
559
/* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
560
static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
561
{
562
    if (sf) {
563
        TCGv_i64 result, cf_64, vf_64, tmp;
564
        result = tcg_temp_new_i64();
565
        cf_64 = tcg_temp_new_i64();
566
        vf_64 = tcg_temp_new_i64();
567
        tmp = tcg_const_i64(0);
568

    
569
        tcg_gen_extu_i32_i64(cf_64, cpu_CF);
570
        tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
571
        tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
572
        tcg_gen_trunc_i64_i32(cpu_CF, cf_64);
573
        gen_set_NZ64(result);
574

    
575
        tcg_gen_xor_i64(vf_64, result, t0);
576
        tcg_gen_xor_i64(tmp, t0, t1);
577
        tcg_gen_andc_i64(vf_64, vf_64, tmp);
578
        tcg_gen_shri_i64(vf_64, vf_64, 32);
579
        tcg_gen_trunc_i64_i32(cpu_VF, vf_64);
580

    
581
        tcg_gen_mov_i64(dest, result);
582

    
583
        tcg_temp_free_i64(tmp);
584
        tcg_temp_free_i64(vf_64);
585
        tcg_temp_free_i64(cf_64);
586
        tcg_temp_free_i64(result);
587
    } else {
588
        TCGv_i32 t0_32, t1_32, tmp;
589
        t0_32 = tcg_temp_new_i32();
590
        t1_32 = tcg_temp_new_i32();
591
        tmp = tcg_const_i32(0);
592

    
593
        tcg_gen_trunc_i64_i32(t0_32, t0);
594
        tcg_gen_trunc_i64_i32(t1_32, t1);
595
        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
596
        tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
597

    
598
        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
599
        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
600
        tcg_gen_xor_i32(tmp, t0_32, t1_32);
601
        tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
602
        tcg_gen_extu_i32_i64(dest, cpu_NF);
603

    
604
        tcg_temp_free_i32(tmp);
605
        tcg_temp_free_i32(t1_32);
606
        tcg_temp_free_i32(t0_32);
607
    }
608
}
609

    
610
/*
611
 * Load/Store generators
612
 */
613

    
614
/*
615
 * Store from GPR register to memory
616
 */
617
static void do_gpr_st(DisasContext *s, TCGv_i64 source,
618
                      TCGv_i64 tcg_addr, int size)
619
{
620
    g_assert(size <= 3);
621
    tcg_gen_qemu_st_i64(source, tcg_addr, get_mem_index(s), MO_TE + size);
622
}
623

    
624
/*
625
 * Load from memory to GPR register
626
 */
627
static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
628
                      int size, bool is_signed, bool extend)
629
{
630
    TCGMemOp memop = MO_TE + size;
631

    
632
    g_assert(size <= 3);
633

    
634
    if (is_signed) {
635
        memop += MO_SIGN;
636
    }
637

    
638
    tcg_gen_qemu_ld_i64(dest, tcg_addr, get_mem_index(s), memop);
639

    
640
    if (extend && is_signed) {
641
        g_assert(size < 3);
642
        tcg_gen_ext32u_i64(dest, dest);
643
    }
644
}
645

    
646
/*
647
 * Store from FP register to memory
648
 */
649
static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
650
{
651
    /* This writes the bottom N bits of a 128 bit wide vector to memory */
652
    TCGv_i64 tmp = tcg_temp_new_i64();
653
    tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(srcidx, MO_64));
654
    if (size < 4) {
655
        tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), MO_TE + size);
656
    } else {
657
        TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
658
        tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), MO_TEQ);
659
        tcg_gen_qemu_st64(tmp, tcg_addr, get_mem_index(s));
660
        tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(srcidx));
661
        tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
662
        tcg_gen_qemu_st_i64(tmp, tcg_hiaddr, get_mem_index(s), MO_TEQ);
663
        tcg_temp_free_i64(tcg_hiaddr);
664
    }
665

    
666
    tcg_temp_free_i64(tmp);
667
}
668

    
669
/*
670
 * Load from memory to FP register
671
 */
672
static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
673
{
674
    /* This always zero-extends and writes to a full 128 bit wide vector */
675
    TCGv_i64 tmplo = tcg_temp_new_i64();
676
    TCGv_i64 tmphi;
677

    
678
    if (size < 4) {
679
        TCGMemOp memop = MO_TE + size;
680
        tmphi = tcg_const_i64(0);
681
        tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
682
    } else {
683
        TCGv_i64 tcg_hiaddr;
684
        tmphi = tcg_temp_new_i64();
685
        tcg_hiaddr = tcg_temp_new_i64();
686

    
687
        tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), MO_TEQ);
688
        tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
689
        tcg_gen_qemu_ld_i64(tmphi, tcg_hiaddr, get_mem_index(s), MO_TEQ);
690
        tcg_temp_free_i64(tcg_hiaddr);
691
    }
692

    
693
    tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(destidx, MO_64));
694
    tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(destidx));
695

    
696
    tcg_temp_free_i64(tmplo);
697
    tcg_temp_free_i64(tmphi);
698
}
699

    
700
/*
701
 * Vector load/store helpers.
702
 *
703
 * The principal difference between this and a FP load is that we don't
704
 * zero extend as we are filling a partial chunk of the vector register.
705
 * These functions don't support 128 bit loads/stores, which would be
706
 * normal load/store operations.
707
 *
708
 * The _i32 versions are useful when operating on 32 bit quantities
709
 * (eg for floating point single or using Neon helper functions).
710
 */
711

    
712
/* Get value of an element within a vector register */
713
static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
714
                             int element, TCGMemOp memop)
715
{
716
    int vect_off = vec_reg_offset(srcidx, element, memop & MO_SIZE);
717
    switch (memop) {
718
    case MO_8:
719
        tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
720
        break;
721
    case MO_16:
722
        tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
723
        break;
724
    case MO_32:
725
        tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
726
        break;
727
    case MO_8|MO_SIGN:
728
        tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
729
        break;
730
    case MO_16|MO_SIGN:
731
        tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
732
        break;
733
    case MO_32|MO_SIGN:
734
        tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
735
        break;
736
    case MO_64:
737
    case MO_64|MO_SIGN:
738
        tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
739
        break;
740
    default:
741
        g_assert_not_reached();
742
    }
743
}
744

    
745
static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
746
                                 int element, TCGMemOp memop)
747
{
748
    int vect_off = vec_reg_offset(srcidx, element, memop & MO_SIZE);
749
    switch (memop) {
750
    case MO_8:
751
        tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
752
        break;
753
    case MO_16:
754
        tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
755
        break;
756
    case MO_8|MO_SIGN:
757
        tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
758
        break;
759
    case MO_16|MO_SIGN:
760
        tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
761
        break;
762
    case MO_32:
763
    case MO_32|MO_SIGN:
764
        tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
765
        break;
766
    default:
767
        g_assert_not_reached();
768
    }
769
}
770

    
771
/* Set value of an element within a vector register */
772
static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
773
                              int element, TCGMemOp memop)
774
{
775
    int vect_off = vec_reg_offset(destidx, element, memop & MO_SIZE);
776
    switch (memop) {
777
    case MO_8:
778
        tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
779
        break;
780
    case MO_16:
781
        tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
782
        break;
783
    case MO_32:
784
        tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
785
        break;
786
    case MO_64:
787
        tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
788
        break;
789
    default:
790
        g_assert_not_reached();
791
    }
792
}
793

    
794
static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
795
                                  int destidx, int element, TCGMemOp memop)
796
{
797
    int vect_off = vec_reg_offset(destidx, element, memop & MO_SIZE);
798
    switch (memop) {
799
    case MO_8:
800
        tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
801
        break;
802
    case MO_16:
803
        tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
804
        break;
805
    case MO_32:
806
        tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
807
        break;
808
    default:
809
        g_assert_not_reached();
810
    }
811
}
812

    
813
/* Clear the high 64 bits of a 128 bit vector (in general non-quad
814
 * vector ops all need to do this).
815
 */
816
static void clear_vec_high(DisasContext *s, int rd)
817
{
818
    TCGv_i64 tcg_zero = tcg_const_i64(0);
819

    
820
    write_vec_element(s, tcg_zero, rd, 1, MO_64);
821
    tcg_temp_free_i64(tcg_zero);
822
}
823

    
824
/* Store from vector register to memory */
825
static void do_vec_st(DisasContext *s, int srcidx, int element,
826
                      TCGv_i64 tcg_addr, int size)
827
{
828
    TCGMemOp memop = MO_TE + size;
829
    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
830

    
831
    read_vec_element(s, tcg_tmp, srcidx, element, size);
832
    tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
833

    
834
    tcg_temp_free_i64(tcg_tmp);
835
}
836

    
837
/* Load from memory to vector register */
838
static void do_vec_ld(DisasContext *s, int destidx, int element,
839
                      TCGv_i64 tcg_addr, int size)
840
{
841
    TCGMemOp memop = MO_TE + size;
842
    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
843

    
844
    tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
845
    write_vec_element(s, tcg_tmp, destidx, element, size);
846

    
847
    tcg_temp_free_i64(tcg_tmp);
848
}
849

    
850
/*
851
 * This utility function is for doing register extension with an
852
 * optional shift. You will likely want to pass a temporary for the
853
 * destination register. See DecodeRegExtend() in the ARM ARM.
854
 */
855
static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
856
                              int option, unsigned int shift)
857
{
858
    int extsize = extract32(option, 0, 2);
859
    bool is_signed = extract32(option, 2, 1);
860

    
861
    if (is_signed) {
862
        switch (extsize) {
863
        case 0:
864
            tcg_gen_ext8s_i64(tcg_out, tcg_in);
865
            break;
866
        case 1:
867
            tcg_gen_ext16s_i64(tcg_out, tcg_in);
868
            break;
869
        case 2:
870
            tcg_gen_ext32s_i64(tcg_out, tcg_in);
871
            break;
872
        case 3:
873
            tcg_gen_mov_i64(tcg_out, tcg_in);
874
            break;
875
        }
876
    } else {
877
        switch (extsize) {
878
        case 0:
879
            tcg_gen_ext8u_i64(tcg_out, tcg_in);
880
            break;
881
        case 1:
882
            tcg_gen_ext16u_i64(tcg_out, tcg_in);
883
            break;
884
        case 2:
885
            tcg_gen_ext32u_i64(tcg_out, tcg_in);
886
            break;
887
        case 3:
888
            tcg_gen_mov_i64(tcg_out, tcg_in);
889
            break;
890
        }
891
    }
892

    
893
    if (shift) {
894
        tcg_gen_shli_i64(tcg_out, tcg_out, shift);
895
    }
896
}
897

    
898
static inline void gen_check_sp_alignment(DisasContext *s)
899
{
900
    /* The AArch64 architecture mandates that (if enabled via PSTATE
901
     * or SCTLR bits) there is a check that SP is 16-aligned on every
902
     * SP-relative load or store (with an exception generated if it is not).
903
     * In line with general QEMU practice regarding misaligned accesses,
904
     * we omit these checks for the sake of guest program performance.
905
     * This function is provided as a hook so we can more easily add these
906
     * checks in future (possibly as a "favour catching guest program bugs
907
     * over speed" user selectable option).
908
     */
909
}
910

    
911
/*
912
 * This provides a simple table based table lookup decoder. It is
913
 * intended to be used when the relevant bits for decode are too
914
 * awkwardly placed and switch/if based logic would be confusing and
915
 * deeply nested. Since it's a linear search through the table, tables
916
 * should be kept small.
917
 *
918
 * It returns the first handler where insn & mask == pattern, or
919
 * NULL if there is no match.
920
 * The table is terminated by an empty mask (i.e. 0)
921
 */
922
static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
923
                                               uint32_t insn)
924
{
925
    const AArch64DecodeTable *tptr = table;
926

    
927
    while (tptr->mask) {
928
        if ((insn & tptr->mask) == tptr->pattern) {
929
            return tptr->disas_fn;
930
        }
931
        tptr++;
932
    }
933
    return NULL;
934
}
935

    
936
/*
937
 * the instruction disassembly implemented here matches
938
 * the instruction encoding classifications in chapter 3 (C3)
939
 * of the ARM Architecture Reference Manual (DDI0487A_a)
940
 */
941

    
942
/* C3.2.7 Unconditional branch (immediate)
943
 *   31  30       26 25                                  0
944
 * +----+-----------+-------------------------------------+
945
 * | op | 0 0 1 0 1 |                 imm26               |
946
 * +----+-----------+-------------------------------------+
947
 */
948
static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
949
{
950
    uint64_t addr = s->pc + sextract32(insn, 0, 26) * 4 - 4;
951

    
952
    if (insn & (1 << 31)) {
953
        /* C5.6.26 BL Branch with link */
954
        tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
955
    }
956

    
957
    /* C5.6.20 B Branch / C5.6.26 BL Branch with link */
958
    gen_goto_tb(s, 0, addr);
959
}
960

    
961
/* C3.2.1 Compare & branch (immediate)
962
 *   31  30         25  24  23                  5 4      0
963
 * +----+-------------+----+---------------------+--------+
964
 * | sf | 0 1 1 0 1 0 | op |         imm19       |   Rt   |
965
 * +----+-------------+----+---------------------+--------+
966
 */
967
static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
968
{
969
    unsigned int sf, op, rt;
970
    uint64_t addr;
971
    int label_match;
972
    TCGv_i64 tcg_cmp;
973

    
974
    sf = extract32(insn, 31, 1);
975
    op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
976
    rt = extract32(insn, 0, 5);
977
    addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
978

    
979
    tcg_cmp = read_cpu_reg(s, rt, sf);
980
    label_match = gen_new_label();
981

    
982
    tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
983
                        tcg_cmp, 0, label_match);
984

    
985
    gen_goto_tb(s, 0, s->pc);
986
    gen_set_label(label_match);
987
    gen_goto_tb(s, 1, addr);
988
}
989

    
990
/* C3.2.5 Test & branch (immediate)
991
 *   31  30         25  24  23   19 18          5 4    0
992
 * +----+-------------+----+-------+-------------+------+
993
 * | b5 | 0 1 1 0 1 1 | op |  b40  |    imm14    |  Rt  |
994
 * +----+-------------+----+-------+-------------+------+
995
 */
996
static void disas_test_b_imm(DisasContext *s, uint32_t insn)
997
{
998
    unsigned int bit_pos, op, rt;
999
    uint64_t addr;
1000
    int label_match;
1001
    TCGv_i64 tcg_cmp;
1002

    
1003
    bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1004
    op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1005
    addr = s->pc + sextract32(insn, 5, 14) * 4 - 4;
1006
    rt = extract32(insn, 0, 5);
1007

    
1008
    tcg_cmp = tcg_temp_new_i64();
1009
    tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1010
    label_match = gen_new_label();
1011
    tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1012
                        tcg_cmp, 0, label_match);
1013
    tcg_temp_free_i64(tcg_cmp);
1014
    gen_goto_tb(s, 0, s->pc);
1015
    gen_set_label(label_match);
1016
    gen_goto_tb(s, 1, addr);
1017
}
1018

    
1019
/* C3.2.2 / C5.6.19 Conditional branch (immediate)
1020
 *  31           25  24  23                  5   4  3    0
1021
 * +---------------+----+---------------------+----+------+
1022
 * | 0 1 0 1 0 1 0 | o1 |         imm19       | o0 | cond |
1023
 * +---------------+----+---------------------+----+------+
1024
 */
1025
static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1026
{
1027
    unsigned int cond;
1028
    uint64_t addr;
1029

    
1030
    if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1031
        unallocated_encoding(s);
1032
        return;
1033
    }
1034
    addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1035
    cond = extract32(insn, 0, 4);
1036

    
1037
    if (cond < 0x0e) {
1038
        /* genuinely conditional branches */
1039
        int label_match = gen_new_label();
1040
        arm_gen_test_cc(cond, label_match);
1041
        gen_goto_tb(s, 0, s->pc);
1042
        gen_set_label(label_match);
1043
        gen_goto_tb(s, 1, addr);
1044
    } else {
1045
        /* 0xe and 0xf are both "always" conditions */
1046
        gen_goto_tb(s, 0, addr);
1047
    }
1048
}
1049

    
1050
/* C5.6.68 HINT */
1051
static void handle_hint(DisasContext *s, uint32_t insn,
1052
                        unsigned int op1, unsigned int op2, unsigned int crm)
1053
{
1054
    unsigned int selector = crm << 3 | op2;
1055

    
1056
    if (op1 != 3) {
1057
        unallocated_encoding(s);
1058
        return;
1059
    }
1060

    
1061
    switch (selector) {
1062
    case 0: /* NOP */
1063
        return;
1064
    case 1: /* YIELD */
1065
    case 2: /* WFE */
1066
    case 3: /* WFI */
1067
    case 4: /* SEV */
1068
    case 5: /* SEVL */
1069
        /* we treat all as NOP at least for now */
1070
        return;
1071
    default:
1072
        /* default specified as NOP equivalent */
1073
        return;
1074
    }
1075
}
1076

    
1077
static void gen_clrex(DisasContext *s, uint32_t insn)
1078
{
1079
    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1080
}
1081

    
1082
/* CLREX, DSB, DMB, ISB */
1083
static void handle_sync(DisasContext *s, uint32_t insn,
1084
                        unsigned int op1, unsigned int op2, unsigned int crm)
1085
{
1086
    if (op1 != 3) {
1087
        unallocated_encoding(s);
1088
        return;
1089
    }
1090

    
1091
    switch (op2) {
1092
    case 2: /* CLREX */
1093
        gen_clrex(s, insn);
1094
        return;
1095
    case 4: /* DSB */
1096
    case 5: /* DMB */
1097
    case 6: /* ISB */
1098
        /* We don't emulate caches so barriers are no-ops */
1099
        return;
1100
    default:
1101
        unallocated_encoding(s);
1102
        return;
1103
    }
1104
}
1105

    
1106
/* C5.6.130 MSR (immediate) - move immediate to processor state field */
1107
static void handle_msr_i(DisasContext *s, uint32_t insn,
1108
                         unsigned int op1, unsigned int op2, unsigned int crm)
1109
{
1110
    unsupported_encoding(s, insn);
1111
}
1112

    
1113
static void gen_get_nzcv(TCGv_i64 tcg_rt)
1114
{
1115
    TCGv_i32 tmp = tcg_temp_new_i32();
1116
    TCGv_i32 nzcv = tcg_temp_new_i32();
1117

    
1118
    /* build bit 31, N */
1119
    tcg_gen_andi_i32(nzcv, cpu_NF, (1 << 31));
1120
    /* build bit 30, Z */
1121
    tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1122
    tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1123
    /* build bit 29, C */
1124
    tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1125
    /* build bit 28, V */
1126
    tcg_gen_shri_i32(tmp, cpu_VF, 31);
1127
    tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1128
    /* generate result */
1129
    tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1130

    
1131
    tcg_temp_free_i32(nzcv);
1132
    tcg_temp_free_i32(tmp);
1133
}
1134

    
1135
static void gen_set_nzcv(TCGv_i64 tcg_rt)
1136

    
1137
{
1138
    TCGv_i32 nzcv = tcg_temp_new_i32();
1139

    
1140
    /* take NZCV from R[t] */
1141
    tcg_gen_trunc_i64_i32(nzcv, tcg_rt);
1142

    
1143
    /* bit 31, N */
1144
    tcg_gen_andi_i32(cpu_NF, nzcv, (1 << 31));
1145
    /* bit 30, Z */
1146
    tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1147
    tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1148
    /* bit 29, C */
1149
    tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1150
    tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1151
    /* bit 28, V */
1152
    tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1153
    tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1154
    tcg_temp_free_i32(nzcv);
1155
}
1156

    
1157
/* C5.6.129 MRS - move from system register
1158
 * C5.6.131 MSR (register) - move to system register
1159
 * C5.6.204 SYS
1160
 * C5.6.205 SYSL
1161
 * These are all essentially the same insn in 'read' and 'write'
1162
 * versions, with varying op0 fields.
1163
 */
1164
static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1165
                       unsigned int op0, unsigned int op1, unsigned int op2,
1166
                       unsigned int crn, unsigned int crm, unsigned int rt)
1167
{
1168
    const ARMCPRegInfo *ri;
1169
    TCGv_i64 tcg_rt;
1170

    
1171
    ri = get_arm_cp_reginfo(s->cp_regs,
1172
                            ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1173
                                               crn, crm, op0, op1, op2));
1174

    
1175
    if (!ri) {
1176
        /* Unknown register */
1177
        unallocated_encoding(s);
1178
        return;
1179
    }
1180

    
1181
    /* Check access permissions */
1182
    if (!cp_access_ok(s->current_pl, ri, isread)) {
1183
        unallocated_encoding(s);
1184
        return;
1185
    }
1186

    
1187
    /* Handle special cases first */
1188
    switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1189
    case ARM_CP_NOP:
1190
        return;
1191
    case ARM_CP_NZCV:
1192
        tcg_rt = cpu_reg(s, rt);
1193
        if (isread) {
1194
            gen_get_nzcv(tcg_rt);
1195
        } else {
1196
            gen_set_nzcv(tcg_rt);
1197
        }
1198
        return;
1199
    default:
1200
        break;
1201
    }
1202

    
1203
    if (use_icount && (ri->type & ARM_CP_IO)) {
1204
        gen_io_start();
1205
    }
1206

    
1207
    tcg_rt = cpu_reg(s, rt);
1208

    
1209
    if (isread) {
1210
        if (ri->type & ARM_CP_CONST) {
1211
            tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1212
        } else if (ri->readfn) {
1213
            TCGv_ptr tmpptr;
1214
            gen_a64_set_pc_im(s->pc - 4);
1215
            tmpptr = tcg_const_ptr(ri);
1216
            gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1217
            tcg_temp_free_ptr(tmpptr);
1218
        } else {
1219
            tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1220
        }
1221
    } else {
1222
        if (ri->type & ARM_CP_CONST) {
1223
            /* If not forbidden by access permissions, treat as WI */
1224
            return;
1225
        } else if (ri->writefn) {
1226
            TCGv_ptr tmpptr;
1227
            gen_a64_set_pc_im(s->pc - 4);
1228
            tmpptr = tcg_const_ptr(ri);
1229
            gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1230
            tcg_temp_free_ptr(tmpptr);
1231
        } else {
1232
            tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1233
        }
1234
    }
1235

    
1236
    if (use_icount && (ri->type & ARM_CP_IO)) {
1237
        /* I/O operations must end the TB here (whether read or write) */
1238
        gen_io_end();
1239
        s->is_jmp = DISAS_UPDATE;
1240
    } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1241
        /* We default to ending the TB on a coprocessor register write,
1242
         * but allow this to be suppressed by the register definition
1243
         * (usually only necessary to work around guest bugs).
1244
         */
1245
        s->is_jmp = DISAS_UPDATE;
1246
    }
1247
}
1248

    
1249
/* C3.2.4 System
1250
 *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
1251
 * +---------------------+---+-----+-----+-------+-------+-----+------+
1252
 * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
1253
 * +---------------------+---+-----+-----+-------+-------+-----+------+
1254
 */
1255
static void disas_system(DisasContext *s, uint32_t insn)
1256
{
1257
    unsigned int l, op0, op1, crn, crm, op2, rt;
1258
    l = extract32(insn, 21, 1);
1259
    op0 = extract32(insn, 19, 2);
1260
    op1 = extract32(insn, 16, 3);
1261
    crn = extract32(insn, 12, 4);
1262
    crm = extract32(insn, 8, 4);
1263
    op2 = extract32(insn, 5, 3);
1264
    rt = extract32(insn, 0, 5);
1265

    
1266
    if (op0 == 0) {
1267
        if (l || rt != 31) {
1268
            unallocated_encoding(s);
1269
            return;
1270
        }
1271
        switch (crn) {
1272
        case 2: /* C5.6.68 HINT */
1273
            handle_hint(s, insn, op1, op2, crm);
1274
            break;
1275
        case 3: /* CLREX, DSB, DMB, ISB */
1276
            handle_sync(s, insn, op1, op2, crm);
1277
            break;
1278
        case 4: /* C5.6.130 MSR (immediate) */
1279
            handle_msr_i(s, insn, op1, op2, crm);
1280
            break;
1281
        default:
1282
            unallocated_encoding(s);
1283
            break;
1284
        }
1285
        return;
1286
    }
1287
    handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
1288
}
1289

    
1290
/* C3.2.3 Exception generation
1291
 *
1292
 *  31             24 23 21 20                     5 4   2 1  0
1293
 * +-----------------+-----+------------------------+-----+----+
1294
 * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
1295
 * +-----------------------+------------------------+----------+
1296
 */
1297
static void disas_exc(DisasContext *s, uint32_t insn)
1298
{
1299
    int opc = extract32(insn, 21, 3);
1300
    int op2_ll = extract32(insn, 0, 5);
1301

    
1302
    switch (opc) {
1303
    case 0:
1304
        /* SVC, HVC, SMC; since we don't support the Virtualization
1305
         * or TrustZone extensions these all UNDEF except SVC.
1306
         */
1307
        if (op2_ll != 1) {
1308
            unallocated_encoding(s);
1309
            break;
1310
        }
1311
        gen_exception_insn(s, 0, EXCP_SWI);
1312
        break;
1313
    case 1:
1314
        if (op2_ll != 0) {
1315
            unallocated_encoding(s);
1316
            break;
1317
        }
1318
        /* BRK */
1319
        gen_exception_insn(s, 0, EXCP_BKPT);
1320
        break;
1321
    case 2:
1322
        if (op2_ll != 0) {
1323
            unallocated_encoding(s);
1324
            break;
1325
        }
1326
        /* HLT */
1327
        unsupported_encoding(s, insn);
1328
        break;
1329
    case 5:
1330
        if (op2_ll < 1 || op2_ll > 3) {
1331
            unallocated_encoding(s);
1332
            break;
1333
        }
1334
        /* DCPS1, DCPS2, DCPS3 */
1335
        unsupported_encoding(s, insn);
1336
        break;
1337
    default:
1338
        unallocated_encoding(s);
1339
        break;
1340
    }
1341
}
1342

    
1343
/* C3.2.7 Unconditional branch (register)
1344
 *  31           25 24   21 20   16 15   10 9    5 4     0
1345
 * +---------------+-------+-------+-------+------+-------+
1346
 * | 1 1 0 1 0 1 1 |  opc  |  op2  |  op3  |  Rn  |  op4  |
1347
 * +---------------+-------+-------+-------+------+-------+
1348
 */
1349
static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1350
{
1351
    unsigned int opc, op2, op3, rn, op4;
1352

    
1353
    opc = extract32(insn, 21, 4);
1354
    op2 = extract32(insn, 16, 5);
1355
    op3 = extract32(insn, 10, 6);
1356
    rn = extract32(insn, 5, 5);
1357
    op4 = extract32(insn, 0, 5);
1358

    
1359
    if (op4 != 0x0 || op3 != 0x0 || op2 != 0x1f) {
1360
        unallocated_encoding(s);
1361
        return;
1362
    }
1363

    
1364
    switch (opc) {
1365
    case 0: /* BR */
1366
    case 2: /* RET */
1367
        break;
1368
    case 1: /* BLR */
1369
        tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1370
        break;
1371
    case 4: /* ERET */
1372
    case 5: /* DRPS */
1373
        if (rn != 0x1f) {
1374
            unallocated_encoding(s);
1375
        } else {
1376
            unsupported_encoding(s, insn);
1377
        }
1378
        return;
1379
    default:
1380
        unallocated_encoding(s);
1381
        return;
1382
    }
1383

    
1384
    tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
1385
    s->is_jmp = DISAS_JUMP;
1386
}
1387

    
1388
/* C3.2 Branches, exception generating and system instructions */
1389
static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
1390
{
1391
    switch (extract32(insn, 25, 7)) {
1392
    case 0x0a: case 0x0b:
1393
    case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
1394
        disas_uncond_b_imm(s, insn);
1395
        break;
1396
    case 0x1a: case 0x5a: /* Compare & branch (immediate) */
1397
        disas_comp_b_imm(s, insn);
1398
        break;
1399
    case 0x1b: case 0x5b: /* Test & branch (immediate) */
1400
        disas_test_b_imm(s, insn);
1401
        break;
1402
    case 0x2a: /* Conditional branch (immediate) */
1403
        disas_cond_b_imm(s, insn);
1404
        break;
1405
    case 0x6a: /* Exception generation / System */
1406
        if (insn & (1 << 24)) {
1407
            disas_system(s, insn);
1408
        } else {
1409
            disas_exc(s, insn);
1410
        }
1411
        break;
1412
    case 0x6b: /* Unconditional branch (register) */
1413
        disas_uncond_b_reg(s, insn);
1414
        break;
1415
    default:
1416
        unallocated_encoding(s);
1417
        break;
1418
    }
1419
}
1420

    
1421
/*
1422
 * Load/Store exclusive instructions are implemented by remembering
1423
 * the value/address loaded, and seeing if these are the same
1424
 * when the store is performed. This is not actually the architecturally
1425
 * mandated semantics, but it works for typical guest code sequences
1426
 * and avoids having to monitor regular stores.
1427
 *
1428
 * In system emulation mode only one CPU will be running at once, so
1429
 * this sequence is effectively atomic.  In user emulation mode we
1430
 * throw an exception and handle the atomic operation elsewhere.
1431
 */
1432
static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
1433
                               TCGv_i64 addr, int size, bool is_pair)
1434
{
1435
    TCGv_i64 tmp = tcg_temp_new_i64();
1436
    TCGMemOp memop = MO_TE + size;
1437

    
1438
    g_assert(size <= 3);
1439
    tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), memop);
1440

    
1441
    if (is_pair) {
1442
        TCGv_i64 addr2 = tcg_temp_new_i64();
1443
        TCGv_i64 hitmp = tcg_temp_new_i64();
1444

    
1445
        g_assert(size >= 2);
1446
        tcg_gen_addi_i64(addr2, addr, 1 << size);
1447
        tcg_gen_qemu_ld_i64(hitmp, addr2, get_mem_index(s), memop);
1448
        tcg_temp_free_i64(addr2);
1449
        tcg_gen_mov_i64(cpu_exclusive_high, hitmp);
1450
        tcg_gen_mov_i64(cpu_reg(s, rt2), hitmp);
1451
        tcg_temp_free_i64(hitmp);
1452
    }
1453

    
1454
    tcg_gen_mov_i64(cpu_exclusive_val, tmp);
1455
    tcg_gen_mov_i64(cpu_reg(s, rt), tmp);
1456

    
1457
    tcg_temp_free_i64(tmp);
1458
    tcg_gen_mov_i64(cpu_exclusive_addr, addr);
1459
}
1460

    
1461
#ifdef CONFIG_USER_ONLY
1462
static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1463
                                TCGv_i64 addr, int size, int is_pair)
1464
{
1465
    tcg_gen_mov_i64(cpu_exclusive_test, addr);
1466
    tcg_gen_movi_i32(cpu_exclusive_info,
1467
                     size | is_pair << 2 | (rd << 4) | (rt << 9) | (rt2 << 14));
1468
    gen_exception_insn(s, 4, EXCP_STREX);
1469
}
1470
#else
1471
static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1472
                                TCGv_i64 addr, int size, int is_pair)
1473
{
1474
    qemu_log_mask(LOG_UNIMP,
1475
                  "%s:%d: system mode store_exclusive unsupported "
1476
                  "at pc=%016" PRIx64 "\n",
1477
                  __FILE__, __LINE__, s->pc - 4);
1478
}
1479
#endif
1480

    
1481
/* C3.3.6 Load/store exclusive
1482
 *
1483
 *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
1484
 * +-----+-------------+----+---+----+------+----+-------+------+------+
1485
 * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
1486
 * +-----+-------------+----+---+----+------+----+-------+------+------+
1487
 *
1488
 *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
1489
 *   L: 0 -> store, 1 -> load
1490
 *  o2: 0 -> exclusive, 1 -> not
1491
 *  o1: 0 -> single register, 1 -> register pair
1492
 *  o0: 1 -> load-acquire/store-release, 0 -> not
1493
 *
1494
 *  o0 == 0 AND o2 == 1 is un-allocated
1495
 *  o1 == 1 is un-allocated except for 32 and 64 bit sizes
1496
 */
1497
static void disas_ldst_excl(DisasContext *s, uint32_t insn)
1498
{
1499
    int rt = extract32(insn, 0, 5);
1500
    int rn = extract32(insn, 5, 5);
1501
    int rt2 = extract32(insn, 10, 5);
1502
    int is_lasr = extract32(insn, 15, 1);
1503
    int rs = extract32(insn, 16, 5);
1504
    int is_pair = extract32(insn, 21, 1);
1505
    int is_store = !extract32(insn, 22, 1);
1506
    int is_excl = !extract32(insn, 23, 1);
1507
    int size = extract32(insn, 30, 2);
1508
    TCGv_i64 tcg_addr;
1509

    
1510
    if ((!is_excl && !is_lasr) ||
1511
        (is_pair && size < 2)) {
1512
        unallocated_encoding(s);
1513
        return;
1514
    }
1515

    
1516
    if (rn == 31) {
1517
        gen_check_sp_alignment(s);
1518
    }
1519
    tcg_addr = read_cpu_reg_sp(s, rn, 1);
1520

    
1521
    /* Note that since TCG is single threaded load-acquire/store-release
1522
     * semantics require no extra if (is_lasr) { ... } handling.
1523
     */
1524

    
1525
    if (is_excl) {
1526
        if (!is_store) {
1527
            gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
1528
        } else {
1529
            gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
1530
        }
1531
    } else {
1532
        TCGv_i64 tcg_rt = cpu_reg(s, rt);
1533
        if (is_store) {
1534
            do_gpr_st(s, tcg_rt, tcg_addr, size);
1535
        } else {
1536
            do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false);
1537
        }
1538
        if (is_pair) {
1539
            TCGv_i64 tcg_rt2 = cpu_reg(s, rt);
1540
            tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
1541
            if (is_store) {
1542
                do_gpr_st(s, tcg_rt2, tcg_addr, size);
1543
            } else {
1544
                do_gpr_ld(s, tcg_rt2, tcg_addr, size, false, false);
1545
            }
1546
        }
1547
    }
1548
}
1549

    
1550
/*
1551
 * C3.3.5 Load register (literal)
1552
 *
1553
 *  31 30 29   27  26 25 24 23                5 4     0
1554
 * +-----+-------+---+-----+-------------------+-------+
1555
 * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
1556
 * +-----+-------+---+-----+-------------------+-------+
1557
 *
1558
 * V: 1 -> vector (simd/fp)
1559
 * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
1560
 *                   10-> 32 bit signed, 11 -> prefetch
1561
 * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
1562
 */
1563
static void disas_ld_lit(DisasContext *s, uint32_t insn)
1564
{
1565
    int rt = extract32(insn, 0, 5);
1566
    int64_t imm = sextract32(insn, 5, 19) << 2;
1567
    bool is_vector = extract32(insn, 26, 1);
1568
    int opc = extract32(insn, 30, 2);
1569
    bool is_signed = false;
1570
    int size = 2;
1571
    TCGv_i64 tcg_rt, tcg_addr;
1572

    
1573
    if (is_vector) {
1574
        if (opc == 3) {
1575
            unallocated_encoding(s);
1576
            return;
1577
        }
1578
        size = 2 + opc;
1579
    } else {
1580
        if (opc == 3) {
1581
            /* PRFM (literal) : prefetch */
1582
            return;
1583
        }
1584
        size = 2 + extract32(opc, 0, 1);
1585
        is_signed = extract32(opc, 1, 1);
1586
    }
1587

    
1588
    tcg_rt = cpu_reg(s, rt);
1589

    
1590
    tcg_addr = tcg_const_i64((s->pc - 4) + imm);
1591
    if (is_vector) {
1592
        do_fp_ld(s, rt, tcg_addr, size);
1593
    } else {
1594
        do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false);
1595
    }
1596
    tcg_temp_free_i64(tcg_addr);
1597
}
1598

    
1599
/*
1600
 * C5.6.80 LDNP (Load Pair - non-temporal hint)
1601
 * C5.6.81 LDP (Load Pair - non vector)
1602
 * C5.6.82 LDPSW (Load Pair Signed Word - non vector)
1603
 * C5.6.176 STNP (Store Pair - non-temporal hint)
1604
 * C5.6.177 STP (Store Pair - non vector)
1605
 * C6.3.165 LDNP (Load Pair of SIMD&FP - non-temporal hint)
1606
 * C6.3.165 LDP (Load Pair of SIMD&FP)
1607
 * C6.3.284 STNP (Store Pair of SIMD&FP - non-temporal hint)
1608
 * C6.3.284 STP (Store Pair of SIMD&FP)
1609
 *
1610
 *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
1611
 * +-----+-------+---+---+-------+---+-----------------------------+
1612
 * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
1613
 * +-----+-------+---+---+-------+---+-------+-------+------+------+
1614
 *
1615
 * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
1616
 *      LDPSW                    01
1617
 *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
1618
 *   V: 0 -> GPR, 1 -> Vector
1619
 * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
1620
 *      10 -> signed offset, 11 -> pre-index
1621
 *   L: 0 -> Store 1 -> Load
1622
 *
1623
 * Rt, Rt2 = GPR or SIMD registers to be stored
1624
 * Rn = general purpose register containing address
1625
 * imm7 = signed offset (multiple of 4 or 8 depending on size)
1626
 */
1627
static void disas_ldst_pair(DisasContext *s, uint32_t insn)
1628
{
1629
    int rt = extract32(insn, 0, 5);
1630
    int rn = extract32(insn, 5, 5);
1631
    int rt2 = extract32(insn, 10, 5);
1632
    int64_t offset = sextract32(insn, 15, 7);
1633
    int index = extract32(insn, 23, 2);
1634
    bool is_vector = extract32(insn, 26, 1);
1635
    bool is_load = extract32(insn, 22, 1);
1636
    int opc = extract32(insn, 30, 2);
1637

    
1638
    bool is_signed = false;
1639
    bool postindex = false;
1640
    bool wback = false;
1641

    
1642
    TCGv_i64 tcg_addr; /* calculated address */
1643
    int size;
1644

    
1645
    if (opc == 3) {
1646
        unallocated_encoding(s);
1647
        return;
1648
    }
1649

    
1650
    if (is_vector) {
1651
        size = 2 + opc;
1652
    } else {
1653
        size = 2 + extract32(opc, 1, 1);
1654
        is_signed = extract32(opc, 0, 1);
1655
        if (!is_load && is_signed) {
1656
            unallocated_encoding(s);
1657
            return;
1658
        }
1659
    }
1660

    
1661
    switch (index) {
1662
    case 1: /* post-index */
1663
        postindex = true;
1664
        wback = true;
1665
        break;
1666
    case 0:
1667
        /* signed offset with "non-temporal" hint. Since we don't emulate
1668
         * caches we don't care about hints to the cache system about
1669
         * data access patterns, and handle this identically to plain
1670
         * signed offset.
1671
         */
1672
        if (is_signed) {
1673
            /* There is no non-temporal-hint version of LDPSW */
1674
            unallocated_encoding(s);
1675
            return;
1676
        }
1677
        postindex = false;
1678
        break;
1679
    case 2: /* signed offset, rn not updated */
1680
        postindex = false;
1681
        break;
1682
    case 3: /* pre-index */
1683
        postindex = false;
1684
        wback = true;
1685
        break;
1686
    }
1687

    
1688
    offset <<= size;
1689

    
1690
    if (rn == 31) {
1691
        gen_check_sp_alignment(s);
1692
    }
1693

    
1694
    tcg_addr = read_cpu_reg_sp(s, rn, 1);
1695

    
1696
    if (!postindex) {
1697
        tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
1698
    }
1699

    
1700
    if (is_vector) {
1701
        if (is_load) {
1702
            do_fp_ld(s, rt, tcg_addr, size);
1703
        } else {
1704
            do_fp_st(s, rt, tcg_addr, size);
1705
        }
1706
    } else {
1707
        TCGv_i64 tcg_rt = cpu_reg(s, rt);
1708
        if (is_load) {
1709
            do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false);
1710
        } else {
1711
            do_gpr_st(s, tcg_rt, tcg_addr, size);
1712
        }
1713
    }
1714
    tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
1715
    if (is_vector) {
1716
        if (is_load) {
1717
            do_fp_ld(s, rt2, tcg_addr, size);
1718
        } else {
1719
            do_fp_st(s, rt2, tcg_addr, size);
1720
        }
1721
    } else {
1722
        TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
1723
        if (is_load) {
1724
            do_gpr_ld(s, tcg_rt2, tcg_addr, size, is_signed, false);
1725
        } else {
1726
            do_gpr_st(s, tcg_rt2, tcg_addr, size);
1727
        }
1728
    }
1729

    
1730
    if (wback) {
1731
        if (postindex) {
1732
            tcg_gen_addi_i64(tcg_addr, tcg_addr, offset - (1 << size));
1733
        } else {
1734
            tcg_gen_subi_i64(tcg_addr, tcg_addr, 1 << size);
1735
        }
1736
        tcg_gen_mov_i64(cpu_reg_sp(s, rn), tcg_addr);
1737
    }
1738
}
1739

    
1740
/*
1741
 * C3.3.8 Load/store (immediate post-indexed)
1742
 * C3.3.9 Load/store (immediate pre-indexed)
1743
 * C3.3.12 Load/store (unscaled immediate)
1744
 *
1745
 * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
1746
 * +----+-------+---+-----+-----+---+--------+-----+------+------+
1747
 * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
1748
 * +----+-------+---+-----+-----+---+--------+-----+------+------+
1749
 *
1750
 * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
1751
 * V = 0 -> non-vector
1752
 * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
1753
 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
1754
 */
1755
static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
1756
{
1757
    int rt = extract32(insn, 0, 5);
1758
    int rn = extract32(insn, 5, 5);
1759
    int imm9 = sextract32(insn, 12, 9);
1760
    int opc = extract32(insn, 22, 2);
1761
    int size = extract32(insn, 30, 2);
1762
    int idx = extract32(insn, 10, 2);
1763
    bool is_signed = false;
1764
    bool is_store = false;
1765
    bool is_extended = false;
1766
    bool is_vector = extract32(insn, 26, 1);
1767
    bool post_index;
1768
    bool writeback;
1769

    
1770
    TCGv_i64 tcg_addr;
1771

    
1772
    if (is_vector) {
1773
        size |= (opc & 2) << 1;
1774
        if (size > 4) {
1775
            unallocated_encoding(s);
1776
            return;
1777
        }
1778
        is_store = ((opc & 1) == 0);
1779
    } else {
1780
        if (size == 3 && opc == 2) {
1781
            /* PRFM - prefetch */
1782
            return;
1783
        }
1784
        if (opc == 3 && size > 1) {
1785
            unallocated_encoding(s);
1786
            return;
1787
        }
1788
        is_store = (opc == 0);
1789
        is_signed = opc & (1<<1);
1790
        is_extended = (size < 3) && (opc & 1);
1791
    }
1792

    
1793
    switch (idx) {
1794
    case 0:
1795
        post_index = false;
1796
        writeback = false;
1797
        break;
1798
    case 1:
1799
        post_index = true;
1800
        writeback = true;
1801
        break;
1802
    case 3:
1803
        post_index = false;
1804
        writeback = true;
1805
        break;
1806
    case 2:
1807
        g_assert(false);
1808
        break;
1809
    }
1810

    
1811
    if (rn == 31) {
1812
        gen_check_sp_alignment(s);
1813
    }
1814
    tcg_addr = read_cpu_reg_sp(s, rn, 1);
1815

    
1816
    if (!post_index) {
1817
        tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
1818
    }
1819

    
1820
    if (is_vector) {
1821
        if (is_store) {
1822
            do_fp_st(s, rt, tcg_addr, size);
1823
        } else {
1824
            do_fp_ld(s, rt, tcg_addr, size);
1825
        }
1826
    } else {
1827
        TCGv_i64 tcg_rt = cpu_reg(s, rt);
1828
        if (is_store) {
1829
            do_gpr_st(s, tcg_rt, tcg_addr, size);
1830
        } else {
1831
            do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
1832
        }
1833
    }
1834

    
1835
    if (writeback) {
1836
        TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
1837
        if (post_index) {
1838
            tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
1839
        }
1840
        tcg_gen_mov_i64(tcg_rn, tcg_addr);
1841
    }
1842
}
1843

    
1844
/*
1845
 * C3.3.10 Load/store (register offset)
1846
 *
1847
 * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
1848
 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
1849
 * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
1850
 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
1851
 *
1852
 * For non-vector:
1853
 *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
1854
 *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
1855
 * For vector:
1856
 *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
1857
 *   opc<0>: 0 -> store, 1 -> load
1858
 * V: 1 -> vector/simd
1859
 * opt: extend encoding (see DecodeRegExtend)
1860
 * S: if S=1 then scale (essentially index by sizeof(size))
1861
 * Rt: register to transfer into/out of
1862
 * Rn: address register or SP for base
1863
 * Rm: offset register or ZR for offset
1864
 */
1865
static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn)
1866
{
1867
    int rt = extract32(insn, 0, 5);
1868
    int rn = extract32(insn, 5, 5);
1869
    int shift = extract32(insn, 12, 1);
1870
    int rm = extract32(insn, 16, 5);
1871
    int opc = extract32(insn, 22, 2);
1872
    int opt = extract32(insn, 13, 3);
1873
    int size = extract32(insn, 30, 2);
1874
    bool is_signed = false;
1875
    bool is_store = false;
1876
    bool is_extended = false;
1877
    bool is_vector = extract32(insn, 26, 1);
1878

    
1879
    TCGv_i64 tcg_rm;
1880
    TCGv_i64 tcg_addr;
1881

    
1882
    if (extract32(opt, 1, 1) == 0) {
1883
        unallocated_encoding(s);
1884
        return;
1885
    }
1886

    
1887
    if (is_vector) {
1888
        size |= (opc & 2) << 1;
1889
        if (size > 4) {
1890
            unallocated_encoding(s);
1891
            return;
1892
        }
1893
        is_store = !extract32(opc, 0, 1);
1894
    } else {
1895
        if (size == 3 && opc == 2) {
1896
            /* PRFM - prefetch */
1897
            return;
1898
        }
1899
        if (opc == 3 && size > 1) {
1900
            unallocated_encoding(s);
1901
            return;
1902
        }
1903
        is_store = (opc == 0);
1904
        is_signed = extract32(opc, 1, 1);
1905
        is_extended = (size < 3) && extract32(opc, 0, 1);
1906
    }
1907

    
1908
    if (rn == 31) {
1909
        gen_check_sp_alignment(s);
1910
    }
1911
    tcg_addr = read_cpu_reg_sp(s, rn, 1);
1912

    
1913
    tcg_rm = read_cpu_reg(s, rm, 1);
1914
    ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
1915

    
1916
    tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm);
1917

    
1918
    if (is_vector) {
1919
        if (is_store) {
1920
            do_fp_st(s, rt, tcg_addr, size);
1921
        } else {
1922
            do_fp_ld(s, rt, tcg_addr, size);
1923
        }
1924
    } else {
1925
        TCGv_i64 tcg_rt = cpu_reg(s, rt);
1926
        if (is_store) {
1927
            do_gpr_st(s, tcg_rt, tcg_addr, size);
1928
        } else {
1929
            do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
1930
        }
1931
    }
1932
}
1933

    
1934
/*
1935
 * C3.3.13 Load/store (unsigned immediate)
1936
 *
1937
 * 31 30 29   27  26 25 24 23 22 21        10 9     5
1938
 * +----+-------+---+-----+-----+------------+-------+------+
1939
 * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
1940
 * +----+-------+---+-----+-----+------------+-------+------+
1941
 *
1942
 * For non-vector:
1943
 *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
1944
 *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
1945
 * For vector:
1946
 *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
1947
 *   opc<0>: 0 -> store, 1 -> load
1948
 * Rn: base address register (inc SP)
1949
 * Rt: target register
1950
 */
1951
static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn)
1952
{
1953
    int rt = extract32(insn, 0, 5);
1954
    int rn = extract32(insn, 5, 5);
1955
    unsigned int imm12 = extract32(insn, 10, 12);
1956
    bool is_vector = extract32(insn, 26, 1);
1957
    int size = extract32(insn, 30, 2);
1958
    int opc = extract32(insn, 22, 2);
1959
    unsigned int offset;
1960

    
1961
    TCGv_i64 tcg_addr;
1962

    
1963
    bool is_store;
1964
    bool is_signed = false;
1965
    bool is_extended = false;
1966

    
1967
    if (is_vector) {
1968
        size |= (opc & 2) << 1;
1969
        if (size > 4) {
1970
            unallocated_encoding(s);
1971
            return;
1972
        }
1973
        is_store = !extract32(opc, 0, 1);
1974
    } else {
1975
        if (size == 3 && opc == 2) {
1976
            /* PRFM - prefetch */
1977
            return;
1978
        }
1979
        if (opc == 3 && size > 1) {
1980
            unallocated_encoding(s);
1981
            return;
1982
        }
1983
        is_store = (opc == 0);
1984
        is_signed = extract32(opc, 1, 1);
1985
        is_extended = (size < 3) && extract32(opc, 0, 1);
1986
    }
1987

    
1988
    if (rn == 31) {
1989
        gen_check_sp_alignment(s);
1990
    }
1991
    tcg_addr = read_cpu_reg_sp(s, rn, 1);
1992
    offset = imm12 << size;
1993
    tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
1994

    
1995
    if (is_vector) {
1996
        if (is_store) {
1997
            do_fp_st(s, rt, tcg_addr, size);
1998
        } else {
1999
            do_fp_ld(s, rt, tcg_addr, size);
2000
        }
2001
    } else {
2002
        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2003
        if (is_store) {
2004
            do_gpr_st(s, tcg_rt, tcg_addr, size);
2005
        } else {
2006
            do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
2007
        }
2008
    }
2009
}
2010

    
2011
/* Load/store register (immediate forms) */
2012
static void disas_ldst_reg_imm(DisasContext *s, uint32_t insn)
2013
{
2014
    switch (extract32(insn, 10, 2)) {
2015
    case 0: case 1: case 3:
2016
        /* Load/store register (unscaled immediate) */
2017
        /* Load/store immediate pre/post-indexed */
2018
        disas_ldst_reg_imm9(s, insn);
2019
        break;
2020
    case 2:
2021
        /* Load/store register unprivileged */
2022
        unsupported_encoding(s, insn);
2023
        break;
2024
    default:
2025
        unallocated_encoding(s);
2026
        break;
2027
    }
2028
}
2029

    
2030
/* Load/store register (all forms) */
2031
static void disas_ldst_reg(DisasContext *s, uint32_t insn)
2032
{
2033
    switch (extract32(insn, 24, 2)) {
2034
    case 0:
2035
        if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
2036
            disas_ldst_reg_roffset(s, insn);
2037
        } else {
2038
            disas_ldst_reg_imm(s, insn);
2039
        }
2040
        break;
2041
    case 1:
2042
        disas_ldst_reg_unsigned_imm(s, insn);
2043
        break;
2044
    default:
2045
        unallocated_encoding(s);
2046
        break;
2047
    }
2048
}
2049

    
2050
/* C3.3.1 AdvSIMD load/store multiple structures
2051
 *
2052
 *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
2053
 * +---+---+---------------+---+-------------+--------+------+------+------+
2054
 * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
2055
 * +---+---+---------------+---+-------------+--------+------+------+------+
2056
 *
2057
 * C3.3.2 AdvSIMD load/store multiple structures (post-indexed)
2058
 *
2059
 *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
2060
 * +---+---+---------------+---+---+---------+--------+------+------+------+
2061
 * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
2062
 * +---+---+---------------+---+---+---------+--------+------+------+------+
2063
 *
2064
 * Rt: first (or only) SIMD&FP register to be transferred
2065
 * Rn: base address or SP
2066
 * Rm (post-index only): post-index register (when !31) or size dependent #imm
2067
 */
2068
static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
2069
{
2070
    int rt = extract32(insn, 0, 5);
2071
    int rn = extract32(insn, 5, 5);
2072
    int size = extract32(insn, 10, 2);
2073
    int opcode = extract32(insn, 12, 4);
2074
    bool is_store = !extract32(insn, 22, 1);
2075
    bool is_postidx = extract32(insn, 23, 1);
2076
    bool is_q = extract32(insn, 30, 1);
2077
    TCGv_i64 tcg_addr, tcg_rn;
2078

    
2079
    int ebytes = 1 << size;
2080
    int elements = (is_q ? 128 : 64) / (8 << size);
2081
    int rpt;    /* num iterations */
2082
    int selem;  /* structure elements */
2083
    int r;
2084

    
2085
    if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
2086
        unallocated_encoding(s);
2087
        return;
2088
    }
2089

    
2090
    /* From the shared decode logic */
2091
    switch (opcode) {
2092
    case 0x0:
2093
        rpt = 1;
2094
        selem = 4;
2095
        break;
2096
    case 0x2:
2097
        rpt = 4;
2098
        selem = 1;
2099
        break;
2100
    case 0x4:
2101
        rpt = 1;
2102
        selem = 3;
2103
        break;
2104
    case 0x6:
2105
        rpt = 3;
2106
        selem = 1;
2107
        break;
2108
    case 0x7:
2109
        rpt = 1;
2110
        selem = 1;
2111
        break;
2112
    case 0x8:
2113
        rpt = 1;
2114
        selem = 2;
2115
        break;
2116
    case 0xa:
2117
        rpt = 2;
2118
        selem = 1;
2119
        break;
2120
    default:
2121
        unallocated_encoding(s);
2122
        return;
2123
    }
2124

    
2125
    if (size == 3 && !is_q && selem != 1) {
2126
        /* reserved */
2127
        unallocated_encoding(s);
2128
        return;
2129
    }
2130

    
2131
    if (rn == 31) {
2132
        gen_check_sp_alignment(s);
2133
    }
2134

    
2135
    tcg_rn = cpu_reg_sp(s, rn);
2136
    tcg_addr = tcg_temp_new_i64();
2137
    tcg_gen_mov_i64(tcg_addr, tcg_rn);
2138

    
2139
    for (r = 0; r < rpt; r++) {
2140
        int e;
2141
        for (e = 0; e < elements; e++) {
2142
            int tt = (rt + r) % 32;
2143
            int xs;
2144
            for (xs = 0; xs < selem; xs++) {
2145
                if (is_store) {
2146
                    do_vec_st(s, tt, e, tcg_addr, size);
2147
                } else {
2148
                    do_vec_ld(s, tt, e, tcg_addr, size);
2149

    
2150
                    /* For non-quad operations, setting a slice of the low
2151
                     * 64 bits of the register clears the high 64 bits (in
2152
                     * the ARM ARM pseudocode this is implicit in the fact
2153
                     * that 'rval' is a 64 bit wide variable). We optimize
2154
                     * by noticing that we only need to do this the first
2155
                     * time we touch a register.
2156
                     */
2157
                    if (!is_q && e == 0 && (r == 0 || xs == selem - 1)) {
2158
                        clear_vec_high(s, tt);
2159
                    }
2160
                }
2161
                tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2162
                tt = (tt + 1) % 32;
2163
            }
2164
        }
2165
    }
2166

    
2167
    if (is_postidx) {
2168
        int rm = extract32(insn, 16, 5);
2169
        if (rm == 31) {
2170
            tcg_gen_mov_i64(tcg_rn, tcg_addr);
2171
        } else {
2172
            tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2173
        }
2174
    }
2175
    tcg_temp_free_i64(tcg_addr);
2176
}
2177

    
2178
/* C3.3.3 AdvSIMD load/store single structure
2179
 *
2180
 *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2181
 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2182
 * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
2183
 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2184
 *
2185
 * C3.3.4 AdvSIMD load/store single structure (post-indexed)
2186
 *
2187
 *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2188
 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2189
 * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
2190
 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2191
 *
2192
 * Rt: first (or only) SIMD&FP register to be transferred
2193
 * Rn: base address or SP
2194
 * Rm (post-index only): post-index register (when !31) or size dependent #imm
2195
 * index = encoded in Q:S:size dependent on size
2196
 *
2197
 * lane_size = encoded in R, opc
2198
 * transfer width = encoded in opc, S, size
2199
 */
2200
static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
2201
{
2202
    int rt = extract32(insn, 0, 5);
2203
    int rn = extract32(insn, 5, 5);
2204
    int size = extract32(insn, 10, 2);
2205
    int S = extract32(insn, 12, 1);
2206
    int opc = extract32(insn, 13, 3);
2207
    int R = extract32(insn, 21, 1);
2208
    int is_load = extract32(insn, 22, 1);
2209
    int is_postidx = extract32(insn, 23, 1);
2210
    int is_q = extract32(insn, 30, 1);
2211

    
2212
    int scale = extract32(opc, 1, 2);
2213
    int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
2214
    bool replicate = false;
2215
    int index = is_q << 3 | S << 2 | size;
2216
    int ebytes, xs;
2217
    TCGv_i64 tcg_addr, tcg_rn;
2218

    
2219
    switch (scale) {
2220
    case 3:
2221
        if (!is_load || S) {
2222
            unallocated_encoding(s);
2223
            return;
2224
        }
2225
        scale = size;
2226
        replicate = true;
2227
        break;
2228
    case 0:
2229
        break;
2230
    case 1:
2231
        if (extract32(size, 0, 1)) {
2232
            unallocated_encoding(s);
2233
            return;
2234
        }
2235
        index >>= 1;
2236
        break;
2237
    case 2:
2238
        if (extract32(size, 1, 1)) {
2239
            unallocated_encoding(s);
2240
            return;
2241
        }
2242
        if (!extract32(size, 0, 1)) {
2243
            index >>= 2;
2244
        } else {
2245
            if (S) {
2246
                unallocated_encoding(s);
2247
                return;
2248
            }
2249
            index >>= 3;
2250
            scale = 3;
2251
        }
2252
        break;
2253
    default:
2254
        g_assert_not_reached();
2255
    }
2256

    
2257
    ebytes = 1 << scale;
2258

    
2259
    if (rn == 31) {
2260
        gen_check_sp_alignment(s);
2261
    }
2262

    
2263
    tcg_rn = cpu_reg_sp(s, rn);
2264
    tcg_addr = tcg_temp_new_i64();
2265
    tcg_gen_mov_i64(tcg_addr, tcg_rn);
2266

    
2267
    for (xs = 0; xs < selem; xs++) {
2268
        if (replicate) {
2269
            /* Load and replicate to all elements */
2270
            uint64_t mulconst;
2271
            TCGv_i64 tcg_tmp = tcg_temp_new_i64();
2272

    
2273
            tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr,
2274
                                get_mem_index(s), MO_TE + scale);
2275
            switch (scale) {
2276
            case 0:
2277
                mulconst = 0x0101010101010101ULL;
2278
                break;
2279
            case 1:
2280
                mulconst = 0x0001000100010001ULL;
2281
                break;
2282
            case 2:
2283
                mulconst = 0x0000000100000001ULL;
2284
                break;
2285
            case 3:
2286
                mulconst = 0;
2287
                break;
2288
            default:
2289
                g_assert_not_reached();
2290
            }
2291
            if (mulconst) {
2292
                tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst);
2293
            }
2294
            write_vec_element(s, tcg_tmp, rt, 0, MO_64);
2295
            if (is_q) {
2296
                write_vec_element(s, tcg_tmp, rt, 1, MO_64);
2297
            } else {
2298
                clear_vec_high(s, rt);
2299
            }
2300
            tcg_temp_free_i64(tcg_tmp);
2301
        } else {
2302
            /* Load/store one element per register */
2303
            if (is_load) {
2304
                do_vec_ld(s, rt, index, tcg_addr, MO_TE + scale);
2305
            } else {
2306
                do_vec_st(s, rt, index, tcg_addr, MO_TE + scale);
2307
            }
2308
        }
2309
        tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2310
        rt = (rt + 1) % 32;
2311
    }
2312

    
2313
    if (is_postidx) {
2314
        int rm = extract32(insn, 16, 5);
2315
        if (rm == 31) {
2316
            tcg_gen_mov_i64(tcg_rn, tcg_addr);
2317
        } else {
2318
            tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2319
        }
2320
    }
2321
    tcg_temp_free_i64(tcg_addr);
2322
}
2323

    
2324
/* C3.3 Loads and stores */
2325
static void disas_ldst(DisasContext *s, uint32_t insn)
2326
{
2327
    switch (extract32(insn, 24, 6)) {
2328
    case 0x08: /* Load/store exclusive */
2329
        disas_ldst_excl(s, insn);
2330
        break;
2331
    case 0x18: case 0x1c: /* Load register (literal) */
2332
        disas_ld_lit(s, insn);
2333
        break;
2334
    case 0x28: case 0x29:
2335
    case 0x2c: case 0x2d: /* Load/store pair (all forms) */
2336
        disas_ldst_pair(s, insn);
2337
        break;
2338
    case 0x38: case 0x39:
2339
    case 0x3c: case 0x3d: /* Load/store register (all forms) */
2340
        disas_ldst_reg(s, insn);
2341
        break;
2342
    case 0x0c: /* AdvSIMD load/store multiple structures */
2343
        disas_ldst_multiple_struct(s, insn);
2344
        break;
2345
    case 0x0d: /* AdvSIMD load/store single structure */
2346
        disas_ldst_single_struct(s, insn);
2347
        break;
2348
    default:
2349
        unallocated_encoding(s);
2350
        break;
2351
    }
2352
}
2353

    
2354
/* C3.4.6 PC-rel. addressing
2355
 *   31  30   29 28       24 23                5 4    0
2356
 * +----+-------+-----------+-------------------+------+
2357
 * | op | immlo | 1 0 0 0 0 |       immhi       |  Rd  |
2358
 * +----+-------+-----------+-------------------+------+
2359
 */
2360
static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
2361
{
2362
    unsigned int page, rd;
2363
    uint64_t base;
2364
    int64_t offset;
2365

    
2366
    page = extract32(insn, 31, 1);
2367
    /* SignExtend(immhi:immlo) -> offset */
2368
    offset = ((int64_t)sextract32(insn, 5, 19) << 2) | extract32(insn, 29, 2);
2369
    rd = extract32(insn, 0, 5);
2370
    base = s->pc - 4;
2371

    
2372
    if (page) {
2373
        /* ADRP (page based) */
2374
        base &= ~0xfff;
2375
        offset <<= 12;
2376
    }
2377

    
2378
    tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
2379
}
2380

    
2381
/*
2382
 * C3.4.1 Add/subtract (immediate)
2383
 *
2384
 *  31 30 29 28       24 23 22 21         10 9   5 4   0
2385
 * +--+--+--+-----------+-----+-------------+-----+-----+
2386
 * |sf|op| S| 1 0 0 0 1 |shift|    imm12    |  Rn | Rd  |
2387
 * +--+--+--+-----------+-----+-------------+-----+-----+
2388
 *
2389
 *    sf: 0 -> 32bit, 1 -> 64bit
2390
 *    op: 0 -> add  , 1 -> sub
2391
 *     S: 1 -> set flags
2392
 * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
2393
 */
2394
static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
2395
{
2396
    int rd = extract32(insn, 0, 5);
2397
    int rn = extract32(insn, 5, 5);
2398
    uint64_t imm = extract32(insn, 10, 12);
2399
    int shift = extract32(insn, 22, 2);
2400
    bool setflags = extract32(insn, 29, 1);
2401
    bool sub_op = extract32(insn, 30, 1);
2402
    bool is_64bit = extract32(insn, 31, 1);
2403

    
2404
    TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2405
    TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
2406
    TCGv_i64 tcg_result;
2407

    
2408
    switch (shift) {
2409
    case 0x0:
2410
        break;
2411
    case 0x1:
2412
        imm <<= 12;
2413
        break;
2414
    default:
2415
        unallocated_encoding(s);
2416
        return;
2417
    }
2418

    
2419
    tcg_result = tcg_temp_new_i64();
2420
    if (!setflags) {
2421
        if (sub_op) {
2422
            tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
2423
        } else {
2424
            tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
2425
        }
2426
    } else {
2427
        TCGv_i64 tcg_imm = tcg_const_i64(imm);
2428
        if (sub_op) {
2429
            gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2430
        } else {
2431
            gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2432
        }
2433
        tcg_temp_free_i64(tcg_imm);
2434
    }
2435

    
2436
    if (is_64bit) {
2437
        tcg_gen_mov_i64(tcg_rd, tcg_result);
2438
    } else {
2439
        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
2440
    }
2441

    
2442
    tcg_temp_free_i64(tcg_result);
2443
}
2444

    
2445
/* The input should be a value in the bottom e bits (with higher
2446
 * bits zero); returns that value replicated into every element
2447
 * of size e in a 64 bit integer.
2448
 */
2449
static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
2450
{
2451
    assert(e != 0);
2452
    while (e < 64) {
2453
        mask |= mask << e;
2454
        e *= 2;
2455
    }
2456
    return mask;
2457
}
2458

    
2459
/* Return a value with the bottom len bits set (where 0 < len <= 64) */
2460
static inline uint64_t bitmask64(unsigned int length)
2461
{
2462
    assert(length > 0 && length <= 64);
2463
    return ~0ULL >> (64 - length);
2464
}
2465

    
2466
/* Simplified variant of pseudocode DecodeBitMasks() for the case where we
2467
 * only require the wmask. Returns false if the imms/immr/immn are a reserved
2468
 * value (ie should cause a guest UNDEF exception), and true if they are
2469
 * valid, in which case the decoded bit pattern is written to result.
2470
 */
2471
static bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
2472
                                   unsigned int imms, unsigned int immr)
2473
{
2474
    uint64_t mask;
2475
    unsigned e, levels, s, r;
2476
    int len;
2477

    
2478
    assert(immn < 2 && imms < 64 && immr < 64);
2479

    
2480
    /* The bit patterns we create here are 64 bit patterns which
2481
     * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
2482
     * 64 bits each. Each element contains the same value: a run
2483
     * of between 1 and e-1 non-zero bits, rotated within the
2484
     * element by between 0 and e-1 bits.
2485
     *
2486
     * The element size and run length are encoded into immn (1 bit)
2487
     * and imms (6 bits) as follows:
2488
     * 64 bit elements: immn = 1, imms = <length of run - 1>
2489
     * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
2490
     * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
2491
     *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
2492
     *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
2493
     *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
2494
     * Notice that immn = 0, imms = 11111x is the only combination
2495
     * not covered by one of the above options; this is reserved.
2496
     * Further, <length of run - 1> all-ones is a reserved pattern.
2497
     *
2498
     * In all cases the rotation is by immr % e (and immr is 6 bits).
2499
     */
2500

    
2501
    /* First determine the element size */
2502
    len = 31 - clz32((immn << 6) | (~imms & 0x3f));
2503
    if (len < 1) {
2504
        /* This is the immn == 0, imms == 0x11111x case */
2505
        return false;
2506
    }
2507
    e = 1 << len;
2508

    
2509
    levels = e - 1;
2510
    s = imms & levels;
2511
    r = immr & levels;
2512

    
2513
    if (s == levels) {
2514
        /* <length of run - 1> mustn't be all-ones. */
2515
        return false;
2516
    }
2517

    
2518
    /* Create the value of one element: s+1 set bits rotated
2519
     * by r within the element (which is e bits wide)...
2520
     */
2521
    mask = bitmask64(s + 1);
2522
    mask = (mask >> r) | (mask << (e - r));
2523
    /* ...then replicate the element over the whole 64 bit value */
2524
    mask = bitfield_replicate(mask, e);
2525
    *result = mask;
2526
    return true;
2527
}
2528

    
2529
/* C3.4.4 Logical (immediate)
2530
 *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
2531
 * +----+-----+-------------+---+------+------+------+------+
2532
 * | sf | opc | 1 0 0 1 0 0 | N | immr | imms |  Rn  |  Rd  |
2533
 * +----+-----+-------------+---+------+------+------+------+
2534
 */
2535
static void disas_logic_imm(DisasContext *s, uint32_t insn)
2536
{
2537
    unsigned int sf, opc, is_n, immr, imms, rn, rd;
2538
    TCGv_i64 tcg_rd, tcg_rn;
2539
    uint64_t wmask;
2540
    bool is_and = false;
2541

    
2542
    sf = extract32(insn, 31, 1);
2543
    opc = extract32(insn, 29, 2);
2544
    is_n = extract32(insn, 22, 1);
2545
    immr = extract32(insn, 16, 6);
2546
    imms = extract32(insn, 10, 6);
2547
    rn = extract32(insn, 5, 5);
2548
    rd = extract32(insn, 0, 5);
2549

    
2550
    if (!sf && is_n) {
2551
        unallocated_encoding(s);
2552
        return;
2553
    }
2554

    
2555
    if (opc == 0x3) { /* ANDS */
2556
        tcg_rd = cpu_reg(s, rd);
2557
    } else {
2558
        tcg_rd = cpu_reg_sp(s, rd);
2559
    }
2560
    tcg_rn = cpu_reg(s, rn);
2561

    
2562
    if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
2563
        /* some immediate field values are reserved */
2564
        unallocated_encoding(s);
2565
        return;
2566
    }
2567

    
2568
    if (!sf) {
2569
        wmask &= 0xffffffff;
2570
    }
2571

    
2572
    switch (opc) {
2573
    case 0x3: /* ANDS */
2574
    case 0x0: /* AND */
2575
        tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
2576
        is_and = true;
2577
        break;
2578
    case 0x1: /* ORR */
2579
        tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
2580
        break;
2581
    case 0x2: /* EOR */
2582
        tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
2583
        break;
2584
    default:
2585
        assert(FALSE); /* must handle all above */
2586
        break;
2587
    }
2588

    
2589
    if (!sf && !is_and) {
2590
        /* zero extend final result; we know we can skip this for AND
2591
         * since the immediate had the high 32 bits clear.
2592
         */
2593
        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2594
    }
2595

    
2596
    if (opc == 3) { /* ANDS */
2597
        gen_logic_CC(sf, tcg_rd);
2598
    }
2599
}
2600

    
2601
/*
2602
 * C3.4.5 Move wide (immediate)
2603
 *
2604
 *  31 30 29 28         23 22 21 20             5 4    0
2605
 * +--+-----+-------------+-----+----------------+------+
2606
 * |sf| opc | 1 0 0 1 0 1 |  hw |  imm16         |  Rd  |
2607
 * +--+-----+-------------+-----+----------------+------+
2608
 *
2609
 * sf: 0 -> 32 bit, 1 -> 64 bit
2610
 * opc: 00 -> N, 10 -> Z, 11 -> K
2611
 * hw: shift/16 (0,16, and sf only 32, 48)
2612
 */
2613
static void disas_movw_imm(DisasContext *s, uint32_t insn)
2614
{
2615
    int rd = extract32(insn, 0, 5);
2616
    uint64_t imm = extract32(insn, 5, 16);
2617
    int sf = extract32(insn, 31, 1);
2618
    int opc = extract32(insn, 29, 2);
2619
    int pos = extract32(insn, 21, 2) << 4;
2620
    TCGv_i64 tcg_rd = cpu_reg(s, rd);
2621
    TCGv_i64 tcg_imm;
2622

    
2623
    if (!sf && (pos >= 32)) {
2624
        unallocated_encoding(s);
2625
        return;
2626
    }
2627

    
2628
    switch (opc) {
2629
    case 0: /* MOVN */
2630
    case 2: /* MOVZ */
2631
        imm <<= pos;
2632
        if (opc == 0) {
2633
            imm = ~imm;
2634
        }
2635
        if (!sf) {
2636
            imm &= 0xffffffffu;
2637
        }
2638
        tcg_gen_movi_i64(tcg_rd, imm);
2639
        break;
2640
    case 3: /* MOVK */
2641
        tcg_imm = tcg_const_i64(imm);
2642
        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
2643
        tcg_temp_free_i64(tcg_imm);
2644
        if (!sf) {
2645
            tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2646
        }
2647
        break;
2648
    default:
2649
        unallocated_encoding(s);
2650
        break;
2651
    }
2652
}
2653

    
2654
/* C3.4.2 Bitfield
2655
 *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
2656
 * +----+-----+-------------+---+------+------+------+------+
2657
 * | sf | opc | 1 0 0 1 1 0 | N | immr | imms |  Rn  |  Rd  |
2658
 * +----+-----+-------------+---+------+------+------+------+
2659
 */
2660
static void disas_bitfield(DisasContext *s, uint32_t insn)
2661
{
2662
    unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
2663
    TCGv_i64 tcg_rd, tcg_tmp;
2664

    
2665
    sf = extract32(insn, 31, 1);
2666
    opc = extract32(insn, 29, 2);
2667
    n = extract32(insn, 22, 1);
2668
    ri = extract32(insn, 16, 6);
2669
    si = extract32(insn, 10, 6);
2670
    rn = extract32(insn, 5, 5);
2671
    rd = extract32(insn, 0, 5);
2672
    bitsize = sf ? 64 : 32;
2673

    
2674
    if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
2675
        unallocated_encoding(s);
2676
        return;
2677
    }
2678

    
2679
    tcg_rd = cpu_reg(s, rd);
2680
    tcg_tmp = read_cpu_reg(s, rn, sf);
2681

    
2682
    /* OPTME: probably worth recognizing common cases of ext{8,16,32}{u,s} */
2683

    
2684
    if (opc != 1) { /* SBFM or UBFM */
2685
        tcg_gen_movi_i64(tcg_rd, 0);
2686
    }
2687

    
2688
    /* do the bit move operation */
2689
    if (si >= ri) {
2690
        /* Wd<s-r:0> = Wn<s:r> */
2691
        tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
2692
        pos = 0;
2693
        len = (si - ri) + 1;
2694
    } else {
2695
        /* Wd<32+s-r,32-r> = Wn<s:0> */
2696
        pos = bitsize - ri;
2697
        len = si + 1;
2698
    }
2699

    
2700
    tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
2701

    
2702
    if (opc == 0) { /* SBFM - sign extend the destination field */
2703
        tcg_gen_shli_i64(tcg_rd, tcg_rd, 64 - (pos + len));
2704
        tcg_gen_sari_i64(tcg_rd, tcg_rd, 64 - (pos + len));
2705
    }
2706

    
2707
    if (!sf) { /* zero extend final result */
2708
        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2709
    }
2710
}
2711

    
2712
/* C3.4.3 Extract
2713
 *   31  30  29 28         23 22   21  20  16 15    10 9    5 4    0
2714
 * +----+------+-------------+---+----+------+--------+------+------+
2715
 * | sf | op21 | 1 0 0 1 1 1 | N | o0 |  Rm  |  imms  |  Rn  |  Rd  |
2716
 * +----+------+-------------+---+----+------+--------+------+------+
2717
 */
2718
static void disas_extract(DisasContext *s, uint32_t insn)
2719
{
2720
    unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
2721

    
2722
    sf = extract32(insn, 31, 1);
2723
    n = extract32(insn, 22, 1);
2724
    rm = extract32(insn, 16, 5);
2725
    imm = extract32(insn, 10, 6);
2726
    rn = extract32(insn, 5, 5);
2727
    rd = extract32(insn, 0, 5);
2728
    op21 = extract32(insn, 29, 2);
2729
    op0 = extract32(insn, 21, 1);
2730
    bitsize = sf ? 64 : 32;
2731

    
2732
    if (sf != n || op21 || op0 || imm >= bitsize) {
2733
        unallocated_encoding(s);
2734
    } else {
2735
        TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
2736

    
2737
        tcg_rd = cpu_reg(s, rd);
2738

    
2739
        if (imm) {
2740
            /* OPTME: we can special case rm==rn as a rotate */
2741
            tcg_rm = read_cpu_reg(s, rm, sf);
2742
            tcg_rn = read_cpu_reg(s, rn, sf);
2743
            tcg_gen_shri_i64(tcg_rm, tcg_rm, imm);
2744
            tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm);
2745
            tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn);
2746
            if (!sf) {
2747
                tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2748
            }
2749
        } else {
2750
            /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
2751
             * so an extract from bit 0 is a special case.
2752
             */
2753
            if (sf) {
2754
                tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
2755
            } else {
2756
                tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
2757
            }
2758
        }
2759

    
2760
    }
2761
}
2762

    
2763
/* C3.4 Data processing - immediate */
2764
static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
2765
{
2766
    switch (extract32(insn, 23, 6)) {
2767
    case 0x20: case 0x21: /* PC-rel. addressing */
2768
        disas_pc_rel_adr(s, insn);
2769
        break;
2770
    case 0x22: case 0x23: /* Add/subtract (immediate) */
2771
        disas_add_sub_imm(s, insn);
2772
        break;
2773
    case 0x24: /* Logical (immediate) */
2774
        disas_logic_imm(s, insn);
2775
        break;
2776
    case 0x25: /* Move wide (immediate) */
2777
        disas_movw_imm(s, insn);
2778
        break;
2779
    case 0x26: /* Bitfield */
2780
        disas_bitfield(s, insn);
2781
        break;
2782
    case 0x27: /* Extract */
2783
        disas_extract(s, insn);
2784
        break;
2785
    default:
2786
        unallocated_encoding(s);
2787
        break;
2788
    }
2789
}
2790

    
2791
/* Shift a TCGv src by TCGv shift_amount, put result in dst.
2792
 * Note that it is the caller's responsibility to ensure that the
2793
 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
2794
 * mandated semantics for out of range shifts.
2795
 */
2796
static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
2797
                      enum a64_shift_type shift_type, TCGv_i64 shift_amount)
2798
{
2799
    switch (shift_type) {
2800
    case A64_SHIFT_TYPE_LSL:
2801
        tcg_gen_shl_i64(dst, src, shift_amount);
2802
        break;
2803
    case A64_SHIFT_TYPE_LSR:
2804
        tcg_gen_shr_i64(dst, src, shift_amount);
2805
        break;
2806
    case A64_SHIFT_TYPE_ASR:
2807
        if (!sf) {
2808
            tcg_gen_ext32s_i64(dst, src);
2809
        }
2810
        tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
2811
        break;
2812
    case A64_SHIFT_TYPE_ROR:
2813
        if (sf) {
2814
            tcg_gen_rotr_i64(dst, src, shift_amount);
2815
        } else {
2816
            TCGv_i32 t0, t1;
2817
            t0 = tcg_temp_new_i32();
2818
            t1 = tcg_temp_new_i32();
2819
            tcg_gen_trunc_i64_i32(t0, src);
2820
            tcg_gen_trunc_i64_i32(t1, shift_amount);
2821
            tcg_gen_rotr_i32(t0, t0, t1);
2822
            tcg_gen_extu_i32_i64(dst, t0);
2823
            tcg_temp_free_i32(t0);
2824
            tcg_temp_free_i32(t1);
2825
        }
2826
        break;
2827
    default:
2828
        assert(FALSE); /* all shift types should be handled */
2829
        break;
2830
    }
2831

    
2832
    if (!sf) { /* zero extend final result */
2833
        tcg_gen_ext32u_i64(dst, dst);
2834
    }
2835
}
2836

    
2837
/* Shift a TCGv src by immediate, put result in dst.
2838
 * The shift amount must be in range (this should always be true as the
2839
 * relevant instructions will UNDEF on bad shift immediates).
2840
 */
2841
static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
2842
                          enum a64_shift_type shift_type, unsigned int shift_i)
2843
{
2844
    assert(shift_i < (sf ? 64 : 32));
2845

    
2846
    if (shift_i == 0) {
2847
        tcg_gen_mov_i64(dst, src);
2848
    } else {
2849
        TCGv_i64 shift_const;
2850

    
2851
        shift_const = tcg_const_i64(shift_i);
2852
        shift_reg(dst, src, sf, shift_type, shift_const);
2853
        tcg_temp_free_i64(shift_const);
2854
    }
2855
}
2856

    
2857
/* C3.5.10 Logical (shifted register)
2858
 *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
2859
 * +----+-----+-----------+-------+---+------+--------+------+------+
2860
 * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
2861
 * +----+-----+-----------+-------+---+------+--------+------+------+
2862
 */
2863
static void disas_logic_reg(DisasContext *s, uint32_t insn)
2864
{
2865
    TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
2866
    unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
2867

    
2868
    sf = extract32(insn, 31, 1);
2869
    opc = extract32(insn, 29, 2);
2870
    shift_type = extract32(insn, 22, 2);
2871
    invert = extract32(insn, 21, 1);
2872
    rm = extract32(insn, 16, 5);
2873
    shift_amount = extract32(insn, 10, 6);
2874
    rn = extract32(insn, 5, 5);
2875
    rd = extract32(insn, 0, 5);
2876

    
2877
    if (!sf && (shift_amount & (1 << 5))) {
2878
        unallocated_encoding(s);
2879
        return;
2880
    }
2881

    
2882
    tcg_rd = cpu_reg(s, rd);
2883

    
2884
    if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
2885
        /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
2886
         * register-register MOV and MVN, so it is worth special casing.
2887
         */
2888
        tcg_rm = cpu_reg(s, rm);
2889
        if (invert) {
2890
            tcg_gen_not_i64(tcg_rd, tcg_rm);
2891
            if (!sf) {
2892
                tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2893
            }
2894
        } else {
2895
            if (sf) {
2896
                tcg_gen_mov_i64(tcg_rd, tcg_rm);
2897
            } else {
2898
                tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
2899
            }
2900
        }
2901
        return;
2902
    }
2903

    
2904
    tcg_rm = read_cpu_reg(s, rm, sf);
2905

    
2906
    if (shift_amount) {
2907
        shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
2908
    }
2909

    
2910
    tcg_rn = cpu_reg(s, rn);
2911

    
2912
    switch (opc | (invert << 2)) {
2913
    case 0: /* AND */
2914
    case 3: /* ANDS */
2915
        tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
2916
        break;
2917
    case 1: /* ORR */
2918
        tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
2919
        break;
2920
    case 2: /* EOR */
2921
        tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
2922
        break;
2923
    case 4: /* BIC */
2924
    case 7: /* BICS */
2925
        tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
2926
        break;
2927
    case 5: /* ORN */
2928
        tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
2929
        break;
2930
    case 6: /* EON */
2931
        tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
2932
        break;
2933
    default:
2934
        assert(FALSE);
2935
        break;
2936
    }
2937

    
2938
    if (!sf) {
2939
        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2940
    }
2941

    
2942
    if (opc == 3) {
2943
        gen_logic_CC(sf, tcg_rd);
2944
    }
2945
}
2946

    
2947
/*
2948
 * C3.5.1 Add/subtract (extended register)
2949
 *
2950
 *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
2951
 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
2952
 * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
2953
 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
2954
 *
2955
 *  sf: 0 -> 32bit, 1 -> 64bit
2956
 *  op: 0 -> add  , 1 -> sub
2957
 *   S: 1 -> set flags
2958
 * opt: 00
2959
 * option: extension type (see DecodeRegExtend)
2960
 * imm3: optional shift to Rm
2961
 *
2962
 * Rd = Rn + LSL(extend(Rm), amount)
2963
 */
2964
static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
2965
{
2966
    int rd = extract32(insn, 0, 5);
2967
    int rn = extract32(insn, 5, 5);
2968
    int imm3 = extract32(insn, 10, 3);
2969
    int option = extract32(insn, 13, 3);
2970
    int rm = extract32(insn, 16, 5);
2971
    bool setflags = extract32(insn, 29, 1);
2972
    bool sub_op = extract32(insn, 30, 1);
2973
    bool sf = extract32(insn, 31, 1);
2974

    
2975
    TCGv_i64 tcg_rm, tcg_rn; /* temps */
2976
    TCGv_i64 tcg_rd;
2977
    TCGv_i64 tcg_result;
2978

    
2979
    if (imm3 > 4) {
2980
        unallocated_encoding(s);
2981
        return;
2982
    }
2983

    
2984
    /* non-flag setting ops may use SP */
2985
    if (!setflags) {
2986
        tcg_rn = read_cpu_reg_sp(s, rn, sf);
2987
        tcg_rd = cpu_reg_sp(s, rd);
2988
    } else {
2989
        tcg_rn = read_cpu_reg(s, rn, sf);
2990
        tcg_rd = cpu_reg(s, rd);
2991
    }
2992

    
2993
    tcg_rm = read_cpu_reg(s, rm, sf);
2994
    ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
2995

    
2996
    tcg_result = tcg_temp_new_i64();
2997

    
2998
    if (!setflags) {
2999
        if (sub_op) {
3000
            tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3001
        } else {
3002
            tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3003
        }
3004
    } else {
3005
        if (sub_op) {
3006
            gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3007
        } else {
3008
            gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3009
        }
3010
    }
3011

    
3012
    if (sf) {
3013
        tcg_gen_mov_i64(tcg_rd, tcg_result);
3014
    } else {
3015
        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3016
    }
3017

    
3018
    tcg_temp_free_i64(tcg_result);
3019
}
3020

    
3021
/*
3022
 * C3.5.2 Add/subtract (shifted register)
3023
 *
3024
 *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
3025
 * +--+--+--+-----------+-----+--+-------+---------+------+------+
3026
 * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
3027
 * +--+--+--+-----------+-----+--+-------+---------+------+------+
3028
 *
3029
 *    sf: 0 -> 32bit, 1 -> 64bit
3030
 *    op: 0 -> add  , 1 -> sub
3031
 *     S: 1 -> set flags
3032
 * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
3033
 *  imm6: Shift amount to apply to Rm before the add/sub
3034
 */
3035
static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
3036
{
3037
    int rd = extract32(insn, 0, 5);
3038
    int rn = extract32(insn, 5, 5);
3039
    int imm6 = extract32(insn, 10, 6);
3040
    int rm = extract32(insn, 16, 5);
3041
    int shift_type = extract32(insn, 22, 2);
3042
    bool setflags = extract32(insn, 29, 1);
3043
    bool sub_op = extract32(insn, 30, 1);
3044
    bool sf = extract32(insn, 31, 1);
3045

    
3046
    TCGv_i64 tcg_rd = cpu_reg(s, rd);
3047
    TCGv_i64 tcg_rn, tcg_rm;
3048
    TCGv_i64 tcg_result;
3049

    
3050
    if ((shift_type == 3) || (!sf && (imm6 > 31))) {
3051
        unallocated_encoding(s);
3052
        return;
3053
    }
3054

    
3055
    tcg_rn = read_cpu_reg(s, rn, sf);
3056
    tcg_rm = read_cpu_reg(s, rm, sf);
3057

    
3058
    shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
3059

    
3060
    tcg_result = tcg_temp_new_i64();
3061

    
3062
    if (!setflags) {
3063
        if (sub_op) {
3064
            tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3065
        } else {
3066
            tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3067
        }
3068
    } else {
3069
        if (sub_op) {
3070
            gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3071
        } else {
3072
            gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3073
        }
3074
    }
3075

    
3076
    if (sf) {
3077
        tcg_gen_mov_i64(tcg_rd, tcg_result);
3078
    } else {
3079
        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3080
    }
3081

    
3082
    tcg_temp_free_i64(tcg_result);
3083
}
3084

    
3085
/* C3.5.9 Data-processing (3 source)
3086

3087
   31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
3088
  +--+------+-----------+------+------+----+------+------+------+
3089
  |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
3090
  +--+------+-----------+------+------+----+------+------+------+
3091

3092
 */
3093
static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
3094
{
3095
    int rd = extract32(insn, 0, 5);
3096
    int rn = extract32(insn, 5, 5);
3097
    int ra = extract32(insn, 10, 5);
3098
    int rm = extract32(insn, 16, 5);
3099
    int op_id = (extract32(insn, 29, 3) << 4) |
3100
        (extract32(insn, 21, 3) << 1) |
3101
        extract32(insn, 15, 1);
3102
    bool sf = extract32(insn, 31, 1);
3103
    bool is_sub = extract32(op_id, 0, 1);
3104
    bool is_high = extract32(op_id, 2, 1);
3105
    bool is_signed = false;
3106
    TCGv_i64 tcg_op1;
3107
    TCGv_i64 tcg_op2;
3108
    TCGv_i64 tcg_tmp;
3109

    
3110
    /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
3111
    switch (op_id) {
3112
    case 0x42: /* SMADDL */
3113
    case 0x43: /* SMSUBL */
3114
    case 0x44: /* SMULH */
3115
        is_signed = true;
3116
        break;
3117
    case 0x0: /* MADD (32bit) */
3118
    case 0x1: /* MSUB (32bit) */
3119
    case 0x40: /* MADD (64bit) */
3120
    case 0x41: /* MSUB (64bit) */
3121
    case 0x4a: /* UMADDL */
3122
    case 0x4b: /* UMSUBL */
3123
    case 0x4c: /* UMULH */
3124
        break;
3125
    default:
3126
        unallocated_encoding(s);
3127
        return;
3128
    }
3129

    
3130
    if (is_high) {
3131
        TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
3132
        TCGv_i64 tcg_rd = cpu_reg(s, rd);
3133
        TCGv_i64 tcg_rn = cpu_reg(s, rn);
3134
        TCGv_i64 tcg_rm = cpu_reg(s, rm);
3135

    
3136
        if (is_signed) {
3137
            tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3138
        } else {
3139
            tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3140
        }
3141

    
3142
        tcg_temp_free_i64(low_bits);
3143
        return;
3144
    }
3145

    
3146
    tcg_op1 = tcg_temp_new_i64();
3147
    tcg_op2 = tcg_temp_new_i64();
3148
    tcg_tmp = tcg_temp_new_i64();
3149

    
3150
    if (op_id < 0x42) {
3151
        tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
3152
        tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
3153
    } else {
3154
        if (is_signed) {
3155
            tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
3156
            tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
3157
        } else {
3158
            tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
3159
            tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
3160
        }
3161
    }
3162

    
3163
    if (ra == 31 && !is_sub) {
3164
        /* Special-case MADD with rA == XZR; it is the standard MUL alias */
3165
        tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
3166
    } else {
3167
        tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
3168
        if (is_sub) {
3169
            tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3170
        } else {
3171
            tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3172
        }
3173
    }
3174

    
3175
    if (!sf) {
3176
        tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
3177
    }
3178

    
3179
    tcg_temp_free_i64(tcg_op1);
3180
    tcg_temp_free_i64(tcg_op2);
3181
    tcg_temp_free_i64(tcg_tmp);
3182
}
3183

    
3184
/* C3.5.3 - Add/subtract (with carry)
3185
 *  31 30 29 28 27 26 25 24 23 22 21  20  16  15   10  9    5 4   0
3186
 * +--+--+--+------------------------+------+---------+------+-----+
3187
 * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | opcode2 |  Rn  |  Rd |
3188
 * +--+--+--+------------------------+------+---------+------+-----+
3189
 *                                            [000000]
3190
 */
3191

    
3192
static void disas_adc_sbc(DisasContext *s, uint32_t insn)
3193
{
3194
    unsigned int sf, op, setflags, rm, rn, rd;
3195
    TCGv_i64 tcg_y, tcg_rn, tcg_rd;
3196

    
3197
    if (extract32(insn, 10, 6) != 0) {
3198
        unallocated_encoding(s);
3199
        return;
3200
    }
3201

    
3202
    sf = extract32(insn, 31, 1);
3203
    op = extract32(insn, 30, 1);
3204
    setflags = extract32(insn, 29, 1);
3205
    rm = extract32(insn, 16, 5);
3206
    rn = extract32(insn, 5, 5);
3207
    rd = extract32(insn, 0, 5);
3208

    
3209
    tcg_rd = cpu_reg(s, rd);
3210
    tcg_rn = cpu_reg(s, rn);
3211

    
3212
    if (op) {
3213
        tcg_y = new_tmp_a64(s);
3214
        tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
3215
    } else {
3216
        tcg_y = cpu_reg(s, rm);
3217
    }
3218

    
3219
    if (setflags) {
3220
        gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
3221
    } else {
3222
        gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
3223
    }
3224
}
3225

    
3226
/* C3.5.4 - C3.5.5 Conditional compare (immediate / register)
3227
 *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
3228
 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3229
 * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
3230
 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3231
 *        [1]                             y                [0]       [0]
3232
 */
3233
static void disas_cc(DisasContext *s, uint32_t insn)
3234
{
3235
    unsigned int sf, op, y, cond, rn, nzcv, is_imm;
3236
    int label_continue = -1;
3237
    TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
3238

    
3239
    if (!extract32(insn, 29, 1)) {
3240
        unallocated_encoding(s);
3241
        return;
3242
    }
3243
    if (insn & (1 << 10 | 1 << 4)) {
3244
        unallocated_encoding(s);
3245
        return;
3246
    }
3247
    sf = extract32(insn, 31, 1);
3248
    op = extract32(insn, 30, 1);
3249
    is_imm = extract32(insn, 11, 1);
3250
    y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
3251
    cond = extract32(insn, 12, 4);
3252
    rn = extract32(insn, 5, 5);
3253
    nzcv = extract32(insn, 0, 4);
3254

    
3255
    if (cond < 0x0e) { /* not always */
3256
        int label_match = gen_new_label();
3257
        label_continue = gen_new_label();
3258
        arm_gen_test_cc(cond, label_match);
3259
        /* nomatch: */
3260
        tcg_tmp = tcg_temp_new_i64();
3261
        tcg_gen_movi_i64(tcg_tmp, nzcv << 28);
3262
        gen_set_nzcv(tcg_tmp);
3263
        tcg_temp_free_i64(tcg_tmp);
3264
        tcg_gen_br(label_continue);
3265
        gen_set_label(label_match);
3266
    }
3267
    /* match, or condition is always */
3268
    if (is_imm) {
3269
        tcg_y = new_tmp_a64(s);
3270
        tcg_gen_movi_i64(tcg_y, y);
3271
    } else {
3272
        tcg_y = cpu_reg(s, y);
3273
    }
3274
    tcg_rn = cpu_reg(s, rn);
3275

    
3276
    tcg_tmp = tcg_temp_new_i64();
3277
    if (op) {
3278
        gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3279
    } else {
3280
        gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3281
    }
3282
    tcg_temp_free_i64(tcg_tmp);
3283

    
3284
    if (cond < 0x0e) { /* continue */
3285
        gen_set_label(label_continue);
3286
    }
3287
}
3288

    
3289
/* C3.5.6 Conditional select
3290
 *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
3291
 * +----+----+---+-----------------+------+------+-----+------+------+
3292
 * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
3293
 * +----+----+---+-----------------+------+------+-----+------+------+
3294
 */
3295
static void disas_cond_select(DisasContext *s, uint32_t insn)
3296
{
3297
    unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
3298
    TCGv_i64 tcg_rd, tcg_src;
3299

    
3300
    if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
3301
        /* S == 1 or op2<1> == 1 */
3302
        unallocated_encoding(s);
3303
        return;
3304
    }
3305
    sf = extract32(insn, 31, 1);
3306
    else_inv = extract32(insn, 30, 1);
3307
    rm = extract32(insn, 16, 5);
3308
    cond = extract32(insn, 12, 4);
3309
    else_inc = extract32(insn, 10, 1);
3310
    rn = extract32(insn, 5, 5);
3311
    rd = extract32(insn, 0, 5);
3312

    
3313
    if (rd == 31) {
3314
        /* silly no-op write; until we use movcond we must special-case
3315
         * this to avoid a dead temporary across basic blocks.
3316
         */
3317
        return;
3318
    }
3319

    
3320
    tcg_rd = cpu_reg(s, rd);
3321

    
3322
    if (cond >= 0x0e) { /* condition "always" */
3323
        tcg_src = read_cpu_reg(s, rn, sf);
3324
        tcg_gen_mov_i64(tcg_rd, tcg_src);
3325
    } else {
3326
        /* OPTME: we could use movcond here, at the cost of duplicating
3327
         * a lot of the arm_gen_test_cc() logic.
3328
         */
3329
        int label_match = gen_new_label();
3330
        int label_continue = gen_new_label();
3331

    
3332
        arm_gen_test_cc(cond, label_match);
3333
        /* nomatch: */
3334
        tcg_src = cpu_reg(s, rm);
3335

    
3336
        if (else_inv && else_inc) {
3337
            tcg_gen_neg_i64(tcg_rd, tcg_src);
3338
        } else if (else_inv) {
3339
            tcg_gen_not_i64(tcg_rd, tcg_src);
3340
        } else if (else_inc) {
3341
            tcg_gen_addi_i64(tcg_rd, tcg_src, 1);
3342
        } else {
3343
            tcg_gen_mov_i64(tcg_rd, tcg_src);
3344
        }
3345
        if (!sf) {
3346
            tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3347
        }
3348
        tcg_gen_br(label_continue);
3349
        /* match: */
3350
        gen_set_label(label_match);
3351
        tcg_src = read_cpu_reg(s, rn, sf);
3352
        tcg_gen_mov_i64(tcg_rd, tcg_src);
3353
        /* continue: */
3354
        gen_set_label(label_continue);
3355
    }
3356
}
3357

    
3358
static void handle_clz(DisasContext *s, unsigned int sf,
3359
                       unsigned int rn, unsigned int rd)
3360
{
3361
    TCGv_i64 tcg_rd, tcg_rn;
3362
    tcg_rd = cpu_reg(s, rd);
3363
    tcg_rn = cpu_reg(s, rn);
3364

    
3365
    if (sf) {
3366
        gen_helper_clz64(tcg_rd, tcg_rn);
3367
    } else {
3368
        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3369
        tcg_gen_trunc_i64_i32(tcg_tmp32, tcg_rn);
3370
        gen_helper_clz(tcg_tmp32, tcg_tmp32);
3371
        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3372
        tcg_temp_free_i32(tcg_tmp32);
3373
    }
3374
}
3375

    
3376
static void handle_cls(DisasContext *s, unsigned int sf,
3377
                       unsigned int rn, unsigned int rd)
3378
{
3379
    TCGv_i64 tcg_rd, tcg_rn;
3380
    tcg_rd = cpu_reg(s, rd);
3381
    tcg_rn = cpu_reg(s, rn);
3382

    
3383
    if (sf) {
3384
        gen_helper_cls64(tcg_rd, tcg_rn);
3385
    } else {
3386
        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3387
        tcg_gen_trunc_i64_i32(tcg_tmp32, tcg_rn);
3388
        gen_helper_cls32(tcg_tmp32, tcg_tmp32);
3389
        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3390
        tcg_temp_free_i32(tcg_tmp32);
3391
    }
3392
}
3393

    
3394
static void handle_rbit(DisasContext *s, unsigned int sf,
3395
                        unsigned int rn, unsigned int rd)
3396
{
3397
    TCGv_i64 tcg_rd, tcg_rn;
3398
    tcg_rd = cpu_reg(s, rd);
3399
    tcg_rn = cpu_reg(s, rn);
3400

    
3401
    if (sf) {
3402
        gen_helper_rbit64(tcg_rd, tcg_rn);
3403
    } else {
3404
        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3405
        tcg_gen_trunc_i64_i32(tcg_tmp32, tcg_rn);
3406
        gen_helper_rbit(tcg_tmp32, tcg_tmp32);
3407
        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3408
        tcg_temp_free_i32(tcg_tmp32);
3409
    }
3410
}
3411

    
3412
/* C5.6.149 REV with sf==1, opcode==3 ("REV64") */
3413
static void handle_rev64(DisasContext *s, unsigned int sf,
3414
                         unsigned int rn, unsigned int rd)
3415
{
3416
    if (!sf) {
3417
        unallocated_encoding(s);
3418
        return;
3419
    }
3420
    tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
3421
}
3422

    
3423
/* C5.6.149 REV with sf==0, opcode==2
3424
 * C5.6.151 REV32 (sf==1, opcode==2)
3425
 */
3426
static void handle_rev32(DisasContext *s, unsigned int sf,
3427
                         unsigned int rn, unsigned int rd)
3428
{
3429
    TCGv_i64 tcg_rd = cpu_reg(s, rd);
3430

    
3431
    if (sf) {
3432
        TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3433
        TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3434

    
3435
        /* bswap32_i64 requires zero high word */
3436
        tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
3437
        tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
3438
        tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
3439
        tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
3440
        tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
3441

    
3442
        tcg_temp_free_i64(tcg_tmp);
3443
    } else {
3444
        tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
3445
        tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
3446
    }
3447
}
3448

    
3449
/* C5.6.150 REV16 (opcode==1) */
3450
static void handle_rev16(DisasContext *s, unsigned int sf,
3451
                         unsigned int rn, unsigned int rd)
3452
{
3453
    TCGv_i64 tcg_rd = cpu_reg(s, rd);
3454
    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3455
    TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3456

    
3457
    tcg_gen_andi_i64(tcg_tmp, tcg_rn, 0xffff);
3458
    tcg_gen_bswap16_i64(tcg_rd, tcg_tmp);
3459

    
3460
    tcg_gen_shri_i64(tcg_tmp, tcg_rn, 16);
3461
    tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
3462
    tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3463
    tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 16, 16);
3464

    
3465
    if (sf) {
3466
        tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
3467
        tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
3468
        tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3469
        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 32, 16);
3470

    
3471
        tcg_gen_shri_i64(tcg_tmp, tcg_rn, 48);
3472
        tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3473
        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 48, 16);
3474
    }
3475

    
3476
    tcg_temp_free_i64(tcg_tmp);
3477
}
3478

    
3479
/* C3.5.7 Data-processing (1 source)
3480
 *   31  30  29  28             21 20     16 15    10 9    5 4    0
3481
 * +----+---+---+-----------------+---------+--------+------+------+
3482
 * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
3483
 * +----+---+---+-----------------+---------+--------+------+------+
3484
 */
3485
static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
3486
{
3487
    unsigned int sf, opcode, rn, rd;
3488

    
3489
    if (extract32(insn, 29, 1) || extract32(insn, 16, 5)) {
3490
        unallocated_encoding(s);
3491
        return;
3492
    }
3493

    
3494
    sf = extract32(insn, 31, 1);
3495
    opcode = extract32(insn, 10, 6);
3496
    rn = extract32(insn, 5, 5);
3497
    rd = extract32(insn, 0, 5);
3498

    
3499
    switch (opcode) {
3500
    case 0: /* RBIT */
3501
        handle_rbit(s, sf, rn, rd);
3502
        break;
3503
    case 1: /* REV16 */
3504
        handle_rev16(s, sf, rn, rd);
3505
        break;
3506
    case 2: /* REV32 */
3507
        handle_rev32(s, sf, rn, rd);
3508
        break;
3509
    case 3: /* REV64 */
3510
        handle_rev64(s, sf, rn, rd);
3511
        break;
3512
    case 4: /* CLZ */
3513
        handle_clz(s, sf, rn, rd);
3514
        break;
3515
    case 5: /* CLS */
3516
        handle_cls(s, sf, rn, rd);
3517
        break;
3518
    }
3519
}
3520

    
3521
static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
3522
                       unsigned int rm, unsigned int rn, unsigned int rd)
3523
{
3524
    TCGv_i64 tcg_n, tcg_m, tcg_rd;
3525
    tcg_rd = cpu_reg(s, rd);
3526

    
3527
    if (!sf && is_signed) {
3528
        tcg_n = new_tmp_a64(s);
3529
        tcg_m = new_tmp_a64(s);
3530
        tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
3531
        tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
3532
    } else {
3533
        tcg_n = read_cpu_reg(s, rn, sf);
3534
        tcg_m = read_cpu_reg(s, rm, sf);
3535
    }
3536

    
3537
    if (is_signed) {
3538
        gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
3539
    } else {
3540
        gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
3541
    }
3542

    
3543
    if (!sf) { /* zero extend final result */
3544
        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3545
    }
3546
}
3547

    
3548
/* C5.6.115 LSLV, C5.6.118 LSRV, C5.6.17 ASRV, C5.6.154 RORV */
3549
static void handle_shift_reg(DisasContext *s,
3550
                             enum a64_shift_type shift_type, unsigned int sf,
3551
                             unsigned int rm, unsigned int rn, unsigned int rd)
3552
{
3553
    TCGv_i64 tcg_shift = tcg_temp_new_i64();
3554
    TCGv_i64 tcg_rd = cpu_reg(s, rd);
3555
    TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3556

    
3557
    tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
3558
    shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
3559
    tcg_temp_free_i64(tcg_shift);
3560
}
3561

    
3562
/* C3.5.8 Data-processing (2 source)
3563
 *   31   30  29 28             21 20  16 15    10 9    5 4    0
3564
 * +----+---+---+-----------------+------+--------+------+------+
3565
 * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
3566
 * +----+---+---+-----------------+------+--------+------+------+
3567
 */
3568
static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
3569
{
3570
    unsigned int sf, rm, opcode, rn, rd;
3571
    sf = extract32(insn, 31, 1);
3572
    rm = extract32(insn, 16, 5);
3573
    opcode = extract32(insn, 10, 6);
3574
    rn = extract32(insn, 5, 5);
3575
    rd = extract32(insn, 0, 5);
3576

    
3577
    if (extract32(insn, 29, 1)) {
3578
        unallocated_encoding(s);
3579
        return;
3580
    }
3581

    
3582
    switch (opcode) {
3583
    case 2: /* UDIV */
3584
        handle_div(s, false, sf, rm, rn, rd);
3585
        break;
3586
    case 3: /* SDIV */
3587
        handle_div(s, true, sf, rm, rn, rd);
3588
        break;
3589
    case 8: /* LSLV */
3590
        handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
3591
        break;
3592
    case 9: /* LSRV */
3593
        handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
3594
        break;
3595
    case 10: /* ASRV */
3596
        handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
3597
        break;
3598
    case 11: /* RORV */
3599
        handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
3600
        break;
3601
    case 16:
3602
    case 17:
3603
    case 18:
3604
    case 19:
3605
    case 20:
3606
    case 21:
3607
    case 22:
3608
    case 23: /* CRC32 */
3609
        unsupported_encoding(s, insn);
3610
        break;
3611
    default:
3612
        unallocated_encoding(s);
3613
        break;
3614
    }
3615
}
3616

    
3617
/* C3.5 Data processing - register */
3618
static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
3619
{
3620
    switch (extract32(insn, 24, 5)) {
3621
    case 0x0a: /* Logical (shifted register) */
3622
        disas_logic_reg(s, insn);
3623
        break;
3624
    case 0x0b: /* Add/subtract */
3625
        if (insn & (1 << 21)) { /* (extended register) */
3626
            disas_add_sub_ext_reg(s, insn);
3627
        } else {
3628
            disas_add_sub_reg(s, insn);
3629
        }
3630
        break;
3631
    case 0x1b: /* Data-processing (3 source) */
3632
        disas_data_proc_3src(s, insn);
3633
        break;
3634
    case 0x1a:
3635
        switch (extract32(insn, 21, 3)) {
3636
        case 0x0: /* Add/subtract (with carry) */
3637
            disas_adc_sbc(s, insn);
3638
            break;
3639
        case 0x2: /* Conditional compare */
3640
            disas_cc(s, insn); /* both imm and reg forms */
3641
            break;
3642
        case 0x4: /* Conditional select */
3643
            disas_cond_select(s, insn);
3644
            break;
3645
        case 0x6: /* Data-processing */
3646
            if (insn & (1 << 30)) { /* (1 source) */
3647
                disas_data_proc_1src(s, insn);
3648
            } else {            /* (2 source) */
3649
                disas_data_proc_2src(s, insn);
3650
            }
3651
            break;
3652
        default:
3653
            unallocated_encoding(s);
3654
            break;
3655
        }
3656
        break;
3657
    default:
3658
        unallocated_encoding(s);
3659
        break;
3660
    }
3661
}
3662

    
3663
static void handle_fp_compare(DisasContext *s, bool is_double,
3664
                              unsigned int rn, unsigned int rm,
3665
                              bool cmp_with_zero, bool signal_all_nans)
3666
{
3667
    TCGv_i64 tcg_flags = tcg_temp_new_i64();
3668
    TCGv_ptr fpst = get_fpstatus_ptr();
3669

    
3670
    if (is_double) {
3671
        TCGv_i64 tcg_vn, tcg_vm;
3672

    
3673
        tcg_vn = read_fp_dreg(s, rn);
3674
        if (cmp_with_zero) {
3675
            tcg_vm = tcg_const_i64(0);
3676
        } else {
3677
            tcg_vm = read_fp_dreg(s, rm);
3678
        }
3679
        if (signal_all_nans) {
3680
            gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
3681
        } else {
3682
            gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
3683
        }
3684
        tcg_temp_free_i64(tcg_vn);
3685
        tcg_temp_free_i64(tcg_vm);
3686
    } else {
3687
        TCGv_i32 tcg_vn, tcg_vm;
3688

    
3689
        tcg_vn = read_fp_sreg(s, rn);
3690
        if (cmp_with_zero) {
3691
            tcg_vm = tcg_const_i32(0);
3692
        } else {
3693
            tcg_vm = read_fp_sreg(s, rm);
3694
        }
3695
        if (signal_all_nans) {
3696
            gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
3697
        } else {
3698
            gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
3699
        }
3700
        tcg_temp_free_i32(tcg_vn);
3701
        tcg_temp_free_i32(tcg_vm);
3702
    }
3703

    
3704
    tcg_temp_free_ptr(fpst);
3705

    
3706
    gen_set_nzcv(tcg_flags);
3707

    
3708
    tcg_temp_free_i64(tcg_flags);
3709
}
3710

    
3711
/* C3.6.22 Floating point compare
3712
 *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
3713
 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
3714
 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
3715
 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
3716
 */
3717
static void disas_fp_compare(DisasContext *s, uint32_t insn)
3718
{
3719
    unsigned int mos, type, rm, op, rn, opc, op2r;
3720

    
3721
    mos = extract32(insn, 29, 3);
3722
    type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
3723
    rm = extract32(insn, 16, 5);
3724
    op = extract32(insn, 14, 2);
3725
    rn = extract32(insn, 5, 5);
3726
    opc = extract32(insn, 3, 2);
3727
    op2r = extract32(insn, 0, 3);
3728

    
3729
    if (mos || op || op2r || type > 1) {
3730
        unallocated_encoding(s);
3731
        return;
3732
    }
3733

    
3734
    handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2);
3735
}
3736

    
3737
/* C3.6.23 Floating point conditional compare
3738
 *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
3739
 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
3740
 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
3741
 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
3742
 */
3743
static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
3744
{
3745
    unsigned int mos, type, rm, cond, rn, op, nzcv;
3746
    TCGv_i64 tcg_flags;
3747
    int label_continue = -1;
3748

    
3749
    mos = extract32(insn, 29, 3);
3750
    type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
3751
    rm = extract32(insn, 16, 5);
3752
    cond = extract32(insn, 12, 4);
3753
    rn = extract32(insn, 5, 5);
3754
    op = extract32(insn, 4, 1);
3755
    nzcv = extract32(insn, 0, 4);
3756

    
3757
    if (mos || type > 1) {
3758
        unallocated_encoding(s);
3759
        return;
3760
    }
3761

    
3762
    if (cond < 0x0e) { /* not always */
3763
        int label_match = gen_new_label();
3764
        label_continue = gen_new_label();
3765
        arm_gen_test_cc(cond, label_match);
3766
        /* nomatch: */
3767
        tcg_flags = tcg_const_i64(nzcv << 28);
3768
        gen_set_nzcv(tcg_flags);
3769
        tcg_temp_free_i64(tcg_flags);
3770
        tcg_gen_br(label_continue);
3771
        gen_set_label(label_match);
3772
    }
3773

    
3774
    handle_fp_compare(s, type, rn, rm, false, op);
3775

    
3776
    if (cond < 0x0e) {
3777
        gen_set_label(label_continue);
3778
    }
3779
}
3780

    
3781
/* copy src FP register to dst FP register; type specifies single or double */
3782
static void gen_mov_fp2fp(DisasContext *s, int type, int dst, int src)
3783
{
3784
    if (type) {
3785
        TCGv_i64 v = read_fp_dreg(s, src);
3786
        write_fp_dreg(s, dst, v);
3787
        tcg_temp_free_i64(v);
3788
    } else {
3789
        TCGv_i32 v = read_fp_sreg(s, src);
3790
        write_fp_sreg(s, dst, v);
3791
        tcg_temp_free_i32(v);
3792
    }
3793
}
3794

    
3795
/* C3.6.24 Floating point conditional select
3796
 *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
3797
 * +---+---+---+-----------+------+---+------+------+-----+------+------+
3798
 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
3799
 * +---+---+---+-----------+------+---+------+------+-----+------+------+
3800
 */
3801
static void disas_fp_csel(DisasContext *s, uint32_t insn)
3802
{
3803
    unsigned int mos, type, rm, cond, rn, rd;
3804
    int label_continue = -1;
3805

    
3806
    mos = extract32(insn, 29, 3);
3807
    type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
3808
    rm = extract32(insn, 16, 5);
3809
    cond = extract32(insn, 12, 4);
3810
    rn = extract32(insn, 5, 5);
3811
    rd = extract32(insn, 0, 5);
3812

    
3813
    if (mos || type > 1) {
3814
        unallocated_encoding(s);
3815
        return;
3816
    }
3817

    
3818
    if (cond < 0x0e) { /* not always */
3819
        int label_match = gen_new_label();
3820
        label_continue = gen_new_label();
3821
        arm_gen_test_cc(cond, label_match);
3822
        /* nomatch: */
3823
        gen_mov_fp2fp(s, type, rd, rm);
3824
        tcg_gen_br(label_continue);
3825
        gen_set_label(label_match);
3826
    }
3827

    
3828
    gen_mov_fp2fp(s, type, rd, rn);
3829

    
3830
    if (cond < 0x0e) { /* continue */
3831
        gen_set_label(label_continue);
3832
    }
3833
}
3834

    
3835
/* C3.6.25 Floating-point data-processing (1 source) - single precision */
3836
static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
3837
{
3838
    TCGv_ptr fpst;
3839
    TCGv_i32 tcg_op;
3840
    TCGv_i32 tcg_res;
3841

    
3842
    fpst = get_fpstatus_ptr();
3843
    tcg_op = read_fp_sreg(s, rn);
3844
    tcg_res = tcg_temp_new_i32();
3845

    
3846
    switch (opcode) {
3847
    case 0x0: /* FMOV */
3848
        tcg_gen_mov_i32(tcg_res, tcg_op);
3849
        break;
3850
    case 0x1: /* FABS */
3851
        gen_helper_vfp_abss(tcg_res, tcg_op);
3852
        break;
3853
    case 0x2: /* FNEG */
3854
        gen_helper_vfp_negs(tcg_res, tcg_op);
3855
        break;
3856
    case 0x3: /* FSQRT */
3857
        gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
3858
        break;
3859
    case 0x8: /* FRINTN */
3860
    case 0x9: /* FRINTP */
3861
    case 0xa: /* FRINTM */
3862
    case 0xb: /* FRINTZ */
3863
    case 0xc: /* FRINTA */
3864
    {
3865
        TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
3866

    
3867
        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
3868
        gen_helper_rints(tcg_res, tcg_op, fpst);
3869

    
3870
        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
3871
        tcg_temp_free_i32(tcg_rmode);
3872
        break;
3873
    }
3874
    case 0xe: /* FRINTX */
3875
        gen_helper_rints_exact(tcg_res, tcg_op, fpst);
3876
        break;
3877
    case 0xf: /* FRINTI */
3878
        gen_helper_rints(tcg_res, tcg_op, fpst);
3879
        break;
3880
    default:
3881
        abort();
3882
    }
3883

    
3884
    write_fp_sreg(s, rd, tcg_res);
3885

    
3886
    tcg_temp_free_ptr(fpst);
3887
    tcg_temp_free_i32(tcg_op);
3888
    tcg_temp_free_i32(tcg_res);
3889
}
3890

    
3891
/* C3.6.25 Floating-point data-processing (1 source) - double precision */
3892
static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
3893
{
3894
    TCGv_ptr fpst;
3895
    TCGv_i64 tcg_op;
3896
    TCGv_i64 tcg_res;
3897

    
3898
    fpst = get_fpstatus_ptr();
3899
    tcg_op = read_fp_dreg(s, rn);
3900
    tcg_res = tcg_temp_new_i64();
3901

    
3902
    switch (opcode) {
3903
    case 0x0: /* FMOV */
3904
        tcg_gen_mov_i64(tcg_res, tcg_op);
3905
        break;
3906
    case 0x1: /* FABS */
3907
        gen_helper_vfp_absd(tcg_res, tcg_op);
3908
        break;
3909
    case 0x2: /* FNEG */
3910
        gen_helper_vfp_negd(tcg_res, tcg_op);
3911
        break;
3912
    case 0x3: /* FSQRT */
3913
        gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
3914
        break;
3915
    case 0x8: /* FRINTN */
3916
    case 0x9: /* FRINTP */
3917
    case 0xa: /* FRINTM */
3918
    case 0xb: /* FRINTZ */
3919
    case 0xc: /* FRINTA */
3920
    {
3921
        TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
3922

    
3923
        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
3924
        gen_helper_rintd(tcg_res, tcg_op, fpst);
3925

    
3926
        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
3927
        tcg_temp_free_i32(tcg_rmode);
3928
        break;
3929
    }
3930
    case 0xe: /* FRINTX */
3931
        gen_helper_rintd_exact(tcg_res, tcg_op, fpst);
3932
        break;
3933
    case 0xf: /* FRINTI */
3934
        gen_helper_rintd(tcg_res, tcg_op, fpst);
3935
        break;
3936
    default:
3937
        abort();
3938
    }
3939

    
3940
    write_fp_dreg(s, rd, tcg_res);
3941

    
3942
    tcg_temp_free_ptr(fpst);
3943
    tcg_temp_free_i64(tcg_op);
3944
    tcg_temp_free_i64(tcg_res);
3945
}
3946

    
3947
static void handle_fp_fcvt(DisasContext *s, int opcode,
3948
                           int rd, int rn, int dtype, int ntype)
3949
{
3950
    switch (ntype) {
3951
    case 0x0:
3952
    {
3953
        TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
3954
        if (dtype == 1) {
3955
            /* Single to double */
3956
            TCGv_i64 tcg_rd = tcg_temp_new_i64();
3957
            gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
3958
            write_fp_dreg(s, rd, tcg_rd);
3959
            tcg_temp_free_i64(tcg_rd);
3960
        } else {
3961
            /* Single to half */
3962
            TCGv_i32 tcg_rd = tcg_temp_new_i32();
3963
            gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, cpu_env);
3964
            /* write_fp_sreg is OK here because top half of tcg_rd is zero */
3965
            write_fp_sreg(s, rd, tcg_rd);
3966
            tcg_temp_free_i32(tcg_rd);
3967
        }
3968
        tcg_temp_free_i32(tcg_rn);
3969
        break;
3970
    }
3971
    case 0x1:
3972
    {
3973
        TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
3974
        TCGv_i32 tcg_rd = tcg_temp_new_i32();
3975
        if (dtype == 0) {
3976
            /* Double to single */
3977
            gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
3978
        } else {
3979
            /* Double to half */
3980
            gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, cpu_env);
3981
            /* write_fp_sreg is OK here because top half of tcg_rd is zero */
3982
        }
3983
        write_fp_sreg(s, rd, tcg_rd);
3984
        tcg_temp_free_i32(tcg_rd);
3985
        tcg_temp_free_i64(tcg_rn);
3986
        break;
3987
    }
3988
    case 0x3:
3989
    {
3990
        TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
3991
        tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
3992
        if (dtype == 0) {
3993
            /* Half to single */
3994
            TCGv_i32 tcg_rd = tcg_temp_new_i32();
3995
            gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, cpu_env);
3996
            write_fp_sreg(s, rd, tcg_rd);
3997
            tcg_temp_free_i32(tcg_rd);
3998
        } else {
3999
            /* Half to double */
4000
            TCGv_i64 tcg_rd = tcg_temp_new_i64();
4001
            gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, cpu_env);
4002
            write_fp_dreg(s, rd, tcg_rd);
4003
            tcg_temp_free_i64(tcg_rd);
4004
        }
4005
        tcg_temp_free_i32(tcg_rn);
4006
        break;
4007
    }
4008
    default:
4009
        abort();
4010
    }
4011
}
4012

    
4013
/* C3.6.25 Floating point data-processing (1 source)
4014
 *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
4015
 * +---+---+---+-----------+------+---+--------+-----------+------+------+
4016
 * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
4017
 * +---+---+---+-----------+------+---+--------+-----------+------+------+
4018
 */
4019
static void disas_fp_1src(DisasContext *s, uint32_t insn)
4020
{
4021
    int type = extract32(insn, 22, 2);
4022
    int opcode = extract32(insn, 15, 6);
4023
    int rn = extract32(insn, 5, 5);
4024
    int rd = extract32(insn, 0, 5);
4025

    
4026
    switch (opcode) {
4027
    case 0x4: case 0x5: case 0x7:
4028
    {
4029
        /* FCVT between half, single and double precision */
4030
        int dtype = extract32(opcode, 0, 2);
4031
        if (type == 2 || dtype == type) {
4032
            unallocated_encoding(s);
4033
            return;
4034
        }
4035
        handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
4036
        break;
4037
    }
4038
    case 0x0 ... 0x3:
4039
    case 0x8 ... 0xc:
4040
    case 0xe ... 0xf:
4041
        /* 32-to-32 and 64-to-64 ops */
4042
        switch (type) {
4043
        case 0:
4044
            handle_fp_1src_single(s, opcode, rd, rn);
4045
            break;
4046
        case 1:
4047
            handle_fp_1src_double(s, opcode, rd, rn);
4048
            break;
4049
        default:
4050
            unallocated_encoding(s);
4051
        }
4052
        break;
4053
    default:
4054
        unallocated_encoding(s);
4055
        break;
4056
    }
4057
}
4058

    
4059
/* C3.6.26 Floating-point data-processing (2 source) - single precision */
4060
static void handle_fp_2src_single(DisasContext *s, int opcode,
4061
                                  int rd, int rn, int rm)
4062
{
4063
    TCGv_i32 tcg_op1;
4064
    TCGv_i32 tcg_op2;
4065
    TCGv_i32 tcg_res;
4066
    TCGv_ptr fpst;
4067

    
4068
    tcg_res = tcg_temp_new_i32();
4069
    fpst = get_fpstatus_ptr();
4070
    tcg_op1 = read_fp_sreg(s, rn);
4071
    tcg_op2 = read_fp_sreg(s, rm);
4072

    
4073
    switch (opcode) {
4074
    case 0x0: /* FMUL */
4075
        gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4076
        break;
4077
    case 0x1: /* FDIV */
4078
        gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
4079
        break;
4080
    case 0x2: /* FADD */
4081
        gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
4082
        break;
4083
    case 0x3: /* FSUB */
4084
        gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
4085
        break;
4086
    case 0x4: /* FMAX */
4087
        gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
4088
        break;
4089
    case 0x5: /* FMIN */
4090
        gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
4091
        break;
4092
    case 0x6: /* FMAXNM */
4093
        gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
4094
        break;
4095
    case 0x7: /* FMINNM */
4096
        gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
4097
        break;
4098
    case 0x8: /* FNMUL */
4099
        gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4100
        gen_helper_vfp_negs(tcg_res, tcg_res);
4101
        break;
4102
    }
4103

    
4104
    write_fp_sreg(s, rd, tcg_res);
4105

    
4106
    tcg_temp_free_ptr(fpst);
4107
    tcg_temp_free_i32(tcg_op1);
4108
    tcg_temp_free_i32(tcg_op2);
4109
    tcg_temp_free_i32(tcg_res);
4110
}
4111

    
4112
/* C3.6.26 Floating-point data-processing (2 source) - double precision */
4113
static void handle_fp_2src_double(DisasContext *s, int opcode,
4114
                                  int rd, int rn, int rm)
4115
{
4116
    TCGv_i64 tcg_op1;
4117
    TCGv_i64 tcg_op2;
4118
    TCGv_i64 tcg_res;
4119
    TCGv_ptr fpst;
4120

    
4121
    tcg_res = tcg_temp_new_i64();
4122
    fpst = get_fpstatus_ptr();
4123
    tcg_op1 = read_fp_dreg(s, rn);
4124
    tcg_op2 = read_fp_dreg(s, rm);
4125

    
4126
    switch (opcode) {
4127
    case 0x0: /* FMUL */
4128
        gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4129
        break;
4130
    case 0x1: /* FDIV */
4131
        gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
4132
        break;
4133
    case 0x2: /* FADD */
4134
        gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
4135
        break;
4136
    case 0x3: /* FSUB */
4137
        gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
4138
        break;
4139
    case 0x4: /* FMAX */
4140
        gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
4141
        break;
4142
    case 0x5: /* FMIN */
4143
        gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
4144
        break;
4145
    case 0x6: /* FMAXNM */
4146
        gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4147
        break;
4148
    case 0x7: /* FMINNM */
4149
        gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4150
        break;
4151
    case 0x8: /* FNMUL */
4152
        gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4153
        gen_helper_vfp_negd(tcg_res, tcg_res);
4154
        break;
4155
    }
4156

    
4157
    write_fp_dreg(s, rd, tcg_res);
4158

    
4159
    tcg_temp_free_ptr(fpst);
4160
    tcg_temp_free_i64(tcg_op1);
4161
    tcg_temp_free_i64(tcg_op2);
4162
    tcg_temp_free_i64(tcg_res);
4163
}
4164

    
4165
/* C3.6.26 Floating point data-processing (2 source)
4166
 *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
4167
 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4168
 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
4169
 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4170
 */
4171
static void disas_fp_2src(DisasContext *s, uint32_t insn)
4172
{
4173
    int type = extract32(insn, 22, 2);
4174
    int rd = extract32(insn, 0, 5);
4175
    int rn = extract32(insn, 5, 5);
4176
    int rm = extract32(insn, 16, 5);
4177
    int opcode = extract32(insn, 12, 4);
4178

    
4179
    if (opcode > 8) {
4180
        unallocated_encoding(s);
4181
        return;
4182
    }
4183

    
4184
    switch (type) {
4185
    case 0:
4186
        handle_fp_2src_single(s, opcode, rd, rn, rm);
4187
        break;
4188
    case 1:
4189
        handle_fp_2src_double(s, opcode, rd, rn, rm);
4190
        break;
4191
    default:
4192
        unallocated_encoding(s);
4193
    }
4194
}
4195

    
4196
/* C3.6.27 Floating-point data-processing (3 source) - single precision */
4197
static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
4198
                                  int rd, int rn, int rm, int ra)
4199
{
4200
    TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
4201
    TCGv_i32 tcg_res = tcg_temp_new_i32();
4202
    TCGv_ptr fpst = get_fpstatus_ptr();
4203

    
4204
    tcg_op1 = read_fp_sreg(s, rn);
4205
    tcg_op2 = read_fp_sreg(s, rm);
4206
    tcg_op3 = read_fp_sreg(s, ra);
4207

    
4208
    /* These are fused multiply-add, and must be done as one
4209
     * floating point operation with no rounding between the
4210
     * multiplication and addition steps.
4211
     * NB that doing the negations here as separate steps is
4212
     * correct : an input NaN should come out with its sign bit
4213
     * flipped if it is a negated-input.
4214
     */
4215
    if (o1 == true) {
4216
        gen_helper_vfp_negs(tcg_op3, tcg_op3);
4217
    }
4218

    
4219
    if (o0 != o1) {
4220
        gen_helper_vfp_negs(tcg_op1, tcg_op1);
4221
    }
4222

    
4223
    gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4224

    
4225
    write_fp_sreg(s, rd, tcg_res);
4226

    
4227
    tcg_temp_free_ptr(fpst);
4228
    tcg_temp_free_i32(tcg_op1);
4229
    tcg_temp_free_i32(tcg_op2);
4230
    tcg_temp_free_i32(tcg_op3);
4231
    tcg_temp_free_i32(tcg_res);
4232
}
4233

    
4234
/* C3.6.27 Floating-point data-processing (3 source) - double precision */
4235
static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
4236
                                  int rd, int rn, int rm, int ra)
4237
{
4238
    TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
4239
    TCGv_i64 tcg_res = tcg_temp_new_i64();
4240
    TCGv_ptr fpst = get_fpstatus_ptr();
4241

    
4242
    tcg_op1 = read_fp_dreg(s, rn);
4243
    tcg_op2 = read_fp_dreg(s, rm);
4244
    tcg_op3 = read_fp_dreg(s, ra);
4245

    
4246
    /* These are fused multiply-add, and must be done as one
4247
     * floating point operation with no rounding between the
4248
     * multiplication and addition steps.
4249
     * NB that doing the negations here as separate steps is
4250
     * correct : an input NaN should come out with its sign bit
4251
     * flipped if it is a negated-input.
4252
     */
4253
    if (o1 == true) {
4254
        gen_helper_vfp_negd(tcg_op3, tcg_op3);
4255
    }
4256

    
4257
    if (o0 != o1) {
4258
        gen_helper_vfp_negd(tcg_op1, tcg_op1);
4259
    }
4260

    
4261
    gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4262

    
4263
    write_fp_dreg(s, rd, tcg_res);
4264

    
4265
    tcg_temp_free_ptr(fpst);
4266
    tcg_temp_free_i64(tcg_op1);
4267
    tcg_temp_free_i64(tcg_op2);
4268
    tcg_temp_free_i64(tcg_op3);
4269
    tcg_temp_free_i64(tcg_res);
4270
}
4271

    
4272
/* C3.6.27 Floating point data-processing (3 source)
4273
 *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
4274
 * +---+---+---+-----------+------+----+------+----+------+------+------+
4275
 * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4276
 * +---+---+---+-----------+------+----+------+----+------+------+------+
4277
 */
4278
static void disas_fp_3src(DisasContext *s, uint32_t insn)
4279
{
4280
    int type = extract32(insn, 22, 2);
4281
    int rd = extract32(insn, 0, 5);
4282
    int rn = extract32(insn, 5, 5);
4283
    int ra = extract32(insn, 10, 5);
4284
    int rm = extract32(insn, 16, 5);
4285
    bool o0 = extract32(insn, 15, 1);
4286
    bool o1 = extract32(insn, 21, 1);
4287

    
4288
    switch (type) {
4289
    case 0:
4290
        handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
4291
        break;
4292
    case 1:
4293
        handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
4294
        break;
4295
    default:
4296
        unallocated_encoding(s);
4297
    }
4298
}
4299

    
4300
/* C3.6.28 Floating point immediate
4301
 *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
4302
 * +---+---+---+-----------+------+---+------------+-------+------+------+
4303
 * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
4304
 * +---+---+---+-----------+------+---+------------+-------+------+------+
4305
 */
4306
static void disas_fp_imm(DisasContext *s, uint32_t insn)
4307
{
4308
    int rd = extract32(insn, 0, 5);
4309
    int imm8 = extract32(insn, 13, 8);
4310
    int is_double = extract32(insn, 22, 2);
4311
    uint64_t imm;
4312
    TCGv_i64 tcg_res;
4313

    
4314
    if (is_double > 1) {
4315
        unallocated_encoding(s);
4316
        return;
4317
    }
4318

    
4319
    /* The imm8 encodes the sign bit, enough bits to represent
4320
     * an exponent in the range 01....1xx to 10....0xx,
4321
     * and the most significant 4 bits of the mantissa; see
4322
     * VFPExpandImm() in the v8 ARM ARM.
4323
     */
4324
    if (is_double) {
4325
        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4326
            (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
4327
            extract32(imm8, 0, 6);
4328
        imm <<= 48;
4329
    } else {
4330
        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4331
            (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
4332
            (extract32(imm8, 0, 6) << 3);
4333
        imm <<= 16;
4334
    }
4335

    
4336
    tcg_res = tcg_const_i64(imm);
4337
    write_fp_dreg(s, rd, tcg_res);
4338
    tcg_temp_free_i64(tcg_res);
4339
}
4340

    
4341
/* Handle floating point <=> fixed point conversions. Note that we can
4342
 * also deal with fp <=> integer conversions as a special case (scale == 64)
4343
 * OPTME: consider handling that special case specially or at least skipping
4344
 * the call to scalbn in the helpers for zero shifts.
4345
 */
4346
static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
4347
                           bool itof, int rmode, int scale, int sf, int type)
4348
{
4349
    bool is_signed = !(opcode & 1);
4350
    bool is_double = type;
4351
    TCGv_ptr tcg_fpstatus;
4352
    TCGv_i32 tcg_shift;
4353

    
4354
    tcg_fpstatus = get_fpstatus_ptr();
4355

    
4356
    tcg_shift = tcg_const_i32(64 - scale);
4357

    
4358
    if (itof) {
4359
        TCGv_i64 tcg_int = cpu_reg(s, rn);
4360
        if (!sf) {
4361
            TCGv_i64 tcg_extend = new_tmp_a64(s);
4362

    
4363
            if (is_signed) {
4364
                tcg_gen_ext32s_i64(tcg_extend, tcg_int);
4365
            } else {
4366
                tcg_gen_ext32u_i64(tcg_extend, tcg_int);
4367
            }
4368

    
4369
            tcg_int = tcg_extend;
4370
        }
4371

    
4372
        if (is_double) {
4373
            TCGv_i64 tcg_double = tcg_temp_new_i64();
4374
            if (is_signed) {
4375
                gen_helper_vfp_sqtod(tcg_double, tcg_int,
4376
                                     tcg_shift, tcg_fpstatus);
4377
            } else {
4378
                gen_helper_vfp_uqtod(tcg_double, tcg_int,
4379
                                     tcg_shift, tcg_fpstatus);
4380
            }
4381
            write_fp_dreg(s, rd, tcg_double);
4382
            tcg_temp_free_i64(tcg_double);
4383
        } else {
4384
            TCGv_i32 tcg_single = tcg_temp_new_i32();
4385
            if (is_signed) {
4386
                gen_helper_vfp_sqtos(tcg_single, tcg_int,
4387
                                     tcg_shift, tcg_fpstatus);
4388
            } else {
4389
                gen_helper_vfp_uqtos(tcg_single, tcg_int,
4390
                                     tcg_shift, tcg_fpstatus);
4391
            }
4392
            write_fp_sreg(s, rd, tcg_single);
4393
            tcg_temp_free_i32(tcg_single);
4394
        }
4395
    } else {
4396
        TCGv_i64 tcg_int = cpu_reg(s, rd);
4397
        TCGv_i32 tcg_rmode;
4398

    
4399
        if (extract32(opcode, 2, 1)) {
4400
            /* There are too many rounding modes to all fit into rmode,
4401
             * so FCVTA[US] is a special case.
4402
             */
4403
            rmode = FPROUNDING_TIEAWAY;
4404
        }
4405

    
4406
        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
4407

    
4408
        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4409

    
4410
        if (is_double) {
4411
            TCGv_i64 tcg_double = read_fp_dreg(s, rn);
4412
            if (is_signed) {
4413
                if (!sf) {
4414
                    gen_helper_vfp_tosld(tcg_int, tcg_double,
4415
                                         tcg_shift, tcg_fpstatus);
4416
                } else {
4417
                    gen_helper_vfp_tosqd(tcg_int, tcg_double,
4418
                                         tcg_shift, tcg_fpstatus);
4419
                }
4420
            } else {
4421
                if (!sf) {
4422
                    gen_helper_vfp_tould(tcg_int, tcg_double,
4423
                                         tcg_shift, tcg_fpstatus);
4424
                } else {
4425
                    gen_helper_vfp_touqd(tcg_int, tcg_double,
4426
                                         tcg_shift, tcg_fpstatus);
4427
                }
4428
            }
4429
            tcg_temp_free_i64(tcg_double);
4430
        } else {
4431
            TCGv_i32 tcg_single = read_fp_sreg(s, rn);
4432
            if (sf) {
4433
                if (is_signed) {
4434
                    gen_helper_vfp_tosqs(tcg_int, tcg_single,
4435
                                         tcg_shift, tcg_fpstatus);
4436
                } else {
4437
                    gen_helper_vfp_touqs(tcg_int, tcg_single,
4438
                                         tcg_shift, tcg_fpstatus);
4439
                }
4440
            } else {
4441
                TCGv_i32 tcg_dest = tcg_temp_new_i32();
4442
                if (is_signed) {
4443
                    gen_helper_vfp_tosls(tcg_dest, tcg_single,
4444
                                         tcg_shift, tcg_fpstatus);
4445
                } else {
4446
                    gen_helper_vfp_touls(tcg_dest, tcg_single,
4447
                                         tcg_shift, tcg_fpstatus);
4448
                }
4449
                tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
4450
                tcg_temp_free_i32(tcg_dest);
4451
            }
4452
            tcg_temp_free_i32(tcg_single);
4453
        }
4454

    
4455
        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4456
        tcg_temp_free_i32(tcg_rmode);
4457

    
4458
        if (!sf) {
4459
            tcg_gen_ext32u_i64(tcg_int, tcg_int);
4460
        }
4461
    }
4462

    
4463
    tcg_temp_free_ptr(tcg_fpstatus);
4464
    tcg_temp_free_i32(tcg_shift);
4465
}
4466

    
4467
/* C3.6.29 Floating point <-> fixed point conversions
4468
 *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
4469
 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
4470
 * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
4471
 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
4472
 */
4473
static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
4474
{
4475
    int rd = extract32(insn, 0, 5);
4476
    int rn = extract32(insn, 5, 5);
4477
    int scale = extract32(insn, 10, 6);
4478
    int opcode = extract32(insn, 16, 3);
4479
    int rmode = extract32(insn, 19, 2);
4480
    int type = extract32(insn, 22, 2);
4481
    bool sbit = extract32(insn, 29, 1);
4482
    bool sf = extract32(insn, 31, 1);
4483
    bool itof;
4484

    
4485
    if (sbit || (type > 1)
4486
        || (!sf && scale < 32)) {
4487
        unallocated_encoding(s);
4488
        return;
4489
    }
4490

    
4491
    switch ((rmode << 3) | opcode) {
4492
    case 0x2: /* SCVTF */
4493
    case 0x3: /* UCVTF */
4494
        itof = true;
4495
        break;
4496
    case 0x18: /* FCVTZS */
4497
    case 0x19: /* FCVTZU */
4498
        itof = false;
4499
        break;
4500
    default:
4501
        unallocated_encoding(s);
4502
        return;
4503
    }
4504

    
4505
    handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
4506
}
4507

    
4508
static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
4509
{
4510
    /* FMOV: gpr to or from float, double, or top half of quad fp reg,
4511
     * without conversion.
4512
     */
4513

    
4514
    if (itof) {
4515
        TCGv_i64 tcg_rn = cpu_reg(s, rn);
4516

    
4517
        switch (type) {
4518
        case 0:
4519
        {
4520
            /* 32 bit */
4521
            TCGv_i64 tmp = tcg_temp_new_i64();
4522
            tcg_gen_ext32u_i64(tmp, tcg_rn);
4523
            tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(rd, MO_64));
4524
            tcg_gen_movi_i64(tmp, 0);
4525
            tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(rd));
4526
            tcg_temp_free_i64(tmp);
4527
            break;
4528
        }
4529
        case 1:
4530
        {
4531
            /* 64 bit */
4532
            TCGv_i64 tmp = tcg_const_i64(0);
4533
            tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(rd, MO_64));
4534
            tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(rd));
4535
            tcg_temp_free_i64(tmp);
4536
            break;
4537
        }
4538
        case 2:
4539
            /* 64 bit to top half. */
4540
            tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(rd));
4541
            break;
4542
        }
4543
    } else {
4544
        TCGv_i64 tcg_rd = cpu_reg(s, rd);
4545

    
4546
        switch (type) {
4547
        case 0:
4548
            /* 32 bit */
4549
            tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(rn, MO_32));
4550
            break;
4551
        case 1:
4552
            /* 64 bit */
4553
            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(rn, MO_64));
4554
            break;
4555
        case 2:
4556
            /* 64 bits from top half */
4557
            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(rn));
4558
            break;
4559
        }
4560
    }
4561
}
4562

    
4563
/* C3.6.30 Floating point <-> integer conversions
4564
 *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
4565
 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
4566
 * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
4567
 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
4568
 */
4569
static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
4570
{
4571
    int rd = extract32(insn, 0, 5);
4572
    int rn = extract32(insn, 5, 5);
4573
    int opcode = extract32(insn, 16, 3);
4574
    int rmode = extract32(insn, 19, 2);
4575
    int type = extract32(insn, 22, 2);
4576
    bool sbit = extract32(insn, 29, 1);
4577
    bool sf = extract32(insn, 31, 1);
4578

    
4579
    if (sbit) {
4580
        unallocated_encoding(s);
4581
        return;
4582
    }
4583

    
4584
    if (opcode > 5) {
4585
        /* FMOV */
4586
        bool itof = opcode & 1;
4587

    
4588
        if (rmode >= 2) {
4589
            unallocated_encoding(s);
4590
            return;
4591
        }
4592

    
4593
        switch (sf << 3 | type << 1 | rmode) {
4594
        case 0x0: /* 32 bit */
4595
        case 0xa: /* 64 bit */
4596
        case 0xd: /* 64 bit to top half of quad */
4597
            break;
4598
        default:
4599
            /* all other sf/type/rmode combinations are invalid */
4600
            unallocated_encoding(s);
4601
            break;
4602
        }
4603

    
4604
        handle_fmov(s, rd, rn, type, itof);
4605
    } else {
4606
        /* actual FP conversions */
4607
        bool itof = extract32(opcode, 1, 1);
4608

    
4609
        if (type > 1 || (rmode != 0 && opcode > 1)) {
4610
            unallocated_encoding(s);
4611
            return;
4612
        }
4613

    
4614
        handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
4615
    }
4616
}
4617

    
4618
/* FP-specific subcases of table C3-6 (SIMD and FP data processing)
4619
 *   31  30  29 28     25 24                          0
4620
 * +---+---+---+---------+-----------------------------+
4621
 * |   | 0 |   | 1 1 1 1 |                             |
4622
 * +---+---+---+---------+-----------------------------+
4623
 */
4624
static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
4625
{
4626
    if (extract32(insn, 24, 1)) {
4627
        /* Floating point data-processing (3 source) */
4628
        disas_fp_3src(s, insn);
4629
    } else if (extract32(insn, 21, 1) == 0) {
4630
        /* Floating point to fixed point conversions */
4631
        disas_fp_fixed_conv(s, insn);
4632
    } else {
4633
        switch (extract32(insn, 10, 2)) {
4634
        case 1:
4635
            /* Floating point conditional compare */
4636
            disas_fp_ccomp(s, insn);
4637
            break;
4638
        case 2:
4639
            /* Floating point data-processing (2 source) */
4640
            disas_fp_2src(s, insn);
4641
            break;
4642
        case 3:
4643
            /* Floating point conditional select */
4644
            disas_fp_csel(s, insn);
4645
            break;
4646
        case 0:
4647
            switch (ctz32(extract32(insn, 12, 4))) {
4648
            case 0: /* [15:12] == xxx1 */
4649
                /* Floating point immediate */
4650
                disas_fp_imm(s, insn);
4651
                break;
4652
            case 1: /* [15:12] == xx10 */
4653
                /* Floating point compare */
4654
                disas_fp_compare(s, insn);
4655
                break;
4656
            case 2: /* [15:12] == x100 */
4657
                /* Floating point data-processing (1 source) */
4658
                disas_fp_1src(s, insn);
4659
                break;
4660
            case 3: /* [15:12] == 1000 */
4661
                unallocated_encoding(s);
4662
                break;
4663
            default: /* [15:12] == 0000 */
4664
                /* Floating point <-> integer conversions */
4665
                disas_fp_int_conv(s, insn);
4666
                break;
4667
            }
4668
            break;
4669
        }
4670
    }
4671
}
4672

    
4673
static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
4674
                     int pos)
4675
{
4676
    /* Extract 64 bits from the middle of two concatenated 64 bit
4677
     * vector register slices left:right. The extracted bits start
4678
     * at 'pos' bits into the right (least significant) side.
4679
     * We return the result in tcg_right, and guarantee not to
4680
     * trash tcg_left.
4681
     */
4682
    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4683
    assert(pos > 0 && pos < 64);
4684

    
4685
    tcg_gen_shri_i64(tcg_right, tcg_right, pos);
4686
    tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
4687
    tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
4688

    
4689
    tcg_temp_free_i64(tcg_tmp);
4690
}
4691

    
4692
/* C3.6.1 EXT
4693
 *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
4694
 * +---+---+-------------+-----+---+------+---+------+---+------+------+
4695
 * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
4696
 * +---+---+-------------+-----+---+------+---+------+---+------+------+
4697
 */
4698
static void disas_simd_ext(DisasContext *s, uint32_t insn)
4699
{
4700
    int is_q = extract32(insn, 30, 1);
4701
    int op2 = extract32(insn, 22, 2);
4702
    int imm4 = extract32(insn, 11, 4);
4703
    int rm = extract32(insn, 16, 5);
4704
    int rn = extract32(insn, 5, 5);
4705
    int rd = extract32(insn, 0, 5);
4706
    int pos = imm4 << 3;
4707
    TCGv_i64 tcg_resl, tcg_resh;
4708

    
4709
    if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
4710
        unallocated_encoding(s);
4711
        return;
4712
    }
4713

    
4714
    tcg_resh = tcg_temp_new_i64();
4715
    tcg_resl = tcg_temp_new_i64();
4716

    
4717
    /* Vd gets bits starting at pos bits into Vm:Vn. This is
4718
     * either extracting 128 bits from a 128:128 concatenation, or
4719
     * extracting 64 bits from a 64:64 concatenation.
4720
     */
4721
    if (!is_q) {
4722
        read_vec_element(s, tcg_resl, rn, 0, MO_64);
4723
        if (pos != 0) {
4724
            read_vec_element(s, tcg_resh, rm, 0, MO_64);
4725
            do_ext64(s, tcg_resh, tcg_resl, pos);
4726
        }
4727
        tcg_gen_movi_i64(tcg_resh, 0);
4728
    } else {
4729
        TCGv_i64 tcg_hh;
4730
        typedef struct {
4731
            int reg;
4732
            int elt;
4733
        } EltPosns;
4734
        EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
4735
        EltPosns *elt = eltposns;
4736

    
4737
        if (pos >= 64) {
4738
            elt++;
4739
            pos -= 64;
4740
        }
4741

    
4742
        read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
4743
        elt++;
4744
        read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
4745
        elt++;
4746
        if (pos != 0) {
4747
            do_ext64(s, tcg_resh, tcg_resl, pos);
4748
            tcg_hh = tcg_temp_new_i64();
4749
            read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
4750
            do_ext64(s, tcg_hh, tcg_resh, pos);
4751
            tcg_temp_free_i64(tcg_hh);
4752
        }
4753
    }
4754

    
4755
    write_vec_element(s, tcg_resl, rd, 0, MO_64);
4756
    tcg_temp_free_i64(tcg_resl);
4757
    write_vec_element(s, tcg_resh, rd, 1, MO_64);
4758
    tcg_temp_free_i64(tcg_resh);
4759
}
4760

    
4761
/* C3.6.2 TBL/TBX
4762
 *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
4763
 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
4764
 * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
4765
 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
4766
 */
4767
static void disas_simd_tb(DisasContext *s, uint32_t insn)
4768
{
4769
    int op2 = extract32(insn, 22, 2);
4770
    int is_q = extract32(insn, 30, 1);
4771
    int rm = extract32(insn, 16, 5);
4772
    int rn = extract32(insn, 5, 5);
4773
    int rd = extract32(insn, 0, 5);
4774
    int is_tblx = extract32(insn, 12, 1);
4775
    int len = extract32(insn, 13, 2);
4776
    TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
4777
    TCGv_i32 tcg_regno, tcg_numregs;
4778

    
4779
    if (op2 != 0) {
4780
        unallocated_encoding(s);
4781
        return;
4782
    }
4783

    
4784
    /* This does a table lookup: for every byte element in the input
4785
     * we index into a table formed from up to four vector registers,
4786
     * and then the output is the result of the lookups. Our helper
4787
     * function does the lookup operation for a single 64 bit part of
4788
     * the input.
4789
     */
4790
    tcg_resl = tcg_temp_new_i64();
4791
    tcg_resh = tcg_temp_new_i64();
4792

    
4793
    if (is_tblx) {
4794
        read_vec_element(s, tcg_resl, rd, 0, MO_64);
4795
    } else {
4796
        tcg_gen_movi_i64(tcg_resl, 0);
4797
    }
4798
    if (is_tblx && is_q) {
4799
        read_vec_element(s, tcg_resh, rd, 1, MO_64);
4800
    } else {
4801
        tcg_gen_movi_i64(tcg_resh, 0);
4802
    }
4803

    
4804
    tcg_idx = tcg_temp_new_i64();
4805
    tcg_regno = tcg_const_i32(rn);
4806
    tcg_numregs = tcg_const_i32(len + 1);
4807
    read_vec_element(s, tcg_idx, rm, 0, MO_64);
4808
    gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
4809
                        tcg_regno, tcg_numregs);
4810
    if (is_q) {
4811
        read_vec_element(s, tcg_idx, rm, 1, MO_64);
4812
        gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
4813
                            tcg_regno, tcg_numregs);
4814
    }
4815
    tcg_temp_free_i64(tcg_idx);
4816
    tcg_temp_free_i32(tcg_regno);
4817
    tcg_temp_free_i32(tcg_numregs);
4818

    
4819
    write_vec_element(s, tcg_resl, rd, 0, MO_64);
4820
    tcg_temp_free_i64(tcg_resl);
4821
    write_vec_element(s, tcg_resh, rd, 1, MO_64);
4822
    tcg_temp_free_i64(tcg_resh);
4823
}
4824

    
4825
/* C3.6.3 ZIP/UZP/TRN
4826
 *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
4827
 * +---+---+-------------+------+---+------+---+------------------+------+
4828
 * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
4829
 * +---+---+-------------+------+---+------+---+------------------+------+
4830
 */
4831
static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
4832
{
4833
    int rd = extract32(insn, 0, 5);
4834
    int rn = extract32(insn, 5, 5);
4835
    int rm = extract32(insn, 16, 5);
4836
    int size = extract32(insn, 22, 2);
4837
    /* opc field bits [1:0] indicate ZIP/UZP/TRN;
4838
     * bit 2 indicates 1 vs 2 variant of the insn.
4839
     */
4840
    int opcode = extract32(insn, 12, 2);
4841
    bool part = extract32(insn, 14, 1);
4842
    bool is_q = extract32(insn, 30, 1);
4843
    int esize = 8 << size;
4844
    int i, ofs;
4845
    int datasize = is_q ? 128 : 64;
4846
    int elements = datasize / esize;
4847
    TCGv_i64 tcg_res, tcg_resl, tcg_resh;
4848

    
4849
    if (opcode == 0 || (size == 3 && !is_q)) {
4850
        unallocated_encoding(s);
4851
        return;
4852
    }
4853

    
4854
    tcg_resl = tcg_const_i64(0);
4855
    tcg_resh = tcg_const_i64(0);
4856
    tcg_res = tcg_temp_new_i64();
4857

    
4858
    for (i = 0; i < elements; i++) {
4859
        switch (opcode) {
4860
        case 1: /* UZP1/2 */
4861
        {
4862
            int midpoint = elements / 2;
4863
            if (i < midpoint) {
4864
                read_vec_element(s, tcg_res, rn, 2 * i + part, size);
4865
            } else {
4866
                read_vec_element(s, tcg_res, rm,
4867
                                 2 * (i - midpoint) + part, size);
4868
            }
4869
            break;
4870
        }
4871
        case 2: /* TRN1/2 */
4872
            if (i & 1) {
4873
                read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
4874
            } else {
4875
                read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
4876
            }
4877
            break;
4878
        case 3: /* ZIP1/2 */
4879
        {
4880
            int base = part * elements / 2;
4881
            if (i & 1) {
4882
                read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
4883
            } else {
4884
                read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
4885
            }
4886
            break;
4887
        }
4888
        default:
4889
            g_assert_not_reached();
4890
        }
4891

    
4892
        ofs = i * esize;
4893
        if (ofs < 64) {
4894
            tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
4895
            tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
4896
        } else {
4897
            tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
4898
            tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
4899
        }
4900
    }
4901

    
4902
    tcg_temp_free_i64(tcg_res);
4903

    
4904
    write_vec_element(s, tcg_resl, rd, 0, MO_64);
4905
    tcg_temp_free_i64(tcg_resl);
4906
    write_vec_element(s, tcg_resh, rd, 1, MO_64);
4907
    tcg_temp_free_i64(tcg_resh);
4908
}
4909

    
4910
static void do_minmaxop(DisasContext *s, TCGv_i32 tcg_elt1, TCGv_i32 tcg_elt2,
4911
                        int opc, bool is_min, TCGv_ptr fpst)
4912
{
4913
    /* Helper function for disas_simd_across_lanes: do a single precision
4914
     * min/max operation on the specified two inputs,
4915
     * and return the result in tcg_elt1.
4916
     */
4917
    if (opc == 0xc) {
4918
        if (is_min) {
4919
            gen_helper_vfp_minnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
4920
        } else {
4921
            gen_helper_vfp_maxnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
4922
        }
4923
    } else {
4924
        assert(opc == 0xf);
4925
        if (is_min) {
4926
            gen_helper_vfp_mins(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
4927
        } else {
4928
            gen_helper_vfp_maxs(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
4929
        }
4930
    }
4931
}
4932

    
4933
/* C3.6.4 AdvSIMD across lanes
4934
 *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
4935
 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
4936
 * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
4937
 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
4938
 */
4939
static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
4940
{
4941
    int rd = extract32(insn, 0, 5);
4942
    int rn = extract32(insn, 5, 5);
4943
    int size = extract32(insn, 22, 2);
4944
    int opcode = extract32(insn, 12, 5);
4945
    bool is_q = extract32(insn, 30, 1);
4946
    bool is_u = extract32(insn, 29, 1);
4947
    bool is_fp = false;
4948
    bool is_min = false;
4949
    int esize;
4950
    int elements;
4951
    int i;
4952
    TCGv_i64 tcg_res, tcg_elt;
4953

    
4954
    switch (opcode) {
4955
    case 0x1b: /* ADDV */
4956
        if (is_u) {
4957
            unallocated_encoding(s);
4958
            return;
4959
        }
4960
        /* fall through */
4961
    case 0x3: /* SADDLV, UADDLV */
4962
    case 0xa: /* SMAXV, UMAXV */
4963
    case 0x1a: /* SMINV, UMINV */
4964
        if (size == 3 || (size == 2 && !is_q)) {
4965
            unallocated_encoding(s);
4966
            return;
4967
        }
4968
        break;
4969
    case 0xc: /* FMAXNMV, FMINNMV */
4970
    case 0xf: /* FMAXV, FMINV */
4971
        if (!is_u || !is_q || extract32(size, 0, 1)) {
4972
            unallocated_encoding(s);
4973
            return;
4974
        }
4975
        /* Bit 1 of size field encodes min vs max, and actual size is always
4976
         * 32 bits: adjust the size variable so following code can rely on it
4977
         */
4978
        is_min = extract32(size, 1, 1);
4979
        is_fp = true;
4980
        size = 2;
4981
        break;
4982
    default:
4983
        unallocated_encoding(s);
4984
        return;
4985
    }
4986

    
4987
    esize = 8 << size;
4988
    elements = (is_q ? 128 : 64) / esize;
4989

    
4990
    tcg_res = tcg_temp_new_i64();
4991
    tcg_elt = tcg_temp_new_i64();
4992

    
4993
    /* These instructions operate across all lanes of a vector
4994
     * to produce a single result. We can guarantee that a 64
4995
     * bit intermediate is sufficient:
4996
     *  + for [US]ADDLV the maximum element size is 32 bits, and
4997
     *    the result type is 64 bits
4998
     *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
4999
     *    same as the element size, which is 32 bits at most
5000
     * For the integer operations we can choose to work at 64
5001
     * or 32 bits and truncate at the end; for simplicity
5002
     * we use 64 bits always. The floating point
5003
     * ops do require 32 bit intermediates, though.
5004
     */
5005
    if (!is_fp) {
5006
        read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
5007

    
5008
        for (i = 1; i < elements; i++) {
5009
            read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
5010

    
5011
            switch (opcode) {
5012
            case 0x03: /* SADDLV / UADDLV */
5013
            case 0x1b: /* ADDV */
5014
                tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
5015
                break;
5016
            case 0x0a: /* SMAXV / UMAXV */
5017
                tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
5018
                                    tcg_res,
5019
                                    tcg_res, tcg_elt, tcg_res, tcg_elt);
5020
                break;
5021
            case 0x1a: /* SMINV / UMINV */
5022
                tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE,
5023
                                    tcg_res,
5024
                                    tcg_res, tcg_elt, tcg_res, tcg_elt);
5025
                break;
5026
                break;
5027
            default:
5028
                g_assert_not_reached();
5029
            }
5030

    
5031
        }
5032
    } else {
5033
        /* Floating point ops which work on 32 bit (single) intermediates.
5034
         * Note that correct NaN propagation requires that we do these
5035
         * operations in exactly the order specified by the pseudocode.
5036
         */
5037
        TCGv_i32 tcg_elt1 = tcg_temp_new_i32();
5038
        TCGv_i32 tcg_elt2 = tcg_temp_new_i32();
5039
        TCGv_i32 tcg_elt3 = tcg_temp_new_i32();
5040
        TCGv_ptr fpst = get_fpstatus_ptr();
5041

    
5042
        assert(esize == 32);
5043
        assert(elements == 4);
5044

    
5045
        read_vec_element(s, tcg_elt, rn, 0, MO_32);
5046
        tcg_gen_trunc_i64_i32(tcg_elt1, tcg_elt);
5047
        read_vec_element(s, tcg_elt, rn, 1, MO_32);
5048
        tcg_gen_trunc_i64_i32(tcg_elt2, tcg_elt);
5049

    
5050
        do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5051

    
5052
        read_vec_element(s, tcg_elt, rn, 2, MO_32);
5053
        tcg_gen_trunc_i64_i32(tcg_elt2, tcg_elt);
5054
        read_vec_element(s, tcg_elt, rn, 3, MO_32);
5055
        tcg_gen_trunc_i64_i32(tcg_elt3, tcg_elt);
5056

    
5057
        do_minmaxop(s, tcg_elt2, tcg_elt3, opcode, is_min, fpst);
5058

    
5059
        do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5060

    
5061
        tcg_gen_extu_i32_i64(tcg_res, tcg_elt1);
5062
        tcg_temp_free_i32(tcg_elt1);
5063
        tcg_temp_free_i32(tcg_elt2);
5064
        tcg_temp_free_i32(tcg_elt3);
5065
        tcg_temp_free_ptr(fpst);
5066
    }
5067

    
5068
    tcg_temp_free_i64(tcg_elt);
5069

    
5070
    /* Now truncate the result to the width required for the final output */
5071
    if (opcode == 0x03) {
5072
        /* SADDLV, UADDLV: result is 2*esize */
5073
        size++;
5074
    }
5075

    
5076
    switch (size) {
5077
    case 0:
5078
        tcg_gen_ext8u_i64(tcg_res, tcg_res);
5079
        break;
5080
    case 1:
5081
        tcg_gen_ext16u_i64(tcg_res, tcg_res);
5082
        break;
5083
    case 2:
5084
        tcg_gen_ext32u_i64(tcg_res, tcg_res);
5085
        break;
5086
    case 3:
5087
        break;
5088
    default:
5089
        g_assert_not_reached();
5090
    }
5091

    
5092
    write_fp_dreg(s, rd, tcg_res);
5093
    tcg_temp_free_i64(tcg_res);
5094
}
5095

    
5096
/* C6.3.31 DUP (Element, Vector)
5097
 *
5098
 *  31  30   29              21 20    16 15        10  9    5 4    0
5099
 * +---+---+-------------------+--------+-------------+------+------+
5100
 * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5101
 * +---+---+-------------------+--------+-------------+------+------+
5102
 *
5103
 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5104
 */
5105
static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
5106
                             int imm5)
5107
{
5108
    int size = ctz32(imm5);
5109
    int esize = 8 << size;
5110
    int elements = (is_q ? 128 : 64) / esize;
5111
    int index, i;
5112
    TCGv_i64 tmp;
5113

    
5114
    if (size > 3 || (size == 3 && !is_q)) {
5115
        unallocated_encoding(s);
5116
        return;
5117
    }
5118

    
5119
    index = imm5 >> (size + 1);
5120

    
5121
    tmp = tcg_temp_new_i64();
5122
    read_vec_element(s, tmp, rn, index, size);
5123

    
5124
    for (i = 0; i < elements; i++) {
5125
        write_vec_element(s, tmp, rd, i, size);
5126
    }
5127

    
5128
    if (!is_q) {
5129
        clear_vec_high(s, rd);
5130
    }
5131

    
5132
    tcg_temp_free_i64(tmp);
5133
}
5134

    
5135
/* C6.3.31 DUP (element, scalar)
5136
 *  31                   21 20    16 15        10  9    5 4    0
5137
 * +-----------------------+--------+-------------+------+------+
5138
 * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5139
 * +-----------------------+--------+-------------+------+------+
5140
 */
5141
static void handle_simd_dupes(DisasContext *s, int rd, int rn,
5142
                              int imm5)
5143
{
5144
    int size = ctz32(imm5);
5145
    int index;
5146
    TCGv_i64 tmp;
5147

    
5148
    if (size > 3) {
5149
        unallocated_encoding(s);
5150
        return;
5151
    }
5152

    
5153
    index = imm5 >> (size + 1);
5154

    
5155
    /* This instruction just extracts the specified element and
5156
     * zero-extends it into the bottom of the destination register.
5157
     */
5158
    tmp = tcg_temp_new_i64();
5159
    read_vec_element(s, tmp, rn, index, size);
5160
    write_fp_dreg(s, rd, tmp);
5161
    tcg_temp_free_i64(tmp);
5162
}
5163

    
5164
/* C6.3.32 DUP (General)
5165
 *
5166
 *  31  30   29              21 20    16 15        10  9    5 4    0
5167
 * +---+---+-------------------+--------+-------------+------+------+
5168
 * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
5169
 * +---+---+-------------------+--------+-------------+------+------+
5170
 *
5171
 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5172
 */
5173
static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
5174
                             int imm5)
5175
{
5176
    int size = ctz32(imm5);
5177
    int esize = 8 << size;
5178
    int elements = (is_q ? 128 : 64)/esize;
5179
    int i = 0;
5180

    
5181
    if (size > 3 || ((size == 3) && !is_q)) {
5182
        unallocated_encoding(s);
5183
        return;
5184
    }
5185
    for (i = 0; i < elements; i++) {
5186
        write_vec_element(s, cpu_reg(s, rn), rd, i, size);
5187
    }
5188
    if (!is_q) {
5189
        clear_vec_high(s, rd);
5190
    }
5191
}
5192

    
5193
/* C6.3.150 INS (Element)
5194
 *
5195
 *  31                   21 20    16 15  14    11  10 9    5 4    0
5196
 * +-----------------------+--------+------------+---+------+------+
5197
 * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
5198
 * +-----------------------+--------+------------+---+------+------+
5199
 *
5200
 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5201
 * index: encoded in imm5<4:size+1>
5202
 */
5203
static void handle_simd_inse(DisasContext *s, int rd, int rn,
5204
                             int imm4, int imm5)
5205
{
5206
    int size = ctz32(imm5);
5207
    int src_index, dst_index;
5208
    TCGv_i64 tmp;
5209

    
5210
    if (size > 3) {
5211
        unallocated_encoding(s);
5212
        return;
5213
    }
5214
    dst_index = extract32(imm5, 1+size, 5);
5215
    src_index = extract32(imm4, size, 4);
5216

    
5217
    tmp = tcg_temp_new_i64();
5218

    
5219
    read_vec_element(s, tmp, rn, src_index, size);
5220
    write_vec_element(s, tmp, rd, dst_index, size);
5221

    
5222
    tcg_temp_free_i64(tmp);
5223
}
5224

    
5225

    
5226
/* C6.3.151 INS (General)
5227
 *
5228
 *  31                   21 20    16 15        10  9    5 4    0
5229
 * +-----------------------+--------+-------------+------+------+
5230
 * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
5231
 * +-----------------------+--------+-------------+------+------+
5232
 *
5233
 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5234
 * index: encoded in imm5<4:size+1>
5235
 */
5236
static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
5237
{
5238
    int size = ctz32(imm5);
5239
    int idx;
5240

    
5241
    if (size > 3) {
5242
        unallocated_encoding(s);
5243
        return;
5244
    }
5245

    
5246
    idx = extract32(imm5, 1 + size, 4 - size);
5247
    write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
5248
}
5249

    
5250
/*
5251
 * C6.3.321 UMOV (General)
5252
 * C6.3.237 SMOV (General)
5253
 *
5254
 *  31  30   29              21 20    16 15    12   10 9    5 4    0
5255
 * +---+---+-------------------+--------+-------------+------+------+
5256
 * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
5257
 * +---+---+-------------------+--------+-------------+------+------+
5258
 *
5259
 * U: unsigned when set
5260
 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5261
 */
5262
static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
5263
                                  int rn, int rd, int imm5)
5264
{
5265
    int size = ctz32(imm5);
5266
    int element;
5267
    TCGv_i64 tcg_rd;
5268

    
5269
    /* Check for UnallocatedEncodings */
5270
    if (is_signed) {
5271
        if (size > 2 || (size == 2 && !is_q)) {
5272
            unallocated_encoding(s);
5273
            return;
5274
        }
5275
    } else {
5276
        if (size > 3
5277
            || (size < 3 && is_q)
5278
            || (size == 3 && !is_q)) {
5279
            unallocated_encoding(s);
5280
            return;
5281
        }
5282
    }
5283
    element = extract32(imm5, 1+size, 4);
5284

    
5285
    tcg_rd = cpu_reg(s, rd);
5286
    read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
5287
    if (is_signed && !is_q) {
5288
        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5289
    }
5290
}
5291

    
5292
/* C3.6.5 AdvSIMD copy
5293
 *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
5294
 * +---+---+----+-----------------+------+---+------+---+------+------+
5295
 * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
5296
 * +---+---+----+-----------------+------+---+------+---+------+------+
5297
 */
5298
static void disas_simd_copy(DisasContext *s, uint32_t insn)
5299
{
5300
    int rd = extract32(insn, 0, 5);
5301
    int rn = extract32(insn, 5, 5);
5302
    int imm4 = extract32(insn, 11, 4);
5303
    int op = extract32(insn, 29, 1);
5304
    int is_q = extract32(insn, 30, 1);
5305
    int imm5 = extract32(insn, 16, 5);
5306

    
5307
    if (op) {
5308
        if (is_q) {
5309
            /* INS (element) */
5310
            handle_simd_inse(s, rd, rn, imm4, imm5);
5311
        } else {
5312
            unallocated_encoding(s);
5313
        }
5314
    } else {
5315
        switch (imm4) {
5316
        case 0:
5317
            /* DUP (element - vector) */
5318
            handle_simd_dupe(s, is_q, rd, rn, imm5);
5319
            break;
5320
        case 1:
5321
            /* DUP (general) */
5322
            handle_simd_dupg(s, is_q, rd, rn, imm5);
5323
            break;
5324
        case 3:
5325
            if (is_q) {
5326
                /* INS (general) */
5327
                handle_simd_insg(s, rd, rn, imm5);
5328
            } else {
5329
                unallocated_encoding(s);
5330
            }
5331
            break;
5332
        case 5:
5333
        case 7:
5334
            /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
5335
            handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
5336
            break;
5337
        default:
5338
            unallocated_encoding(s);
5339
            break;
5340
        }
5341
    }
5342
}
5343

    
5344
/* C3.6.6 AdvSIMD modified immediate
5345
 *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
5346
 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
5347
 * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
5348
 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
5349
 *
5350
 * There are a number of operations that can be carried out here:
5351
 *   MOVI - move (shifted) imm into register
5352
 *   MVNI - move inverted (shifted) imm into register
5353
 *   ORR  - bitwise OR of (shifted) imm with register
5354
 *   BIC  - bitwise clear of (shifted) imm with register
5355
 */
5356
static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
5357
{
5358
    int rd = extract32(insn, 0, 5);
5359
    int cmode = extract32(insn, 12, 4);
5360
    int cmode_3_1 = extract32(cmode, 1, 3);
5361
    int cmode_0 = extract32(cmode, 0, 1);
5362
    int o2 = extract32(insn, 11, 1);
5363
    uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
5364
    bool is_neg = extract32(insn, 29, 1);
5365
    bool is_q = extract32(insn, 30, 1);
5366
    uint64_t imm = 0;
5367
    TCGv_i64 tcg_rd, tcg_imm;
5368
    int i;
5369

    
5370
    if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
5371
        unallocated_encoding(s);
5372
        return;
5373
    }
5374

    
5375
    /* See AdvSIMDExpandImm() in ARM ARM */
5376
    switch (cmode_3_1) {
5377
    case 0: /* Replicate(Zeros(24):imm8, 2) */
5378
    case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
5379
    case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
5380
    case 3: /* Replicate(imm8:Zeros(24), 2) */
5381
    {
5382
        int shift = cmode_3_1 * 8;
5383
        imm = bitfield_replicate(abcdefgh << shift, 32);
5384
        break;
5385
    }
5386
    case 4: /* Replicate(Zeros(8):imm8, 4) */
5387
    case 5: /* Replicate(imm8:Zeros(8), 4) */
5388
    {
5389
        int shift = (cmode_3_1 & 0x1) * 8;
5390
        imm = bitfield_replicate(abcdefgh << shift, 16);
5391
        break;
5392
    }
5393
    case 6:
5394
        if (cmode_0) {
5395
            /* Replicate(Zeros(8):imm8:Ones(16), 2) */
5396
            imm = (abcdefgh << 16) | 0xffff;
5397
        } else {
5398
            /* Replicate(Zeros(16):imm8:Ones(8), 2) */
5399
            imm = (abcdefgh << 8) | 0xff;
5400
        }
5401
        imm = bitfield_replicate(imm, 32);
5402
        break;
5403
    case 7:
5404
        if (!cmode_0 && !is_neg) {
5405
            imm = bitfield_replicate(abcdefgh, 8);
5406
        } else if (!cmode_0 && is_neg) {
5407
            int i;
5408
            imm = 0;
5409
            for (i = 0; i < 8; i++) {
5410
                if ((abcdefgh) & (1 << i)) {
5411
                    imm |= 0xffULL << (i * 8);
5412
                }
5413
            }
5414
        } else if (cmode_0) {
5415
            if (is_neg) {
5416
                imm = (abcdefgh & 0x3f) << 48;
5417
                if (abcdefgh & 0x80) {
5418
                    imm |= 0x8000000000000000ULL;
5419
                }
5420
                if (abcdefgh & 0x40) {
5421
                    imm |= 0x3fc0000000000000ULL;
5422
                } else {
5423
                    imm |= 0x4000000000000000ULL;
5424
                }
5425
            } else {
5426
                imm = (abcdefgh & 0x3f) << 19;
5427
                if (abcdefgh & 0x80) {
5428
                    imm |= 0x80000000;
5429
                }
5430
                if (abcdefgh & 0x40) {
5431
                    imm |= 0x3e000000;
5432
                } else {
5433
                    imm |= 0x40000000;
5434
                }
5435
                imm |= (imm << 32);
5436
            }
5437
        }
5438
        break;
5439
    }
5440

    
5441
    if (cmode_3_1 != 7 && is_neg) {
5442
        imm = ~imm;
5443
    }
5444

    
5445
    tcg_imm = tcg_const_i64(imm);
5446
    tcg_rd = new_tmp_a64(s);
5447

    
5448
    for (i = 0; i < 2; i++) {
5449
        int foffs = i ? fp_reg_hi_offset(rd) : fp_reg_offset(rd, MO_64);
5450

    
5451
        if (i == 1 && !is_q) {
5452
            /* non-quad ops clear high half of vector */
5453
            tcg_gen_movi_i64(tcg_rd, 0);
5454
        } else if ((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9) {
5455
            tcg_gen_ld_i64(tcg_rd, cpu_env, foffs);
5456
            if (is_neg) {
5457
                /* AND (BIC) */
5458
                tcg_gen_and_i64(tcg_rd, tcg_rd, tcg_imm);
5459
            } else {
5460
                /* ORR */
5461
                tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_imm);
5462
            }
5463
        } else {
5464
            /* MOVI */
5465
            tcg_gen_mov_i64(tcg_rd, tcg_imm);
5466
        }
5467
        tcg_gen_st_i64(tcg_rd, cpu_env, foffs);
5468
    }
5469

    
5470
    tcg_temp_free_i64(tcg_imm);
5471
}
5472

    
5473
/* C3.6.7 AdvSIMD scalar copy
5474
 *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
5475
 * +-----+----+-----------------+------+---+------+---+------+------+
5476
 * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
5477
 * +-----+----+-----------------+------+---+------+---+------+------+
5478
 */
5479
static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
5480
{
5481
    int rd = extract32(insn, 0, 5);
5482
    int rn = extract32(insn, 5, 5);
5483
    int imm4 = extract32(insn, 11, 4);
5484
    int imm5 = extract32(insn, 16, 5);
5485
    int op = extract32(insn, 29, 1);
5486

    
5487
    if (op != 0 || imm4 != 0) {
5488
        unallocated_encoding(s);
5489
        return;
5490
    }
5491

    
5492
    /* DUP (element, scalar) */
5493
    handle_simd_dupes(s, rd, rn, imm5);
5494
}
5495

    
5496
/* C3.6.8 AdvSIMD scalar pairwise
5497
 *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
5498
 * +-----+---+-----------+------+-----------+--------+-----+------+------+
5499
 * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
5500
 * +-----+---+-----------+------+-----------+--------+-----+------+------+
5501
 */
5502
static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
5503
{
5504
    unsupported_encoding(s, insn);
5505
}
5506

    
5507
/*
5508
 * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
5509
 *
5510
 * This code is handles the common shifting code and is used by both
5511
 * the vector and scalar code.
5512
 */
5513
static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
5514
                                    TCGv_i64 tcg_rnd, bool accumulate,
5515
                                    bool is_u, int size, int shift)
5516
{
5517
    bool extended_result = false;
5518
    bool round = !TCGV_IS_UNUSED_I64(tcg_rnd);
5519
    int ext_lshift = 0;
5520
    TCGv_i64 tcg_src_hi;
5521

    
5522
    if (round && size == 3) {
5523
        extended_result = true;
5524
        ext_lshift = 64 - shift;
5525
        tcg_src_hi = tcg_temp_new_i64();
5526
    } else if (shift == 64) {
5527
        if (!accumulate && is_u) {
5528
            /* result is zero */
5529
            tcg_gen_movi_i64(tcg_res, 0);
5530
            return;
5531
        }
5532
    }
5533

    
5534
    /* Deal with the rounding step */
5535
    if (round) {
5536
        if (extended_result) {
5537
            TCGv_i64 tcg_zero = tcg_const_i64(0);
5538
            if (!is_u) {
5539
                /* take care of sign extending tcg_res */
5540
                tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
5541
                tcg_gen_add2_i64(tcg_src, tcg_src_hi,
5542
                                 tcg_src, tcg_src_hi,
5543
                                 tcg_rnd, tcg_zero);
5544
            } else {
5545
                tcg_gen_add2_i64(tcg_src, tcg_src_hi,
5546
                                 tcg_src, tcg_zero,
5547
                                 tcg_rnd, tcg_zero);
5548
            }
5549
            tcg_temp_free_i64(tcg_zero);
5550
        } else {
5551
            tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
5552
        }
5553
    }
5554

    
5555
    /* Now do the shift right */
5556
    if (round && extended_result) {
5557
        /* extended case, >64 bit precision required */
5558
        if (ext_lshift == 0) {
5559
            /* special case, only high bits matter */
5560
            tcg_gen_mov_i64(tcg_src, tcg_src_hi);
5561
        } else {
5562
            tcg_gen_shri_i64(tcg_src, tcg_src, shift);
5563
            tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
5564
            tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
5565
        }
5566
    } else {
5567
        if (is_u) {
5568
            if (shift == 64) {
5569
                /* essentially shifting in 64 zeros */
5570
                tcg_gen_movi_i64(tcg_src, 0);
5571
            } else {
5572
                tcg_gen_shri_i64(tcg_src, tcg_src, shift);
5573
            }
5574
        } else {
5575
            if (shift == 64) {
5576
                /* effectively extending the sign-bit */
5577
                tcg_gen_sari_i64(tcg_src, tcg_src, 63);
5578
            } else {
5579
                tcg_gen_sari_i64(tcg_src, tcg_src, shift);
5580
            }
5581
        }
5582
    }
5583

    
5584
    if (accumulate) {
5585
        tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
5586
    } else {
5587
        tcg_gen_mov_i64(tcg_res, tcg_src);
5588
    }
5589

    
5590
    if (extended_result) {
5591
        tcg_temp_free_i64(tcg_src_hi);
5592
    }
5593
}
5594

    
5595
/* Common SHL/SLI - Shift left with an optional insert */
5596
static void handle_shli_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
5597
                                 bool insert, int shift)
5598
{
5599
    if (insert) { /* SLI */
5600
        tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, shift, 64 - shift);
5601
    } else { /* SHL */
5602
        tcg_gen_shli_i64(tcg_res, tcg_src, shift);
5603
    }
5604
}
5605

    
5606
/* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
5607
static void handle_scalar_simd_shri(DisasContext *s,
5608
                                    bool is_u, int immh, int immb,
5609
                                    int opcode, int rn, int rd)
5610
{
5611
    const int size = 3;
5612
    int immhb = immh << 3 | immb;
5613
    int shift = 2 * (8 << size) - immhb;
5614
    bool accumulate = false;
5615
    bool round = false;
5616
    TCGv_i64 tcg_rn;
5617
    TCGv_i64 tcg_rd;
5618
    TCGv_i64 tcg_round;
5619

    
5620
    if (!extract32(immh, 3, 1)) {
5621
        unallocated_encoding(s);
5622
        return;
5623
    }
5624

    
5625
    switch (opcode) {
5626
    case 0x02: /* SSRA / USRA (accumulate) */
5627
        accumulate = true;
5628
        break;
5629
    case 0x04: /* SRSHR / URSHR (rounding) */
5630
        round = true;
5631
        break;
5632
    case 0x06: /* SRSRA / URSRA (accum + rounding) */
5633
        accumulate = round = true;
5634
        break;
5635
    }
5636

    
5637
    if (round) {
5638
        uint64_t round_const = 1ULL << (shift - 1);
5639
        tcg_round = tcg_const_i64(round_const);
5640
    } else {
5641
        TCGV_UNUSED_I64(tcg_round);
5642
    }
5643

    
5644
    tcg_rn = read_fp_dreg(s, rn);
5645
    tcg_rd = accumulate ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
5646

    
5647
    handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
5648
                               accumulate, is_u, size, shift);
5649

    
5650
    write_fp_dreg(s, rd, tcg_rd);
5651

    
5652
    tcg_temp_free_i64(tcg_rn);
5653
    tcg_temp_free_i64(tcg_rd);
5654
    if (round) {
5655
        tcg_temp_free_i64(tcg_round);
5656
    }
5657
}
5658

    
5659
/* SHL/SLI - Scalar shift left */
5660
static void handle_scalar_simd_shli(DisasContext *s, bool insert,
5661
                                    int immh, int immb, int opcode,
5662
                                    int rn, int rd)
5663
{
5664
    int size = 32 - clz32(immh) - 1;
5665
    int immhb = immh << 3 | immb;
5666
    int shift = immhb - (8 << size);
5667
    TCGv_i64 tcg_rn = new_tmp_a64(s);
5668
    TCGv_i64 tcg_rd = new_tmp_a64(s);
5669

    
5670
    if (!extract32(immh, 3, 1)) {
5671
        unallocated_encoding(s);
5672
        return;
5673
    }
5674

    
5675
    tcg_rn = read_fp_dreg(s, rn);
5676
    tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
5677

    
5678
    handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
5679

    
5680
    write_fp_dreg(s, rd, tcg_rd);
5681

    
5682
    tcg_temp_free_i64(tcg_rn);
5683
    tcg_temp_free_i64(tcg_rd);
5684
}
5685

    
5686
/* C3.6.9 AdvSIMD scalar shift by immediate
5687
 *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
5688
 * +-----+---+-------------+------+------+--------+---+------+------+
5689
 * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
5690
 * +-----+---+-------------+------+------+--------+---+------+------+
5691
 *
5692
 * This is the scalar version so it works on a fixed sized registers
5693
 */
5694
static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
5695
{
5696
    int rd = extract32(insn, 0, 5);
5697
    int rn = extract32(insn, 5, 5);
5698
    int opcode = extract32(insn, 11, 5);
5699
    int immb = extract32(insn, 16, 3);
5700
    int immh = extract32(insn, 19, 4);
5701
    bool is_u = extract32(insn, 29, 1);
5702

    
5703
    switch (opcode) {
5704
    case 0x00: /* SSHR / USHR */
5705
    case 0x02: /* SSRA / USRA */
5706
    case 0x04: /* SRSHR / URSHR */
5707
    case 0x06: /* SRSRA / URSRA */
5708
        handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
5709
        break;
5710
    case 0x0a: /* SHL / SLI */
5711
        handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
5712
        break;
5713
    default:
5714
        unsupported_encoding(s, insn);
5715
        break;
5716
    }
5717
}
5718

    
5719
/* C3.6.10 AdvSIMD scalar three different
5720
 *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
5721
 * +-----+---+-----------+------+---+------+--------+-----+------+------+
5722
 * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
5723
 * +-----+---+-----------+------+---+------+--------+-----+------+------+
5724
 */
5725
static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
5726
{
5727
    unsupported_encoding(s, insn);
5728
}
5729

    
5730
static void handle_3same_64(DisasContext *s, int opcode, bool u,
5731
                            TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
5732
{
5733
    /* Handle 64x64->64 opcodes which are shared between the scalar
5734
     * and vector 3-same groups. We cover every opcode where size == 3
5735
     * is valid in either the three-reg-same (integer, not pairwise)
5736
     * or scalar-three-reg-same groups. (Some opcodes are not yet
5737
     * implemented.)
5738
     */
5739
    TCGCond cond;
5740

    
5741
    switch (opcode) {
5742
    case 0x1: /* SQADD */
5743
        if (u) {
5744
            gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
5745
        } else {
5746
            gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
5747
        }
5748
        break;
5749
    case 0x5: /* SQSUB */
5750
        if (u) {
5751
            gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
5752
        } else {
5753
            gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
5754
        }
5755
        break;
5756
    case 0x6: /* CMGT, CMHI */
5757
        /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
5758
         * We implement this using setcond (test) and then negating.
5759
         */
5760
        cond = u ? TCG_COND_GTU : TCG_COND_GT;
5761
    do_cmop:
5762
        tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
5763
        tcg_gen_neg_i64(tcg_rd, tcg_rd);
5764
        break;
5765
    case 0x7: /* CMGE, CMHS */
5766
        cond = u ? TCG_COND_GEU : TCG_COND_GE;
5767
        goto do_cmop;
5768
    case 0x11: /* CMTST, CMEQ */
5769
        if (u) {
5770
            cond = TCG_COND_EQ;
5771
            goto do_cmop;
5772
        }
5773
        /* CMTST : test is "if (X & Y != 0)". */
5774
        tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
5775
        tcg_gen_setcondi_i64(TCG_COND_NE, tcg_rd, tcg_rd, 0);
5776
        tcg_gen_neg_i64(tcg_rd, tcg_rd);
5777
        break;
5778
    case 0x8: /* SSHL, USHL */
5779
        if (u) {
5780
            gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
5781
        } else {
5782
            gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm);
5783
        }
5784
        break;
5785
    case 0x9: /* SQSHL, UQSHL */
5786
        if (u) {
5787
            gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
5788
        } else {
5789
            gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
5790
        }
5791
        break;
5792
    case 0xa: /* SRSHL, URSHL */
5793
        if (u) {
5794
            gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
5795
        } else {
5796
            gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
5797
        }
5798
        break;
5799
    case 0xb: /* SQRSHL, UQRSHL */
5800
        if (u) {
5801
            gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
5802
        } else {
5803
            gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
5804
        }
5805
        break;
5806
    case 0x10: /* ADD, SUB */
5807
        if (u) {
5808
            tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
5809
        } else {
5810
            tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
5811
        }
5812
        break;
5813
    default:
5814
        g_assert_not_reached();
5815
    }
5816
}
5817

    
5818
/* Handle the 3-same-operands float operations; shared by the scalar
5819
 * and vector encodings. The caller must filter out any encodings
5820
 * not allocated for the encoding it is dealing with.
5821
 */
5822
static void handle_3same_float(DisasContext *s, int size, int elements,
5823
                               int fpopcode, int rd, int rn, int rm)
5824
{
5825
    int pass;
5826
    TCGv_ptr fpst = get_fpstatus_ptr();
5827

    
5828
    for (pass = 0; pass < elements; pass++) {
5829
        if (size) {
5830
            /* Double */
5831
            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
5832
            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
5833
            TCGv_i64 tcg_res = tcg_temp_new_i64();
5834

    
5835
            read_vec_element(s, tcg_op1, rn, pass, MO_64);
5836
            read_vec_element(s, tcg_op2, rm, pass, MO_64);
5837

    
5838
            switch (fpopcode) {
5839
            case 0x18: /* FMAXNM */
5840
                gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
5841
                break;
5842
            case 0x1a: /* FADD */
5843
                gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
5844
                break;
5845
            case 0x1e: /* FMAX */
5846
                gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
5847
                break;
5848
            case 0x38: /* FMINNM */
5849
                gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
5850
                break;
5851
            case 0x3a: /* FSUB */
5852
                gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
5853
                break;
5854
            case 0x3e: /* FMIN */
5855
                gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
5856
                break;
5857
            case 0x5b: /* FMUL */
5858
                gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
5859
                break;
5860
            case 0x5f: /* FDIV */
5861
                gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
5862
                break;
5863
            case 0x7a: /* FABD */
5864
                gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
5865
                gen_helper_vfp_absd(tcg_res, tcg_res);
5866
                break;
5867
            default:
5868
                g_assert_not_reached();
5869
            }
5870

    
5871
            write_vec_element(s, tcg_res, rd, pass, MO_64);
5872

    
5873
            tcg_temp_free_i64(tcg_res);
5874
            tcg_temp_free_i64(tcg_op1);
5875
            tcg_temp_free_i64(tcg_op2);
5876
        } else {
5877
            /* Single */
5878
            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
5879
            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
5880
            TCGv_i32 tcg_res = tcg_temp_new_i32();
5881

    
5882
            read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
5883
            read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
5884

    
5885
            switch (fpopcode) {
5886
            case 0x1a: /* FADD */
5887
                gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
5888
                break;
5889
            case 0x1e: /* FMAX */
5890
                gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
5891
                break;
5892
            case 0x18: /* FMAXNM */
5893
                gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
5894
                break;
5895
            case 0x38: /* FMINNM */
5896
                gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
5897
                break;
5898
            case 0x3a: /* FSUB */
5899
                gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
5900
                break;
5901
            case 0x3e: /* FMIN */
5902
                gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
5903
                break;
5904
            case 0x5b: /* FMUL */
5905
                gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
5906
                break;
5907
            case 0x5f: /* FDIV */
5908
                gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
5909
                break;
5910
            case 0x7a: /* FABD */
5911
                gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
5912
                gen_helper_vfp_abss(tcg_res, tcg_res);
5913
                break;
5914
            default:
5915
                g_assert_not_reached();
5916
            }
5917

    
5918
            if (elements == 1) {
5919
                /* scalar single so clear high part */
5920
                TCGv_i64 tcg_tmp = tcg_temp_new_i64();
5921

    
5922
                tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
5923
                write_vec_element(s, tcg_tmp, rd, pass, MO_64);
5924
                tcg_temp_free_i64(tcg_tmp);
5925
            } else {
5926
                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
5927
            }
5928

    
5929
            tcg_temp_free_i32(tcg_res);
5930
            tcg_temp_free_i32(tcg_op1);
5931
            tcg_temp_free_i32(tcg_op2);
5932
        }
5933
    }
5934

    
5935
    tcg_temp_free_ptr(fpst);
5936

    
5937
    if ((elements << size) < 4) {
5938
        /* scalar, or non-quad vector op */
5939
        clear_vec_high(s, rd);
5940
    }
5941
}
5942

    
5943
/* C3.6.11 AdvSIMD scalar three same
5944
 *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
5945
 * +-----+---+-----------+------+---+------+--------+---+------+------+
5946
 * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
5947
 * +-----+---+-----------+------+---+------+--------+---+------+------+
5948
 */
5949
static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
5950
{
5951
    int rd = extract32(insn, 0, 5);
5952
    int rn = extract32(insn, 5, 5);
5953
    int opcode = extract32(insn, 11, 5);
5954
    int rm = extract32(insn, 16, 5);
5955
    int size = extract32(insn, 22, 2);
5956
    bool u = extract32(insn, 29, 1);
5957
    TCGv_i64 tcg_rn;
5958
    TCGv_i64 tcg_rm;
5959
    TCGv_i64 tcg_rd;
5960

    
5961
    if (opcode >= 0x18) {
5962
        /* Floating point: U, size[1] and opcode indicate operation */
5963
        int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
5964
        switch (fpopcode) {
5965
        case 0x1b: /* FMULX */
5966
        case 0x1c: /* FCMEQ */
5967
        case 0x1f: /* FRECPS */
5968
        case 0x3f: /* FRSQRTS */
5969
        case 0x5c: /* FCMGE */
5970
        case 0x5d: /* FACGE */
5971
        case 0x7c: /* FCMGT */
5972
        case 0x7d: /* FACGT */
5973
            unsupported_encoding(s, insn);
5974
            return;
5975
        case 0x7a: /* FABD */
5976
            break;
5977
        default:
5978
            unallocated_encoding(s);
5979
            return;
5980
        }
5981

    
5982
        handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
5983
        return;
5984
    }
5985

    
5986
    switch (opcode) {
5987
    case 0x1: /* SQADD, UQADD */
5988
    case 0x5: /* SQSUB, UQSUB */
5989
        unsupported_encoding(s, insn);
5990
        return;
5991
    case 0x8: /* SSHL, USHL */
5992
    case 0xa: /* SRSHL, URSHL */
5993
    case 0x6: /* CMGT, CMHI */
5994
    case 0x7: /* CMGE, CMHS */
5995
    case 0x11: /* CMTST, CMEQ */
5996
    case 0x10: /* ADD, SUB (vector) */
5997
        if (size != 3) {
5998
            unallocated_encoding(s);
5999
            return;
6000
        }
6001
        break;
6002
    case 0x9: /* SQSHL, UQSHL */
6003
    case 0xb: /* SQRSHL, UQRSHL */
6004
        unsupported_encoding(s, insn);
6005
        return;
6006
    case 0x16: /* SQDMULH, SQRDMULH (vector) */
6007
        if (size != 1 && size != 2) {
6008
            unallocated_encoding(s);
6009
            return;
6010
        }
6011
        unsupported_encoding(s, insn);
6012
        return;
6013
    default:
6014
        unallocated_encoding(s);
6015
        return;
6016
    }
6017

    
6018
    tcg_rn = read_fp_dreg(s, rn);       /* op1 */
6019
    tcg_rm = read_fp_dreg(s, rm);       /* op2 */
6020
    tcg_rd = tcg_temp_new_i64();
6021

    
6022
    /* For the moment we only support the opcodes which are
6023
     * 64-bit-width only. The size != 3 cases will
6024
     * be handled later when the relevant ops are implemented.
6025
     */
6026
    handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
6027

    
6028
    write_fp_dreg(s, rd, tcg_rd);
6029

    
6030
    tcg_temp_free_i64(tcg_rn);
6031
    tcg_temp_free_i64(tcg_rm);
6032
    tcg_temp_free_i64(tcg_rd);
6033
}
6034

    
6035
/* C3.6.12 AdvSIMD scalar two reg misc
6036
 *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
6037
 * +-----+---+-----------+------+-----------+--------+-----+------+------+
6038
 * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
6039
 * +-----+---+-----------+------+-----------+--------+-----+------+------+
6040
 */
6041
static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
6042
{
6043
    unsupported_encoding(s, insn);
6044
}
6045

    
6046
/* C3.6.13 AdvSIMD scalar x indexed element
6047
 *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
6048
 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
6049
 * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
6050
 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
6051
 */
6052
static void disas_simd_scalar_indexed(DisasContext *s, uint32_t insn)
6053
{
6054
    unsupported_encoding(s, insn);
6055
}
6056

    
6057
/* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
6058
static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
6059
                                 int immh, int immb, int opcode, int rn, int rd)
6060
{
6061
    int size = 32 - clz32(immh) - 1;
6062
    int immhb = immh << 3 | immb;
6063
    int shift = 2 * (8 << size) - immhb;
6064
    bool accumulate = false;
6065
    bool round = false;
6066
    int dsize = is_q ? 128 : 64;
6067
    int esize = 8 << size;
6068
    int elements = dsize/esize;
6069
    TCGMemOp memop = size | (is_u ? 0 : MO_SIGN);
6070
    TCGv_i64 tcg_rn = new_tmp_a64(s);
6071
    TCGv_i64 tcg_rd = new_tmp_a64(s);
6072
    TCGv_i64 tcg_round;
6073
    int i;
6074

    
6075
    if (extract32(immh, 3, 1) && !is_q) {
6076
        unallocated_encoding(s);
6077
        return;
6078
    }
6079

    
6080
    if (size > 3 && !is_q) {
6081
        unallocated_encoding(s);
6082
        return;
6083
    }
6084

    
6085
    switch (opcode) {
6086
    case 0x02: /* SSRA / USRA (accumulate) */
6087
        accumulate = true;
6088
        break;
6089
    case 0x04: /* SRSHR / URSHR (rounding) */
6090
        round = true;
6091
        break;
6092
    case 0x06: /* SRSRA / URSRA (accum + rounding) */
6093
        accumulate = round = true;
6094
        break;
6095
    }
6096

    
6097
    if (round) {
6098
        uint64_t round_const = 1ULL << (shift - 1);
6099
        tcg_round = tcg_const_i64(round_const);
6100
    } else {
6101
        TCGV_UNUSED_I64(tcg_round);
6102
    }
6103

    
6104
    for (i = 0; i < elements; i++) {
6105
        read_vec_element(s, tcg_rn, rn, i, memop);
6106
        if (accumulate) {
6107
            read_vec_element(s, tcg_rd, rd, i, memop);
6108
        }
6109

    
6110
        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6111
                                accumulate, is_u, size, shift);
6112

    
6113
        write_vec_element(s, tcg_rd, rd, i, size);
6114
    }
6115

    
6116
    if (!is_q) {
6117
        clear_vec_high(s, rd);
6118
    }
6119

    
6120
    if (round) {
6121
        tcg_temp_free_i64(tcg_round);
6122
    }
6123
}
6124

    
6125
/* SHL/SLI - Vector shift left */
6126
static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
6127
                                int immh, int immb, int opcode, int rn, int rd)
6128
{
6129
    int size = 32 - clz32(immh) - 1;
6130
    int immhb = immh << 3 | immb;
6131
    int shift = immhb - (8 << size);
6132
    int dsize = is_q ? 128 : 64;
6133
    int esize = 8 << size;
6134
    int elements = dsize/esize;
6135
    TCGv_i64 tcg_rn = new_tmp_a64(s);
6136
    TCGv_i64 tcg_rd = new_tmp_a64(s);
6137
    int i;
6138

    
6139
    if (extract32(immh, 3, 1) && !is_q) {
6140
        unallocated_encoding(s);
6141
        return;
6142
    }
6143

    
6144
    if (size > 3 && !is_q) {
6145
        unallocated_encoding(s);
6146
        return;
6147
    }
6148

    
6149
    for (i = 0; i < elements; i++) {
6150
        read_vec_element(s, tcg_rn, rn, i, size);
6151
        if (insert) {
6152
            read_vec_element(s, tcg_rd, rd, i, size);
6153
        }
6154

    
6155
        handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
6156

    
6157
        write_vec_element(s, tcg_rd, rd, i, size);
6158
    }
6159

    
6160
    if (!is_q) {
6161
        clear_vec_high(s, rd);
6162
    }
6163
}
6164

    
6165
/* USHLL/SHLL - Vector shift left with widening */
6166
static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
6167
                                 int immh, int immb, int opcode, int rn, int rd)
6168
{
6169
    int size = 32 - clz32(immh) - 1;
6170
    int immhb = immh << 3 | immb;
6171
    int shift = immhb - (8 << size);
6172
    int dsize = 64;
6173
    int esize = 8 << size;
6174
    int elements = dsize/esize;
6175
    TCGv_i64 tcg_rn = new_tmp_a64(s);
6176
    TCGv_i64 tcg_rd = new_tmp_a64(s);
6177
    int i;
6178

    
6179
    if (size >= 3) {
6180
        unallocated_encoding(s);
6181
        return;
6182
    }
6183

    
6184
    /* For the LL variants the store is larger than the load,
6185
     * so if rd == rn we would overwrite parts of our input.
6186
     * So load everything right now and use shifts in the main loop.
6187
     */
6188
    read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
6189

    
6190
    for (i = 0; i < elements; i++) {
6191
        tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
6192
        ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
6193
        tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
6194
        write_vec_element(s, tcg_rd, rd, i, size + 1);
6195
    }
6196
}
6197

    
6198

    
6199
/* C3.6.14 AdvSIMD shift by immediate
6200
 *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
6201
 * +---+---+---+-------------+------+------+--------+---+------+------+
6202
 * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
6203
 * +---+---+---+-------------+------+------+--------+---+------+------+
6204
 */
6205
static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
6206
{
6207
    int rd = extract32(insn, 0, 5);
6208
    int rn = extract32(insn, 5, 5);
6209
    int opcode = extract32(insn, 11, 5);
6210
    int immb = extract32(insn, 16, 3);
6211
    int immh = extract32(insn, 19, 4);
6212
    bool is_u = extract32(insn, 29, 1);
6213
    bool is_q = extract32(insn, 30, 1);
6214

    
6215
    switch (opcode) {
6216
    case 0x00: /* SSHR / USHR */
6217
    case 0x02: /* SSRA / USRA (accumulate) */
6218
    case 0x04: /* SRSHR / URSHR (rounding) */
6219
    case 0x06: /* SRSRA / URSRA (accum + rounding) */
6220
        handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
6221
        break;
6222
    case 0x0a: /* SHL / SLI */
6223
        handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
6224
        break;
6225
    case 0x14: /* SSHLL / USHLL */
6226
        handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
6227
        break;
6228
    default:
6229
        /* We don't currently implement any of the Narrow or saturating shifts;
6230
         * nor do we implement the fixed-point conversions in this
6231
         * encoding group (SCVTF, FCVTZS, UCVTF, FCVTZU).
6232
         */
6233
        unsupported_encoding(s, insn);
6234
        return;
6235
    }
6236
}
6237

    
6238
static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
6239
                                int opcode, int rd, int rn, int rm)
6240
{
6241
    /* 3-reg-different widening insns: 64 x 64 -> 128 */
6242
    TCGv_i64 tcg_res[2];
6243
    int pass, accop;
6244

    
6245
    tcg_res[0] = tcg_temp_new_i64();
6246
    tcg_res[1] = tcg_temp_new_i64();
6247

    
6248
    /* Does this op do an adding accumulate, a subtracting accumulate,
6249
     * or no accumulate at all?
6250
     */
6251
    switch (opcode) {
6252
    case 5:
6253
    case 8:
6254
    case 9:
6255
        accop = 1;
6256
        break;
6257
    case 10:
6258
    case 11:
6259
        accop = -1;
6260
        break;
6261
    default:
6262
        accop = 0;
6263
        break;
6264
    }
6265

    
6266
    if (accop != 0) {
6267
        read_vec_element(s, tcg_res[0], rd, 0, MO_64);
6268
        read_vec_element(s, tcg_res[1], rd, 1, MO_64);
6269
    }
6270

    
6271
    /* size == 2 means two 32x32->64 operations; this is worth special
6272
     * casing because we can generally handle it inline.
6273
     */
6274
    if (size == 2) {
6275
        for (pass = 0; pass < 2; pass++) {
6276
            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6277
            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6278
            TCGv_i64 tcg_passres;
6279
            TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
6280

    
6281
            int elt = pass + is_q * 2;
6282

    
6283
            read_vec_element(s, tcg_op1, rn, elt, memop);
6284
            read_vec_element(s, tcg_op2, rm, elt, memop);
6285

    
6286
            if (accop == 0) {
6287
                tcg_passres = tcg_res[pass];
6288
            } else {
6289
                tcg_passres = tcg_temp_new_i64();
6290
            }
6291

    
6292
            switch (opcode) {
6293
            case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
6294
            case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
6295
            {
6296
                TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
6297
                TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
6298

    
6299
                tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
6300
                tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
6301
                tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
6302
                                    tcg_passres,
6303
                                    tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
6304
                tcg_temp_free_i64(tcg_tmp1);
6305
                tcg_temp_free_i64(tcg_tmp2);
6306
                break;
6307
            }
6308
            case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
6309
            case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
6310
            case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
6311
                tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
6312
                break;
6313
            default:
6314
                g_assert_not_reached();
6315
            }
6316

    
6317
            if (accop > 0) {
6318
                tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
6319
                tcg_temp_free_i64(tcg_passres);
6320
            } else if (accop < 0) {
6321
                tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
6322
                tcg_temp_free_i64(tcg_passres);
6323
            }
6324

    
6325
            tcg_temp_free_i64(tcg_op1);
6326
            tcg_temp_free_i64(tcg_op2);
6327
        }
6328
    } else {
6329
        /* size 0 or 1, generally helper functions */
6330
        for (pass = 0; pass < 2; pass++) {
6331
            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
6332
            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
6333
            TCGv_i64 tcg_passres;
6334
            int elt = pass + is_q * 2;
6335

    
6336
            read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
6337
            read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
6338

    
6339
            if (accop == 0) {
6340
                tcg_passres = tcg_res[pass];
6341
            } else {
6342
                tcg_passres = tcg_temp_new_i64();
6343
            }
6344

    
6345
            switch (opcode) {
6346
            case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
6347
            case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
6348
                if (size == 0) {
6349
                    if (is_u) {
6350
                        gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
6351
                    } else {
6352
                        gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
6353
                    }
6354
                } else {
6355
                    if (is_u) {
6356
                        gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
6357
                    } else {
6358
                        gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
6359
                    }
6360
                }
6361
                break;
6362
            case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
6363
            case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
6364
            case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
6365
                if (size == 0) {
6366
                    if (is_u) {
6367
                        gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
6368
                    } else {
6369
                        gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
6370
                    }
6371
                } else {
6372
                    if (is_u) {
6373
                        gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
6374
                    } else {
6375
                        gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
6376
                    }
6377
                }
6378
                break;
6379
            default:
6380
                g_assert_not_reached();
6381
            }
6382
            tcg_temp_free_i32(tcg_op1);
6383
            tcg_temp_free_i32(tcg_op2);
6384

    
6385
            if (accop > 0) {
6386
                if (size == 0) {
6387
                    gen_helper_neon_addl_u16(tcg_res[pass], tcg_res[pass],
6388
                                             tcg_passres);
6389
                } else {
6390
                    gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
6391
                                             tcg_passres);
6392
                }
6393
                tcg_temp_free_i64(tcg_passres);
6394
            } else if (accop < 0) {
6395
                if (size == 0) {
6396
                    gen_helper_neon_subl_u16(tcg_res[pass], tcg_res[pass],
6397
                                             tcg_passres);
6398
                } else {
6399
                    gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
6400
                                             tcg_passres);
6401
                }
6402
                tcg_temp_free_i64(tcg_passres);
6403
            }
6404
        }
6405
    }
6406

    
6407
    write_vec_element(s, tcg_res[0], rd, 0, MO_64);
6408
    write_vec_element(s, tcg_res[1], rd, 1, MO_64);
6409
    tcg_temp_free_i64(tcg_res[0]);
6410
    tcg_temp_free_i64(tcg_res[1]);
6411
}
6412

    
6413
/* C3.6.15 AdvSIMD three different
6414
 *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
6415
 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6416
 * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
6417
 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6418
 */
6419
static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
6420
{
6421
    /* Instructions in this group fall into three basic classes
6422
     * (in each case with the operation working on each element in
6423
     * the input vectors):
6424
     * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
6425
     *     128 bit input)
6426
     * (2) wide 64 x 128 -> 128
6427
     * (3) narrowing 128 x 128 -> 64
6428
     * Here we do initial decode, catch unallocated cases and
6429
     * dispatch to separate functions for each class.
6430
     */
6431
    int is_q = extract32(insn, 30, 1);
6432
    int is_u = extract32(insn, 29, 1);
6433
    int size = extract32(insn, 22, 2);
6434
    int opcode = extract32(insn, 12, 4);
6435
    int rm = extract32(insn, 16, 5);
6436
    int rn = extract32(insn, 5, 5);
6437
    int rd = extract32(insn, 0, 5);
6438

    
6439
    switch (opcode) {
6440
    case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
6441
    case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
6442
        /* 64 x 128 -> 128 */
6443
        unsupported_encoding(s, insn);
6444
        break;
6445
    case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
6446
    case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
6447
        /* 128 x 128 -> 64 */
6448
        unsupported_encoding(s, insn);
6449
        break;
6450
    case 9:
6451
    case 11:
6452
    case 13:
6453
    case 14:
6454
        if (is_u) {
6455
            unallocated_encoding(s);
6456
            return;
6457
        }
6458
        /* fall through */
6459
    case 0:
6460
    case 2:
6461
        unsupported_encoding(s, insn);
6462
        break;
6463
    case 5:
6464
    case 7:
6465
    case 8:
6466
    case 10:
6467
    case 12:
6468
        /* 64 x 64 -> 128 */
6469
        if (size == 3) {
6470
            unallocated_encoding(s);
6471
            return;
6472
        }
6473
        handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
6474
        break;
6475
    default:
6476
        /* opcode 15 not allocated */
6477
        unallocated_encoding(s);
6478
        break;
6479
    }
6480
}
6481

    
6482
/* Logic op (opcode == 3) subgroup of C3.6.16. */
6483
static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
6484
{
6485
    int rd = extract32(insn, 0, 5);
6486
    int rn = extract32(insn, 5, 5);
6487
    int rm = extract32(insn, 16, 5);
6488
    int size = extract32(insn, 22, 2);
6489
    bool is_u = extract32(insn, 29, 1);
6490
    bool is_q = extract32(insn, 30, 1);
6491
    TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6492
    TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6493
    TCGv_i64 tcg_res[2];
6494
    int pass;
6495

    
6496
    tcg_res[0] = tcg_temp_new_i64();
6497
    tcg_res[1] = tcg_temp_new_i64();
6498

    
6499
    for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
6500
        read_vec_element(s, tcg_op1, rn, pass, MO_64);
6501
        read_vec_element(s, tcg_op2, rm, pass, MO_64);
6502

    
6503
        if (!is_u) {
6504
            switch (size) {
6505
            case 0: /* AND */
6506
                tcg_gen_and_i64(tcg_res[pass], tcg_op1, tcg_op2);
6507
                break;
6508
            case 1: /* BIC */
6509
                tcg_gen_andc_i64(tcg_res[pass], tcg_op1, tcg_op2);
6510
                break;
6511
            case 2: /* ORR */
6512
                tcg_gen_or_i64(tcg_res[pass], tcg_op1, tcg_op2);
6513
                break;
6514
            case 3: /* ORN */
6515
                tcg_gen_orc_i64(tcg_res[pass], tcg_op1, tcg_op2);
6516
                break;
6517
            }
6518
        } else {
6519
            if (size != 0) {
6520
                /* B* ops need res loaded to operate on */
6521
                read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
6522
            }
6523

    
6524
            switch (size) {
6525
            case 0: /* EOR */
6526
                tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
6527
                break;
6528
            case 1: /* BSL bitwise select */
6529
                tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_op2);
6530
                tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_res[pass]);
6531
                tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op1);
6532
                break;
6533
            case 2: /* BIT, bitwise insert if true */
6534
                tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
6535
                tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_op2);
6536
                tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
6537
                break;
6538
            case 3: /* BIF, bitwise insert if false */
6539
                tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
6540
                tcg_gen_andc_i64(tcg_op1, tcg_op1, tcg_op2);
6541
                tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
6542
                break;
6543
            }
6544
        }
6545
    }
6546

    
6547
    write_vec_element(s, tcg_res[0], rd, 0, MO_64);
6548
    if (!is_q) {
6549
        tcg_gen_movi_i64(tcg_res[1], 0);
6550
    }
6551
    write_vec_element(s, tcg_res[1], rd, 1, MO_64);
6552

    
6553
    tcg_temp_free_i64(tcg_op1);
6554
    tcg_temp_free_i64(tcg_op2);
6555
    tcg_temp_free_i64(tcg_res[0]);
6556
    tcg_temp_free_i64(tcg_res[1]);
6557
}
6558

    
6559
/* Pairwise op subgroup of C3.6.16. */
6560
static void disas_simd_3same_pair(DisasContext *s, uint32_t insn)
6561
{
6562
    unsupported_encoding(s, insn);
6563
}
6564

    
6565
/* Floating point op subgroup of C3.6.16. */
6566
static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
6567
{
6568
    /* For floating point ops, the U, size[1] and opcode bits
6569
     * together indicate the operation. size[0] indicates single
6570
     * or double.
6571
     */
6572
    int fpopcode = extract32(insn, 11, 5)
6573
        | (extract32(insn, 23, 1) << 5)
6574
        | (extract32(insn, 29, 1) << 6);
6575
    int is_q = extract32(insn, 30, 1);
6576
    int size = extract32(insn, 22, 1);
6577
    int rm = extract32(insn, 16, 5);
6578
    int rn = extract32(insn, 5, 5);
6579
    int rd = extract32(insn, 0, 5);
6580

    
6581
    int datasize = is_q ? 128 : 64;
6582
    int esize = 32 << size;
6583
    int elements = datasize / esize;
6584

    
6585
    if (size == 1 && !is_q) {
6586
        unallocated_encoding(s);
6587
        return;
6588
    }
6589

    
6590
    switch (fpopcode) {
6591
    case 0x58: /* FMAXNMP */
6592
    case 0x5a: /* FADDP */
6593
    case 0x5e: /* FMAXP */
6594
    case 0x78: /* FMINNMP */
6595
    case 0x7e: /* FMINP */
6596
        /* pairwise ops */
6597
        unsupported_encoding(s, insn);
6598
        return;
6599
    case 0x1b: /* FMULX */
6600
    case 0x1c: /* FCMEQ */
6601
    case 0x1f: /* FRECPS */
6602
    case 0x3f: /* FRSQRTS */
6603
    case 0x5c: /* FCMGE */
6604
    case 0x5d: /* FACGE */
6605
    case 0x7c: /* FCMGT */
6606
    case 0x7d: /* FACGT */
6607
    case 0x19: /* FMLA */
6608
    case 0x39: /* FMLS */
6609
        unsupported_encoding(s, insn);
6610
        return;
6611
    case 0x18: /* FMAXNM */
6612
    case 0x1a: /* FADD */
6613
    case 0x1e: /* FMAX */
6614
    case 0x38: /* FMINNM */
6615
    case 0x3a: /* FSUB */
6616
    case 0x3e: /* FMIN */
6617
    case 0x5b: /* FMUL */
6618
    case 0x5f: /* FDIV */
6619
    case 0x7a: /* FABD */
6620
        handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
6621
        return;
6622
    default:
6623
        unallocated_encoding(s);
6624
        return;
6625
    }
6626
}
6627

    
6628
/* Integer op subgroup of C3.6.16. */
6629
static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
6630
{
6631
    int is_q = extract32(insn, 30, 1);
6632
    int u = extract32(insn, 29, 1);
6633
    int size = extract32(insn, 22, 2);
6634
    int opcode = extract32(insn, 11, 5);
6635
    int rm = extract32(insn, 16, 5);
6636
    int rn = extract32(insn, 5, 5);
6637
    int rd = extract32(insn, 0, 5);
6638
    int pass;
6639

    
6640
    switch (opcode) {
6641
    case 0x13: /* MUL, PMUL */
6642
        if (u && size != 0) {
6643
            unallocated_encoding(s);
6644
            return;
6645
        }
6646
        /* fall through */
6647
    case 0x0: /* SHADD, UHADD */
6648
    case 0x2: /* SRHADD, URHADD */
6649
    case 0x4: /* SHSUB, UHSUB */
6650
    case 0xc: /* SMAX, UMAX */
6651
    case 0xd: /* SMIN, UMIN */
6652
    case 0xe: /* SABD, UABD */
6653
    case 0xf: /* SABA, UABA */
6654
    case 0x12: /* MLA, MLS */
6655
        if (size == 3) {
6656
            unallocated_encoding(s);
6657
            return;
6658
        }
6659
        unsupported_encoding(s, insn);
6660
        return;
6661
    case 0x16: /* SQDMULH, SQRDMULH */
6662
        if (size == 0 || size == 3) {
6663
            unallocated_encoding(s);
6664
            return;
6665
        }
6666
        unsupported_encoding(s, insn);
6667
        return;
6668
    default:
6669
        if (size == 3 && !is_q) {
6670
            unallocated_encoding(s);
6671
            return;
6672
        }
6673
        break;
6674
    }
6675

    
6676
    if (size == 3) {
6677
        for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
6678
            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6679
            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6680
            TCGv_i64 tcg_res = tcg_temp_new_i64();
6681

    
6682
            read_vec_element(s, tcg_op1, rn, pass, MO_64);
6683
            read_vec_element(s, tcg_op2, rm, pass, MO_64);
6684

    
6685
            handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
6686

    
6687
            write_vec_element(s, tcg_res, rd, pass, MO_64);
6688

    
6689
            tcg_temp_free_i64(tcg_res);
6690
            tcg_temp_free_i64(tcg_op1);
6691
            tcg_temp_free_i64(tcg_op2);
6692
        }
6693
    } else {
6694
        for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
6695
            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
6696
            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
6697
            TCGv_i32 tcg_res = tcg_temp_new_i32();
6698
            NeonGenTwoOpFn *genfn = NULL;
6699
            NeonGenTwoOpEnvFn *genenvfn = NULL;
6700

    
6701
            read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
6702
            read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
6703

    
6704
            switch (opcode) {
6705
            case 0x1: /* SQADD, UQADD */
6706
            {
6707
                static NeonGenTwoOpEnvFn * const fns[3][2] = {
6708
                    { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
6709
                    { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
6710
                    { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
6711
                };
6712
                genenvfn = fns[size][u];
6713
                break;
6714
            }
6715
            case 0x5: /* SQSUB, UQSUB */
6716
            {
6717
                static NeonGenTwoOpEnvFn * const fns[3][2] = {
6718
                    { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
6719
                    { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
6720
                    { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
6721
                };
6722
                genenvfn = fns[size][u];
6723
                break;
6724
            }
6725
            case 0x6: /* CMGT, CMHI */
6726
            {
6727
                static NeonGenTwoOpFn * const fns[3][2] = {
6728
                    { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_u8 },
6729
                    { gen_helper_neon_cgt_s16, gen_helper_neon_cgt_u16 },
6730
                    { gen_helper_neon_cgt_s32, gen_helper_neon_cgt_u32 },
6731
                };
6732
                genfn = fns[size][u];
6733
                break;
6734
            }
6735
            case 0x7: /* CMGE, CMHS */
6736
            {
6737
                static NeonGenTwoOpFn * const fns[3][2] = {
6738
                    { gen_helper_neon_cge_s8, gen_helper_neon_cge_u8 },
6739
                    { gen_helper_neon_cge_s16, gen_helper_neon_cge_u16 },
6740
                    { gen_helper_neon_cge_s32, gen_helper_neon_cge_u32 },
6741
                };
6742
                genfn = fns[size][u];
6743
                break;
6744
            }
6745
            case 0x8: /* SSHL, USHL */
6746
            {
6747
                static NeonGenTwoOpFn * const fns[3][2] = {
6748
                    { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
6749
                    { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
6750
                    { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
6751
                };
6752
                genfn = fns[size][u];
6753
                break;
6754
            }
6755
            case 0x9: /* SQSHL, UQSHL */
6756
            {
6757
                static NeonGenTwoOpEnvFn * const fns[3][2] = {
6758
                    { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
6759
                    { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
6760
                    { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
6761
                };
6762
                genenvfn = fns[size][u];
6763
                break;
6764
            }
6765
            case 0xa: /* SRSHL, URSHL */
6766
            {
6767
                static NeonGenTwoOpFn * const fns[3][2] = {
6768
                    { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
6769
                    { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
6770
                    { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
6771
                };
6772
                genfn = fns[size][u];
6773
                break;
6774
            }
6775
            case 0xb: /* SQRSHL, UQRSHL */
6776
            {
6777
                static NeonGenTwoOpEnvFn * const fns[3][2] = {
6778
                    { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
6779
                    { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
6780
                    { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
6781
                };
6782
                genenvfn = fns[size][u];
6783
                break;
6784
            }
6785
            case 0x10: /* ADD, SUB */
6786
            {
6787
                static NeonGenTwoOpFn * const fns[3][2] = {
6788
                    { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
6789
                    { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
6790
                    { tcg_gen_add_i32, tcg_gen_sub_i32 },
6791
                };
6792
                genfn = fns[size][u];
6793
                break;
6794
            }
6795
            case 0x11: /* CMTST, CMEQ */
6796
            {
6797
                static NeonGenTwoOpFn * const fns[3][2] = {
6798
                    { gen_helper_neon_tst_u8, gen_helper_neon_ceq_u8 },
6799
                    { gen_helper_neon_tst_u16, gen_helper_neon_ceq_u16 },
6800
                    { gen_helper_neon_tst_u32, gen_helper_neon_ceq_u32 },
6801
                };
6802
                genfn = fns[size][u];
6803
                break;
6804
            }
6805
            default:
6806
                g_assert_not_reached();
6807
            }
6808

    
6809
            if (genenvfn) {
6810
                genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
6811
            } else {
6812
                genfn(tcg_res, tcg_op1, tcg_op2);
6813
            }
6814

    
6815
            write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
6816

    
6817
            tcg_temp_free_i32(tcg_res);
6818
            tcg_temp_free_i32(tcg_op1);
6819
            tcg_temp_free_i32(tcg_op2);
6820
        }
6821
    }
6822

    
6823
    if (!is_q) {
6824
        clear_vec_high(s, rd);
6825
    }
6826
}
6827

    
6828
/* C3.6.16 AdvSIMD three same
6829
 *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
6830
 * +---+---+---+-----------+------+---+------+--------+---+------+------+
6831
 * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
6832
 * +---+---+---+-----------+------+---+------+--------+---+------+------+
6833
 */
6834
static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
6835
{
6836
    int opcode = extract32(insn, 11, 5);
6837

    
6838
    switch (opcode) {
6839
    case 0x3: /* logic ops */
6840
        disas_simd_3same_logic(s, insn);
6841
        break;
6842
    case 0x17: /* ADDP */
6843
    case 0x14: /* SMAXP, UMAXP */
6844
    case 0x15: /* SMINP, UMINP */
6845
        /* Pairwise operations */
6846
        disas_simd_3same_pair(s, insn);
6847
        break;
6848
    case 0x18 ... 0x31:
6849
        /* floating point ops, sz[1] and U are part of opcode */
6850
        disas_simd_3same_float(s, insn);
6851
        break;
6852
    default:
6853
        disas_simd_3same_int(s, insn);
6854
        break;
6855
    }
6856
}
6857

    
6858
/* C3.6.17 AdvSIMD two reg misc
6859
 *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
6860
 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
6861
 * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
6862
 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
6863
 */
6864
static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
6865
{
6866
    unsupported_encoding(s, insn);
6867
}
6868

    
6869
/* C3.6.18 AdvSIMD vector x indexed element
6870
 *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
6871
 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
6872
 * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
6873
 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
6874
 */
6875
static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn)
6876
{
6877
    unsupported_encoding(s, insn);
6878
}
6879

    
6880
/* C3.6.19 Crypto AES
6881
 *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
6882
 * +-----------------+------+-----------+--------+-----+------+------+
6883
 * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
6884
 * +-----------------+------+-----------+--------+-----+------+------+
6885
 */
6886
static void disas_crypto_aes(DisasContext *s, uint32_t insn)
6887
{
6888
    unsupported_encoding(s, insn);
6889
}
6890

    
6891
/* C3.6.20 Crypto three-reg SHA
6892
 *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
6893
 * +-----------------+------+---+------+---+--------+-----+------+------+
6894
 * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
6895
 * +-----------------+------+---+------+---+--------+-----+------+------+
6896
 */
6897
static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
6898
{
6899
    unsupported_encoding(s, insn);
6900
}
6901

    
6902
/* C3.6.21 Crypto two-reg SHA
6903
 *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
6904
 * +-----------------+------+-----------+--------+-----+------+------+
6905
 * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
6906
 * +-----------------+------+-----------+--------+-----+------+------+
6907
 */
6908
static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
6909
{
6910
    unsupported_encoding(s, insn);
6911
}
6912

    
6913
/* C3.6 Data processing - SIMD, inc Crypto
6914
 *
6915
 * As the decode gets a little complex we are using a table based
6916
 * approach for this part of the decode.
6917
 */
6918
static const AArch64DecodeTable data_proc_simd[] = {
6919
    /* pattern  ,  mask     ,  fn                        */
6920
    { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
6921
    { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
6922
    { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
6923
    { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
6924
    { 0x0e000400, 0x9fe08400, disas_simd_copy },
6925
    { 0x0f000000, 0x9f000400, disas_simd_indexed_vector },
6926
    /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
6927
    { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
6928
    { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
6929
    { 0x0e000000, 0xbf208c00, disas_simd_tb },
6930
    { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
6931
    { 0x2e000000, 0xbf208400, disas_simd_ext },
6932
    { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
6933
    { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
6934
    { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
6935
    { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
6936
    { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
6937
    { 0x5f000000, 0xdf000400, disas_simd_scalar_indexed },
6938
    { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
6939
    { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
6940
    { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
6941
    { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
6942
    { 0x00000000, 0x00000000, NULL }
6943
};
6944

    
6945
static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
6946
{
6947
    /* Note that this is called with all non-FP cases from
6948
     * table C3-6 so it must UNDEF for entries not specifically
6949
     * allocated to instructions in that table.
6950
     */
6951
    AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
6952
    if (fn) {
6953
        fn(s, insn);
6954
    } else {
6955
        unallocated_encoding(s);
6956
    }
6957
}
6958

    
6959
/* C3.6 Data processing - SIMD and floating point */
6960
static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
6961
{
6962
    if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
6963
        disas_data_proc_fp(s, insn);
6964
    } else {
6965
        /* SIMD, including crypto */
6966
        disas_data_proc_simd(s, insn);
6967
    }
6968
}
6969

    
6970
/* C3.1 A64 instruction index by encoding */
6971
static void disas_a64_insn(CPUARMState *env, DisasContext *s)
6972
{
6973
    uint32_t insn;
6974

    
6975
    insn = arm_ldl_code(env, s->pc, s->bswap_code);
6976
    s->insn = insn;
6977
    s->pc += 4;
6978

    
6979
    switch (extract32(insn, 25, 4)) {
6980
    case 0x0: case 0x1: case 0x2: case 0x3: /* UNALLOCATED */
6981
        unallocated_encoding(s);
6982
        break;
6983
    case 0x8: case 0x9: /* Data processing - immediate */
6984
        disas_data_proc_imm(s, insn);
6985
        break;
6986
    case 0xa: case 0xb: /* Branch, exception generation and system insns */
6987
        disas_b_exc_sys(s, insn);
6988
        break;
6989
    case 0x4:
6990
    case 0x6:
6991
    case 0xc:
6992
    case 0xe:      /* Loads and stores */
6993
        disas_ldst(s, insn);
6994
        break;
6995
    case 0x5:
6996
    case 0xd:      /* Data processing - register */
6997
        disas_data_proc_reg(s, insn);
6998
        break;
6999
    case 0x7:
7000
    case 0xf:      /* Data processing - SIMD and floating point */
7001
        disas_data_proc_simd_fp(s, insn);
7002
        break;
7003
    default:
7004
        assert(FALSE); /* all 15 cases should be handled above */
7005
        break;
7006
    }
7007

    
7008
    /* if we allocated any temporaries, free them here */
7009
    free_tmp_a64(s);
7010
}
7011

    
7012
void gen_intermediate_code_internal_a64(ARMCPU *cpu,
7013
                                        TranslationBlock *tb,
7014
                                        bool search_pc)
7015
{
7016
    CPUState *cs = CPU(cpu);
7017
    CPUARMState *env = &cpu->env;
7018
    DisasContext dc1, *dc = &dc1;
7019
    CPUBreakpoint *bp;
7020
    uint16_t *gen_opc_end;
7021
    int j, lj;
7022
    target_ulong pc_start;
7023
    target_ulong next_page_start;
7024
    int num_insns;
7025
    int max_insns;
7026

    
7027
    pc_start = tb->pc;
7028

    
7029
    dc->tb = tb;
7030

    
7031
    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
7032

    
7033
    dc->is_jmp = DISAS_NEXT;
7034
    dc->pc = pc_start;
7035
    dc->singlestep_enabled = cs->singlestep_enabled;
7036
    dc->condjmp = 0;
7037

    
7038
    dc->aarch64 = 1;
7039
    dc->thumb = 0;
7040
    dc->bswap_code = 0;
7041
    dc->condexec_mask = 0;
7042
    dc->condexec_cond = 0;
7043
#if !defined(CONFIG_USER_ONLY)
7044
    dc->user = 0;
7045
#endif
7046
    dc->vfp_enabled = 0;
7047
    dc->vec_len = 0;
7048
    dc->vec_stride = 0;
7049
    dc->cp_regs = cpu->cp_regs;
7050
    dc->current_pl = arm_current_pl(env);
7051

    
7052
    init_tmp_a64_array(dc);
7053

    
7054
    next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
7055
    lj = -1;
7056
    num_insns = 0;
7057
    max_insns = tb->cflags & CF_COUNT_MASK;
7058
    if (max_insns == 0) {
7059
        max_insns = CF_COUNT_MASK;
7060
    }
7061

    
7062
    gen_tb_start();
7063

    
7064
    tcg_clear_temp_count();
7065

    
7066
    do {
7067
        if (unlikely(!QTAILQ_EMPTY(&env->breakpoints))) {
7068
            QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
7069
                if (bp->pc == dc->pc) {
7070
                    gen_exception_insn(dc, 0, EXCP_DEBUG);
7071
                    /* Advance PC so that clearing the breakpoint will
7072
                       invalidate this TB.  */
7073
                    dc->pc += 2;
7074
                    goto done_generating;
7075
                }
7076
            }
7077
        }
7078

    
7079
        if (search_pc) {
7080
            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
7081
            if (lj < j) {
7082
                lj++;
7083
                while (lj < j) {
7084
                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
7085
                }
7086
            }
7087
            tcg_ctx.gen_opc_pc[lj] = dc->pc;
7088
            tcg_ctx.gen_opc_instr_start[lj] = 1;
7089
            tcg_ctx.gen_opc_icount[lj] = num_insns;
7090
        }
7091

    
7092
        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO)) {
7093
            gen_io_start();
7094
        }
7095

    
7096
        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
7097
            tcg_gen_debug_insn_start(dc->pc);
7098
        }
7099

    
7100
        disas_a64_insn(env, dc);
7101

    
7102
        if (tcg_check_temp_count()) {
7103
            fprintf(stderr, "TCG temporary leak before "TARGET_FMT_lx"\n",
7104
                    dc->pc);
7105
        }
7106

    
7107
        /* Translation stops when a conditional branch is encountered.
7108
         * Otherwise the subsequent code could get translated several times.
7109
         * Also stop translation when a page boundary is reached.  This
7110
         * ensures prefetch aborts occur at the right place.
7111
         */
7112
        num_insns++;
7113
    } while (!dc->is_jmp && tcg_ctx.gen_opc_ptr < gen_opc_end &&
7114
             !cs->singlestep_enabled &&
7115
             !singlestep &&
7116
             dc->pc < next_page_start &&
7117
             num_insns < max_insns);
7118

    
7119
    if (tb->cflags & CF_LAST_IO) {
7120
        gen_io_end();
7121
    }
7122

    
7123
    if (unlikely(cs->singlestep_enabled) && dc->is_jmp != DISAS_EXC) {
7124
        /* Note that this means single stepping WFI doesn't halt the CPU.
7125
         * For conditional branch insns this is harmless unreachable code as
7126
         * gen_goto_tb() has already handled emitting the debug exception
7127
         * (and thus a tb-jump is not possible when singlestepping).
7128
         */
7129
        assert(dc->is_jmp != DISAS_TB_JUMP);
7130
        if (dc->is_jmp != DISAS_JUMP) {
7131
            gen_a64_set_pc_im(dc->pc);
7132
        }
7133
        gen_exception(EXCP_DEBUG);
7134
    } else {
7135
        switch (dc->is_jmp) {
7136
        case DISAS_NEXT:
7137
            gen_goto_tb(dc, 1, dc->pc);
7138
            break;
7139
        default:
7140
        case DISAS_UPDATE:
7141
            gen_a64_set_pc_im(dc->pc);
7142
            /* fall through */
7143
        case DISAS_JUMP:
7144
            /* indicate that the hash table must be used to find the next TB */
7145
            tcg_gen_exit_tb(0);
7146
            break;
7147
        case DISAS_TB_JUMP:
7148
        case DISAS_EXC:
7149
        case DISAS_SWI:
7150
            break;
7151
        case DISAS_WFI:
7152
            /* This is a special case because we don't want to just halt the CPU
7153
             * if trying to debug across a WFI.
7154
             */
7155
            gen_helper_wfi(cpu_env);
7156
            break;
7157
        }
7158
    }
7159

    
7160
done_generating:
7161
    gen_tb_end(tb, num_insns);
7162
    *tcg_ctx.gen_opc_ptr = INDEX_op_end;
7163

    
7164
#ifdef DEBUG_DISAS
7165
    if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
7166
        qemu_log("----------------\n");
7167
        qemu_log("IN: %s\n", lookup_symbol(pc_start));
7168
        log_target_disas(env, pc_start, dc->pc - pc_start,
7169
                         dc->thumb | (dc->bswap_code << 1));
7170
        qemu_log("\n");
7171
    }
7172
#endif
7173
    if (search_pc) {
7174
        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
7175
        lj++;
7176
        while (lj <= j) {
7177
            tcg_ctx.gen_opc_instr_start[lj++] = 0;
7178
        }
7179
    } else {
7180
        tb->size = dc->pc - pc_start;
7181
        tb->icount = num_insns;
7182
    }
7183
}