Statistics
| Branch: | Revision:

root / target-arm / translate-a64.c @ d9ea7d29

History | View | Annotate | Download (283.6 kB)

1
/*
2
 *  AArch64 translation
3
 *
4
 *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18
 */
19
#include <stdarg.h>
20
#include <stdlib.h>
21
#include <stdio.h>
22
#include <string.h>
23
#include <inttypes.h>
24

    
25
#include "cpu.h"
26
#include "tcg-op.h"
27
#include "qemu/log.h"
28
#include "translate.h"
29
#include "qemu/host-utils.h"
30

    
31
#include "exec/gen-icount.h"
32

    
33
#include "helper.h"
34
#define GEN_HELPER 1
35
#include "helper.h"
36

    
37
static TCGv_i64 cpu_X[32];
38
static TCGv_i64 cpu_pc;
39
static TCGv_i32 cpu_NF, cpu_ZF, cpu_CF, cpu_VF;
40

    
41
/* Load/store exclusive handling */
42
static TCGv_i64 cpu_exclusive_addr;
43
static TCGv_i64 cpu_exclusive_val;
44
static TCGv_i64 cpu_exclusive_high;
45
#ifdef CONFIG_USER_ONLY
46
static TCGv_i64 cpu_exclusive_test;
47
static TCGv_i32 cpu_exclusive_info;
48
#endif
49

    
50
static const char *regnames[] = {
51
    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
52
    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
53
    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
54
    "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
55
};
56

    
57
enum a64_shift_type {
58
    A64_SHIFT_TYPE_LSL = 0,
59
    A64_SHIFT_TYPE_LSR = 1,
60
    A64_SHIFT_TYPE_ASR = 2,
61
    A64_SHIFT_TYPE_ROR = 3
62
};
63

    
64
/* Table based decoder typedefs - used when the relevant bits for decode
65
 * are too awkwardly scattered across the instruction (eg SIMD).
66
 */
67
typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
68

    
69
typedef struct AArch64DecodeTable {
70
    uint32_t pattern;
71
    uint32_t mask;
72
    AArch64DecodeFn *disas_fn;
73
} AArch64DecodeTable;
74

    
75
/* Function prototype for gen_ functions for calling Neon helpers */
76
typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
77
typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
78
typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
79
typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
80
typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
81
typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
82
typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
83
typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
84

    
85
/* initialize TCG globals.  */
86
void a64_translate_init(void)
87
{
88
    int i;
89

    
90
    cpu_pc = tcg_global_mem_new_i64(TCG_AREG0,
91
                                    offsetof(CPUARMState, pc),
92
                                    "pc");
93
    for (i = 0; i < 32; i++) {
94
        cpu_X[i] = tcg_global_mem_new_i64(TCG_AREG0,
95
                                          offsetof(CPUARMState, xregs[i]),
96
                                          regnames[i]);
97
    }
98

    
99
    cpu_NF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, NF), "NF");
100
    cpu_ZF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, ZF), "ZF");
101
    cpu_CF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, CF), "CF");
102
    cpu_VF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, VF), "VF");
103

    
104
    cpu_exclusive_addr = tcg_global_mem_new_i64(TCG_AREG0,
105
        offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
106
    cpu_exclusive_val = tcg_global_mem_new_i64(TCG_AREG0,
107
        offsetof(CPUARMState, exclusive_val), "exclusive_val");
108
    cpu_exclusive_high = tcg_global_mem_new_i64(TCG_AREG0,
109
        offsetof(CPUARMState, exclusive_high), "exclusive_high");
110
#ifdef CONFIG_USER_ONLY
111
    cpu_exclusive_test = tcg_global_mem_new_i64(TCG_AREG0,
112
        offsetof(CPUARMState, exclusive_test), "exclusive_test");
113
    cpu_exclusive_info = tcg_global_mem_new_i32(TCG_AREG0,
114
        offsetof(CPUARMState, exclusive_info), "exclusive_info");
115
#endif
116
}
117

    
118
void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
119
                            fprintf_function cpu_fprintf, int flags)
120
{
121
    ARMCPU *cpu = ARM_CPU(cs);
122
    CPUARMState *env = &cpu->env;
123
    uint32_t psr = pstate_read(env);
124
    int i;
125

    
126
    cpu_fprintf(f, "PC=%016"PRIx64"  SP=%016"PRIx64"\n",
127
            env->pc, env->xregs[31]);
128
    for (i = 0; i < 31; i++) {
129
        cpu_fprintf(f, "X%02d=%016"PRIx64, i, env->xregs[i]);
130
        if ((i % 4) == 3) {
131
            cpu_fprintf(f, "\n");
132
        } else {
133
            cpu_fprintf(f, " ");
134
        }
135
    }
136
    cpu_fprintf(f, "PSTATE=%08x (flags %c%c%c%c)\n",
137
                psr,
138
                psr & PSTATE_N ? 'N' : '-',
139
                psr & PSTATE_Z ? 'Z' : '-',
140
                psr & PSTATE_C ? 'C' : '-',
141
                psr & PSTATE_V ? 'V' : '-');
142
    cpu_fprintf(f, "\n");
143

    
144
    if (flags & CPU_DUMP_FPU) {
145
        int numvfpregs = 32;
146
        for (i = 0; i < numvfpregs; i += 2) {
147
            uint64_t vlo = float64_val(env->vfp.regs[i * 2]);
148
            uint64_t vhi = float64_val(env->vfp.regs[(i * 2) + 1]);
149
            cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 " ",
150
                        i, vhi, vlo);
151
            vlo = float64_val(env->vfp.regs[(i + 1) * 2]);
152
            vhi = float64_val(env->vfp.regs[((i + 1) * 2) + 1]);
153
            cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 "\n",
154
                        i + 1, vhi, vlo);
155
        }
156
        cpu_fprintf(f, "FPCR: %08x  FPSR: %08x\n",
157
                    vfp_get_fpcr(env), vfp_get_fpsr(env));
158
    }
159
}
160

    
161
static int get_mem_index(DisasContext *s)
162
{
163
#ifdef CONFIG_USER_ONLY
164
    return 1;
165
#else
166
    return s->user;
167
#endif
168
}
169

    
170
void gen_a64_set_pc_im(uint64_t val)
171
{
172
    tcg_gen_movi_i64(cpu_pc, val);
173
}
174

    
175
static void gen_exception(int excp)
176
{
177
    TCGv_i32 tmp = tcg_temp_new_i32();
178
    tcg_gen_movi_i32(tmp, excp);
179
    gen_helper_exception(cpu_env, tmp);
180
    tcg_temp_free_i32(tmp);
181
}
182

    
183
static void gen_exception_insn(DisasContext *s, int offset, int excp)
184
{
185
    gen_a64_set_pc_im(s->pc - offset);
186
    gen_exception(excp);
187
    s->is_jmp = DISAS_EXC;
188
}
189

    
190
static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
191
{
192
    /* No direct tb linking with singlestep or deterministic io */
193
    if (s->singlestep_enabled || (s->tb->cflags & CF_LAST_IO)) {
194
        return false;
195
    }
196

    
197
    /* Only link tbs from inside the same guest page */
198
    if ((s->tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
199
        return false;
200
    }
201

    
202
    return true;
203
}
204

    
205
static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
206
{
207
    TranslationBlock *tb;
208

    
209
    tb = s->tb;
210
    if (use_goto_tb(s, n, dest)) {
211
        tcg_gen_goto_tb(n);
212
        gen_a64_set_pc_im(dest);
213
        tcg_gen_exit_tb((tcg_target_long)tb + n);
214
        s->is_jmp = DISAS_TB_JUMP;
215
    } else {
216
        gen_a64_set_pc_im(dest);
217
        if (s->singlestep_enabled) {
218
            gen_exception(EXCP_DEBUG);
219
        }
220
        tcg_gen_exit_tb(0);
221
        s->is_jmp = DISAS_JUMP;
222
    }
223
}
224

    
225
static void unallocated_encoding(DisasContext *s)
226
{
227
    gen_exception_insn(s, 4, EXCP_UDEF);
228
}
229

    
230
#define unsupported_encoding(s, insn)                                    \
231
    do {                                                                 \
232
        qemu_log_mask(LOG_UNIMP,                                         \
233
                      "%s:%d: unsupported instruction encoding 0x%08x "  \
234
                      "at pc=%016" PRIx64 "\n",                          \
235
                      __FILE__, __LINE__, insn, s->pc - 4);              \
236
        unallocated_encoding(s);                                         \
237
    } while (0);
238

    
239
static void init_tmp_a64_array(DisasContext *s)
240
{
241
#ifdef CONFIG_DEBUG_TCG
242
    int i;
243
    for (i = 0; i < ARRAY_SIZE(s->tmp_a64); i++) {
244
        TCGV_UNUSED_I64(s->tmp_a64[i]);
245
    }
246
#endif
247
    s->tmp_a64_count = 0;
248
}
249

    
250
static void free_tmp_a64(DisasContext *s)
251
{
252
    int i;
253
    for (i = 0; i < s->tmp_a64_count; i++) {
254
        tcg_temp_free_i64(s->tmp_a64[i]);
255
    }
256
    init_tmp_a64_array(s);
257
}
258

    
259
static TCGv_i64 new_tmp_a64(DisasContext *s)
260
{
261
    assert(s->tmp_a64_count < TMP_A64_MAX);
262
    return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
263
}
264

    
265
static TCGv_i64 new_tmp_a64_zero(DisasContext *s)
266
{
267
    TCGv_i64 t = new_tmp_a64(s);
268
    tcg_gen_movi_i64(t, 0);
269
    return t;
270
}
271

    
272
/*
273
 * Register access functions
274
 *
275
 * These functions are used for directly accessing a register in where
276
 * changes to the final register value are likely to be made. If you
277
 * need to use a register for temporary calculation (e.g. index type
278
 * operations) use the read_* form.
279
 *
280
 * B1.2.1 Register mappings
281
 *
282
 * In instruction register encoding 31 can refer to ZR (zero register) or
283
 * the SP (stack pointer) depending on context. In QEMU's case we map SP
284
 * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
285
 * This is the point of the _sp forms.
286
 */
287
static TCGv_i64 cpu_reg(DisasContext *s, int reg)
288
{
289
    if (reg == 31) {
290
        return new_tmp_a64_zero(s);
291
    } else {
292
        return cpu_X[reg];
293
    }
294
}
295

    
296
/* register access for when 31 == SP */
297
static TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
298
{
299
    return cpu_X[reg];
300
}
301

    
302
/* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
303
 * representing the register contents. This TCGv is an auto-freed
304
 * temporary so it need not be explicitly freed, and may be modified.
305
 */
306
static TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
307
{
308
    TCGv_i64 v = new_tmp_a64(s);
309
    if (reg != 31) {
310
        if (sf) {
311
            tcg_gen_mov_i64(v, cpu_X[reg]);
312
        } else {
313
            tcg_gen_ext32u_i64(v, cpu_X[reg]);
314
        }
315
    } else {
316
        tcg_gen_movi_i64(v, 0);
317
    }
318
    return v;
319
}
320

    
321
static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
322
{
323
    TCGv_i64 v = new_tmp_a64(s);
324
    if (sf) {
325
        tcg_gen_mov_i64(v, cpu_X[reg]);
326
    } else {
327
        tcg_gen_ext32u_i64(v, cpu_X[reg]);
328
    }
329
    return v;
330
}
331

    
332
/* Return the offset into CPUARMState of an element of specified
333
 * size, 'element' places in from the least significant end of
334
 * the FP/vector register Qn.
335
 */
336
static inline int vec_reg_offset(int regno, int element, TCGMemOp size)
337
{
338
    int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
339
#ifdef HOST_WORDS_BIGENDIAN
340
    /* This is complicated slightly because vfp.regs[2n] is
341
     * still the low half and  vfp.regs[2n+1] the high half
342
     * of the 128 bit vector, even on big endian systems.
343
     * Calculate the offset assuming a fully bigendian 128 bits,
344
     * then XOR to account for the order of the two 64 bit halves.
345
     */
346
    offs += (16 - ((element + 1) * (1 << size)));
347
    offs ^= 8;
348
#else
349
    offs += element * (1 << size);
350
#endif
351
    return offs;
352
}
353

    
354
/* Return the offset into CPUARMState of a slice (from
355
 * the least significant end) of FP register Qn (ie
356
 * Dn, Sn, Hn or Bn).
357
 * (Note that this is not the same mapping as for A32; see cpu.h)
358
 */
359
static inline int fp_reg_offset(int regno, TCGMemOp size)
360
{
361
    int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
362
#ifdef HOST_WORDS_BIGENDIAN
363
    offs += (8 - (1 << size));
364
#endif
365
    return offs;
366
}
367

    
368
/* Offset of the high half of the 128 bit vector Qn */
369
static inline int fp_reg_hi_offset(int regno)
370
{
371
    return offsetof(CPUARMState, vfp.regs[regno * 2 + 1]);
372
}
373

    
374
/* Convenience accessors for reading and writing single and double
375
 * FP registers. Writing clears the upper parts of the associated
376
 * 128 bit vector register, as required by the architecture.
377
 * Note that unlike the GP register accessors, the values returned
378
 * by the read functions must be manually freed.
379
 */
380
static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
381
{
382
    TCGv_i64 v = tcg_temp_new_i64();
383

    
384
    tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(reg, MO_64));
385
    return v;
386
}
387

    
388
static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
389
{
390
    TCGv_i32 v = tcg_temp_new_i32();
391

    
392
    tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(reg, MO_32));
393
    return v;
394
}
395

    
396
static void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
397
{
398
    TCGv_i64 tcg_zero = tcg_const_i64(0);
399

    
400
    tcg_gen_st_i64(v, cpu_env, fp_reg_offset(reg, MO_64));
401
    tcg_gen_st_i64(tcg_zero, cpu_env, fp_reg_hi_offset(reg));
402
    tcg_temp_free_i64(tcg_zero);
403
}
404

    
405
static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
406
{
407
    TCGv_i64 tmp = tcg_temp_new_i64();
408

    
409
    tcg_gen_extu_i32_i64(tmp, v);
410
    write_fp_dreg(s, reg, tmp);
411
    tcg_temp_free_i64(tmp);
412
}
413

    
414
static TCGv_ptr get_fpstatus_ptr(void)
415
{
416
    TCGv_ptr statusptr = tcg_temp_new_ptr();
417
    int offset;
418

    
419
    /* In A64 all instructions (both FP and Neon) use the FPCR;
420
     * there is no equivalent of the A32 Neon "standard FPSCR value"
421
     * and all operations use vfp.fp_status.
422
     */
423
    offset = offsetof(CPUARMState, vfp.fp_status);
424
    tcg_gen_addi_ptr(statusptr, cpu_env, offset);
425
    return statusptr;
426
}
427

    
428
/* Set ZF and NF based on a 64 bit result. This is alas fiddlier
429
 * than the 32 bit equivalent.
430
 */
431
static inline void gen_set_NZ64(TCGv_i64 result)
432
{
433
    TCGv_i64 flag = tcg_temp_new_i64();
434

    
435
    tcg_gen_setcondi_i64(TCG_COND_NE, flag, result, 0);
436
    tcg_gen_trunc_i64_i32(cpu_ZF, flag);
437
    tcg_gen_shri_i64(flag, result, 32);
438
    tcg_gen_trunc_i64_i32(cpu_NF, flag);
439
    tcg_temp_free_i64(flag);
440
}
441

    
442
/* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
443
static inline void gen_logic_CC(int sf, TCGv_i64 result)
444
{
445
    if (sf) {
446
        gen_set_NZ64(result);
447
    } else {
448
        tcg_gen_trunc_i64_i32(cpu_ZF, result);
449
        tcg_gen_trunc_i64_i32(cpu_NF, result);
450
    }
451
    tcg_gen_movi_i32(cpu_CF, 0);
452
    tcg_gen_movi_i32(cpu_VF, 0);
453
}
454

    
455
/* dest = T0 + T1; compute C, N, V and Z flags */
456
static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
457
{
458
    if (sf) {
459
        TCGv_i64 result, flag, tmp;
460
        result = tcg_temp_new_i64();
461
        flag = tcg_temp_new_i64();
462
        tmp = tcg_temp_new_i64();
463

    
464
        tcg_gen_movi_i64(tmp, 0);
465
        tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
466

    
467
        tcg_gen_trunc_i64_i32(cpu_CF, flag);
468

    
469
        gen_set_NZ64(result);
470

    
471
        tcg_gen_xor_i64(flag, result, t0);
472
        tcg_gen_xor_i64(tmp, t0, t1);
473
        tcg_gen_andc_i64(flag, flag, tmp);
474
        tcg_temp_free_i64(tmp);
475
        tcg_gen_shri_i64(flag, flag, 32);
476
        tcg_gen_trunc_i64_i32(cpu_VF, flag);
477

    
478
        tcg_gen_mov_i64(dest, result);
479
        tcg_temp_free_i64(result);
480
        tcg_temp_free_i64(flag);
481
    } else {
482
        /* 32 bit arithmetic */
483
        TCGv_i32 t0_32 = tcg_temp_new_i32();
484
        TCGv_i32 t1_32 = tcg_temp_new_i32();
485
        TCGv_i32 tmp = tcg_temp_new_i32();
486

    
487
        tcg_gen_movi_i32(tmp, 0);
488
        tcg_gen_trunc_i64_i32(t0_32, t0);
489
        tcg_gen_trunc_i64_i32(t1_32, t1);
490
        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
491
        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
492
        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
493
        tcg_gen_xor_i32(tmp, t0_32, t1_32);
494
        tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
495
        tcg_gen_extu_i32_i64(dest, cpu_NF);
496

    
497
        tcg_temp_free_i32(tmp);
498
        tcg_temp_free_i32(t0_32);
499
        tcg_temp_free_i32(t1_32);
500
    }
501
}
502

    
503
/* dest = T0 - T1; compute C, N, V and Z flags */
504
static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
505
{
506
    if (sf) {
507
        /* 64 bit arithmetic */
508
        TCGv_i64 result, flag, tmp;
509

    
510
        result = tcg_temp_new_i64();
511
        flag = tcg_temp_new_i64();
512
        tcg_gen_sub_i64(result, t0, t1);
513

    
514
        gen_set_NZ64(result);
515

    
516
        tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
517
        tcg_gen_trunc_i64_i32(cpu_CF, flag);
518

    
519
        tcg_gen_xor_i64(flag, result, t0);
520
        tmp = tcg_temp_new_i64();
521
        tcg_gen_xor_i64(tmp, t0, t1);
522
        tcg_gen_and_i64(flag, flag, tmp);
523
        tcg_temp_free_i64(tmp);
524
        tcg_gen_shri_i64(flag, flag, 32);
525
        tcg_gen_trunc_i64_i32(cpu_VF, flag);
526
        tcg_gen_mov_i64(dest, result);
527
        tcg_temp_free_i64(flag);
528
        tcg_temp_free_i64(result);
529
    } else {
530
        /* 32 bit arithmetic */
531
        TCGv_i32 t0_32 = tcg_temp_new_i32();
532
        TCGv_i32 t1_32 = tcg_temp_new_i32();
533
        TCGv_i32 tmp;
534

    
535
        tcg_gen_trunc_i64_i32(t0_32, t0);
536
        tcg_gen_trunc_i64_i32(t1_32, t1);
537
        tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
538
        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
539
        tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
540
        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
541
        tmp = tcg_temp_new_i32();
542
        tcg_gen_xor_i32(tmp, t0_32, t1_32);
543
        tcg_temp_free_i32(t0_32);
544
        tcg_temp_free_i32(t1_32);
545
        tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
546
        tcg_temp_free_i32(tmp);
547
        tcg_gen_extu_i32_i64(dest, cpu_NF);
548
    }
549
}
550

    
551
/* dest = T0 + T1 + CF; do not compute flags. */
552
static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
553
{
554
    TCGv_i64 flag = tcg_temp_new_i64();
555
    tcg_gen_extu_i32_i64(flag, cpu_CF);
556
    tcg_gen_add_i64(dest, t0, t1);
557
    tcg_gen_add_i64(dest, dest, flag);
558
    tcg_temp_free_i64(flag);
559

    
560
    if (!sf) {
561
        tcg_gen_ext32u_i64(dest, dest);
562
    }
563
}
564

    
565
/* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
566
static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
567
{
568
    if (sf) {
569
        TCGv_i64 result, cf_64, vf_64, tmp;
570
        result = tcg_temp_new_i64();
571
        cf_64 = tcg_temp_new_i64();
572
        vf_64 = tcg_temp_new_i64();
573
        tmp = tcg_const_i64(0);
574

    
575
        tcg_gen_extu_i32_i64(cf_64, cpu_CF);
576
        tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
577
        tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
578
        tcg_gen_trunc_i64_i32(cpu_CF, cf_64);
579
        gen_set_NZ64(result);
580

    
581
        tcg_gen_xor_i64(vf_64, result, t0);
582
        tcg_gen_xor_i64(tmp, t0, t1);
583
        tcg_gen_andc_i64(vf_64, vf_64, tmp);
584
        tcg_gen_shri_i64(vf_64, vf_64, 32);
585
        tcg_gen_trunc_i64_i32(cpu_VF, vf_64);
586

    
587
        tcg_gen_mov_i64(dest, result);
588

    
589
        tcg_temp_free_i64(tmp);
590
        tcg_temp_free_i64(vf_64);
591
        tcg_temp_free_i64(cf_64);
592
        tcg_temp_free_i64(result);
593
    } else {
594
        TCGv_i32 t0_32, t1_32, tmp;
595
        t0_32 = tcg_temp_new_i32();
596
        t1_32 = tcg_temp_new_i32();
597
        tmp = tcg_const_i32(0);
598

    
599
        tcg_gen_trunc_i64_i32(t0_32, t0);
600
        tcg_gen_trunc_i64_i32(t1_32, t1);
601
        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
602
        tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
603

    
604
        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
605
        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
606
        tcg_gen_xor_i32(tmp, t0_32, t1_32);
607
        tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
608
        tcg_gen_extu_i32_i64(dest, cpu_NF);
609

    
610
        tcg_temp_free_i32(tmp);
611
        tcg_temp_free_i32(t1_32);
612
        tcg_temp_free_i32(t0_32);
613
    }
614
}
615

    
616
/*
617
 * Load/Store generators
618
 */
619

    
620
/*
621
 * Store from GPR register to memory.
622
 */
623
static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
624
                             TCGv_i64 tcg_addr, int size, int memidx)
625
{
626
    g_assert(size <= 3);
627
    tcg_gen_qemu_st_i64(source, tcg_addr, memidx, MO_TE + size);
628
}
629

    
630
static void do_gpr_st(DisasContext *s, TCGv_i64 source,
631
                      TCGv_i64 tcg_addr, int size)
632
{
633
    do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s));
634
}
635

    
636
/*
637
 * Load from memory to GPR register
638
 */
639
static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
640
                             int size, bool is_signed, bool extend, int memidx)
641
{
642
    TCGMemOp memop = MO_TE + size;
643

    
644
    g_assert(size <= 3);
645

    
646
    if (is_signed) {
647
        memop += MO_SIGN;
648
    }
649

    
650
    tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
651

    
652
    if (extend && is_signed) {
653
        g_assert(size < 3);
654
        tcg_gen_ext32u_i64(dest, dest);
655
    }
656
}
657

    
658
static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
659
                      int size, bool is_signed, bool extend)
660
{
661
    do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
662
                     get_mem_index(s));
663
}
664

    
665
/*
666
 * Store from FP register to memory
667
 */
668
static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
669
{
670
    /* This writes the bottom N bits of a 128 bit wide vector to memory */
671
    TCGv_i64 tmp = tcg_temp_new_i64();
672
    tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(srcidx, MO_64));
673
    if (size < 4) {
674
        tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), MO_TE + size);
675
    } else {
676
        TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
677
        tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), MO_TEQ);
678
        tcg_gen_qemu_st64(tmp, tcg_addr, get_mem_index(s));
679
        tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(srcidx));
680
        tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
681
        tcg_gen_qemu_st_i64(tmp, tcg_hiaddr, get_mem_index(s), MO_TEQ);
682
        tcg_temp_free_i64(tcg_hiaddr);
683
    }
684

    
685
    tcg_temp_free_i64(tmp);
686
}
687

    
688
/*
689
 * Load from memory to FP register
690
 */
691
static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
692
{
693
    /* This always zero-extends and writes to a full 128 bit wide vector */
694
    TCGv_i64 tmplo = tcg_temp_new_i64();
695
    TCGv_i64 tmphi;
696

    
697
    if (size < 4) {
698
        TCGMemOp memop = MO_TE + size;
699
        tmphi = tcg_const_i64(0);
700
        tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
701
    } else {
702
        TCGv_i64 tcg_hiaddr;
703
        tmphi = tcg_temp_new_i64();
704
        tcg_hiaddr = tcg_temp_new_i64();
705

    
706
        tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), MO_TEQ);
707
        tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
708
        tcg_gen_qemu_ld_i64(tmphi, tcg_hiaddr, get_mem_index(s), MO_TEQ);
709
        tcg_temp_free_i64(tcg_hiaddr);
710
    }
711

    
712
    tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(destidx, MO_64));
713
    tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(destidx));
714

    
715
    tcg_temp_free_i64(tmplo);
716
    tcg_temp_free_i64(tmphi);
717
}
718

    
719
/*
720
 * Vector load/store helpers.
721
 *
722
 * The principal difference between this and a FP load is that we don't
723
 * zero extend as we are filling a partial chunk of the vector register.
724
 * These functions don't support 128 bit loads/stores, which would be
725
 * normal load/store operations.
726
 *
727
 * The _i32 versions are useful when operating on 32 bit quantities
728
 * (eg for floating point single or using Neon helper functions).
729
 */
730

    
731
/* Get value of an element within a vector register */
732
static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
733
                             int element, TCGMemOp memop)
734
{
735
    int vect_off = vec_reg_offset(srcidx, element, memop & MO_SIZE);
736
    switch (memop) {
737
    case MO_8:
738
        tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
739
        break;
740
    case MO_16:
741
        tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
742
        break;
743
    case MO_32:
744
        tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
745
        break;
746
    case MO_8|MO_SIGN:
747
        tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
748
        break;
749
    case MO_16|MO_SIGN:
750
        tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
751
        break;
752
    case MO_32|MO_SIGN:
753
        tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
754
        break;
755
    case MO_64:
756
    case MO_64|MO_SIGN:
757
        tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
758
        break;
759
    default:
760
        g_assert_not_reached();
761
    }
762
}
763

    
764
static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
765
                                 int element, TCGMemOp memop)
766
{
767
    int vect_off = vec_reg_offset(srcidx, element, memop & MO_SIZE);
768
    switch (memop) {
769
    case MO_8:
770
        tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
771
        break;
772
    case MO_16:
773
        tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
774
        break;
775
    case MO_8|MO_SIGN:
776
        tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
777
        break;
778
    case MO_16|MO_SIGN:
779
        tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
780
        break;
781
    case MO_32:
782
    case MO_32|MO_SIGN:
783
        tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
784
        break;
785
    default:
786
        g_assert_not_reached();
787
    }
788
}
789

    
790
/* Set value of an element within a vector register */
791
static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
792
                              int element, TCGMemOp memop)
793
{
794
    int vect_off = vec_reg_offset(destidx, element, memop & MO_SIZE);
795
    switch (memop) {
796
    case MO_8:
797
        tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
798
        break;
799
    case MO_16:
800
        tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
801
        break;
802
    case MO_32:
803
        tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
804
        break;
805
    case MO_64:
806
        tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
807
        break;
808
    default:
809
        g_assert_not_reached();
810
    }
811
}
812

    
813
static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
814
                                  int destidx, int element, TCGMemOp memop)
815
{
816
    int vect_off = vec_reg_offset(destidx, element, memop & MO_SIZE);
817
    switch (memop) {
818
    case MO_8:
819
        tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
820
        break;
821
    case MO_16:
822
        tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
823
        break;
824
    case MO_32:
825
        tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
826
        break;
827
    default:
828
        g_assert_not_reached();
829
    }
830
}
831

    
832
/* Clear the high 64 bits of a 128 bit vector (in general non-quad
833
 * vector ops all need to do this).
834
 */
835
static void clear_vec_high(DisasContext *s, int rd)
836
{
837
    TCGv_i64 tcg_zero = tcg_const_i64(0);
838

    
839
    write_vec_element(s, tcg_zero, rd, 1, MO_64);
840
    tcg_temp_free_i64(tcg_zero);
841
}
842

    
843
/* Store from vector register to memory */
844
static void do_vec_st(DisasContext *s, int srcidx, int element,
845
                      TCGv_i64 tcg_addr, int size)
846
{
847
    TCGMemOp memop = MO_TE + size;
848
    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
849

    
850
    read_vec_element(s, tcg_tmp, srcidx, element, size);
851
    tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
852

    
853
    tcg_temp_free_i64(tcg_tmp);
854
}
855

    
856
/* Load from memory to vector register */
857
static void do_vec_ld(DisasContext *s, int destidx, int element,
858
                      TCGv_i64 tcg_addr, int size)
859
{
860
    TCGMemOp memop = MO_TE + size;
861
    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
862

    
863
    tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
864
    write_vec_element(s, tcg_tmp, destidx, element, size);
865

    
866
    tcg_temp_free_i64(tcg_tmp);
867
}
868

    
869
/*
870
 * This utility function is for doing register extension with an
871
 * optional shift. You will likely want to pass a temporary for the
872
 * destination register. See DecodeRegExtend() in the ARM ARM.
873
 */
874
static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
875
                              int option, unsigned int shift)
876
{
877
    int extsize = extract32(option, 0, 2);
878
    bool is_signed = extract32(option, 2, 1);
879

    
880
    if (is_signed) {
881
        switch (extsize) {
882
        case 0:
883
            tcg_gen_ext8s_i64(tcg_out, tcg_in);
884
            break;
885
        case 1:
886
            tcg_gen_ext16s_i64(tcg_out, tcg_in);
887
            break;
888
        case 2:
889
            tcg_gen_ext32s_i64(tcg_out, tcg_in);
890
            break;
891
        case 3:
892
            tcg_gen_mov_i64(tcg_out, tcg_in);
893
            break;
894
        }
895
    } else {
896
        switch (extsize) {
897
        case 0:
898
            tcg_gen_ext8u_i64(tcg_out, tcg_in);
899
            break;
900
        case 1:
901
            tcg_gen_ext16u_i64(tcg_out, tcg_in);
902
            break;
903
        case 2:
904
            tcg_gen_ext32u_i64(tcg_out, tcg_in);
905
            break;
906
        case 3:
907
            tcg_gen_mov_i64(tcg_out, tcg_in);
908
            break;
909
        }
910
    }
911

    
912
    if (shift) {
913
        tcg_gen_shli_i64(tcg_out, tcg_out, shift);
914
    }
915
}
916

    
917
static inline void gen_check_sp_alignment(DisasContext *s)
918
{
919
    /* The AArch64 architecture mandates that (if enabled via PSTATE
920
     * or SCTLR bits) there is a check that SP is 16-aligned on every
921
     * SP-relative load or store (with an exception generated if it is not).
922
     * In line with general QEMU practice regarding misaligned accesses,
923
     * we omit these checks for the sake of guest program performance.
924
     * This function is provided as a hook so we can more easily add these
925
     * checks in future (possibly as a "favour catching guest program bugs
926
     * over speed" user selectable option).
927
     */
928
}
929

    
930
/*
931
 * This provides a simple table based table lookup decoder. It is
932
 * intended to be used when the relevant bits for decode are too
933
 * awkwardly placed and switch/if based logic would be confusing and
934
 * deeply nested. Since it's a linear search through the table, tables
935
 * should be kept small.
936
 *
937
 * It returns the first handler where insn & mask == pattern, or
938
 * NULL if there is no match.
939
 * The table is terminated by an empty mask (i.e. 0)
940
 */
941
static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
942
                                               uint32_t insn)
943
{
944
    const AArch64DecodeTable *tptr = table;
945

    
946
    while (tptr->mask) {
947
        if ((insn & tptr->mask) == tptr->pattern) {
948
            return tptr->disas_fn;
949
        }
950
        tptr++;
951
    }
952
    return NULL;
953
}
954

    
955
/*
956
 * the instruction disassembly implemented here matches
957
 * the instruction encoding classifications in chapter 3 (C3)
958
 * of the ARM Architecture Reference Manual (DDI0487A_a)
959
 */
960

    
961
/* C3.2.7 Unconditional branch (immediate)
962
 *   31  30       26 25                                  0
963
 * +----+-----------+-------------------------------------+
964
 * | op | 0 0 1 0 1 |                 imm26               |
965
 * +----+-----------+-------------------------------------+
966
 */
967
static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
968
{
969
    uint64_t addr = s->pc + sextract32(insn, 0, 26) * 4 - 4;
970

    
971
    if (insn & (1 << 31)) {
972
        /* C5.6.26 BL Branch with link */
973
        tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
974
    }
975

    
976
    /* C5.6.20 B Branch / C5.6.26 BL Branch with link */
977
    gen_goto_tb(s, 0, addr);
978
}
979

    
980
/* C3.2.1 Compare & branch (immediate)
981
 *   31  30         25  24  23                  5 4      0
982
 * +----+-------------+----+---------------------+--------+
983
 * | sf | 0 1 1 0 1 0 | op |         imm19       |   Rt   |
984
 * +----+-------------+----+---------------------+--------+
985
 */
986
static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
987
{
988
    unsigned int sf, op, rt;
989
    uint64_t addr;
990
    int label_match;
991
    TCGv_i64 tcg_cmp;
992

    
993
    sf = extract32(insn, 31, 1);
994
    op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
995
    rt = extract32(insn, 0, 5);
996
    addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
997

    
998
    tcg_cmp = read_cpu_reg(s, rt, sf);
999
    label_match = gen_new_label();
1000

    
1001
    tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1002
                        tcg_cmp, 0, label_match);
1003

    
1004
    gen_goto_tb(s, 0, s->pc);
1005
    gen_set_label(label_match);
1006
    gen_goto_tb(s, 1, addr);
1007
}
1008

    
1009
/* C3.2.5 Test & branch (immediate)
1010
 *   31  30         25  24  23   19 18          5 4    0
1011
 * +----+-------------+----+-------+-------------+------+
1012
 * | b5 | 0 1 1 0 1 1 | op |  b40  |    imm14    |  Rt  |
1013
 * +----+-------------+----+-------+-------------+------+
1014
 */
1015
static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1016
{
1017
    unsigned int bit_pos, op, rt;
1018
    uint64_t addr;
1019
    int label_match;
1020
    TCGv_i64 tcg_cmp;
1021

    
1022
    bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1023
    op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1024
    addr = s->pc + sextract32(insn, 5, 14) * 4 - 4;
1025
    rt = extract32(insn, 0, 5);
1026

    
1027
    tcg_cmp = tcg_temp_new_i64();
1028
    tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1029
    label_match = gen_new_label();
1030
    tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1031
                        tcg_cmp, 0, label_match);
1032
    tcg_temp_free_i64(tcg_cmp);
1033
    gen_goto_tb(s, 0, s->pc);
1034
    gen_set_label(label_match);
1035
    gen_goto_tb(s, 1, addr);
1036
}
1037

    
1038
/* C3.2.2 / C5.6.19 Conditional branch (immediate)
1039
 *  31           25  24  23                  5   4  3    0
1040
 * +---------------+----+---------------------+----+------+
1041
 * | 0 1 0 1 0 1 0 | o1 |         imm19       | o0 | cond |
1042
 * +---------------+----+---------------------+----+------+
1043
 */
1044
static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1045
{
1046
    unsigned int cond;
1047
    uint64_t addr;
1048

    
1049
    if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1050
        unallocated_encoding(s);
1051
        return;
1052
    }
1053
    addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1054
    cond = extract32(insn, 0, 4);
1055

    
1056
    if (cond < 0x0e) {
1057
        /* genuinely conditional branches */
1058
        int label_match = gen_new_label();
1059
        arm_gen_test_cc(cond, label_match);
1060
        gen_goto_tb(s, 0, s->pc);
1061
        gen_set_label(label_match);
1062
        gen_goto_tb(s, 1, addr);
1063
    } else {
1064
        /* 0xe and 0xf are both "always" conditions */
1065
        gen_goto_tb(s, 0, addr);
1066
    }
1067
}
1068

    
1069
/* C5.6.68 HINT */
1070
static void handle_hint(DisasContext *s, uint32_t insn,
1071
                        unsigned int op1, unsigned int op2, unsigned int crm)
1072
{
1073
    unsigned int selector = crm << 3 | op2;
1074

    
1075
    if (op1 != 3) {
1076
        unallocated_encoding(s);
1077
        return;
1078
    }
1079

    
1080
    switch (selector) {
1081
    case 0: /* NOP */
1082
        return;
1083
    case 1: /* YIELD */
1084
    case 2: /* WFE */
1085
    case 3: /* WFI */
1086
    case 4: /* SEV */
1087
    case 5: /* SEVL */
1088
        /* we treat all as NOP at least for now */
1089
        return;
1090
    default:
1091
        /* default specified as NOP equivalent */
1092
        return;
1093
    }
1094
}
1095

    
1096
static void gen_clrex(DisasContext *s, uint32_t insn)
1097
{
1098
    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1099
}
1100

    
1101
/* CLREX, DSB, DMB, ISB */
1102
static void handle_sync(DisasContext *s, uint32_t insn,
1103
                        unsigned int op1, unsigned int op2, unsigned int crm)
1104
{
1105
    if (op1 != 3) {
1106
        unallocated_encoding(s);
1107
        return;
1108
    }
1109

    
1110
    switch (op2) {
1111
    case 2: /* CLREX */
1112
        gen_clrex(s, insn);
1113
        return;
1114
    case 4: /* DSB */
1115
    case 5: /* DMB */
1116
    case 6: /* ISB */
1117
        /* We don't emulate caches so barriers are no-ops */
1118
        return;
1119
    default:
1120
        unallocated_encoding(s);
1121
        return;
1122
    }
1123
}
1124

    
1125
/* C5.6.130 MSR (immediate) - move immediate to processor state field */
1126
static void handle_msr_i(DisasContext *s, uint32_t insn,
1127
                         unsigned int op1, unsigned int op2, unsigned int crm)
1128
{
1129
    unsupported_encoding(s, insn);
1130
}
1131

    
1132
static void gen_get_nzcv(TCGv_i64 tcg_rt)
1133
{
1134
    TCGv_i32 tmp = tcg_temp_new_i32();
1135
    TCGv_i32 nzcv = tcg_temp_new_i32();
1136

    
1137
    /* build bit 31, N */
1138
    tcg_gen_andi_i32(nzcv, cpu_NF, (1 << 31));
1139
    /* build bit 30, Z */
1140
    tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1141
    tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1142
    /* build bit 29, C */
1143
    tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1144
    /* build bit 28, V */
1145
    tcg_gen_shri_i32(tmp, cpu_VF, 31);
1146
    tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1147
    /* generate result */
1148
    tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1149

    
1150
    tcg_temp_free_i32(nzcv);
1151
    tcg_temp_free_i32(tmp);
1152
}
1153

    
1154
static void gen_set_nzcv(TCGv_i64 tcg_rt)
1155

    
1156
{
1157
    TCGv_i32 nzcv = tcg_temp_new_i32();
1158

    
1159
    /* take NZCV from R[t] */
1160
    tcg_gen_trunc_i64_i32(nzcv, tcg_rt);
1161

    
1162
    /* bit 31, N */
1163
    tcg_gen_andi_i32(cpu_NF, nzcv, (1 << 31));
1164
    /* bit 30, Z */
1165
    tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1166
    tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1167
    /* bit 29, C */
1168
    tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1169
    tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1170
    /* bit 28, V */
1171
    tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1172
    tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1173
    tcg_temp_free_i32(nzcv);
1174
}
1175

    
1176
/* C5.6.129 MRS - move from system register
1177
 * C5.6.131 MSR (register) - move to system register
1178
 * C5.6.204 SYS
1179
 * C5.6.205 SYSL
1180
 * These are all essentially the same insn in 'read' and 'write'
1181
 * versions, with varying op0 fields.
1182
 */
1183
static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1184
                       unsigned int op0, unsigned int op1, unsigned int op2,
1185
                       unsigned int crn, unsigned int crm, unsigned int rt)
1186
{
1187
    const ARMCPRegInfo *ri;
1188
    TCGv_i64 tcg_rt;
1189

    
1190
    ri = get_arm_cp_reginfo(s->cp_regs,
1191
                            ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1192
                                               crn, crm, op0, op1, op2));
1193

    
1194
    if (!ri) {
1195
        /* Unknown register; this might be a guest error or a QEMU
1196
         * unimplemented feature.
1197
         */
1198
        qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1199
                      "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1200
                      isread ? "read" : "write", op0, op1, crn, crm, op2);
1201
        unallocated_encoding(s);
1202
        return;
1203
    }
1204

    
1205
    /* Check access permissions */
1206
    if (!cp_access_ok(s->current_pl, ri, isread)) {
1207
        unallocated_encoding(s);
1208
        return;
1209
    }
1210

    
1211
    if (ri->accessfn) {
1212
        /* Emit code to perform further access permissions checks at
1213
         * runtime; this may result in an exception.
1214
         */
1215
        TCGv_ptr tmpptr;
1216
        gen_a64_set_pc_im(s->pc - 4);
1217
        tmpptr = tcg_const_ptr(ri);
1218
        gen_helper_access_check_cp_reg(cpu_env, tmpptr);
1219
        tcg_temp_free_ptr(tmpptr);
1220
    }
1221

    
1222
    /* Handle special cases first */
1223
    switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1224
    case ARM_CP_NOP:
1225
        return;
1226
    case ARM_CP_NZCV:
1227
        tcg_rt = cpu_reg(s, rt);
1228
        if (isread) {
1229
            gen_get_nzcv(tcg_rt);
1230
        } else {
1231
            gen_set_nzcv(tcg_rt);
1232
        }
1233
        return;
1234
    case ARM_CP_CURRENTEL:
1235
        /* Reads as current EL value from pstate, which is
1236
         * guaranteed to be constant by the tb flags.
1237
         */
1238
        tcg_rt = cpu_reg(s, rt);
1239
        tcg_gen_movi_i64(tcg_rt, s->current_pl << 2);
1240
        return;
1241
    default:
1242
        break;
1243
    }
1244

    
1245
    if (use_icount && (ri->type & ARM_CP_IO)) {
1246
        gen_io_start();
1247
    }
1248

    
1249
    tcg_rt = cpu_reg(s, rt);
1250

    
1251
    if (isread) {
1252
        if (ri->type & ARM_CP_CONST) {
1253
            tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1254
        } else if (ri->readfn) {
1255
            TCGv_ptr tmpptr;
1256
            tmpptr = tcg_const_ptr(ri);
1257
            gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1258
            tcg_temp_free_ptr(tmpptr);
1259
        } else {
1260
            tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1261
        }
1262
    } else {
1263
        if (ri->type & ARM_CP_CONST) {
1264
            /* If not forbidden by access permissions, treat as WI */
1265
            return;
1266
        } else if (ri->writefn) {
1267
            TCGv_ptr tmpptr;
1268
            tmpptr = tcg_const_ptr(ri);
1269
            gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1270
            tcg_temp_free_ptr(tmpptr);
1271
        } else {
1272
            tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1273
        }
1274
    }
1275

    
1276
    if (use_icount && (ri->type & ARM_CP_IO)) {
1277
        /* I/O operations must end the TB here (whether read or write) */
1278
        gen_io_end();
1279
        s->is_jmp = DISAS_UPDATE;
1280
    } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1281
        /* We default to ending the TB on a coprocessor register write,
1282
         * but allow this to be suppressed by the register definition
1283
         * (usually only necessary to work around guest bugs).
1284
         */
1285
        s->is_jmp = DISAS_UPDATE;
1286
    }
1287
}
1288

    
1289
/* C3.2.4 System
1290
 *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
1291
 * +---------------------+---+-----+-----+-------+-------+-----+------+
1292
 * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
1293
 * +---------------------+---+-----+-----+-------+-------+-----+------+
1294
 */
1295
static void disas_system(DisasContext *s, uint32_t insn)
1296
{
1297
    unsigned int l, op0, op1, crn, crm, op2, rt;
1298
    l = extract32(insn, 21, 1);
1299
    op0 = extract32(insn, 19, 2);
1300
    op1 = extract32(insn, 16, 3);
1301
    crn = extract32(insn, 12, 4);
1302
    crm = extract32(insn, 8, 4);
1303
    op2 = extract32(insn, 5, 3);
1304
    rt = extract32(insn, 0, 5);
1305

    
1306
    if (op0 == 0) {
1307
        if (l || rt != 31) {
1308
            unallocated_encoding(s);
1309
            return;
1310
        }
1311
        switch (crn) {
1312
        case 2: /* C5.6.68 HINT */
1313
            handle_hint(s, insn, op1, op2, crm);
1314
            break;
1315
        case 3: /* CLREX, DSB, DMB, ISB */
1316
            handle_sync(s, insn, op1, op2, crm);
1317
            break;
1318
        case 4: /* C5.6.130 MSR (immediate) */
1319
            handle_msr_i(s, insn, op1, op2, crm);
1320
            break;
1321
        default:
1322
            unallocated_encoding(s);
1323
            break;
1324
        }
1325
        return;
1326
    }
1327
    handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
1328
}
1329

    
1330
/* C3.2.3 Exception generation
1331
 *
1332
 *  31             24 23 21 20                     5 4   2 1  0
1333
 * +-----------------+-----+------------------------+-----+----+
1334
 * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
1335
 * +-----------------------+------------------------+----------+
1336
 */
1337
static void disas_exc(DisasContext *s, uint32_t insn)
1338
{
1339
    int opc = extract32(insn, 21, 3);
1340
    int op2_ll = extract32(insn, 0, 5);
1341

    
1342
    switch (opc) {
1343
    case 0:
1344
        /* SVC, HVC, SMC; since we don't support the Virtualization
1345
         * or TrustZone extensions these all UNDEF except SVC.
1346
         */
1347
        if (op2_ll != 1) {
1348
            unallocated_encoding(s);
1349
            break;
1350
        }
1351
        gen_exception_insn(s, 0, EXCP_SWI);
1352
        break;
1353
    case 1:
1354
        if (op2_ll != 0) {
1355
            unallocated_encoding(s);
1356
            break;
1357
        }
1358
        /* BRK */
1359
        gen_exception_insn(s, 0, EXCP_BKPT);
1360
        break;
1361
    case 2:
1362
        if (op2_ll != 0) {
1363
            unallocated_encoding(s);
1364
            break;
1365
        }
1366
        /* HLT */
1367
        unsupported_encoding(s, insn);
1368
        break;
1369
    case 5:
1370
        if (op2_ll < 1 || op2_ll > 3) {
1371
            unallocated_encoding(s);
1372
            break;
1373
        }
1374
        /* DCPS1, DCPS2, DCPS3 */
1375
        unsupported_encoding(s, insn);
1376
        break;
1377
    default:
1378
        unallocated_encoding(s);
1379
        break;
1380
    }
1381
}
1382

    
1383
/* C3.2.7 Unconditional branch (register)
1384
 *  31           25 24   21 20   16 15   10 9    5 4     0
1385
 * +---------------+-------+-------+-------+------+-------+
1386
 * | 1 1 0 1 0 1 1 |  opc  |  op2  |  op3  |  Rn  |  op4  |
1387
 * +---------------+-------+-------+-------+------+-------+
1388
 */
1389
static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1390
{
1391
    unsigned int opc, op2, op3, rn, op4;
1392

    
1393
    opc = extract32(insn, 21, 4);
1394
    op2 = extract32(insn, 16, 5);
1395
    op3 = extract32(insn, 10, 6);
1396
    rn = extract32(insn, 5, 5);
1397
    op4 = extract32(insn, 0, 5);
1398

    
1399
    if (op4 != 0x0 || op3 != 0x0 || op2 != 0x1f) {
1400
        unallocated_encoding(s);
1401
        return;
1402
    }
1403

    
1404
    switch (opc) {
1405
    case 0: /* BR */
1406
    case 2: /* RET */
1407
        break;
1408
    case 1: /* BLR */
1409
        tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1410
        break;
1411
    case 4: /* ERET */
1412
    case 5: /* DRPS */
1413
        if (rn != 0x1f) {
1414
            unallocated_encoding(s);
1415
        } else {
1416
            unsupported_encoding(s, insn);
1417
        }
1418
        return;
1419
    default:
1420
        unallocated_encoding(s);
1421
        return;
1422
    }
1423

    
1424
    tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
1425
    s->is_jmp = DISAS_JUMP;
1426
}
1427

    
1428
/* C3.2 Branches, exception generating and system instructions */
1429
static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
1430
{
1431
    switch (extract32(insn, 25, 7)) {
1432
    case 0x0a: case 0x0b:
1433
    case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
1434
        disas_uncond_b_imm(s, insn);
1435
        break;
1436
    case 0x1a: case 0x5a: /* Compare & branch (immediate) */
1437
        disas_comp_b_imm(s, insn);
1438
        break;
1439
    case 0x1b: case 0x5b: /* Test & branch (immediate) */
1440
        disas_test_b_imm(s, insn);
1441
        break;
1442
    case 0x2a: /* Conditional branch (immediate) */
1443
        disas_cond_b_imm(s, insn);
1444
        break;
1445
    case 0x6a: /* Exception generation / System */
1446
        if (insn & (1 << 24)) {
1447
            disas_system(s, insn);
1448
        } else {
1449
            disas_exc(s, insn);
1450
        }
1451
        break;
1452
    case 0x6b: /* Unconditional branch (register) */
1453
        disas_uncond_b_reg(s, insn);
1454
        break;
1455
    default:
1456
        unallocated_encoding(s);
1457
        break;
1458
    }
1459
}
1460

    
1461
/*
1462
 * Load/Store exclusive instructions are implemented by remembering
1463
 * the value/address loaded, and seeing if these are the same
1464
 * when the store is performed. This is not actually the architecturally
1465
 * mandated semantics, but it works for typical guest code sequences
1466
 * and avoids having to monitor regular stores.
1467
 *
1468
 * In system emulation mode only one CPU will be running at once, so
1469
 * this sequence is effectively atomic.  In user emulation mode we
1470
 * throw an exception and handle the atomic operation elsewhere.
1471
 */
1472
static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
1473
                               TCGv_i64 addr, int size, bool is_pair)
1474
{
1475
    TCGv_i64 tmp = tcg_temp_new_i64();
1476
    TCGMemOp memop = MO_TE + size;
1477

    
1478
    g_assert(size <= 3);
1479
    tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), memop);
1480

    
1481
    if (is_pair) {
1482
        TCGv_i64 addr2 = tcg_temp_new_i64();
1483
        TCGv_i64 hitmp = tcg_temp_new_i64();
1484

    
1485
        g_assert(size >= 2);
1486
        tcg_gen_addi_i64(addr2, addr, 1 << size);
1487
        tcg_gen_qemu_ld_i64(hitmp, addr2, get_mem_index(s), memop);
1488
        tcg_temp_free_i64(addr2);
1489
        tcg_gen_mov_i64(cpu_exclusive_high, hitmp);
1490
        tcg_gen_mov_i64(cpu_reg(s, rt2), hitmp);
1491
        tcg_temp_free_i64(hitmp);
1492
    }
1493

    
1494
    tcg_gen_mov_i64(cpu_exclusive_val, tmp);
1495
    tcg_gen_mov_i64(cpu_reg(s, rt), tmp);
1496

    
1497
    tcg_temp_free_i64(tmp);
1498
    tcg_gen_mov_i64(cpu_exclusive_addr, addr);
1499
}
1500

    
1501
#ifdef CONFIG_USER_ONLY
1502
static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1503
                                TCGv_i64 addr, int size, int is_pair)
1504
{
1505
    tcg_gen_mov_i64(cpu_exclusive_test, addr);
1506
    tcg_gen_movi_i32(cpu_exclusive_info,
1507
                     size | is_pair << 2 | (rd << 4) | (rt << 9) | (rt2 << 14));
1508
    gen_exception_insn(s, 4, EXCP_STREX);
1509
}
1510
#else
1511
static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1512
                                TCGv_i64 inaddr, int size, int is_pair)
1513
{
1514
    /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
1515
     *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
1516
     *     [addr] = {Rt};
1517
     *     if (is_pair) {
1518
     *         [addr + datasize] = {Rt2};
1519
     *     }
1520
     *     {Rd} = 0;
1521
     * } else {
1522
     *     {Rd} = 1;
1523
     * }
1524
     * env->exclusive_addr = -1;
1525
     */
1526
    int fail_label = gen_new_label();
1527
    int done_label = gen_new_label();
1528
    TCGv_i64 addr = tcg_temp_local_new_i64();
1529
    TCGv_i64 tmp;
1530

    
1531
    /* Copy input into a local temp so it is not trashed when the
1532
     * basic block ends at the branch insn.
1533
     */
1534
    tcg_gen_mov_i64(addr, inaddr);
1535
    tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
1536

    
1537
    tmp = tcg_temp_new_i64();
1538
    tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), MO_TE + size);
1539
    tcg_gen_brcond_i64(TCG_COND_NE, tmp, cpu_exclusive_val, fail_label);
1540
    tcg_temp_free_i64(tmp);
1541

    
1542
    if (is_pair) {
1543
        TCGv_i64 addrhi = tcg_temp_new_i64();
1544
        TCGv_i64 tmphi = tcg_temp_new_i64();
1545

    
1546
        tcg_gen_addi_i64(addrhi, addr, 1 << size);
1547
        tcg_gen_qemu_ld_i64(tmphi, addrhi, get_mem_index(s), MO_TE + size);
1548
        tcg_gen_brcond_i64(TCG_COND_NE, tmphi, cpu_exclusive_high, fail_label);
1549

    
1550
        tcg_temp_free_i64(tmphi);
1551
        tcg_temp_free_i64(addrhi);
1552
    }
1553

    
1554
    /* We seem to still have the exclusive monitor, so do the store */
1555
    tcg_gen_qemu_st_i64(cpu_reg(s, rt), addr, get_mem_index(s), MO_TE + size);
1556
    if (is_pair) {
1557
        TCGv_i64 addrhi = tcg_temp_new_i64();
1558

    
1559
        tcg_gen_addi_i64(addrhi, addr, 1 << size);
1560
        tcg_gen_qemu_st_i64(cpu_reg(s, rt2), addrhi,
1561
                            get_mem_index(s), MO_TE + size);
1562
        tcg_temp_free_i64(addrhi);
1563
    }
1564

    
1565
    tcg_temp_free_i64(addr);
1566

    
1567
    tcg_gen_movi_i64(cpu_reg(s, rd), 0);
1568
    tcg_gen_br(done_label);
1569
    gen_set_label(fail_label);
1570
    tcg_gen_movi_i64(cpu_reg(s, rd), 1);
1571
    gen_set_label(done_label);
1572
    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1573

    
1574
}
1575
#endif
1576

    
1577
/* C3.3.6 Load/store exclusive
1578
 *
1579
 *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
1580
 * +-----+-------------+----+---+----+------+----+-------+------+------+
1581
 * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
1582
 * +-----+-------------+----+---+----+------+----+-------+------+------+
1583
 *
1584
 *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
1585
 *   L: 0 -> store, 1 -> load
1586
 *  o2: 0 -> exclusive, 1 -> not
1587
 *  o1: 0 -> single register, 1 -> register pair
1588
 *  o0: 1 -> load-acquire/store-release, 0 -> not
1589
 *
1590
 *  o0 == 0 AND o2 == 1 is un-allocated
1591
 *  o1 == 1 is un-allocated except for 32 and 64 bit sizes
1592
 */
1593
static void disas_ldst_excl(DisasContext *s, uint32_t insn)
1594
{
1595
    int rt = extract32(insn, 0, 5);
1596
    int rn = extract32(insn, 5, 5);
1597
    int rt2 = extract32(insn, 10, 5);
1598
    int is_lasr = extract32(insn, 15, 1);
1599
    int rs = extract32(insn, 16, 5);
1600
    int is_pair = extract32(insn, 21, 1);
1601
    int is_store = !extract32(insn, 22, 1);
1602
    int is_excl = !extract32(insn, 23, 1);
1603
    int size = extract32(insn, 30, 2);
1604
    TCGv_i64 tcg_addr;
1605

    
1606
    if ((!is_excl && !is_lasr) ||
1607
        (is_pair && size < 2)) {
1608
        unallocated_encoding(s);
1609
        return;
1610
    }
1611

    
1612
    if (rn == 31) {
1613
        gen_check_sp_alignment(s);
1614
    }
1615
    tcg_addr = read_cpu_reg_sp(s, rn, 1);
1616

    
1617
    /* Note that since TCG is single threaded load-acquire/store-release
1618
     * semantics require no extra if (is_lasr) { ... } handling.
1619
     */
1620

    
1621
    if (is_excl) {
1622
        if (!is_store) {
1623
            gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
1624
        } else {
1625
            gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
1626
        }
1627
    } else {
1628
        TCGv_i64 tcg_rt = cpu_reg(s, rt);
1629
        if (is_store) {
1630
            do_gpr_st(s, tcg_rt, tcg_addr, size);
1631
        } else {
1632
            do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false);
1633
        }
1634
        if (is_pair) {
1635
            TCGv_i64 tcg_rt2 = cpu_reg(s, rt);
1636
            tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
1637
            if (is_store) {
1638
                do_gpr_st(s, tcg_rt2, tcg_addr, size);
1639
            } else {
1640
                do_gpr_ld(s, tcg_rt2, tcg_addr, size, false, false);
1641
            }
1642
        }
1643
    }
1644
}
1645

    
1646
/*
1647
 * C3.3.5 Load register (literal)
1648
 *
1649
 *  31 30 29   27  26 25 24 23                5 4     0
1650
 * +-----+-------+---+-----+-------------------+-------+
1651
 * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
1652
 * +-----+-------+---+-----+-------------------+-------+
1653
 *
1654
 * V: 1 -> vector (simd/fp)
1655
 * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
1656
 *                   10-> 32 bit signed, 11 -> prefetch
1657
 * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
1658
 */
1659
static void disas_ld_lit(DisasContext *s, uint32_t insn)
1660
{
1661
    int rt = extract32(insn, 0, 5);
1662
    int64_t imm = sextract32(insn, 5, 19) << 2;
1663
    bool is_vector = extract32(insn, 26, 1);
1664
    int opc = extract32(insn, 30, 2);
1665
    bool is_signed = false;
1666
    int size = 2;
1667
    TCGv_i64 tcg_rt, tcg_addr;
1668

    
1669
    if (is_vector) {
1670
        if (opc == 3) {
1671
            unallocated_encoding(s);
1672
            return;
1673
        }
1674
        size = 2 + opc;
1675
    } else {
1676
        if (opc == 3) {
1677
            /* PRFM (literal) : prefetch */
1678
            return;
1679
        }
1680
        size = 2 + extract32(opc, 0, 1);
1681
        is_signed = extract32(opc, 1, 1);
1682
    }
1683

    
1684
    tcg_rt = cpu_reg(s, rt);
1685

    
1686
    tcg_addr = tcg_const_i64((s->pc - 4) + imm);
1687
    if (is_vector) {
1688
        do_fp_ld(s, rt, tcg_addr, size);
1689
    } else {
1690
        do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false);
1691
    }
1692
    tcg_temp_free_i64(tcg_addr);
1693
}
1694

    
1695
/*
1696
 * C5.6.80 LDNP (Load Pair - non-temporal hint)
1697
 * C5.6.81 LDP (Load Pair - non vector)
1698
 * C5.6.82 LDPSW (Load Pair Signed Word - non vector)
1699
 * C5.6.176 STNP (Store Pair - non-temporal hint)
1700
 * C5.6.177 STP (Store Pair - non vector)
1701
 * C6.3.165 LDNP (Load Pair of SIMD&FP - non-temporal hint)
1702
 * C6.3.165 LDP (Load Pair of SIMD&FP)
1703
 * C6.3.284 STNP (Store Pair of SIMD&FP - non-temporal hint)
1704
 * C6.3.284 STP (Store Pair of SIMD&FP)
1705
 *
1706
 *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
1707
 * +-----+-------+---+---+-------+---+-----------------------------+
1708
 * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
1709
 * +-----+-------+---+---+-------+---+-------+-------+------+------+
1710
 *
1711
 * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
1712
 *      LDPSW                    01
1713
 *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
1714
 *   V: 0 -> GPR, 1 -> Vector
1715
 * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
1716
 *      10 -> signed offset, 11 -> pre-index
1717
 *   L: 0 -> Store 1 -> Load
1718
 *
1719
 * Rt, Rt2 = GPR or SIMD registers to be stored
1720
 * Rn = general purpose register containing address
1721
 * imm7 = signed offset (multiple of 4 or 8 depending on size)
1722
 */
1723
static void disas_ldst_pair(DisasContext *s, uint32_t insn)
1724
{
1725
    int rt = extract32(insn, 0, 5);
1726
    int rn = extract32(insn, 5, 5);
1727
    int rt2 = extract32(insn, 10, 5);
1728
    int64_t offset = sextract32(insn, 15, 7);
1729
    int index = extract32(insn, 23, 2);
1730
    bool is_vector = extract32(insn, 26, 1);
1731
    bool is_load = extract32(insn, 22, 1);
1732
    int opc = extract32(insn, 30, 2);
1733

    
1734
    bool is_signed = false;
1735
    bool postindex = false;
1736
    bool wback = false;
1737

    
1738
    TCGv_i64 tcg_addr; /* calculated address */
1739
    int size;
1740

    
1741
    if (opc == 3) {
1742
        unallocated_encoding(s);
1743
        return;
1744
    }
1745

    
1746
    if (is_vector) {
1747
        size = 2 + opc;
1748
    } else {
1749
        size = 2 + extract32(opc, 1, 1);
1750
        is_signed = extract32(opc, 0, 1);
1751
        if (!is_load && is_signed) {
1752
            unallocated_encoding(s);
1753
            return;
1754
        }
1755
    }
1756

    
1757
    switch (index) {
1758
    case 1: /* post-index */
1759
        postindex = true;
1760
        wback = true;
1761
        break;
1762
    case 0:
1763
        /* signed offset with "non-temporal" hint. Since we don't emulate
1764
         * caches we don't care about hints to the cache system about
1765
         * data access patterns, and handle this identically to plain
1766
         * signed offset.
1767
         */
1768
        if (is_signed) {
1769
            /* There is no non-temporal-hint version of LDPSW */
1770
            unallocated_encoding(s);
1771
            return;
1772
        }
1773
        postindex = false;
1774
        break;
1775
    case 2: /* signed offset, rn not updated */
1776
        postindex = false;
1777
        break;
1778
    case 3: /* pre-index */
1779
        postindex = false;
1780
        wback = true;
1781
        break;
1782
    }
1783

    
1784
    offset <<= size;
1785

    
1786
    if (rn == 31) {
1787
        gen_check_sp_alignment(s);
1788
    }
1789

    
1790
    tcg_addr = read_cpu_reg_sp(s, rn, 1);
1791

    
1792
    if (!postindex) {
1793
        tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
1794
    }
1795

    
1796
    if (is_vector) {
1797
        if (is_load) {
1798
            do_fp_ld(s, rt, tcg_addr, size);
1799
        } else {
1800
            do_fp_st(s, rt, tcg_addr, size);
1801
        }
1802
    } else {
1803
        TCGv_i64 tcg_rt = cpu_reg(s, rt);
1804
        if (is_load) {
1805
            do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false);
1806
        } else {
1807
            do_gpr_st(s, tcg_rt, tcg_addr, size);
1808
        }
1809
    }
1810
    tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
1811
    if (is_vector) {
1812
        if (is_load) {
1813
            do_fp_ld(s, rt2, tcg_addr, size);
1814
        } else {
1815
            do_fp_st(s, rt2, tcg_addr, size);
1816
        }
1817
    } else {
1818
        TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
1819
        if (is_load) {
1820
            do_gpr_ld(s, tcg_rt2, tcg_addr, size, is_signed, false);
1821
        } else {
1822
            do_gpr_st(s, tcg_rt2, tcg_addr, size);
1823
        }
1824
    }
1825

    
1826
    if (wback) {
1827
        if (postindex) {
1828
            tcg_gen_addi_i64(tcg_addr, tcg_addr, offset - (1 << size));
1829
        } else {
1830
            tcg_gen_subi_i64(tcg_addr, tcg_addr, 1 << size);
1831
        }
1832
        tcg_gen_mov_i64(cpu_reg_sp(s, rn), tcg_addr);
1833
    }
1834
}
1835

    
1836
/*
1837
 * C3.3.8 Load/store (immediate post-indexed)
1838
 * C3.3.9 Load/store (immediate pre-indexed)
1839
 * C3.3.12 Load/store (unscaled immediate)
1840
 *
1841
 * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
1842
 * +----+-------+---+-----+-----+---+--------+-----+------+------+
1843
 * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
1844
 * +----+-------+---+-----+-----+---+--------+-----+------+------+
1845
 *
1846
 * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
1847
         10 -> unprivileged
1848
 * V = 0 -> non-vector
1849
 * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
1850
 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
1851
 */
1852
static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
1853
{
1854
    int rt = extract32(insn, 0, 5);
1855
    int rn = extract32(insn, 5, 5);
1856
    int imm9 = sextract32(insn, 12, 9);
1857
    int opc = extract32(insn, 22, 2);
1858
    int size = extract32(insn, 30, 2);
1859
    int idx = extract32(insn, 10, 2);
1860
    bool is_signed = false;
1861
    bool is_store = false;
1862
    bool is_extended = false;
1863
    bool is_unpriv = (idx == 2);
1864
    bool is_vector = extract32(insn, 26, 1);
1865
    bool post_index;
1866
    bool writeback;
1867

    
1868
    TCGv_i64 tcg_addr;
1869

    
1870
    if (is_vector) {
1871
        size |= (opc & 2) << 1;
1872
        if (size > 4 || is_unpriv) {
1873
            unallocated_encoding(s);
1874
            return;
1875
        }
1876
        is_store = ((opc & 1) == 0);
1877
    } else {
1878
        if (size == 3 && opc == 2) {
1879
            /* PRFM - prefetch */
1880
            if (is_unpriv) {
1881
                unallocated_encoding(s);
1882
                return;
1883
            }
1884
            return;
1885
        }
1886
        if (opc == 3 && size > 1) {
1887
            unallocated_encoding(s);
1888
            return;
1889
        }
1890
        is_store = (opc == 0);
1891
        is_signed = opc & (1<<1);
1892
        is_extended = (size < 3) && (opc & 1);
1893
    }
1894

    
1895
    switch (idx) {
1896
    case 0:
1897
    case 2:
1898
        post_index = false;
1899
        writeback = false;
1900
        break;
1901
    case 1:
1902
        post_index = true;
1903
        writeback = true;
1904
        break;
1905
    case 3:
1906
        post_index = false;
1907
        writeback = true;
1908
        break;
1909
    }
1910

    
1911
    if (rn == 31) {
1912
        gen_check_sp_alignment(s);
1913
    }
1914
    tcg_addr = read_cpu_reg_sp(s, rn, 1);
1915

    
1916
    if (!post_index) {
1917
        tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
1918
    }
1919

    
1920
    if (is_vector) {
1921
        if (is_store) {
1922
            do_fp_st(s, rt, tcg_addr, size);
1923
        } else {
1924
            do_fp_ld(s, rt, tcg_addr, size);
1925
        }
1926
    } else {
1927
        TCGv_i64 tcg_rt = cpu_reg(s, rt);
1928
        int memidx = is_unpriv ? 1 : get_mem_index(s);
1929

    
1930
        if (is_store) {
1931
            do_gpr_st_memidx(s, tcg_rt, tcg_addr, size, memidx);
1932
        } else {
1933
            do_gpr_ld_memidx(s, tcg_rt, tcg_addr, size,
1934
                             is_signed, is_extended, memidx);
1935
        }
1936
    }
1937

    
1938
    if (writeback) {
1939
        TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
1940
        if (post_index) {
1941
            tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
1942
        }
1943
        tcg_gen_mov_i64(tcg_rn, tcg_addr);
1944
    }
1945
}
1946

    
1947
/*
1948
 * C3.3.10 Load/store (register offset)
1949
 *
1950
 * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
1951
 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
1952
 * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
1953
 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
1954
 *
1955
 * For non-vector:
1956
 *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
1957
 *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
1958
 * For vector:
1959
 *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
1960
 *   opc<0>: 0 -> store, 1 -> load
1961
 * V: 1 -> vector/simd
1962
 * opt: extend encoding (see DecodeRegExtend)
1963
 * S: if S=1 then scale (essentially index by sizeof(size))
1964
 * Rt: register to transfer into/out of
1965
 * Rn: address register or SP for base
1966
 * Rm: offset register or ZR for offset
1967
 */
1968
static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn)
1969
{
1970
    int rt = extract32(insn, 0, 5);
1971
    int rn = extract32(insn, 5, 5);
1972
    int shift = extract32(insn, 12, 1);
1973
    int rm = extract32(insn, 16, 5);
1974
    int opc = extract32(insn, 22, 2);
1975
    int opt = extract32(insn, 13, 3);
1976
    int size = extract32(insn, 30, 2);
1977
    bool is_signed = false;
1978
    bool is_store = false;
1979
    bool is_extended = false;
1980
    bool is_vector = extract32(insn, 26, 1);
1981

    
1982
    TCGv_i64 tcg_rm;
1983
    TCGv_i64 tcg_addr;
1984

    
1985
    if (extract32(opt, 1, 1) == 0) {
1986
        unallocated_encoding(s);
1987
        return;
1988
    }
1989

    
1990
    if (is_vector) {
1991
        size |= (opc & 2) << 1;
1992
        if (size > 4) {
1993
            unallocated_encoding(s);
1994
            return;
1995
        }
1996
        is_store = !extract32(opc, 0, 1);
1997
    } else {
1998
        if (size == 3 && opc == 2) {
1999
            /* PRFM - prefetch */
2000
            return;
2001
        }
2002
        if (opc == 3 && size > 1) {
2003
            unallocated_encoding(s);
2004
            return;
2005
        }
2006
        is_store = (opc == 0);
2007
        is_signed = extract32(opc, 1, 1);
2008
        is_extended = (size < 3) && extract32(opc, 0, 1);
2009
    }
2010

    
2011
    if (rn == 31) {
2012
        gen_check_sp_alignment(s);
2013
    }
2014
    tcg_addr = read_cpu_reg_sp(s, rn, 1);
2015

    
2016
    tcg_rm = read_cpu_reg(s, rm, 1);
2017
    ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
2018

    
2019
    tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm);
2020

    
2021
    if (is_vector) {
2022
        if (is_store) {
2023
            do_fp_st(s, rt, tcg_addr, size);
2024
        } else {
2025
            do_fp_ld(s, rt, tcg_addr, size);
2026
        }
2027
    } else {
2028
        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2029
        if (is_store) {
2030
            do_gpr_st(s, tcg_rt, tcg_addr, size);
2031
        } else {
2032
            do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
2033
        }
2034
    }
2035
}
2036

    
2037
/*
2038
 * C3.3.13 Load/store (unsigned immediate)
2039
 *
2040
 * 31 30 29   27  26 25 24 23 22 21        10 9     5
2041
 * +----+-------+---+-----+-----+------------+-------+------+
2042
 * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
2043
 * +----+-------+---+-----+-----+------------+-------+------+
2044
 *
2045
 * For non-vector:
2046
 *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2047
 *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2048
 * For vector:
2049
 *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2050
 *   opc<0>: 0 -> store, 1 -> load
2051
 * Rn: base address register (inc SP)
2052
 * Rt: target register
2053
 */
2054
static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn)
2055
{
2056
    int rt = extract32(insn, 0, 5);
2057
    int rn = extract32(insn, 5, 5);
2058
    unsigned int imm12 = extract32(insn, 10, 12);
2059
    bool is_vector = extract32(insn, 26, 1);
2060
    int size = extract32(insn, 30, 2);
2061
    int opc = extract32(insn, 22, 2);
2062
    unsigned int offset;
2063

    
2064
    TCGv_i64 tcg_addr;
2065

    
2066
    bool is_store;
2067
    bool is_signed = false;
2068
    bool is_extended = false;
2069

    
2070
    if (is_vector) {
2071
        size |= (opc & 2) << 1;
2072
        if (size > 4) {
2073
            unallocated_encoding(s);
2074
            return;
2075
        }
2076
        is_store = !extract32(opc, 0, 1);
2077
    } else {
2078
        if (size == 3 && opc == 2) {
2079
            /* PRFM - prefetch */
2080
            return;
2081
        }
2082
        if (opc == 3 && size > 1) {
2083
            unallocated_encoding(s);
2084
            return;
2085
        }
2086
        is_store = (opc == 0);
2087
        is_signed = extract32(opc, 1, 1);
2088
        is_extended = (size < 3) && extract32(opc, 0, 1);
2089
    }
2090

    
2091
    if (rn == 31) {
2092
        gen_check_sp_alignment(s);
2093
    }
2094
    tcg_addr = read_cpu_reg_sp(s, rn, 1);
2095
    offset = imm12 << size;
2096
    tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2097

    
2098
    if (is_vector) {
2099
        if (is_store) {
2100
            do_fp_st(s, rt, tcg_addr, size);
2101
        } else {
2102
            do_fp_ld(s, rt, tcg_addr, size);
2103
        }
2104
    } else {
2105
        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2106
        if (is_store) {
2107
            do_gpr_st(s, tcg_rt, tcg_addr, size);
2108
        } else {
2109
            do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
2110
        }
2111
    }
2112
}
2113

    
2114
/* Load/store register (all forms) */
2115
static void disas_ldst_reg(DisasContext *s, uint32_t insn)
2116
{
2117
    switch (extract32(insn, 24, 2)) {
2118
    case 0:
2119
        if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
2120
            disas_ldst_reg_roffset(s, insn);
2121
        } else {
2122
            /* Load/store register (unscaled immediate)
2123
             * Load/store immediate pre/post-indexed
2124
             * Load/store register unprivileged
2125
             */
2126
            disas_ldst_reg_imm9(s, insn);
2127
        }
2128
        break;
2129
    case 1:
2130
        disas_ldst_reg_unsigned_imm(s, insn);
2131
        break;
2132
    default:
2133
        unallocated_encoding(s);
2134
        break;
2135
    }
2136
}
2137

    
2138
/* C3.3.1 AdvSIMD load/store multiple structures
2139
 *
2140
 *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
2141
 * +---+---+---------------+---+-------------+--------+------+------+------+
2142
 * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
2143
 * +---+---+---------------+---+-------------+--------+------+------+------+
2144
 *
2145
 * C3.3.2 AdvSIMD load/store multiple structures (post-indexed)
2146
 *
2147
 *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
2148
 * +---+---+---------------+---+---+---------+--------+------+------+------+
2149
 * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
2150
 * +---+---+---------------+---+---+---------+--------+------+------+------+
2151
 *
2152
 * Rt: first (or only) SIMD&FP register to be transferred
2153
 * Rn: base address or SP
2154
 * Rm (post-index only): post-index register (when !31) or size dependent #imm
2155
 */
2156
static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
2157
{
2158
    int rt = extract32(insn, 0, 5);
2159
    int rn = extract32(insn, 5, 5);
2160
    int size = extract32(insn, 10, 2);
2161
    int opcode = extract32(insn, 12, 4);
2162
    bool is_store = !extract32(insn, 22, 1);
2163
    bool is_postidx = extract32(insn, 23, 1);
2164
    bool is_q = extract32(insn, 30, 1);
2165
    TCGv_i64 tcg_addr, tcg_rn;
2166

    
2167
    int ebytes = 1 << size;
2168
    int elements = (is_q ? 128 : 64) / (8 << size);
2169
    int rpt;    /* num iterations */
2170
    int selem;  /* structure elements */
2171
    int r;
2172

    
2173
    if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
2174
        unallocated_encoding(s);
2175
        return;
2176
    }
2177

    
2178
    /* From the shared decode logic */
2179
    switch (opcode) {
2180
    case 0x0:
2181
        rpt = 1;
2182
        selem = 4;
2183
        break;
2184
    case 0x2:
2185
        rpt = 4;
2186
        selem = 1;
2187
        break;
2188
    case 0x4:
2189
        rpt = 1;
2190
        selem = 3;
2191
        break;
2192
    case 0x6:
2193
        rpt = 3;
2194
        selem = 1;
2195
        break;
2196
    case 0x7:
2197
        rpt = 1;
2198
        selem = 1;
2199
        break;
2200
    case 0x8:
2201
        rpt = 1;
2202
        selem = 2;
2203
        break;
2204
    case 0xa:
2205
        rpt = 2;
2206
        selem = 1;
2207
        break;
2208
    default:
2209
        unallocated_encoding(s);
2210
        return;
2211
    }
2212

    
2213
    if (size == 3 && !is_q && selem != 1) {
2214
        /* reserved */
2215
        unallocated_encoding(s);
2216
        return;
2217
    }
2218

    
2219
    if (rn == 31) {
2220
        gen_check_sp_alignment(s);
2221
    }
2222

    
2223
    tcg_rn = cpu_reg_sp(s, rn);
2224
    tcg_addr = tcg_temp_new_i64();
2225
    tcg_gen_mov_i64(tcg_addr, tcg_rn);
2226

    
2227
    for (r = 0; r < rpt; r++) {
2228
        int e;
2229
        for (e = 0; e < elements; e++) {
2230
            int tt = (rt + r) % 32;
2231
            int xs;
2232
            for (xs = 0; xs < selem; xs++) {
2233
                if (is_store) {
2234
                    do_vec_st(s, tt, e, tcg_addr, size);
2235
                } else {
2236
                    do_vec_ld(s, tt, e, tcg_addr, size);
2237

    
2238
                    /* For non-quad operations, setting a slice of the low
2239
                     * 64 bits of the register clears the high 64 bits (in
2240
                     * the ARM ARM pseudocode this is implicit in the fact
2241
                     * that 'rval' is a 64 bit wide variable). We optimize
2242
                     * by noticing that we only need to do this the first
2243
                     * time we touch a register.
2244
                     */
2245
                    if (!is_q && e == 0 && (r == 0 || xs == selem - 1)) {
2246
                        clear_vec_high(s, tt);
2247
                    }
2248
                }
2249
                tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2250
                tt = (tt + 1) % 32;
2251
            }
2252
        }
2253
    }
2254

    
2255
    if (is_postidx) {
2256
        int rm = extract32(insn, 16, 5);
2257
        if (rm == 31) {
2258
            tcg_gen_mov_i64(tcg_rn, tcg_addr);
2259
        } else {
2260
            tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2261
        }
2262
    }
2263
    tcg_temp_free_i64(tcg_addr);
2264
}
2265

    
2266
/* C3.3.3 AdvSIMD load/store single structure
2267
 *
2268
 *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2269
 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2270
 * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
2271
 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2272
 *
2273
 * C3.3.4 AdvSIMD load/store single structure (post-indexed)
2274
 *
2275
 *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2276
 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2277
 * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
2278
 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2279
 *
2280
 * Rt: first (or only) SIMD&FP register to be transferred
2281
 * Rn: base address or SP
2282
 * Rm (post-index only): post-index register (when !31) or size dependent #imm
2283
 * index = encoded in Q:S:size dependent on size
2284
 *
2285
 * lane_size = encoded in R, opc
2286
 * transfer width = encoded in opc, S, size
2287
 */
2288
static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
2289
{
2290
    int rt = extract32(insn, 0, 5);
2291
    int rn = extract32(insn, 5, 5);
2292
    int size = extract32(insn, 10, 2);
2293
    int S = extract32(insn, 12, 1);
2294
    int opc = extract32(insn, 13, 3);
2295
    int R = extract32(insn, 21, 1);
2296
    int is_load = extract32(insn, 22, 1);
2297
    int is_postidx = extract32(insn, 23, 1);
2298
    int is_q = extract32(insn, 30, 1);
2299

    
2300
    int scale = extract32(opc, 1, 2);
2301
    int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
2302
    bool replicate = false;
2303
    int index = is_q << 3 | S << 2 | size;
2304
    int ebytes, xs;
2305
    TCGv_i64 tcg_addr, tcg_rn;
2306

    
2307
    switch (scale) {
2308
    case 3:
2309
        if (!is_load || S) {
2310
            unallocated_encoding(s);
2311
            return;
2312
        }
2313
        scale = size;
2314
        replicate = true;
2315
        break;
2316
    case 0:
2317
        break;
2318
    case 1:
2319
        if (extract32(size, 0, 1)) {
2320
            unallocated_encoding(s);
2321
            return;
2322
        }
2323
        index >>= 1;
2324
        break;
2325
    case 2:
2326
        if (extract32(size, 1, 1)) {
2327
            unallocated_encoding(s);
2328
            return;
2329
        }
2330
        if (!extract32(size, 0, 1)) {
2331
            index >>= 2;
2332
        } else {
2333
            if (S) {
2334
                unallocated_encoding(s);
2335
                return;
2336
            }
2337
            index >>= 3;
2338
            scale = 3;
2339
        }
2340
        break;
2341
    default:
2342
        g_assert_not_reached();
2343
    }
2344

    
2345
    ebytes = 1 << scale;
2346

    
2347
    if (rn == 31) {
2348
        gen_check_sp_alignment(s);
2349
    }
2350

    
2351
    tcg_rn = cpu_reg_sp(s, rn);
2352
    tcg_addr = tcg_temp_new_i64();
2353
    tcg_gen_mov_i64(tcg_addr, tcg_rn);
2354

    
2355
    for (xs = 0; xs < selem; xs++) {
2356
        if (replicate) {
2357
            /* Load and replicate to all elements */
2358
            uint64_t mulconst;
2359
            TCGv_i64 tcg_tmp = tcg_temp_new_i64();
2360

    
2361
            tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr,
2362
                                get_mem_index(s), MO_TE + scale);
2363
            switch (scale) {
2364
            case 0:
2365
                mulconst = 0x0101010101010101ULL;
2366
                break;
2367
            case 1:
2368
                mulconst = 0x0001000100010001ULL;
2369
                break;
2370
            case 2:
2371
                mulconst = 0x0000000100000001ULL;
2372
                break;
2373
            case 3:
2374
                mulconst = 0;
2375
                break;
2376
            default:
2377
                g_assert_not_reached();
2378
            }
2379
            if (mulconst) {
2380
                tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst);
2381
            }
2382
            write_vec_element(s, tcg_tmp, rt, 0, MO_64);
2383
            if (is_q) {
2384
                write_vec_element(s, tcg_tmp, rt, 1, MO_64);
2385
            } else {
2386
                clear_vec_high(s, rt);
2387
            }
2388
            tcg_temp_free_i64(tcg_tmp);
2389
        } else {
2390
            /* Load/store one element per register */
2391
            if (is_load) {
2392
                do_vec_ld(s, rt, index, tcg_addr, MO_TE + scale);
2393
            } else {
2394
                do_vec_st(s, rt, index, tcg_addr, MO_TE + scale);
2395
            }
2396
        }
2397
        tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2398
        rt = (rt + 1) % 32;
2399
    }
2400

    
2401
    if (is_postidx) {
2402
        int rm = extract32(insn, 16, 5);
2403
        if (rm == 31) {
2404
            tcg_gen_mov_i64(tcg_rn, tcg_addr);
2405
        } else {
2406
            tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2407
        }
2408
    }
2409
    tcg_temp_free_i64(tcg_addr);
2410
}
2411

    
2412
/* C3.3 Loads and stores */
2413
static void disas_ldst(DisasContext *s, uint32_t insn)
2414
{
2415
    switch (extract32(insn, 24, 6)) {
2416
    case 0x08: /* Load/store exclusive */
2417
        disas_ldst_excl(s, insn);
2418
        break;
2419
    case 0x18: case 0x1c: /* Load register (literal) */
2420
        disas_ld_lit(s, insn);
2421
        break;
2422
    case 0x28: case 0x29:
2423
    case 0x2c: case 0x2d: /* Load/store pair (all forms) */
2424
        disas_ldst_pair(s, insn);
2425
        break;
2426
    case 0x38: case 0x39:
2427
    case 0x3c: case 0x3d: /* Load/store register (all forms) */
2428
        disas_ldst_reg(s, insn);
2429
        break;
2430
    case 0x0c: /* AdvSIMD load/store multiple structures */
2431
        disas_ldst_multiple_struct(s, insn);
2432
        break;
2433
    case 0x0d: /* AdvSIMD load/store single structure */
2434
        disas_ldst_single_struct(s, insn);
2435
        break;
2436
    default:
2437
        unallocated_encoding(s);
2438
        break;
2439
    }
2440
}
2441

    
2442
/* C3.4.6 PC-rel. addressing
2443
 *   31  30   29 28       24 23                5 4    0
2444
 * +----+-------+-----------+-------------------+------+
2445
 * | op | immlo | 1 0 0 0 0 |       immhi       |  Rd  |
2446
 * +----+-------+-----------+-------------------+------+
2447
 */
2448
static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
2449
{
2450
    unsigned int page, rd;
2451
    uint64_t base;
2452
    int64_t offset;
2453

    
2454
    page = extract32(insn, 31, 1);
2455
    /* SignExtend(immhi:immlo) -> offset */
2456
    offset = ((int64_t)sextract32(insn, 5, 19) << 2) | extract32(insn, 29, 2);
2457
    rd = extract32(insn, 0, 5);
2458
    base = s->pc - 4;
2459

    
2460
    if (page) {
2461
        /* ADRP (page based) */
2462
        base &= ~0xfff;
2463
        offset <<= 12;
2464
    }
2465

    
2466
    tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
2467
}
2468

    
2469
/*
2470
 * C3.4.1 Add/subtract (immediate)
2471
 *
2472
 *  31 30 29 28       24 23 22 21         10 9   5 4   0
2473
 * +--+--+--+-----------+-----+-------------+-----+-----+
2474
 * |sf|op| S| 1 0 0 0 1 |shift|    imm12    |  Rn | Rd  |
2475
 * +--+--+--+-----------+-----+-------------+-----+-----+
2476
 *
2477
 *    sf: 0 -> 32bit, 1 -> 64bit
2478
 *    op: 0 -> add  , 1 -> sub
2479
 *     S: 1 -> set flags
2480
 * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
2481
 */
2482
static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
2483
{
2484
    int rd = extract32(insn, 0, 5);
2485
    int rn = extract32(insn, 5, 5);
2486
    uint64_t imm = extract32(insn, 10, 12);
2487
    int shift = extract32(insn, 22, 2);
2488
    bool setflags = extract32(insn, 29, 1);
2489
    bool sub_op = extract32(insn, 30, 1);
2490
    bool is_64bit = extract32(insn, 31, 1);
2491

    
2492
    TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2493
    TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
2494
    TCGv_i64 tcg_result;
2495

    
2496
    switch (shift) {
2497
    case 0x0:
2498
        break;
2499
    case 0x1:
2500
        imm <<= 12;
2501
        break;
2502
    default:
2503
        unallocated_encoding(s);
2504
        return;
2505
    }
2506

    
2507
    tcg_result = tcg_temp_new_i64();
2508
    if (!setflags) {
2509
        if (sub_op) {
2510
            tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
2511
        } else {
2512
            tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
2513
        }
2514
    } else {
2515
        TCGv_i64 tcg_imm = tcg_const_i64(imm);
2516
        if (sub_op) {
2517
            gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2518
        } else {
2519
            gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2520
        }
2521
        tcg_temp_free_i64(tcg_imm);
2522
    }
2523

    
2524
    if (is_64bit) {
2525
        tcg_gen_mov_i64(tcg_rd, tcg_result);
2526
    } else {
2527
        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
2528
    }
2529

    
2530
    tcg_temp_free_i64(tcg_result);
2531
}
2532

    
2533
/* The input should be a value in the bottom e bits (with higher
2534
 * bits zero); returns that value replicated into every element
2535
 * of size e in a 64 bit integer.
2536
 */
2537
static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
2538
{
2539
    assert(e != 0);
2540
    while (e < 64) {
2541
        mask |= mask << e;
2542
        e *= 2;
2543
    }
2544
    return mask;
2545
}
2546

    
2547
/* Return a value with the bottom len bits set (where 0 < len <= 64) */
2548
static inline uint64_t bitmask64(unsigned int length)
2549
{
2550
    assert(length > 0 && length <= 64);
2551
    return ~0ULL >> (64 - length);
2552
}
2553

    
2554
/* Simplified variant of pseudocode DecodeBitMasks() for the case where we
2555
 * only require the wmask. Returns false if the imms/immr/immn are a reserved
2556
 * value (ie should cause a guest UNDEF exception), and true if they are
2557
 * valid, in which case the decoded bit pattern is written to result.
2558
 */
2559
static bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
2560
                                   unsigned int imms, unsigned int immr)
2561
{
2562
    uint64_t mask;
2563
    unsigned e, levels, s, r;
2564
    int len;
2565

    
2566
    assert(immn < 2 && imms < 64 && immr < 64);
2567

    
2568
    /* The bit patterns we create here are 64 bit patterns which
2569
     * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
2570
     * 64 bits each. Each element contains the same value: a run
2571
     * of between 1 and e-1 non-zero bits, rotated within the
2572
     * element by between 0 and e-1 bits.
2573
     *
2574
     * The element size and run length are encoded into immn (1 bit)
2575
     * and imms (6 bits) as follows:
2576
     * 64 bit elements: immn = 1, imms = <length of run - 1>
2577
     * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
2578
     * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
2579
     *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
2580
     *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
2581
     *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
2582
     * Notice that immn = 0, imms = 11111x is the only combination
2583
     * not covered by one of the above options; this is reserved.
2584
     * Further, <length of run - 1> all-ones is a reserved pattern.
2585
     *
2586
     * In all cases the rotation is by immr % e (and immr is 6 bits).
2587
     */
2588

    
2589
    /* First determine the element size */
2590
    len = 31 - clz32((immn << 6) | (~imms & 0x3f));
2591
    if (len < 1) {
2592
        /* This is the immn == 0, imms == 0x11111x case */
2593
        return false;
2594
    }
2595
    e = 1 << len;
2596

    
2597
    levels = e - 1;
2598
    s = imms & levels;
2599
    r = immr & levels;
2600

    
2601
    if (s == levels) {
2602
        /* <length of run - 1> mustn't be all-ones. */
2603
        return false;
2604
    }
2605

    
2606
    /* Create the value of one element: s+1 set bits rotated
2607
     * by r within the element (which is e bits wide)...
2608
     */
2609
    mask = bitmask64(s + 1);
2610
    mask = (mask >> r) | (mask << (e - r));
2611
    /* ...then replicate the element over the whole 64 bit value */
2612
    mask = bitfield_replicate(mask, e);
2613
    *result = mask;
2614
    return true;
2615
}
2616

    
2617
/* C3.4.4 Logical (immediate)
2618
 *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
2619
 * +----+-----+-------------+---+------+------+------+------+
2620
 * | sf | opc | 1 0 0 1 0 0 | N | immr | imms |  Rn  |  Rd  |
2621
 * +----+-----+-------------+---+------+------+------+------+
2622
 */
2623
static void disas_logic_imm(DisasContext *s, uint32_t insn)
2624
{
2625
    unsigned int sf, opc, is_n, immr, imms, rn, rd;
2626
    TCGv_i64 tcg_rd, tcg_rn;
2627
    uint64_t wmask;
2628
    bool is_and = false;
2629

    
2630
    sf = extract32(insn, 31, 1);
2631
    opc = extract32(insn, 29, 2);
2632
    is_n = extract32(insn, 22, 1);
2633
    immr = extract32(insn, 16, 6);
2634
    imms = extract32(insn, 10, 6);
2635
    rn = extract32(insn, 5, 5);
2636
    rd = extract32(insn, 0, 5);
2637

    
2638
    if (!sf && is_n) {
2639
        unallocated_encoding(s);
2640
        return;
2641
    }
2642

    
2643
    if (opc == 0x3) { /* ANDS */
2644
        tcg_rd = cpu_reg(s, rd);
2645
    } else {
2646
        tcg_rd = cpu_reg_sp(s, rd);
2647
    }
2648
    tcg_rn = cpu_reg(s, rn);
2649

    
2650
    if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
2651
        /* some immediate field values are reserved */
2652
        unallocated_encoding(s);
2653
        return;
2654
    }
2655

    
2656
    if (!sf) {
2657
        wmask &= 0xffffffff;
2658
    }
2659

    
2660
    switch (opc) {
2661
    case 0x3: /* ANDS */
2662
    case 0x0: /* AND */
2663
        tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
2664
        is_and = true;
2665
        break;
2666
    case 0x1: /* ORR */
2667
        tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
2668
        break;
2669
    case 0x2: /* EOR */
2670
        tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
2671
        break;
2672
    default:
2673
        assert(FALSE); /* must handle all above */
2674
        break;
2675
    }
2676

    
2677
    if (!sf && !is_and) {
2678
        /* zero extend final result; we know we can skip this for AND
2679
         * since the immediate had the high 32 bits clear.
2680
         */
2681
        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2682
    }
2683

    
2684
    if (opc == 3) { /* ANDS */
2685
        gen_logic_CC(sf, tcg_rd);
2686
    }
2687
}
2688

    
2689
/*
2690
 * C3.4.5 Move wide (immediate)
2691
 *
2692
 *  31 30 29 28         23 22 21 20             5 4    0
2693
 * +--+-----+-------------+-----+----------------+------+
2694
 * |sf| opc | 1 0 0 1 0 1 |  hw |  imm16         |  Rd  |
2695
 * +--+-----+-------------+-----+----------------+------+
2696
 *
2697
 * sf: 0 -> 32 bit, 1 -> 64 bit
2698
 * opc: 00 -> N, 10 -> Z, 11 -> K
2699
 * hw: shift/16 (0,16, and sf only 32, 48)
2700
 */
2701
static void disas_movw_imm(DisasContext *s, uint32_t insn)
2702
{
2703
    int rd = extract32(insn, 0, 5);
2704
    uint64_t imm = extract32(insn, 5, 16);
2705
    int sf = extract32(insn, 31, 1);
2706
    int opc = extract32(insn, 29, 2);
2707
    int pos = extract32(insn, 21, 2) << 4;
2708
    TCGv_i64 tcg_rd = cpu_reg(s, rd);
2709
    TCGv_i64 tcg_imm;
2710

    
2711
    if (!sf && (pos >= 32)) {
2712
        unallocated_encoding(s);
2713
        return;
2714
    }
2715

    
2716
    switch (opc) {
2717
    case 0: /* MOVN */
2718
    case 2: /* MOVZ */
2719
        imm <<= pos;
2720
        if (opc == 0) {
2721
            imm = ~imm;
2722
        }
2723
        if (!sf) {
2724
            imm &= 0xffffffffu;
2725
        }
2726
        tcg_gen_movi_i64(tcg_rd, imm);
2727
        break;
2728
    case 3: /* MOVK */
2729
        tcg_imm = tcg_const_i64(imm);
2730
        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
2731
        tcg_temp_free_i64(tcg_imm);
2732
        if (!sf) {
2733
            tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2734
        }
2735
        break;
2736
    default:
2737
        unallocated_encoding(s);
2738
        break;
2739
    }
2740
}
2741

    
2742
/* C3.4.2 Bitfield
2743
 *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
2744
 * +----+-----+-------------+---+------+------+------+------+
2745
 * | sf | opc | 1 0 0 1 1 0 | N | immr | imms |  Rn  |  Rd  |
2746
 * +----+-----+-------------+---+------+------+------+------+
2747
 */
2748
static void disas_bitfield(DisasContext *s, uint32_t insn)
2749
{
2750
    unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
2751
    TCGv_i64 tcg_rd, tcg_tmp;
2752

    
2753
    sf = extract32(insn, 31, 1);
2754
    opc = extract32(insn, 29, 2);
2755
    n = extract32(insn, 22, 1);
2756
    ri = extract32(insn, 16, 6);
2757
    si = extract32(insn, 10, 6);
2758
    rn = extract32(insn, 5, 5);
2759
    rd = extract32(insn, 0, 5);
2760
    bitsize = sf ? 64 : 32;
2761

    
2762
    if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
2763
        unallocated_encoding(s);
2764
        return;
2765
    }
2766

    
2767
    tcg_rd = cpu_reg(s, rd);
2768
    tcg_tmp = read_cpu_reg(s, rn, sf);
2769

    
2770
    /* OPTME: probably worth recognizing common cases of ext{8,16,32}{u,s} */
2771

    
2772
    if (opc != 1) { /* SBFM or UBFM */
2773
        tcg_gen_movi_i64(tcg_rd, 0);
2774
    }
2775

    
2776
    /* do the bit move operation */
2777
    if (si >= ri) {
2778
        /* Wd<s-r:0> = Wn<s:r> */
2779
        tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
2780
        pos = 0;
2781
        len = (si - ri) + 1;
2782
    } else {
2783
        /* Wd<32+s-r,32-r> = Wn<s:0> */
2784
        pos = bitsize - ri;
2785
        len = si + 1;
2786
    }
2787

    
2788
    tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
2789

    
2790
    if (opc == 0) { /* SBFM - sign extend the destination field */
2791
        tcg_gen_shli_i64(tcg_rd, tcg_rd, 64 - (pos + len));
2792
        tcg_gen_sari_i64(tcg_rd, tcg_rd, 64 - (pos + len));
2793
    }
2794

    
2795
    if (!sf) { /* zero extend final result */
2796
        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2797
    }
2798
}
2799

    
2800
/* C3.4.3 Extract
2801
 *   31  30  29 28         23 22   21  20  16 15    10 9    5 4    0
2802
 * +----+------+-------------+---+----+------+--------+------+------+
2803
 * | sf | op21 | 1 0 0 1 1 1 | N | o0 |  Rm  |  imms  |  Rn  |  Rd  |
2804
 * +----+------+-------------+---+----+------+--------+------+------+
2805
 */
2806
static void disas_extract(DisasContext *s, uint32_t insn)
2807
{
2808
    unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
2809

    
2810
    sf = extract32(insn, 31, 1);
2811
    n = extract32(insn, 22, 1);
2812
    rm = extract32(insn, 16, 5);
2813
    imm = extract32(insn, 10, 6);
2814
    rn = extract32(insn, 5, 5);
2815
    rd = extract32(insn, 0, 5);
2816
    op21 = extract32(insn, 29, 2);
2817
    op0 = extract32(insn, 21, 1);
2818
    bitsize = sf ? 64 : 32;
2819

    
2820
    if (sf != n || op21 || op0 || imm >= bitsize) {
2821
        unallocated_encoding(s);
2822
    } else {
2823
        TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
2824

    
2825
        tcg_rd = cpu_reg(s, rd);
2826

    
2827
        if (imm) {
2828
            /* OPTME: we can special case rm==rn as a rotate */
2829
            tcg_rm = read_cpu_reg(s, rm, sf);
2830
            tcg_rn = read_cpu_reg(s, rn, sf);
2831
            tcg_gen_shri_i64(tcg_rm, tcg_rm, imm);
2832
            tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm);
2833
            tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn);
2834
            if (!sf) {
2835
                tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2836
            }
2837
        } else {
2838
            /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
2839
             * so an extract from bit 0 is a special case.
2840
             */
2841
            if (sf) {
2842
                tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
2843
            } else {
2844
                tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
2845
            }
2846
        }
2847

    
2848
    }
2849
}
2850

    
2851
/* C3.4 Data processing - immediate */
2852
static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
2853
{
2854
    switch (extract32(insn, 23, 6)) {
2855
    case 0x20: case 0x21: /* PC-rel. addressing */
2856
        disas_pc_rel_adr(s, insn);
2857
        break;
2858
    case 0x22: case 0x23: /* Add/subtract (immediate) */
2859
        disas_add_sub_imm(s, insn);
2860
        break;
2861
    case 0x24: /* Logical (immediate) */
2862
        disas_logic_imm(s, insn);
2863
        break;
2864
    case 0x25: /* Move wide (immediate) */
2865
        disas_movw_imm(s, insn);
2866
        break;
2867
    case 0x26: /* Bitfield */
2868
        disas_bitfield(s, insn);
2869
        break;
2870
    case 0x27: /* Extract */
2871
        disas_extract(s, insn);
2872
        break;
2873
    default:
2874
        unallocated_encoding(s);
2875
        break;
2876
    }
2877
}
2878

    
2879
/* Shift a TCGv src by TCGv shift_amount, put result in dst.
2880
 * Note that it is the caller's responsibility to ensure that the
2881
 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
2882
 * mandated semantics for out of range shifts.
2883
 */
2884
static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
2885
                      enum a64_shift_type shift_type, TCGv_i64 shift_amount)
2886
{
2887
    switch (shift_type) {
2888
    case A64_SHIFT_TYPE_LSL:
2889
        tcg_gen_shl_i64(dst, src, shift_amount);
2890
        break;
2891
    case A64_SHIFT_TYPE_LSR:
2892
        tcg_gen_shr_i64(dst, src, shift_amount);
2893
        break;
2894
    case A64_SHIFT_TYPE_ASR:
2895
        if (!sf) {
2896
            tcg_gen_ext32s_i64(dst, src);
2897
        }
2898
        tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
2899
        break;
2900
    case A64_SHIFT_TYPE_ROR:
2901
        if (sf) {
2902
            tcg_gen_rotr_i64(dst, src, shift_amount);
2903
        } else {
2904
            TCGv_i32 t0, t1;
2905
            t0 = tcg_temp_new_i32();
2906
            t1 = tcg_temp_new_i32();
2907
            tcg_gen_trunc_i64_i32(t0, src);
2908
            tcg_gen_trunc_i64_i32(t1, shift_amount);
2909
            tcg_gen_rotr_i32(t0, t0, t1);
2910
            tcg_gen_extu_i32_i64(dst, t0);
2911
            tcg_temp_free_i32(t0);
2912
            tcg_temp_free_i32(t1);
2913
        }
2914
        break;
2915
    default:
2916
        assert(FALSE); /* all shift types should be handled */
2917
        break;
2918
    }
2919

    
2920
    if (!sf) { /* zero extend final result */
2921
        tcg_gen_ext32u_i64(dst, dst);
2922
    }
2923
}
2924

    
2925
/* Shift a TCGv src by immediate, put result in dst.
2926
 * The shift amount must be in range (this should always be true as the
2927
 * relevant instructions will UNDEF on bad shift immediates).
2928
 */
2929
static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
2930
                          enum a64_shift_type shift_type, unsigned int shift_i)
2931
{
2932
    assert(shift_i < (sf ? 64 : 32));
2933

    
2934
    if (shift_i == 0) {
2935
        tcg_gen_mov_i64(dst, src);
2936
    } else {
2937
        TCGv_i64 shift_const;
2938

    
2939
        shift_const = tcg_const_i64(shift_i);
2940
        shift_reg(dst, src, sf, shift_type, shift_const);
2941
        tcg_temp_free_i64(shift_const);
2942
    }
2943
}
2944

    
2945
/* C3.5.10 Logical (shifted register)
2946
 *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
2947
 * +----+-----+-----------+-------+---+------+--------+------+------+
2948
 * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
2949
 * +----+-----+-----------+-------+---+------+--------+------+------+
2950
 */
2951
static void disas_logic_reg(DisasContext *s, uint32_t insn)
2952
{
2953
    TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
2954
    unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
2955

    
2956
    sf = extract32(insn, 31, 1);
2957
    opc = extract32(insn, 29, 2);
2958
    shift_type = extract32(insn, 22, 2);
2959
    invert = extract32(insn, 21, 1);
2960
    rm = extract32(insn, 16, 5);
2961
    shift_amount = extract32(insn, 10, 6);
2962
    rn = extract32(insn, 5, 5);
2963
    rd = extract32(insn, 0, 5);
2964

    
2965
    if (!sf && (shift_amount & (1 << 5))) {
2966
        unallocated_encoding(s);
2967
        return;
2968
    }
2969

    
2970
    tcg_rd = cpu_reg(s, rd);
2971

    
2972
    if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
2973
        /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
2974
         * register-register MOV and MVN, so it is worth special casing.
2975
         */
2976
        tcg_rm = cpu_reg(s, rm);
2977
        if (invert) {
2978
            tcg_gen_not_i64(tcg_rd, tcg_rm);
2979
            if (!sf) {
2980
                tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2981
            }
2982
        } else {
2983
            if (sf) {
2984
                tcg_gen_mov_i64(tcg_rd, tcg_rm);
2985
            } else {
2986
                tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
2987
            }
2988
        }
2989
        return;
2990
    }
2991

    
2992
    tcg_rm = read_cpu_reg(s, rm, sf);
2993

    
2994
    if (shift_amount) {
2995
        shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
2996
    }
2997

    
2998
    tcg_rn = cpu_reg(s, rn);
2999

    
3000
    switch (opc | (invert << 2)) {
3001
    case 0: /* AND */
3002
    case 3: /* ANDS */
3003
        tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
3004
        break;
3005
    case 1: /* ORR */
3006
        tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
3007
        break;
3008
    case 2: /* EOR */
3009
        tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
3010
        break;
3011
    case 4: /* BIC */
3012
    case 7: /* BICS */
3013
        tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
3014
        break;
3015
    case 5: /* ORN */
3016
        tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
3017
        break;
3018
    case 6: /* EON */
3019
        tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
3020
        break;
3021
    default:
3022
        assert(FALSE);
3023
        break;
3024
    }
3025

    
3026
    if (!sf) {
3027
        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3028
    }
3029

    
3030
    if (opc == 3) {
3031
        gen_logic_CC(sf, tcg_rd);
3032
    }
3033
}
3034

    
3035
/*
3036
 * C3.5.1 Add/subtract (extended register)
3037
 *
3038
 *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
3039
 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3040
 * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
3041
 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3042
 *
3043
 *  sf: 0 -> 32bit, 1 -> 64bit
3044
 *  op: 0 -> add  , 1 -> sub
3045
 *   S: 1 -> set flags
3046
 * opt: 00
3047
 * option: extension type (see DecodeRegExtend)
3048
 * imm3: optional shift to Rm
3049
 *
3050
 * Rd = Rn + LSL(extend(Rm), amount)
3051
 */
3052
static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
3053
{
3054
    int rd = extract32(insn, 0, 5);
3055
    int rn = extract32(insn, 5, 5);
3056
    int imm3 = extract32(insn, 10, 3);
3057
    int option = extract32(insn, 13, 3);
3058
    int rm = extract32(insn, 16, 5);
3059
    bool setflags = extract32(insn, 29, 1);
3060
    bool sub_op = extract32(insn, 30, 1);
3061
    bool sf = extract32(insn, 31, 1);
3062

    
3063
    TCGv_i64 tcg_rm, tcg_rn; /* temps */
3064
    TCGv_i64 tcg_rd;
3065
    TCGv_i64 tcg_result;
3066

    
3067
    if (imm3 > 4) {
3068
        unallocated_encoding(s);
3069
        return;
3070
    }
3071

    
3072
    /* non-flag setting ops may use SP */
3073
    if (!setflags) {
3074
        tcg_rn = read_cpu_reg_sp(s, rn, sf);
3075
        tcg_rd = cpu_reg_sp(s, rd);
3076
    } else {
3077
        tcg_rn = read_cpu_reg(s, rn, sf);
3078
        tcg_rd = cpu_reg(s, rd);
3079
    }
3080

    
3081
    tcg_rm = read_cpu_reg(s, rm, sf);
3082
    ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
3083

    
3084
    tcg_result = tcg_temp_new_i64();
3085

    
3086
    if (!setflags) {
3087
        if (sub_op) {
3088
            tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3089
        } else {
3090
            tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3091
        }
3092
    } else {
3093
        if (sub_op) {
3094
            gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3095
        } else {
3096
            gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3097
        }
3098
    }
3099

    
3100
    if (sf) {
3101
        tcg_gen_mov_i64(tcg_rd, tcg_result);
3102
    } else {
3103
        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3104
    }
3105

    
3106
    tcg_temp_free_i64(tcg_result);
3107
}
3108

    
3109
/*
3110
 * C3.5.2 Add/subtract (shifted register)
3111
 *
3112
 *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
3113
 * +--+--+--+-----------+-----+--+-------+---------+------+------+
3114
 * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
3115
 * +--+--+--+-----------+-----+--+-------+---------+------+------+
3116
 *
3117
 *    sf: 0 -> 32bit, 1 -> 64bit
3118
 *    op: 0 -> add  , 1 -> sub
3119
 *     S: 1 -> set flags
3120
 * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
3121
 *  imm6: Shift amount to apply to Rm before the add/sub
3122
 */
3123
static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
3124
{
3125
    int rd = extract32(insn, 0, 5);
3126
    int rn = extract32(insn, 5, 5);
3127
    int imm6 = extract32(insn, 10, 6);
3128
    int rm = extract32(insn, 16, 5);
3129
    int shift_type = extract32(insn, 22, 2);
3130
    bool setflags = extract32(insn, 29, 1);
3131
    bool sub_op = extract32(insn, 30, 1);
3132
    bool sf = extract32(insn, 31, 1);
3133

    
3134
    TCGv_i64 tcg_rd = cpu_reg(s, rd);
3135
    TCGv_i64 tcg_rn, tcg_rm;
3136
    TCGv_i64 tcg_result;
3137

    
3138
    if ((shift_type == 3) || (!sf && (imm6 > 31))) {
3139
        unallocated_encoding(s);
3140
        return;
3141
    }
3142

    
3143
    tcg_rn = read_cpu_reg(s, rn, sf);
3144
    tcg_rm = read_cpu_reg(s, rm, sf);
3145

    
3146
    shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
3147

    
3148
    tcg_result = tcg_temp_new_i64();
3149

    
3150
    if (!setflags) {
3151
        if (sub_op) {
3152
            tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3153
        } else {
3154
            tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3155
        }
3156
    } else {
3157
        if (sub_op) {
3158
            gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3159
        } else {
3160
            gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3161
        }
3162
    }
3163

    
3164
    if (sf) {
3165
        tcg_gen_mov_i64(tcg_rd, tcg_result);
3166
    } else {
3167
        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3168
    }
3169

    
3170
    tcg_temp_free_i64(tcg_result);
3171
}
3172

    
3173
/* C3.5.9 Data-processing (3 source)
3174

3175
   31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
3176
  +--+------+-----------+------+------+----+------+------+------+
3177
  |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
3178
  +--+------+-----------+------+------+----+------+------+------+
3179

3180
 */
3181
static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
3182
{
3183
    int rd = extract32(insn, 0, 5);
3184
    int rn = extract32(insn, 5, 5);
3185
    int ra = extract32(insn, 10, 5);
3186
    int rm = extract32(insn, 16, 5);
3187
    int op_id = (extract32(insn, 29, 3) << 4) |
3188
        (extract32(insn, 21, 3) << 1) |
3189
        extract32(insn, 15, 1);
3190
    bool sf = extract32(insn, 31, 1);
3191
    bool is_sub = extract32(op_id, 0, 1);
3192
    bool is_high = extract32(op_id, 2, 1);
3193
    bool is_signed = false;
3194
    TCGv_i64 tcg_op1;
3195
    TCGv_i64 tcg_op2;
3196
    TCGv_i64 tcg_tmp;
3197

    
3198
    /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
3199
    switch (op_id) {
3200
    case 0x42: /* SMADDL */
3201
    case 0x43: /* SMSUBL */
3202
    case 0x44: /* SMULH */
3203
        is_signed = true;
3204
        break;
3205
    case 0x0: /* MADD (32bit) */
3206
    case 0x1: /* MSUB (32bit) */
3207
    case 0x40: /* MADD (64bit) */
3208
    case 0x41: /* MSUB (64bit) */
3209
    case 0x4a: /* UMADDL */
3210
    case 0x4b: /* UMSUBL */
3211
    case 0x4c: /* UMULH */
3212
        break;
3213
    default:
3214
        unallocated_encoding(s);
3215
        return;
3216
    }
3217

    
3218
    if (is_high) {
3219
        TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
3220
        TCGv_i64 tcg_rd = cpu_reg(s, rd);
3221
        TCGv_i64 tcg_rn = cpu_reg(s, rn);
3222
        TCGv_i64 tcg_rm = cpu_reg(s, rm);
3223

    
3224
        if (is_signed) {
3225
            tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3226
        } else {
3227
            tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3228
        }
3229

    
3230
        tcg_temp_free_i64(low_bits);
3231
        return;
3232
    }
3233

    
3234
    tcg_op1 = tcg_temp_new_i64();
3235
    tcg_op2 = tcg_temp_new_i64();
3236
    tcg_tmp = tcg_temp_new_i64();
3237

    
3238
    if (op_id < 0x42) {
3239
        tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
3240
        tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
3241
    } else {
3242
        if (is_signed) {
3243
            tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
3244
            tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
3245
        } else {
3246
            tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
3247
            tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
3248
        }
3249
    }
3250

    
3251
    if (ra == 31 && !is_sub) {
3252
        /* Special-case MADD with rA == XZR; it is the standard MUL alias */
3253
        tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
3254
    } else {
3255
        tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
3256
        if (is_sub) {
3257
            tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3258
        } else {
3259
            tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3260
        }
3261
    }
3262

    
3263
    if (!sf) {
3264
        tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
3265
    }
3266

    
3267
    tcg_temp_free_i64(tcg_op1);
3268
    tcg_temp_free_i64(tcg_op2);
3269
    tcg_temp_free_i64(tcg_tmp);
3270
}
3271

    
3272
/* C3.5.3 - Add/subtract (with carry)
3273
 *  31 30 29 28 27 26 25 24 23 22 21  20  16  15   10  9    5 4   0
3274
 * +--+--+--+------------------------+------+---------+------+-----+
3275
 * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | opcode2 |  Rn  |  Rd |
3276
 * +--+--+--+------------------------+------+---------+------+-----+
3277
 *                                            [000000]
3278
 */
3279

    
3280
static void disas_adc_sbc(DisasContext *s, uint32_t insn)
3281
{
3282
    unsigned int sf, op, setflags, rm, rn, rd;
3283
    TCGv_i64 tcg_y, tcg_rn, tcg_rd;
3284

    
3285
    if (extract32(insn, 10, 6) != 0) {
3286
        unallocated_encoding(s);
3287
        return;
3288
    }
3289

    
3290
    sf = extract32(insn, 31, 1);
3291
    op = extract32(insn, 30, 1);
3292
    setflags = extract32(insn, 29, 1);
3293
    rm = extract32(insn, 16, 5);
3294
    rn = extract32(insn, 5, 5);
3295
    rd = extract32(insn, 0, 5);
3296

    
3297
    tcg_rd = cpu_reg(s, rd);
3298
    tcg_rn = cpu_reg(s, rn);
3299

    
3300
    if (op) {
3301
        tcg_y = new_tmp_a64(s);
3302
        tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
3303
    } else {
3304
        tcg_y = cpu_reg(s, rm);
3305
    }
3306

    
3307
    if (setflags) {
3308
        gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
3309
    } else {
3310
        gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
3311
    }
3312
}
3313

    
3314
/* C3.5.4 - C3.5.5 Conditional compare (immediate / register)
3315
 *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
3316
 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3317
 * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
3318
 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3319
 *        [1]                             y                [0]       [0]
3320
 */
3321
static void disas_cc(DisasContext *s, uint32_t insn)
3322
{
3323
    unsigned int sf, op, y, cond, rn, nzcv, is_imm;
3324
    int label_continue = -1;
3325
    TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
3326

    
3327
    if (!extract32(insn, 29, 1)) {
3328
        unallocated_encoding(s);
3329
        return;
3330
    }
3331
    if (insn & (1 << 10 | 1 << 4)) {
3332
        unallocated_encoding(s);
3333
        return;
3334
    }
3335
    sf = extract32(insn, 31, 1);
3336
    op = extract32(insn, 30, 1);
3337
    is_imm = extract32(insn, 11, 1);
3338
    y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
3339
    cond = extract32(insn, 12, 4);
3340
    rn = extract32(insn, 5, 5);
3341
    nzcv = extract32(insn, 0, 4);
3342

    
3343
    if (cond < 0x0e) { /* not always */
3344
        int label_match = gen_new_label();
3345
        label_continue = gen_new_label();
3346
        arm_gen_test_cc(cond, label_match);
3347
        /* nomatch: */
3348
        tcg_tmp = tcg_temp_new_i64();
3349
        tcg_gen_movi_i64(tcg_tmp, nzcv << 28);
3350
        gen_set_nzcv(tcg_tmp);
3351
        tcg_temp_free_i64(tcg_tmp);
3352
        tcg_gen_br(label_continue);
3353
        gen_set_label(label_match);
3354
    }
3355
    /* match, or condition is always */
3356
    if (is_imm) {
3357
        tcg_y = new_tmp_a64(s);
3358
        tcg_gen_movi_i64(tcg_y, y);
3359
    } else {
3360
        tcg_y = cpu_reg(s, y);
3361
    }
3362
    tcg_rn = cpu_reg(s, rn);
3363

    
3364
    tcg_tmp = tcg_temp_new_i64();
3365
    if (op) {
3366
        gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3367
    } else {
3368
        gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3369
    }
3370
    tcg_temp_free_i64(tcg_tmp);
3371

    
3372
    if (cond < 0x0e) { /* continue */
3373
        gen_set_label(label_continue);
3374
    }
3375
}
3376

    
3377
/* C3.5.6 Conditional select
3378
 *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
3379
 * +----+----+---+-----------------+------+------+-----+------+------+
3380
 * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
3381
 * +----+----+---+-----------------+------+------+-----+------+------+
3382
 */
3383
static void disas_cond_select(DisasContext *s, uint32_t insn)
3384
{
3385
    unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
3386
    TCGv_i64 tcg_rd, tcg_src;
3387

    
3388
    if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
3389
        /* S == 1 or op2<1> == 1 */
3390
        unallocated_encoding(s);
3391
        return;
3392
    }
3393
    sf = extract32(insn, 31, 1);
3394
    else_inv = extract32(insn, 30, 1);
3395
    rm = extract32(insn, 16, 5);
3396
    cond = extract32(insn, 12, 4);
3397
    else_inc = extract32(insn, 10, 1);
3398
    rn = extract32(insn, 5, 5);
3399
    rd = extract32(insn, 0, 5);
3400

    
3401
    if (rd == 31) {
3402
        /* silly no-op write; until we use movcond we must special-case
3403
         * this to avoid a dead temporary across basic blocks.
3404
         */
3405
        return;
3406
    }
3407

    
3408
    tcg_rd = cpu_reg(s, rd);
3409

    
3410
    if (cond >= 0x0e) { /* condition "always" */
3411
        tcg_src = read_cpu_reg(s, rn, sf);
3412
        tcg_gen_mov_i64(tcg_rd, tcg_src);
3413
    } else {
3414
        /* OPTME: we could use movcond here, at the cost of duplicating
3415
         * a lot of the arm_gen_test_cc() logic.
3416
         */
3417
        int label_match = gen_new_label();
3418
        int label_continue = gen_new_label();
3419

    
3420
        arm_gen_test_cc(cond, label_match);
3421
        /* nomatch: */
3422
        tcg_src = cpu_reg(s, rm);
3423

    
3424
        if (else_inv && else_inc) {
3425
            tcg_gen_neg_i64(tcg_rd, tcg_src);
3426
        } else if (else_inv) {
3427
            tcg_gen_not_i64(tcg_rd, tcg_src);
3428
        } else if (else_inc) {
3429
            tcg_gen_addi_i64(tcg_rd, tcg_src, 1);
3430
        } else {
3431
            tcg_gen_mov_i64(tcg_rd, tcg_src);
3432
        }
3433
        if (!sf) {
3434
            tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3435
        }
3436
        tcg_gen_br(label_continue);
3437
        /* match: */
3438
        gen_set_label(label_match);
3439
        tcg_src = read_cpu_reg(s, rn, sf);
3440
        tcg_gen_mov_i64(tcg_rd, tcg_src);
3441
        /* continue: */
3442
        gen_set_label(label_continue);
3443
    }
3444
}
3445

    
3446
static void handle_clz(DisasContext *s, unsigned int sf,
3447
                       unsigned int rn, unsigned int rd)
3448
{
3449
    TCGv_i64 tcg_rd, tcg_rn;
3450
    tcg_rd = cpu_reg(s, rd);
3451
    tcg_rn = cpu_reg(s, rn);
3452

    
3453
    if (sf) {
3454
        gen_helper_clz64(tcg_rd, tcg_rn);
3455
    } else {
3456
        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3457
        tcg_gen_trunc_i64_i32(tcg_tmp32, tcg_rn);
3458
        gen_helper_clz(tcg_tmp32, tcg_tmp32);
3459
        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3460
        tcg_temp_free_i32(tcg_tmp32);
3461
    }
3462
}
3463

    
3464
static void handle_cls(DisasContext *s, unsigned int sf,
3465
                       unsigned int rn, unsigned int rd)
3466
{
3467
    TCGv_i64 tcg_rd, tcg_rn;
3468
    tcg_rd = cpu_reg(s, rd);
3469
    tcg_rn = cpu_reg(s, rn);
3470

    
3471
    if (sf) {
3472
        gen_helper_cls64(tcg_rd, tcg_rn);
3473
    } else {
3474
        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3475
        tcg_gen_trunc_i64_i32(tcg_tmp32, tcg_rn);
3476
        gen_helper_cls32(tcg_tmp32, tcg_tmp32);
3477
        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3478
        tcg_temp_free_i32(tcg_tmp32);
3479
    }
3480
}
3481

    
3482
static void handle_rbit(DisasContext *s, unsigned int sf,
3483
                        unsigned int rn, unsigned int rd)
3484
{
3485
    TCGv_i64 tcg_rd, tcg_rn;
3486
    tcg_rd = cpu_reg(s, rd);
3487
    tcg_rn = cpu_reg(s, rn);
3488

    
3489
    if (sf) {
3490
        gen_helper_rbit64(tcg_rd, tcg_rn);
3491
    } else {
3492
        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3493
        tcg_gen_trunc_i64_i32(tcg_tmp32, tcg_rn);
3494
        gen_helper_rbit(tcg_tmp32, tcg_tmp32);
3495
        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3496
        tcg_temp_free_i32(tcg_tmp32);
3497
    }
3498
}
3499

    
3500
/* C5.6.149 REV with sf==1, opcode==3 ("REV64") */
3501
static void handle_rev64(DisasContext *s, unsigned int sf,
3502
                         unsigned int rn, unsigned int rd)
3503
{
3504
    if (!sf) {
3505
        unallocated_encoding(s);
3506
        return;
3507
    }
3508
    tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
3509
}
3510

    
3511
/* C5.6.149 REV with sf==0, opcode==2
3512
 * C5.6.151 REV32 (sf==1, opcode==2)
3513
 */
3514
static void handle_rev32(DisasContext *s, unsigned int sf,
3515
                         unsigned int rn, unsigned int rd)
3516
{
3517
    TCGv_i64 tcg_rd = cpu_reg(s, rd);
3518

    
3519
    if (sf) {
3520
        TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3521
        TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3522

    
3523
        /* bswap32_i64 requires zero high word */
3524
        tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
3525
        tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
3526
        tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
3527
        tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
3528
        tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
3529

    
3530
        tcg_temp_free_i64(tcg_tmp);
3531
    } else {
3532
        tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
3533
        tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
3534
    }
3535
}
3536

    
3537
/* C5.6.150 REV16 (opcode==1) */
3538
static void handle_rev16(DisasContext *s, unsigned int sf,
3539
                         unsigned int rn, unsigned int rd)
3540
{
3541
    TCGv_i64 tcg_rd = cpu_reg(s, rd);
3542
    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3543
    TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3544

    
3545
    tcg_gen_andi_i64(tcg_tmp, tcg_rn, 0xffff);
3546
    tcg_gen_bswap16_i64(tcg_rd, tcg_tmp);
3547

    
3548
    tcg_gen_shri_i64(tcg_tmp, tcg_rn, 16);
3549
    tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
3550
    tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3551
    tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 16, 16);
3552

    
3553
    if (sf) {
3554
        tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
3555
        tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
3556
        tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3557
        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 32, 16);
3558

    
3559
        tcg_gen_shri_i64(tcg_tmp, tcg_rn, 48);
3560
        tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3561
        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 48, 16);
3562
    }
3563

    
3564
    tcg_temp_free_i64(tcg_tmp);
3565
}
3566

    
3567
/* C3.5.7 Data-processing (1 source)
3568
 *   31  30  29  28             21 20     16 15    10 9    5 4    0
3569
 * +----+---+---+-----------------+---------+--------+------+------+
3570
 * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
3571
 * +----+---+---+-----------------+---------+--------+------+------+
3572
 */
3573
static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
3574
{
3575
    unsigned int sf, opcode, rn, rd;
3576

    
3577
    if (extract32(insn, 29, 1) || extract32(insn, 16, 5)) {
3578
        unallocated_encoding(s);
3579
        return;
3580
    }
3581

    
3582
    sf = extract32(insn, 31, 1);
3583
    opcode = extract32(insn, 10, 6);
3584
    rn = extract32(insn, 5, 5);
3585
    rd = extract32(insn, 0, 5);
3586

    
3587
    switch (opcode) {
3588
    case 0: /* RBIT */
3589
        handle_rbit(s, sf, rn, rd);
3590
        break;
3591
    case 1: /* REV16 */
3592
        handle_rev16(s, sf, rn, rd);
3593
        break;
3594
    case 2: /* REV32 */
3595
        handle_rev32(s, sf, rn, rd);
3596
        break;
3597
    case 3: /* REV64 */
3598
        handle_rev64(s, sf, rn, rd);
3599
        break;
3600
    case 4: /* CLZ */
3601
        handle_clz(s, sf, rn, rd);
3602
        break;
3603
    case 5: /* CLS */
3604
        handle_cls(s, sf, rn, rd);
3605
        break;
3606
    }
3607
}
3608

    
3609
static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
3610
                       unsigned int rm, unsigned int rn, unsigned int rd)
3611
{
3612
    TCGv_i64 tcg_n, tcg_m, tcg_rd;
3613
    tcg_rd = cpu_reg(s, rd);
3614

    
3615
    if (!sf && is_signed) {
3616
        tcg_n = new_tmp_a64(s);
3617
        tcg_m = new_tmp_a64(s);
3618
        tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
3619
        tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
3620
    } else {
3621
        tcg_n = read_cpu_reg(s, rn, sf);
3622
        tcg_m = read_cpu_reg(s, rm, sf);
3623
    }
3624

    
3625
    if (is_signed) {
3626
        gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
3627
    } else {
3628
        gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
3629
    }
3630

    
3631
    if (!sf) { /* zero extend final result */
3632
        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3633
    }
3634
}
3635

    
3636
/* C5.6.115 LSLV, C5.6.118 LSRV, C5.6.17 ASRV, C5.6.154 RORV */
3637
static void handle_shift_reg(DisasContext *s,
3638
                             enum a64_shift_type shift_type, unsigned int sf,
3639
                             unsigned int rm, unsigned int rn, unsigned int rd)
3640
{
3641
    TCGv_i64 tcg_shift = tcg_temp_new_i64();
3642
    TCGv_i64 tcg_rd = cpu_reg(s, rd);
3643
    TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3644

    
3645
    tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
3646
    shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
3647
    tcg_temp_free_i64(tcg_shift);
3648
}
3649

    
3650
/* C3.5.8 Data-processing (2 source)
3651
 *   31   30  29 28             21 20  16 15    10 9    5 4    0
3652
 * +----+---+---+-----------------+------+--------+------+------+
3653
 * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
3654
 * +----+---+---+-----------------+------+--------+------+------+
3655
 */
3656
static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
3657
{
3658
    unsigned int sf, rm, opcode, rn, rd;
3659
    sf = extract32(insn, 31, 1);
3660
    rm = extract32(insn, 16, 5);
3661
    opcode = extract32(insn, 10, 6);
3662
    rn = extract32(insn, 5, 5);
3663
    rd = extract32(insn, 0, 5);
3664

    
3665
    if (extract32(insn, 29, 1)) {
3666
        unallocated_encoding(s);
3667
        return;
3668
    }
3669

    
3670
    switch (opcode) {
3671
    case 2: /* UDIV */
3672
        handle_div(s, false, sf, rm, rn, rd);
3673
        break;
3674
    case 3: /* SDIV */
3675
        handle_div(s, true, sf, rm, rn, rd);
3676
        break;
3677
    case 8: /* LSLV */
3678
        handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
3679
        break;
3680
    case 9: /* LSRV */
3681
        handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
3682
        break;
3683
    case 10: /* ASRV */
3684
        handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
3685
        break;
3686
    case 11: /* RORV */
3687
        handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
3688
        break;
3689
    case 16:
3690
    case 17:
3691
    case 18:
3692
    case 19:
3693
    case 20:
3694
    case 21:
3695
    case 22:
3696
    case 23: /* CRC32 */
3697
        unsupported_encoding(s, insn);
3698
        break;
3699
    default:
3700
        unallocated_encoding(s);
3701
        break;
3702
    }
3703
}
3704

    
3705
/* C3.5 Data processing - register */
3706
static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
3707
{
3708
    switch (extract32(insn, 24, 5)) {
3709
    case 0x0a: /* Logical (shifted register) */
3710
        disas_logic_reg(s, insn);
3711
        break;
3712
    case 0x0b: /* Add/subtract */
3713
        if (insn & (1 << 21)) { /* (extended register) */
3714
            disas_add_sub_ext_reg(s, insn);
3715
        } else {
3716
            disas_add_sub_reg(s, insn);
3717
        }
3718
        break;
3719
    case 0x1b: /* Data-processing (3 source) */
3720
        disas_data_proc_3src(s, insn);
3721
        break;
3722
    case 0x1a:
3723
        switch (extract32(insn, 21, 3)) {
3724
        case 0x0: /* Add/subtract (with carry) */
3725
            disas_adc_sbc(s, insn);
3726
            break;
3727
        case 0x2: /* Conditional compare */
3728
            disas_cc(s, insn); /* both imm and reg forms */
3729
            break;
3730
        case 0x4: /* Conditional select */
3731
            disas_cond_select(s, insn);
3732
            break;
3733
        case 0x6: /* Data-processing */
3734
            if (insn & (1 << 30)) { /* (1 source) */
3735
                disas_data_proc_1src(s, insn);
3736
            } else {            /* (2 source) */
3737
                disas_data_proc_2src(s, insn);
3738
            }
3739
            break;
3740
        default:
3741
            unallocated_encoding(s);
3742
            break;
3743
        }
3744
        break;
3745
    default:
3746
        unallocated_encoding(s);
3747
        break;
3748
    }
3749
}
3750

    
3751
static void handle_fp_compare(DisasContext *s, bool is_double,
3752
                              unsigned int rn, unsigned int rm,
3753
                              bool cmp_with_zero, bool signal_all_nans)
3754
{
3755
    TCGv_i64 tcg_flags = tcg_temp_new_i64();
3756
    TCGv_ptr fpst = get_fpstatus_ptr();
3757

    
3758
    if (is_double) {
3759
        TCGv_i64 tcg_vn, tcg_vm;
3760

    
3761
        tcg_vn = read_fp_dreg(s, rn);
3762
        if (cmp_with_zero) {
3763
            tcg_vm = tcg_const_i64(0);
3764
        } else {
3765
            tcg_vm = read_fp_dreg(s, rm);
3766
        }
3767
        if (signal_all_nans) {
3768
            gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
3769
        } else {
3770
            gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
3771
        }
3772
        tcg_temp_free_i64(tcg_vn);
3773
        tcg_temp_free_i64(tcg_vm);
3774
    } else {
3775
        TCGv_i32 tcg_vn, tcg_vm;
3776

    
3777
        tcg_vn = read_fp_sreg(s, rn);
3778
        if (cmp_with_zero) {
3779
            tcg_vm = tcg_const_i32(0);
3780
        } else {
3781
            tcg_vm = read_fp_sreg(s, rm);
3782
        }
3783
        if (signal_all_nans) {
3784
            gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
3785
        } else {
3786
            gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
3787
        }
3788
        tcg_temp_free_i32(tcg_vn);
3789
        tcg_temp_free_i32(tcg_vm);
3790
    }
3791

    
3792
    tcg_temp_free_ptr(fpst);
3793

    
3794
    gen_set_nzcv(tcg_flags);
3795

    
3796
    tcg_temp_free_i64(tcg_flags);
3797
}
3798

    
3799
/* C3.6.22 Floating point compare
3800
 *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
3801
 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
3802
 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
3803
 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
3804
 */
3805
static void disas_fp_compare(DisasContext *s, uint32_t insn)
3806
{
3807
    unsigned int mos, type, rm, op, rn, opc, op2r;
3808

    
3809
    mos = extract32(insn, 29, 3);
3810
    type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
3811
    rm = extract32(insn, 16, 5);
3812
    op = extract32(insn, 14, 2);
3813
    rn = extract32(insn, 5, 5);
3814
    opc = extract32(insn, 3, 2);
3815
    op2r = extract32(insn, 0, 3);
3816

    
3817
    if (mos || op || op2r || type > 1) {
3818
        unallocated_encoding(s);
3819
        return;
3820
    }
3821

    
3822
    handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2);
3823
}
3824

    
3825
/* C3.6.23 Floating point conditional compare
3826
 *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
3827
 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
3828
 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
3829
 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
3830
 */
3831
static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
3832
{
3833
    unsigned int mos, type, rm, cond, rn, op, nzcv;
3834
    TCGv_i64 tcg_flags;
3835
    int label_continue = -1;
3836

    
3837
    mos = extract32(insn, 29, 3);
3838
    type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
3839
    rm = extract32(insn, 16, 5);
3840
    cond = extract32(insn, 12, 4);
3841
    rn = extract32(insn, 5, 5);
3842
    op = extract32(insn, 4, 1);
3843
    nzcv = extract32(insn, 0, 4);
3844

    
3845
    if (mos || type > 1) {
3846
        unallocated_encoding(s);
3847
        return;
3848
    }
3849

    
3850
    if (cond < 0x0e) { /* not always */
3851
        int label_match = gen_new_label();
3852
        label_continue = gen_new_label();
3853
        arm_gen_test_cc(cond, label_match);
3854
        /* nomatch: */
3855
        tcg_flags = tcg_const_i64(nzcv << 28);
3856
        gen_set_nzcv(tcg_flags);
3857
        tcg_temp_free_i64(tcg_flags);
3858
        tcg_gen_br(label_continue);
3859
        gen_set_label(label_match);
3860
    }
3861

    
3862
    handle_fp_compare(s, type, rn, rm, false, op);
3863

    
3864
    if (cond < 0x0e) {
3865
        gen_set_label(label_continue);
3866
    }
3867
}
3868

    
3869
/* copy src FP register to dst FP register; type specifies single or double */
3870
static void gen_mov_fp2fp(DisasContext *s, int type, int dst, int src)
3871
{
3872
    if (type) {
3873
        TCGv_i64 v = read_fp_dreg(s, src);
3874
        write_fp_dreg(s, dst, v);
3875
        tcg_temp_free_i64(v);
3876
    } else {
3877
        TCGv_i32 v = read_fp_sreg(s, src);
3878
        write_fp_sreg(s, dst, v);
3879
        tcg_temp_free_i32(v);
3880
    }
3881
}
3882

    
3883
/* C3.6.24 Floating point conditional select
3884
 *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
3885
 * +---+---+---+-----------+------+---+------+------+-----+------+------+
3886
 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
3887
 * +---+---+---+-----------+------+---+------+------+-----+------+------+
3888
 */
3889
static void disas_fp_csel(DisasContext *s, uint32_t insn)
3890
{
3891
    unsigned int mos, type, rm, cond, rn, rd;
3892
    int label_continue = -1;
3893

    
3894
    mos = extract32(insn, 29, 3);
3895
    type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
3896
    rm = extract32(insn, 16, 5);
3897
    cond = extract32(insn, 12, 4);
3898
    rn = extract32(insn, 5, 5);
3899
    rd = extract32(insn, 0, 5);
3900

    
3901
    if (mos || type > 1) {
3902
        unallocated_encoding(s);
3903
        return;
3904
    }
3905

    
3906
    if (cond < 0x0e) { /* not always */
3907
        int label_match = gen_new_label();
3908
        label_continue = gen_new_label();
3909
        arm_gen_test_cc(cond, label_match);
3910
        /* nomatch: */
3911
        gen_mov_fp2fp(s, type, rd, rm);
3912
        tcg_gen_br(label_continue);
3913
        gen_set_label(label_match);
3914
    }
3915

    
3916
    gen_mov_fp2fp(s, type, rd, rn);
3917

    
3918
    if (cond < 0x0e) { /* continue */
3919
        gen_set_label(label_continue);
3920
    }
3921
}
3922

    
3923
/* C3.6.25 Floating-point data-processing (1 source) - single precision */
3924
static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
3925
{
3926
    TCGv_ptr fpst;
3927
    TCGv_i32 tcg_op;
3928
    TCGv_i32 tcg_res;
3929

    
3930
    fpst = get_fpstatus_ptr();
3931
    tcg_op = read_fp_sreg(s, rn);
3932
    tcg_res = tcg_temp_new_i32();
3933

    
3934
    switch (opcode) {
3935
    case 0x0: /* FMOV */
3936
        tcg_gen_mov_i32(tcg_res, tcg_op);
3937
        break;
3938
    case 0x1: /* FABS */
3939
        gen_helper_vfp_abss(tcg_res, tcg_op);
3940
        break;
3941
    case 0x2: /* FNEG */
3942
        gen_helper_vfp_negs(tcg_res, tcg_op);
3943
        break;
3944
    case 0x3: /* FSQRT */
3945
        gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
3946
        break;
3947
    case 0x8: /* FRINTN */
3948
    case 0x9: /* FRINTP */
3949
    case 0xa: /* FRINTM */
3950
    case 0xb: /* FRINTZ */
3951
    case 0xc: /* FRINTA */
3952
    {
3953
        TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
3954

    
3955
        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
3956
        gen_helper_rints(tcg_res, tcg_op, fpst);
3957

    
3958
        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
3959
        tcg_temp_free_i32(tcg_rmode);
3960
        break;
3961
    }
3962
    case 0xe: /* FRINTX */
3963
        gen_helper_rints_exact(tcg_res, tcg_op, fpst);
3964
        break;
3965
    case 0xf: /* FRINTI */
3966
        gen_helper_rints(tcg_res, tcg_op, fpst);
3967
        break;
3968
    default:
3969
        abort();
3970
    }
3971

    
3972
    write_fp_sreg(s, rd, tcg_res);
3973

    
3974
    tcg_temp_free_ptr(fpst);
3975
    tcg_temp_free_i32(tcg_op);
3976
    tcg_temp_free_i32(tcg_res);
3977
}
3978

    
3979
/* C3.6.25 Floating-point data-processing (1 source) - double precision */
3980
static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
3981
{
3982
    TCGv_ptr fpst;
3983
    TCGv_i64 tcg_op;
3984
    TCGv_i64 tcg_res;
3985

    
3986
    fpst = get_fpstatus_ptr();
3987
    tcg_op = read_fp_dreg(s, rn);
3988
    tcg_res = tcg_temp_new_i64();
3989

    
3990
    switch (opcode) {
3991
    case 0x0: /* FMOV */
3992
        tcg_gen_mov_i64(tcg_res, tcg_op);
3993
        break;
3994
    case 0x1: /* FABS */
3995
        gen_helper_vfp_absd(tcg_res, tcg_op);
3996
        break;
3997
    case 0x2: /* FNEG */
3998
        gen_helper_vfp_negd(tcg_res, tcg_op);
3999
        break;
4000
    case 0x3: /* FSQRT */
4001
        gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
4002
        break;
4003
    case 0x8: /* FRINTN */
4004
    case 0x9: /* FRINTP */
4005
    case 0xa: /* FRINTM */
4006
    case 0xb: /* FRINTZ */
4007
    case 0xc: /* FRINTA */
4008
    {
4009
        TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4010

    
4011
        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4012
        gen_helper_rintd(tcg_res, tcg_op, fpst);
4013

    
4014
        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4015
        tcg_temp_free_i32(tcg_rmode);
4016
        break;
4017
    }
4018
    case 0xe: /* FRINTX */
4019
        gen_helper_rintd_exact(tcg_res, tcg_op, fpst);
4020
        break;
4021
    case 0xf: /* FRINTI */
4022
        gen_helper_rintd(tcg_res, tcg_op, fpst);
4023
        break;
4024
    default:
4025
        abort();
4026
    }
4027

    
4028
    write_fp_dreg(s, rd, tcg_res);
4029

    
4030
    tcg_temp_free_ptr(fpst);
4031
    tcg_temp_free_i64(tcg_op);
4032
    tcg_temp_free_i64(tcg_res);
4033
}
4034

    
4035
static void handle_fp_fcvt(DisasContext *s, int opcode,
4036
                           int rd, int rn, int dtype, int ntype)
4037
{
4038
    switch (ntype) {
4039
    case 0x0:
4040
    {
4041
        TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4042
        if (dtype == 1) {
4043
            /* Single to double */
4044
            TCGv_i64 tcg_rd = tcg_temp_new_i64();
4045
            gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
4046
            write_fp_dreg(s, rd, tcg_rd);
4047
            tcg_temp_free_i64(tcg_rd);
4048
        } else {
4049
            /* Single to half */
4050
            TCGv_i32 tcg_rd = tcg_temp_new_i32();
4051
            gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, cpu_env);
4052
            /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4053
            write_fp_sreg(s, rd, tcg_rd);
4054
            tcg_temp_free_i32(tcg_rd);
4055
        }
4056
        tcg_temp_free_i32(tcg_rn);
4057
        break;
4058
    }
4059
    case 0x1:
4060
    {
4061
        TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
4062
        TCGv_i32 tcg_rd = tcg_temp_new_i32();
4063
        if (dtype == 0) {
4064
            /* Double to single */
4065
            gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
4066
        } else {
4067
            /* Double to half */
4068
            gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, cpu_env);
4069
            /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4070
        }
4071
        write_fp_sreg(s, rd, tcg_rd);
4072
        tcg_temp_free_i32(tcg_rd);
4073
        tcg_temp_free_i64(tcg_rn);
4074
        break;
4075
    }
4076
    case 0x3:
4077
    {
4078
        TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4079
        tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
4080
        if (dtype == 0) {
4081
            /* Half to single */
4082
            TCGv_i32 tcg_rd = tcg_temp_new_i32();
4083
            gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, cpu_env);
4084
            write_fp_sreg(s, rd, tcg_rd);
4085
            tcg_temp_free_i32(tcg_rd);
4086
        } else {
4087
            /* Half to double */
4088
            TCGv_i64 tcg_rd = tcg_temp_new_i64();
4089
            gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, cpu_env);
4090
            write_fp_dreg(s, rd, tcg_rd);
4091
            tcg_temp_free_i64(tcg_rd);
4092
        }
4093
        tcg_temp_free_i32(tcg_rn);
4094
        break;
4095
    }
4096
    default:
4097
        abort();
4098
    }
4099
}
4100

    
4101
/* C3.6.25 Floating point data-processing (1 source)
4102
 *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
4103
 * +---+---+---+-----------+------+---+--------+-----------+------+------+
4104
 * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
4105
 * +---+---+---+-----------+------+---+--------+-----------+------+------+
4106
 */
4107
static void disas_fp_1src(DisasContext *s, uint32_t insn)
4108
{
4109
    int type = extract32(insn, 22, 2);
4110
    int opcode = extract32(insn, 15, 6);
4111
    int rn = extract32(insn, 5, 5);
4112
    int rd = extract32(insn, 0, 5);
4113

    
4114
    switch (opcode) {
4115
    case 0x4: case 0x5: case 0x7:
4116
    {
4117
        /* FCVT between half, single and double precision */
4118
        int dtype = extract32(opcode, 0, 2);
4119
        if (type == 2 || dtype == type) {
4120
            unallocated_encoding(s);
4121
            return;
4122
        }
4123
        handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
4124
        break;
4125
    }
4126
    case 0x0 ... 0x3:
4127
    case 0x8 ... 0xc:
4128
    case 0xe ... 0xf:
4129
        /* 32-to-32 and 64-to-64 ops */
4130
        switch (type) {
4131
        case 0:
4132
            handle_fp_1src_single(s, opcode, rd, rn);
4133
            break;
4134
        case 1:
4135
            handle_fp_1src_double(s, opcode, rd, rn);
4136
            break;
4137
        default:
4138
            unallocated_encoding(s);
4139
        }
4140
        break;
4141
    default:
4142
        unallocated_encoding(s);
4143
        break;
4144
    }
4145
}
4146

    
4147
/* C3.6.26 Floating-point data-processing (2 source) - single precision */
4148
static void handle_fp_2src_single(DisasContext *s, int opcode,
4149
                                  int rd, int rn, int rm)
4150
{
4151
    TCGv_i32 tcg_op1;
4152
    TCGv_i32 tcg_op2;
4153
    TCGv_i32 tcg_res;
4154
    TCGv_ptr fpst;
4155

    
4156
    tcg_res = tcg_temp_new_i32();
4157
    fpst = get_fpstatus_ptr();
4158
    tcg_op1 = read_fp_sreg(s, rn);
4159
    tcg_op2 = read_fp_sreg(s, rm);
4160

    
4161
    switch (opcode) {
4162
    case 0x0: /* FMUL */
4163
        gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4164
        break;
4165
    case 0x1: /* FDIV */
4166
        gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
4167
        break;
4168
    case 0x2: /* FADD */
4169
        gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
4170
        break;
4171
    case 0x3: /* FSUB */
4172
        gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
4173
        break;
4174
    case 0x4: /* FMAX */
4175
        gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
4176
        break;
4177
    case 0x5: /* FMIN */
4178
        gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
4179
        break;
4180
    case 0x6: /* FMAXNM */
4181
        gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
4182
        break;
4183
    case 0x7: /* FMINNM */
4184
        gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
4185
        break;
4186
    case 0x8: /* FNMUL */
4187
        gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4188
        gen_helper_vfp_negs(tcg_res, tcg_res);
4189
        break;
4190
    }
4191

    
4192
    write_fp_sreg(s, rd, tcg_res);
4193

    
4194
    tcg_temp_free_ptr(fpst);
4195
    tcg_temp_free_i32(tcg_op1);
4196
    tcg_temp_free_i32(tcg_op2);
4197
    tcg_temp_free_i32(tcg_res);
4198
}
4199

    
4200
/* C3.6.26 Floating-point data-processing (2 source) - double precision */
4201
static void handle_fp_2src_double(DisasContext *s, int opcode,
4202
                                  int rd, int rn, int rm)
4203
{
4204
    TCGv_i64 tcg_op1;
4205
    TCGv_i64 tcg_op2;
4206
    TCGv_i64 tcg_res;
4207
    TCGv_ptr fpst;
4208

    
4209
    tcg_res = tcg_temp_new_i64();
4210
    fpst = get_fpstatus_ptr();
4211
    tcg_op1 = read_fp_dreg(s, rn);
4212
    tcg_op2 = read_fp_dreg(s, rm);
4213

    
4214
    switch (opcode) {
4215
    case 0x0: /* FMUL */
4216
        gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4217
        break;
4218
    case 0x1: /* FDIV */
4219
        gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
4220
        break;
4221
    case 0x2: /* FADD */
4222
        gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
4223
        break;
4224
    case 0x3: /* FSUB */
4225
        gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
4226
        break;
4227
    case 0x4: /* FMAX */
4228
        gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
4229
        break;
4230
    case 0x5: /* FMIN */
4231
        gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
4232
        break;
4233
    case 0x6: /* FMAXNM */
4234
        gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4235
        break;
4236
    case 0x7: /* FMINNM */
4237
        gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4238
        break;
4239
    case 0x8: /* FNMUL */
4240
        gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4241
        gen_helper_vfp_negd(tcg_res, tcg_res);
4242
        break;
4243
    }
4244

    
4245
    write_fp_dreg(s, rd, tcg_res);
4246

    
4247
    tcg_temp_free_ptr(fpst);
4248
    tcg_temp_free_i64(tcg_op1);
4249
    tcg_temp_free_i64(tcg_op2);
4250
    tcg_temp_free_i64(tcg_res);
4251
}
4252

    
4253
/* C3.6.26 Floating point data-processing (2 source)
4254
 *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
4255
 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4256
 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
4257
 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4258
 */
4259
static void disas_fp_2src(DisasContext *s, uint32_t insn)
4260
{
4261
    int type = extract32(insn, 22, 2);
4262
    int rd = extract32(insn, 0, 5);
4263
    int rn = extract32(insn, 5, 5);
4264
    int rm = extract32(insn, 16, 5);
4265
    int opcode = extract32(insn, 12, 4);
4266

    
4267
    if (opcode > 8) {
4268
        unallocated_encoding(s);
4269
        return;
4270
    }
4271

    
4272
    switch (type) {
4273
    case 0:
4274
        handle_fp_2src_single(s, opcode, rd, rn, rm);
4275
        break;
4276
    case 1:
4277
        handle_fp_2src_double(s, opcode, rd, rn, rm);
4278
        break;
4279
    default:
4280
        unallocated_encoding(s);
4281
    }
4282
}
4283

    
4284
/* C3.6.27 Floating-point data-processing (3 source) - single precision */
4285
static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
4286
                                  int rd, int rn, int rm, int ra)
4287
{
4288
    TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
4289
    TCGv_i32 tcg_res = tcg_temp_new_i32();
4290
    TCGv_ptr fpst = get_fpstatus_ptr();
4291

    
4292
    tcg_op1 = read_fp_sreg(s, rn);
4293
    tcg_op2 = read_fp_sreg(s, rm);
4294
    tcg_op3 = read_fp_sreg(s, ra);
4295

    
4296
    /* These are fused multiply-add, and must be done as one
4297
     * floating point operation with no rounding between the
4298
     * multiplication and addition steps.
4299
     * NB that doing the negations here as separate steps is
4300
     * correct : an input NaN should come out with its sign bit
4301
     * flipped if it is a negated-input.
4302
     */
4303
    if (o1 == true) {
4304
        gen_helper_vfp_negs(tcg_op3, tcg_op3);
4305
    }
4306

    
4307
    if (o0 != o1) {
4308
        gen_helper_vfp_negs(tcg_op1, tcg_op1);
4309
    }
4310

    
4311
    gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4312

    
4313
    write_fp_sreg(s, rd, tcg_res);
4314

    
4315
    tcg_temp_free_ptr(fpst);
4316
    tcg_temp_free_i32(tcg_op1);
4317
    tcg_temp_free_i32(tcg_op2);
4318
    tcg_temp_free_i32(tcg_op3);
4319
    tcg_temp_free_i32(tcg_res);
4320
}
4321

    
4322
/* C3.6.27 Floating-point data-processing (3 source) - double precision */
4323
static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
4324
                                  int rd, int rn, int rm, int ra)
4325
{
4326
    TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
4327
    TCGv_i64 tcg_res = tcg_temp_new_i64();
4328
    TCGv_ptr fpst = get_fpstatus_ptr();
4329

    
4330
    tcg_op1 = read_fp_dreg(s, rn);
4331
    tcg_op2 = read_fp_dreg(s, rm);
4332
    tcg_op3 = read_fp_dreg(s, ra);
4333

    
4334
    /* These are fused multiply-add, and must be done as one
4335
     * floating point operation with no rounding between the
4336
     * multiplication and addition steps.
4337
     * NB that doing the negations here as separate steps is
4338
     * correct : an input NaN should come out with its sign bit
4339
     * flipped if it is a negated-input.
4340
     */
4341
    if (o1 == true) {
4342
        gen_helper_vfp_negd(tcg_op3, tcg_op3);
4343
    }
4344

    
4345
    if (o0 != o1) {
4346
        gen_helper_vfp_negd(tcg_op1, tcg_op1);
4347
    }
4348

    
4349
    gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4350

    
4351
    write_fp_dreg(s, rd, tcg_res);
4352

    
4353
    tcg_temp_free_ptr(fpst);
4354
    tcg_temp_free_i64(tcg_op1);
4355
    tcg_temp_free_i64(tcg_op2);
4356
    tcg_temp_free_i64(tcg_op3);
4357
    tcg_temp_free_i64(tcg_res);
4358
}
4359

    
4360
/* C3.6.27 Floating point data-processing (3 source)
4361
 *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
4362
 * +---+---+---+-----------+------+----+------+----+------+------+------+
4363
 * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4364
 * +---+---+---+-----------+------+----+------+----+------+------+------+
4365
 */
4366
static void disas_fp_3src(DisasContext *s, uint32_t insn)
4367
{
4368
    int type = extract32(insn, 22, 2);
4369
    int rd = extract32(insn, 0, 5);
4370
    int rn = extract32(insn, 5, 5);
4371
    int ra = extract32(insn, 10, 5);
4372
    int rm = extract32(insn, 16, 5);
4373
    bool o0 = extract32(insn, 15, 1);
4374
    bool o1 = extract32(insn, 21, 1);
4375

    
4376
    switch (type) {
4377
    case 0:
4378
        handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
4379
        break;
4380
    case 1:
4381
        handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
4382
        break;
4383
    default:
4384
        unallocated_encoding(s);
4385
    }
4386
}
4387

    
4388
/* C3.6.28 Floating point immediate
4389
 *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
4390
 * +---+---+---+-----------+------+---+------------+-------+------+------+
4391
 * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
4392
 * +---+---+---+-----------+------+---+------------+-------+------+------+
4393
 */
4394
static void disas_fp_imm(DisasContext *s, uint32_t insn)
4395
{
4396
    int rd = extract32(insn, 0, 5);
4397
    int imm8 = extract32(insn, 13, 8);
4398
    int is_double = extract32(insn, 22, 2);
4399
    uint64_t imm;
4400
    TCGv_i64 tcg_res;
4401

    
4402
    if (is_double > 1) {
4403
        unallocated_encoding(s);
4404
        return;
4405
    }
4406

    
4407
    /* The imm8 encodes the sign bit, enough bits to represent
4408
     * an exponent in the range 01....1xx to 10....0xx,
4409
     * and the most significant 4 bits of the mantissa; see
4410
     * VFPExpandImm() in the v8 ARM ARM.
4411
     */
4412
    if (is_double) {
4413
        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4414
            (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
4415
            extract32(imm8, 0, 6);
4416
        imm <<= 48;
4417
    } else {
4418
        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4419
            (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
4420
            (extract32(imm8, 0, 6) << 3);
4421
        imm <<= 16;
4422
    }
4423

    
4424
    tcg_res = tcg_const_i64(imm);
4425
    write_fp_dreg(s, rd, tcg_res);
4426
    tcg_temp_free_i64(tcg_res);
4427
}
4428

    
4429
/* Handle floating point <=> fixed point conversions. Note that we can
4430
 * also deal with fp <=> integer conversions as a special case (scale == 64)
4431
 * OPTME: consider handling that special case specially or at least skipping
4432
 * the call to scalbn in the helpers for zero shifts.
4433
 */
4434
static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
4435
                           bool itof, int rmode, int scale, int sf, int type)
4436
{
4437
    bool is_signed = !(opcode & 1);
4438
    bool is_double = type;
4439
    TCGv_ptr tcg_fpstatus;
4440
    TCGv_i32 tcg_shift;
4441

    
4442
    tcg_fpstatus = get_fpstatus_ptr();
4443

    
4444
    tcg_shift = tcg_const_i32(64 - scale);
4445

    
4446
    if (itof) {
4447
        TCGv_i64 tcg_int = cpu_reg(s, rn);
4448
        if (!sf) {
4449
            TCGv_i64 tcg_extend = new_tmp_a64(s);
4450

    
4451
            if (is_signed) {
4452
                tcg_gen_ext32s_i64(tcg_extend, tcg_int);
4453
            } else {
4454
                tcg_gen_ext32u_i64(tcg_extend, tcg_int);
4455
            }
4456

    
4457
            tcg_int = tcg_extend;
4458
        }
4459

    
4460
        if (is_double) {
4461
            TCGv_i64 tcg_double = tcg_temp_new_i64();
4462
            if (is_signed) {
4463
                gen_helper_vfp_sqtod(tcg_double, tcg_int,
4464
                                     tcg_shift, tcg_fpstatus);
4465
            } else {
4466
                gen_helper_vfp_uqtod(tcg_double, tcg_int,
4467
                                     tcg_shift, tcg_fpstatus);
4468
            }
4469
            write_fp_dreg(s, rd, tcg_double);
4470
            tcg_temp_free_i64(tcg_double);
4471
        } else {
4472
            TCGv_i32 tcg_single = tcg_temp_new_i32();
4473
            if (is_signed) {
4474
                gen_helper_vfp_sqtos(tcg_single, tcg_int,
4475
                                     tcg_shift, tcg_fpstatus);
4476
            } else {
4477
                gen_helper_vfp_uqtos(tcg_single, tcg_int,
4478
                                     tcg_shift, tcg_fpstatus);
4479
            }
4480
            write_fp_sreg(s, rd, tcg_single);
4481
            tcg_temp_free_i32(tcg_single);
4482
        }
4483
    } else {
4484
        TCGv_i64 tcg_int = cpu_reg(s, rd);
4485
        TCGv_i32 tcg_rmode;
4486

    
4487
        if (extract32(opcode, 2, 1)) {
4488
            /* There are too many rounding modes to all fit into rmode,
4489
             * so FCVTA[US] is a special case.
4490
             */
4491
            rmode = FPROUNDING_TIEAWAY;
4492
        }
4493

    
4494
        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
4495

    
4496
        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4497

    
4498
        if (is_double) {
4499
            TCGv_i64 tcg_double = read_fp_dreg(s, rn);
4500
            if (is_signed) {
4501
                if (!sf) {
4502
                    gen_helper_vfp_tosld(tcg_int, tcg_double,
4503
                                         tcg_shift, tcg_fpstatus);
4504
                } else {
4505
                    gen_helper_vfp_tosqd(tcg_int, tcg_double,
4506
                                         tcg_shift, tcg_fpstatus);
4507
                }
4508
            } else {
4509
                if (!sf) {
4510
                    gen_helper_vfp_tould(tcg_int, tcg_double,
4511
                                         tcg_shift, tcg_fpstatus);
4512
                } else {
4513
                    gen_helper_vfp_touqd(tcg_int, tcg_double,
4514
                                         tcg_shift, tcg_fpstatus);
4515
                }
4516
            }
4517
            tcg_temp_free_i64(tcg_double);
4518
        } else {
4519
            TCGv_i32 tcg_single = read_fp_sreg(s, rn);
4520
            if (sf) {
4521
                if (is_signed) {
4522
                    gen_helper_vfp_tosqs(tcg_int, tcg_single,
4523
                                         tcg_shift, tcg_fpstatus);
4524
                } else {
4525
                    gen_helper_vfp_touqs(tcg_int, tcg_single,
4526
                                         tcg_shift, tcg_fpstatus);
4527
                }
4528
            } else {
4529
                TCGv_i32 tcg_dest = tcg_temp_new_i32();
4530
                if (is_signed) {
4531
                    gen_helper_vfp_tosls(tcg_dest, tcg_single,
4532
                                         tcg_shift, tcg_fpstatus);
4533
                } else {
4534
                    gen_helper_vfp_touls(tcg_dest, tcg_single,
4535
                                         tcg_shift, tcg_fpstatus);
4536
                }
4537
                tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
4538
                tcg_temp_free_i32(tcg_dest);
4539
            }
4540
            tcg_temp_free_i32(tcg_single);
4541
        }
4542

    
4543
        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4544
        tcg_temp_free_i32(tcg_rmode);
4545

    
4546
        if (!sf) {
4547
            tcg_gen_ext32u_i64(tcg_int, tcg_int);
4548
        }
4549
    }
4550

    
4551
    tcg_temp_free_ptr(tcg_fpstatus);
4552
    tcg_temp_free_i32(tcg_shift);
4553
}
4554

    
4555
/* C3.6.29 Floating point <-> fixed point conversions
4556
 *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
4557
 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
4558
 * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
4559
 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
4560
 */
4561
static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
4562
{
4563
    int rd = extract32(insn, 0, 5);
4564
    int rn = extract32(insn, 5, 5);
4565
    int scale = extract32(insn, 10, 6);
4566
    int opcode = extract32(insn, 16, 3);
4567
    int rmode = extract32(insn, 19, 2);
4568
    int type = extract32(insn, 22, 2);
4569
    bool sbit = extract32(insn, 29, 1);
4570
    bool sf = extract32(insn, 31, 1);
4571
    bool itof;
4572

    
4573
    if (sbit || (type > 1)
4574
        || (!sf && scale < 32)) {
4575
        unallocated_encoding(s);
4576
        return;
4577
    }
4578

    
4579
    switch ((rmode << 3) | opcode) {
4580
    case 0x2: /* SCVTF */
4581
    case 0x3: /* UCVTF */
4582
        itof = true;
4583
        break;
4584
    case 0x18: /* FCVTZS */
4585
    case 0x19: /* FCVTZU */
4586
        itof = false;
4587
        break;
4588
    default:
4589
        unallocated_encoding(s);
4590
        return;
4591
    }
4592

    
4593
    handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
4594
}
4595

    
4596
static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
4597
{
4598
    /* FMOV: gpr to or from float, double, or top half of quad fp reg,
4599
     * without conversion.
4600
     */
4601

    
4602
    if (itof) {
4603
        TCGv_i64 tcg_rn = cpu_reg(s, rn);
4604

    
4605
        switch (type) {
4606
        case 0:
4607
        {
4608
            /* 32 bit */
4609
            TCGv_i64 tmp = tcg_temp_new_i64();
4610
            tcg_gen_ext32u_i64(tmp, tcg_rn);
4611
            tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(rd, MO_64));
4612
            tcg_gen_movi_i64(tmp, 0);
4613
            tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(rd));
4614
            tcg_temp_free_i64(tmp);
4615
            break;
4616
        }
4617
        case 1:
4618
        {
4619
            /* 64 bit */
4620
            TCGv_i64 tmp = tcg_const_i64(0);
4621
            tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(rd, MO_64));
4622
            tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(rd));
4623
            tcg_temp_free_i64(tmp);
4624
            break;
4625
        }
4626
        case 2:
4627
            /* 64 bit to top half. */
4628
            tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(rd));
4629
            break;
4630
        }
4631
    } else {
4632
        TCGv_i64 tcg_rd = cpu_reg(s, rd);
4633

    
4634
        switch (type) {
4635
        case 0:
4636
            /* 32 bit */
4637
            tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(rn, MO_32));
4638
            break;
4639
        case 1:
4640
            /* 64 bit */
4641
            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(rn, MO_64));
4642
            break;
4643
        case 2:
4644
            /* 64 bits from top half */
4645
            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(rn));
4646
            break;
4647
        }
4648
    }
4649
}
4650

    
4651
/* C3.6.30 Floating point <-> integer conversions
4652
 *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
4653
 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
4654
 * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
4655
 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
4656
 */
4657
static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
4658
{
4659
    int rd = extract32(insn, 0, 5);
4660
    int rn = extract32(insn, 5, 5);
4661
    int opcode = extract32(insn, 16, 3);
4662
    int rmode = extract32(insn, 19, 2);
4663
    int type = extract32(insn, 22, 2);
4664
    bool sbit = extract32(insn, 29, 1);
4665
    bool sf = extract32(insn, 31, 1);
4666

    
4667
    if (sbit) {
4668
        unallocated_encoding(s);
4669
        return;
4670
    }
4671

    
4672
    if (opcode > 5) {
4673
        /* FMOV */
4674
        bool itof = opcode & 1;
4675

    
4676
        if (rmode >= 2) {
4677
            unallocated_encoding(s);
4678
            return;
4679
        }
4680

    
4681
        switch (sf << 3 | type << 1 | rmode) {
4682
        case 0x0: /* 32 bit */
4683
        case 0xa: /* 64 bit */
4684
        case 0xd: /* 64 bit to top half of quad */
4685
            break;
4686
        default:
4687
            /* all other sf/type/rmode combinations are invalid */
4688
            unallocated_encoding(s);
4689
            break;
4690
        }
4691

    
4692
        handle_fmov(s, rd, rn, type, itof);
4693
    } else {
4694
        /* actual FP conversions */
4695
        bool itof = extract32(opcode, 1, 1);
4696

    
4697
        if (type > 1 || (rmode != 0 && opcode > 1)) {
4698
            unallocated_encoding(s);
4699
            return;
4700
        }
4701

    
4702
        handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
4703
    }
4704
}
4705

    
4706
/* FP-specific subcases of table C3-6 (SIMD and FP data processing)
4707
 *   31  30  29 28     25 24                          0
4708
 * +---+---+---+---------+-----------------------------+
4709
 * |   | 0 |   | 1 1 1 1 |                             |
4710
 * +---+---+---+---------+-----------------------------+
4711
 */
4712
static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
4713
{
4714
    if (extract32(insn, 24, 1)) {
4715
        /* Floating point data-processing (3 source) */
4716
        disas_fp_3src(s, insn);
4717
    } else if (extract32(insn, 21, 1) == 0) {
4718
        /* Floating point to fixed point conversions */
4719
        disas_fp_fixed_conv(s, insn);
4720
    } else {
4721
        switch (extract32(insn, 10, 2)) {
4722
        case 1:
4723
            /* Floating point conditional compare */
4724
            disas_fp_ccomp(s, insn);
4725
            break;
4726
        case 2:
4727
            /* Floating point data-processing (2 source) */
4728
            disas_fp_2src(s, insn);
4729
            break;
4730
        case 3:
4731
            /* Floating point conditional select */
4732
            disas_fp_csel(s, insn);
4733
            break;
4734
        case 0:
4735
            switch (ctz32(extract32(insn, 12, 4))) {
4736
            case 0: /* [15:12] == xxx1 */
4737
                /* Floating point immediate */
4738
                disas_fp_imm(s, insn);
4739
                break;
4740
            case 1: /* [15:12] == xx10 */
4741
                /* Floating point compare */
4742
                disas_fp_compare(s, insn);
4743
                break;
4744
            case 2: /* [15:12] == x100 */
4745
                /* Floating point data-processing (1 source) */
4746
                disas_fp_1src(s, insn);
4747
                break;
4748
            case 3: /* [15:12] == 1000 */
4749
                unallocated_encoding(s);
4750
                break;
4751
            default: /* [15:12] == 0000 */
4752
                /* Floating point <-> integer conversions */
4753
                disas_fp_int_conv(s, insn);
4754
                break;
4755
            }
4756
            break;
4757
        }
4758
    }
4759
}
4760

    
4761
static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
4762
                     int pos)
4763
{
4764
    /* Extract 64 bits from the middle of two concatenated 64 bit
4765
     * vector register slices left:right. The extracted bits start
4766
     * at 'pos' bits into the right (least significant) side.
4767
     * We return the result in tcg_right, and guarantee not to
4768
     * trash tcg_left.
4769
     */
4770
    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4771
    assert(pos > 0 && pos < 64);
4772

    
4773
    tcg_gen_shri_i64(tcg_right, tcg_right, pos);
4774
    tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
4775
    tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
4776

    
4777
    tcg_temp_free_i64(tcg_tmp);
4778
}
4779

    
4780
/* C3.6.1 EXT
4781
 *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
4782
 * +---+---+-------------+-----+---+------+---+------+---+------+------+
4783
 * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
4784
 * +---+---+-------------+-----+---+------+---+------+---+------+------+
4785
 */
4786
static void disas_simd_ext(DisasContext *s, uint32_t insn)
4787
{
4788
    int is_q = extract32(insn, 30, 1);
4789
    int op2 = extract32(insn, 22, 2);
4790
    int imm4 = extract32(insn, 11, 4);
4791
    int rm = extract32(insn, 16, 5);
4792
    int rn = extract32(insn, 5, 5);
4793
    int rd = extract32(insn, 0, 5);
4794
    int pos = imm4 << 3;
4795
    TCGv_i64 tcg_resl, tcg_resh;
4796

    
4797
    if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
4798
        unallocated_encoding(s);
4799
        return;
4800
    }
4801

    
4802
    tcg_resh = tcg_temp_new_i64();
4803
    tcg_resl = tcg_temp_new_i64();
4804

    
4805
    /* Vd gets bits starting at pos bits into Vm:Vn. This is
4806
     * either extracting 128 bits from a 128:128 concatenation, or
4807
     * extracting 64 bits from a 64:64 concatenation.
4808
     */
4809
    if (!is_q) {
4810
        read_vec_element(s, tcg_resl, rn, 0, MO_64);
4811
        if (pos != 0) {
4812
            read_vec_element(s, tcg_resh, rm, 0, MO_64);
4813
            do_ext64(s, tcg_resh, tcg_resl, pos);
4814
        }
4815
        tcg_gen_movi_i64(tcg_resh, 0);
4816
    } else {
4817
        TCGv_i64 tcg_hh;
4818
        typedef struct {
4819
            int reg;
4820
            int elt;
4821
        } EltPosns;
4822
        EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
4823
        EltPosns *elt = eltposns;
4824

    
4825
        if (pos >= 64) {
4826
            elt++;
4827
            pos -= 64;
4828
        }
4829

    
4830
        read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
4831
        elt++;
4832
        read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
4833
        elt++;
4834
        if (pos != 0) {
4835
            do_ext64(s, tcg_resh, tcg_resl, pos);
4836
            tcg_hh = tcg_temp_new_i64();
4837
            read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
4838
            do_ext64(s, tcg_hh, tcg_resh, pos);
4839
            tcg_temp_free_i64(tcg_hh);
4840
        }
4841
    }
4842

    
4843
    write_vec_element(s, tcg_resl, rd, 0, MO_64);
4844
    tcg_temp_free_i64(tcg_resl);
4845
    write_vec_element(s, tcg_resh, rd, 1, MO_64);
4846
    tcg_temp_free_i64(tcg_resh);
4847
}
4848

    
4849
/* C3.6.2 TBL/TBX
4850
 *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
4851
 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
4852
 * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
4853
 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
4854
 */
4855
static void disas_simd_tb(DisasContext *s, uint32_t insn)
4856
{
4857
    int op2 = extract32(insn, 22, 2);
4858
    int is_q = extract32(insn, 30, 1);
4859
    int rm = extract32(insn, 16, 5);
4860
    int rn = extract32(insn, 5, 5);
4861
    int rd = extract32(insn, 0, 5);
4862
    int is_tblx = extract32(insn, 12, 1);
4863
    int len = extract32(insn, 13, 2);
4864
    TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
4865
    TCGv_i32 tcg_regno, tcg_numregs;
4866

    
4867
    if (op2 != 0) {
4868
        unallocated_encoding(s);
4869
        return;
4870
    }
4871

    
4872
    /* This does a table lookup: for every byte element in the input
4873
     * we index into a table formed from up to four vector registers,
4874
     * and then the output is the result of the lookups. Our helper
4875
     * function does the lookup operation for a single 64 bit part of
4876
     * the input.
4877
     */
4878
    tcg_resl = tcg_temp_new_i64();
4879
    tcg_resh = tcg_temp_new_i64();
4880

    
4881
    if (is_tblx) {
4882
        read_vec_element(s, tcg_resl, rd, 0, MO_64);
4883
    } else {
4884
        tcg_gen_movi_i64(tcg_resl, 0);
4885
    }
4886
    if (is_tblx && is_q) {
4887
        read_vec_element(s, tcg_resh, rd, 1, MO_64);
4888
    } else {
4889
        tcg_gen_movi_i64(tcg_resh, 0);
4890
    }
4891

    
4892
    tcg_idx = tcg_temp_new_i64();
4893
    tcg_regno = tcg_const_i32(rn);
4894
    tcg_numregs = tcg_const_i32(len + 1);
4895
    read_vec_element(s, tcg_idx, rm, 0, MO_64);
4896
    gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
4897
                        tcg_regno, tcg_numregs);
4898
    if (is_q) {
4899
        read_vec_element(s, tcg_idx, rm, 1, MO_64);
4900
        gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
4901
                            tcg_regno, tcg_numregs);
4902
    }
4903
    tcg_temp_free_i64(tcg_idx);
4904
    tcg_temp_free_i32(tcg_regno);
4905
    tcg_temp_free_i32(tcg_numregs);
4906

    
4907
    write_vec_element(s, tcg_resl, rd, 0, MO_64);
4908
    tcg_temp_free_i64(tcg_resl);
4909
    write_vec_element(s, tcg_resh, rd, 1, MO_64);
4910
    tcg_temp_free_i64(tcg_resh);
4911
}
4912

    
4913
/* C3.6.3 ZIP/UZP/TRN
4914
 *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
4915
 * +---+---+-------------+------+---+------+---+------------------+------+
4916
 * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
4917
 * +---+---+-------------+------+---+------+---+------------------+------+
4918
 */
4919
static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
4920
{
4921
    int rd = extract32(insn, 0, 5);
4922
    int rn = extract32(insn, 5, 5);
4923
    int rm = extract32(insn, 16, 5);
4924
    int size = extract32(insn, 22, 2);
4925
    /* opc field bits [1:0] indicate ZIP/UZP/TRN;
4926
     * bit 2 indicates 1 vs 2 variant of the insn.
4927
     */
4928
    int opcode = extract32(insn, 12, 2);
4929
    bool part = extract32(insn, 14, 1);
4930
    bool is_q = extract32(insn, 30, 1);
4931
    int esize = 8 << size;
4932
    int i, ofs;
4933
    int datasize = is_q ? 128 : 64;
4934
    int elements = datasize / esize;
4935
    TCGv_i64 tcg_res, tcg_resl, tcg_resh;
4936

    
4937
    if (opcode == 0 || (size == 3 && !is_q)) {
4938
        unallocated_encoding(s);
4939
        return;
4940
    }
4941

    
4942
    tcg_resl = tcg_const_i64(0);
4943
    tcg_resh = tcg_const_i64(0);
4944
    tcg_res = tcg_temp_new_i64();
4945

    
4946
    for (i = 0; i < elements; i++) {
4947
        switch (opcode) {
4948
        case 1: /* UZP1/2 */
4949
        {
4950
            int midpoint = elements / 2;
4951
            if (i < midpoint) {
4952
                read_vec_element(s, tcg_res, rn, 2 * i + part, size);
4953
            } else {
4954
                read_vec_element(s, tcg_res, rm,
4955
                                 2 * (i - midpoint) + part, size);
4956
            }
4957
            break;
4958
        }
4959
        case 2: /* TRN1/2 */
4960
            if (i & 1) {
4961
                read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
4962
            } else {
4963
                read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
4964
            }
4965
            break;
4966
        case 3: /* ZIP1/2 */
4967
        {
4968
            int base = part * elements / 2;
4969
            if (i & 1) {
4970
                read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
4971
            } else {
4972
                read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
4973
            }
4974
            break;
4975
        }
4976
        default:
4977
            g_assert_not_reached();
4978
        }
4979

    
4980
        ofs = i * esize;
4981
        if (ofs < 64) {
4982
            tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
4983
            tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
4984
        } else {
4985
            tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
4986
            tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
4987
        }
4988
    }
4989

    
4990
    tcg_temp_free_i64(tcg_res);
4991

    
4992
    write_vec_element(s, tcg_resl, rd, 0, MO_64);
4993
    tcg_temp_free_i64(tcg_resl);
4994
    write_vec_element(s, tcg_resh, rd, 1, MO_64);
4995
    tcg_temp_free_i64(tcg_resh);
4996
}
4997

    
4998
static void do_minmaxop(DisasContext *s, TCGv_i32 tcg_elt1, TCGv_i32 tcg_elt2,
4999
                        int opc, bool is_min, TCGv_ptr fpst)
5000
{
5001
    /* Helper function for disas_simd_across_lanes: do a single precision
5002
     * min/max operation on the specified two inputs,
5003
     * and return the result in tcg_elt1.
5004
     */
5005
    if (opc == 0xc) {
5006
        if (is_min) {
5007
            gen_helper_vfp_minnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5008
        } else {
5009
            gen_helper_vfp_maxnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5010
        }
5011
    } else {
5012
        assert(opc == 0xf);
5013
        if (is_min) {
5014
            gen_helper_vfp_mins(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5015
        } else {
5016
            gen_helper_vfp_maxs(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5017
        }
5018
    }
5019
}
5020

    
5021
/* C3.6.4 AdvSIMD across lanes
5022
 *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
5023
 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5024
 * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
5025
 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5026
 */
5027
static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
5028
{
5029
    int rd = extract32(insn, 0, 5);
5030
    int rn = extract32(insn, 5, 5);
5031
    int size = extract32(insn, 22, 2);
5032
    int opcode = extract32(insn, 12, 5);
5033
    bool is_q = extract32(insn, 30, 1);
5034
    bool is_u = extract32(insn, 29, 1);
5035
    bool is_fp = false;
5036
    bool is_min = false;
5037
    int esize;
5038
    int elements;
5039
    int i;
5040
    TCGv_i64 tcg_res, tcg_elt;
5041

    
5042
    switch (opcode) {
5043
    case 0x1b: /* ADDV */
5044
        if (is_u) {
5045
            unallocated_encoding(s);
5046
            return;
5047
        }
5048
        /* fall through */
5049
    case 0x3: /* SADDLV, UADDLV */
5050
    case 0xa: /* SMAXV, UMAXV */
5051
    case 0x1a: /* SMINV, UMINV */
5052
        if (size == 3 || (size == 2 && !is_q)) {
5053
            unallocated_encoding(s);
5054
            return;
5055
        }
5056
        break;
5057
    case 0xc: /* FMAXNMV, FMINNMV */
5058
    case 0xf: /* FMAXV, FMINV */
5059
        if (!is_u || !is_q || extract32(size, 0, 1)) {
5060
            unallocated_encoding(s);
5061
            return;
5062
        }
5063
        /* Bit 1 of size field encodes min vs max, and actual size is always
5064
         * 32 bits: adjust the size variable so following code can rely on it
5065
         */
5066
        is_min = extract32(size, 1, 1);
5067
        is_fp = true;
5068
        size = 2;
5069
        break;
5070
    default:
5071
        unallocated_encoding(s);
5072
        return;
5073
    }
5074

    
5075
    esize = 8 << size;
5076
    elements = (is_q ? 128 : 64) / esize;
5077

    
5078
    tcg_res = tcg_temp_new_i64();
5079
    tcg_elt = tcg_temp_new_i64();
5080

    
5081
    /* These instructions operate across all lanes of a vector
5082
     * to produce a single result. We can guarantee that a 64
5083
     * bit intermediate is sufficient:
5084
     *  + for [US]ADDLV the maximum element size is 32 bits, and
5085
     *    the result type is 64 bits
5086
     *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
5087
     *    same as the element size, which is 32 bits at most
5088
     * For the integer operations we can choose to work at 64
5089
     * or 32 bits and truncate at the end; for simplicity
5090
     * we use 64 bits always. The floating point
5091
     * ops do require 32 bit intermediates, though.
5092
     */
5093
    if (!is_fp) {
5094
        read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
5095

    
5096
        for (i = 1; i < elements; i++) {
5097
            read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
5098

    
5099
            switch (opcode) {
5100
            case 0x03: /* SADDLV / UADDLV */
5101
            case 0x1b: /* ADDV */
5102
                tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
5103
                break;
5104
            case 0x0a: /* SMAXV / UMAXV */
5105
                tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
5106
                                    tcg_res,
5107
                                    tcg_res, tcg_elt, tcg_res, tcg_elt);
5108
                break;
5109
            case 0x1a: /* SMINV / UMINV */
5110
                tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE,
5111
                                    tcg_res,
5112
                                    tcg_res, tcg_elt, tcg_res, tcg_elt);
5113
                break;
5114
                break;
5115
            default:
5116
                g_assert_not_reached();
5117
            }
5118

    
5119
        }
5120
    } else {
5121
        /* Floating point ops which work on 32 bit (single) intermediates.
5122
         * Note that correct NaN propagation requires that we do these
5123
         * operations in exactly the order specified by the pseudocode.
5124
         */
5125
        TCGv_i32 tcg_elt1 = tcg_temp_new_i32();
5126
        TCGv_i32 tcg_elt2 = tcg_temp_new_i32();
5127
        TCGv_i32 tcg_elt3 = tcg_temp_new_i32();
5128
        TCGv_ptr fpst = get_fpstatus_ptr();
5129

    
5130
        assert(esize == 32);
5131
        assert(elements == 4);
5132

    
5133
        read_vec_element(s, tcg_elt, rn, 0, MO_32);
5134
        tcg_gen_trunc_i64_i32(tcg_elt1, tcg_elt);
5135
        read_vec_element(s, tcg_elt, rn, 1, MO_32);
5136
        tcg_gen_trunc_i64_i32(tcg_elt2, tcg_elt);
5137

    
5138
        do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5139

    
5140
        read_vec_element(s, tcg_elt, rn, 2, MO_32);
5141
        tcg_gen_trunc_i64_i32(tcg_elt2, tcg_elt);
5142
        read_vec_element(s, tcg_elt, rn, 3, MO_32);
5143
        tcg_gen_trunc_i64_i32(tcg_elt3, tcg_elt);
5144

    
5145
        do_minmaxop(s, tcg_elt2, tcg_elt3, opcode, is_min, fpst);
5146

    
5147
        do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5148

    
5149
        tcg_gen_extu_i32_i64(tcg_res, tcg_elt1);
5150
        tcg_temp_free_i32(tcg_elt1);
5151
        tcg_temp_free_i32(tcg_elt2);
5152
        tcg_temp_free_i32(tcg_elt3);
5153
        tcg_temp_free_ptr(fpst);
5154
    }
5155

    
5156
    tcg_temp_free_i64(tcg_elt);
5157

    
5158
    /* Now truncate the result to the width required for the final output */
5159
    if (opcode == 0x03) {
5160
        /* SADDLV, UADDLV: result is 2*esize */
5161
        size++;
5162
    }
5163

    
5164
    switch (size) {
5165
    case 0:
5166
        tcg_gen_ext8u_i64(tcg_res, tcg_res);
5167
        break;
5168
    case 1:
5169
        tcg_gen_ext16u_i64(tcg_res, tcg_res);
5170
        break;
5171
    case 2:
5172
        tcg_gen_ext32u_i64(tcg_res, tcg_res);
5173
        break;
5174
    case 3:
5175
        break;
5176
    default:
5177
        g_assert_not_reached();
5178
    }
5179

    
5180
    write_fp_dreg(s, rd, tcg_res);
5181
    tcg_temp_free_i64(tcg_res);
5182
}
5183

    
5184
/* C6.3.31 DUP (Element, Vector)
5185
 *
5186
 *  31  30   29              21 20    16 15        10  9    5 4    0
5187
 * +---+---+-------------------+--------+-------------+------+------+
5188
 * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5189
 * +---+---+-------------------+--------+-------------+------+------+
5190
 *
5191
 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5192
 */
5193
static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
5194
                             int imm5)
5195
{
5196
    int size = ctz32(imm5);
5197
    int esize = 8 << size;
5198
    int elements = (is_q ? 128 : 64) / esize;
5199
    int index, i;
5200
    TCGv_i64 tmp;
5201

    
5202
    if (size > 3 || (size == 3 && !is_q)) {
5203
        unallocated_encoding(s);
5204
        return;
5205
    }
5206

    
5207
    index = imm5 >> (size + 1);
5208

    
5209
    tmp = tcg_temp_new_i64();
5210
    read_vec_element(s, tmp, rn, index, size);
5211

    
5212
    for (i = 0; i < elements; i++) {
5213
        write_vec_element(s, tmp, rd, i, size);
5214
    }
5215

    
5216
    if (!is_q) {
5217
        clear_vec_high(s, rd);
5218
    }
5219

    
5220
    tcg_temp_free_i64(tmp);
5221
}
5222

    
5223
/* C6.3.31 DUP (element, scalar)
5224
 *  31                   21 20    16 15        10  9    5 4    0
5225
 * +-----------------------+--------+-------------+------+------+
5226
 * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5227
 * +-----------------------+--------+-------------+------+------+
5228
 */
5229
static void handle_simd_dupes(DisasContext *s, int rd, int rn,
5230
                              int imm5)
5231
{
5232
    int size = ctz32(imm5);
5233
    int index;
5234
    TCGv_i64 tmp;
5235

    
5236
    if (size > 3) {
5237
        unallocated_encoding(s);
5238
        return;
5239
    }
5240

    
5241
    index = imm5 >> (size + 1);
5242

    
5243
    /* This instruction just extracts the specified element and
5244
     * zero-extends it into the bottom of the destination register.
5245
     */
5246
    tmp = tcg_temp_new_i64();
5247
    read_vec_element(s, tmp, rn, index, size);
5248
    write_fp_dreg(s, rd, tmp);
5249
    tcg_temp_free_i64(tmp);
5250
}
5251

    
5252
/* C6.3.32 DUP (General)
5253
 *
5254
 *  31  30   29              21 20    16 15        10  9    5 4    0
5255
 * +---+---+-------------------+--------+-------------+------+------+
5256
 * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
5257
 * +---+---+-------------------+--------+-------------+------+------+
5258
 *
5259
 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5260
 */
5261
static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
5262
                             int imm5)
5263
{
5264
    int size = ctz32(imm5);
5265
    int esize = 8 << size;
5266
    int elements = (is_q ? 128 : 64)/esize;
5267
    int i = 0;
5268

    
5269
    if (size > 3 || ((size == 3) && !is_q)) {
5270
        unallocated_encoding(s);
5271
        return;
5272
    }
5273
    for (i = 0; i < elements; i++) {
5274
        write_vec_element(s, cpu_reg(s, rn), rd, i, size);
5275
    }
5276
    if (!is_q) {
5277
        clear_vec_high(s, rd);
5278
    }
5279
}
5280

    
5281
/* C6.3.150 INS (Element)
5282
 *
5283
 *  31                   21 20    16 15  14    11  10 9    5 4    0
5284
 * +-----------------------+--------+------------+---+------+------+
5285
 * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
5286
 * +-----------------------+--------+------------+---+------+------+
5287
 *
5288
 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5289
 * index: encoded in imm5<4:size+1>
5290
 */
5291
static void handle_simd_inse(DisasContext *s, int rd, int rn,
5292
                             int imm4, int imm5)
5293
{
5294
    int size = ctz32(imm5);
5295
    int src_index, dst_index;
5296
    TCGv_i64 tmp;
5297

    
5298
    if (size > 3) {
5299
        unallocated_encoding(s);
5300
        return;
5301
    }
5302
    dst_index = extract32(imm5, 1+size, 5);
5303
    src_index = extract32(imm4, size, 4);
5304

    
5305
    tmp = tcg_temp_new_i64();
5306

    
5307
    read_vec_element(s, tmp, rn, src_index, size);
5308
    write_vec_element(s, tmp, rd, dst_index, size);
5309

    
5310
    tcg_temp_free_i64(tmp);
5311
}
5312

    
5313

    
5314
/* C6.3.151 INS (General)
5315
 *
5316
 *  31                   21 20    16 15        10  9    5 4    0
5317
 * +-----------------------+--------+-------------+------+------+
5318
 * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
5319
 * +-----------------------+--------+-------------+------+------+
5320
 *
5321
 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5322
 * index: encoded in imm5<4:size+1>
5323
 */
5324
static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
5325
{
5326
    int size = ctz32(imm5);
5327
    int idx;
5328

    
5329
    if (size > 3) {
5330
        unallocated_encoding(s);
5331
        return;
5332
    }
5333

    
5334
    idx = extract32(imm5, 1 + size, 4 - size);
5335
    write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
5336
}
5337

    
5338
/*
5339
 * C6.3.321 UMOV (General)
5340
 * C6.3.237 SMOV (General)
5341
 *
5342
 *  31  30   29              21 20    16 15    12   10 9    5 4    0
5343
 * +---+---+-------------------+--------+-------------+------+------+
5344
 * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
5345
 * +---+---+-------------------+--------+-------------+------+------+
5346
 *
5347
 * U: unsigned when set
5348
 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5349
 */
5350
static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
5351
                                  int rn, int rd, int imm5)
5352
{
5353
    int size = ctz32(imm5);
5354
    int element;
5355
    TCGv_i64 tcg_rd;
5356

    
5357
    /* Check for UnallocatedEncodings */
5358
    if (is_signed) {
5359
        if (size > 2 || (size == 2 && !is_q)) {
5360
            unallocated_encoding(s);
5361
            return;
5362
        }
5363
    } else {
5364
        if (size > 3
5365
            || (size < 3 && is_q)
5366
            || (size == 3 && !is_q)) {
5367
            unallocated_encoding(s);
5368
            return;
5369
        }
5370
    }
5371
    element = extract32(imm5, 1+size, 4);
5372

    
5373
    tcg_rd = cpu_reg(s, rd);
5374
    read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
5375
    if (is_signed && !is_q) {
5376
        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5377
    }
5378
}
5379

    
5380
/* C3.6.5 AdvSIMD copy
5381
 *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
5382
 * +---+---+----+-----------------+------+---+------+---+------+------+
5383
 * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
5384
 * +---+---+----+-----------------+------+---+------+---+------+------+
5385
 */
5386
static void disas_simd_copy(DisasContext *s, uint32_t insn)
5387
{
5388
    int rd = extract32(insn, 0, 5);
5389
    int rn = extract32(insn, 5, 5);
5390
    int imm4 = extract32(insn, 11, 4);
5391
    int op = extract32(insn, 29, 1);
5392
    int is_q = extract32(insn, 30, 1);
5393
    int imm5 = extract32(insn, 16, 5);
5394

    
5395
    if (op) {
5396
        if (is_q) {
5397
            /* INS (element) */
5398
            handle_simd_inse(s, rd, rn, imm4, imm5);
5399
        } else {
5400
            unallocated_encoding(s);
5401
        }
5402
    } else {
5403
        switch (imm4) {
5404
        case 0:
5405
            /* DUP (element - vector) */
5406
            handle_simd_dupe(s, is_q, rd, rn, imm5);
5407
            break;
5408
        case 1:
5409
            /* DUP (general) */
5410
            handle_simd_dupg(s, is_q, rd, rn, imm5);
5411
            break;
5412
        case 3:
5413
            if (is_q) {
5414
                /* INS (general) */
5415
                handle_simd_insg(s, rd, rn, imm5);
5416
            } else {
5417
                unallocated_encoding(s);
5418
            }
5419
            break;
5420
        case 5:
5421
        case 7:
5422
            /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
5423
            handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
5424
            break;
5425
        default:
5426
            unallocated_encoding(s);
5427
            break;
5428
        }
5429
    }
5430
}
5431

    
5432
/* C3.6.6 AdvSIMD modified immediate
5433
 *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
5434
 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
5435
 * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
5436
 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
5437
 *
5438
 * There are a number of operations that can be carried out here:
5439
 *   MOVI - move (shifted) imm into register
5440
 *   MVNI - move inverted (shifted) imm into register
5441
 *   ORR  - bitwise OR of (shifted) imm with register
5442
 *   BIC  - bitwise clear of (shifted) imm with register
5443
 */
5444
static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
5445
{
5446
    int rd = extract32(insn, 0, 5);
5447
    int cmode = extract32(insn, 12, 4);
5448
    int cmode_3_1 = extract32(cmode, 1, 3);
5449
    int cmode_0 = extract32(cmode, 0, 1);
5450
    int o2 = extract32(insn, 11, 1);
5451
    uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
5452
    bool is_neg = extract32(insn, 29, 1);
5453
    bool is_q = extract32(insn, 30, 1);
5454
    uint64_t imm = 0;
5455
    TCGv_i64 tcg_rd, tcg_imm;
5456
    int i;
5457

    
5458
    if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
5459
        unallocated_encoding(s);
5460
        return;
5461
    }
5462

    
5463
    /* See AdvSIMDExpandImm() in ARM ARM */
5464
    switch (cmode_3_1) {
5465
    case 0: /* Replicate(Zeros(24):imm8, 2) */
5466
    case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
5467
    case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
5468
    case 3: /* Replicate(imm8:Zeros(24), 2) */
5469
    {
5470
        int shift = cmode_3_1 * 8;
5471
        imm = bitfield_replicate(abcdefgh << shift, 32);
5472
        break;
5473
    }
5474
    case 4: /* Replicate(Zeros(8):imm8, 4) */
5475
    case 5: /* Replicate(imm8:Zeros(8), 4) */
5476
    {
5477
        int shift = (cmode_3_1 & 0x1) * 8;
5478
        imm = bitfield_replicate(abcdefgh << shift, 16);
5479
        break;
5480
    }
5481
    case 6:
5482
        if (cmode_0) {
5483
            /* Replicate(Zeros(8):imm8:Ones(16), 2) */
5484
            imm = (abcdefgh << 16) | 0xffff;
5485
        } else {
5486
            /* Replicate(Zeros(16):imm8:Ones(8), 2) */
5487
            imm = (abcdefgh << 8) | 0xff;
5488
        }
5489
        imm = bitfield_replicate(imm, 32);
5490
        break;
5491
    case 7:
5492
        if (!cmode_0 && !is_neg) {
5493
            imm = bitfield_replicate(abcdefgh, 8);
5494
        } else if (!cmode_0 && is_neg) {
5495
            int i;
5496
            imm = 0;
5497
            for (i = 0; i < 8; i++) {
5498
                if ((abcdefgh) & (1 << i)) {
5499
                    imm |= 0xffULL << (i * 8);
5500
                }
5501
            }
5502
        } else if (cmode_0) {
5503
            if (is_neg) {
5504
                imm = (abcdefgh & 0x3f) << 48;
5505
                if (abcdefgh & 0x80) {
5506
                    imm |= 0x8000000000000000ULL;
5507
                }
5508
                if (abcdefgh & 0x40) {
5509
                    imm |= 0x3fc0000000000000ULL;
5510
                } else {
5511
                    imm |= 0x4000000000000000ULL;
5512
                }
5513
            } else {
5514
                imm = (abcdefgh & 0x3f) << 19;
5515
                if (abcdefgh & 0x80) {
5516
                    imm |= 0x80000000;
5517
                }
5518
                if (abcdefgh & 0x40) {
5519
                    imm |= 0x3e000000;
5520
                } else {
5521
                    imm |= 0x40000000;
5522
                }
5523
                imm |= (imm << 32);
5524
            }
5525
        }
5526
        break;
5527
    }
5528

    
5529
    if (cmode_3_1 != 7 && is_neg) {
5530
        imm = ~imm;
5531
    }
5532

    
5533
    tcg_imm = tcg_const_i64(imm);
5534
    tcg_rd = new_tmp_a64(s);
5535

    
5536
    for (i = 0; i < 2; i++) {
5537
        int foffs = i ? fp_reg_hi_offset(rd) : fp_reg_offset(rd, MO_64);
5538

    
5539
        if (i == 1 && !is_q) {
5540
            /* non-quad ops clear high half of vector */
5541
            tcg_gen_movi_i64(tcg_rd, 0);
5542
        } else if ((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9) {
5543
            tcg_gen_ld_i64(tcg_rd, cpu_env, foffs);
5544
            if (is_neg) {
5545
                /* AND (BIC) */
5546
                tcg_gen_and_i64(tcg_rd, tcg_rd, tcg_imm);
5547
            } else {
5548
                /* ORR */
5549
                tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_imm);
5550
            }
5551
        } else {
5552
            /* MOVI */
5553
            tcg_gen_mov_i64(tcg_rd, tcg_imm);
5554
        }
5555
        tcg_gen_st_i64(tcg_rd, cpu_env, foffs);
5556
    }
5557

    
5558
    tcg_temp_free_i64(tcg_imm);
5559
}
5560

    
5561
/* C3.6.7 AdvSIMD scalar copy
5562
 *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
5563
 * +-----+----+-----------------+------+---+------+---+------+------+
5564
 * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
5565
 * +-----+----+-----------------+------+---+------+---+------+------+
5566
 */
5567
static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
5568
{
5569
    int rd = extract32(insn, 0, 5);
5570
    int rn = extract32(insn, 5, 5);
5571
    int imm4 = extract32(insn, 11, 4);
5572
    int imm5 = extract32(insn, 16, 5);
5573
    int op = extract32(insn, 29, 1);
5574

    
5575
    if (op != 0 || imm4 != 0) {
5576
        unallocated_encoding(s);
5577
        return;
5578
    }
5579

    
5580
    /* DUP (element, scalar) */
5581
    handle_simd_dupes(s, rd, rn, imm5);
5582
}
5583

    
5584
/* C3.6.8 AdvSIMD scalar pairwise
5585
 *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
5586
 * +-----+---+-----------+------+-----------+--------+-----+------+------+
5587
 * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
5588
 * +-----+---+-----------+------+-----------+--------+-----+------+------+
5589
 */
5590
static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
5591
{
5592
    int u = extract32(insn, 29, 1);
5593
    int size = extract32(insn, 22, 2);
5594
    int opcode = extract32(insn, 12, 5);
5595
    int rn = extract32(insn, 5, 5);
5596
    int rd = extract32(insn, 0, 5);
5597
    TCGv_ptr fpst;
5598

    
5599
    /* For some ops (the FP ones), size[1] is part of the encoding.
5600
     * For ADDP strictly it is not but size[1] is always 1 for valid
5601
     * encodings.
5602
     */
5603
    opcode |= (extract32(size, 1, 1) << 5);
5604

    
5605
    switch (opcode) {
5606
    case 0x3b: /* ADDP */
5607
        if (u || size != 3) {
5608
            unallocated_encoding(s);
5609
            return;
5610
        }
5611
        TCGV_UNUSED_PTR(fpst);
5612
        break;
5613
    case 0xc: /* FMAXNMP */
5614
    case 0xd: /* FADDP */
5615
    case 0xf: /* FMAXP */
5616
    case 0x2c: /* FMINNMP */
5617
    case 0x2f: /* FMINP */
5618
        /* FP op, size[0] is 32 or 64 bit */
5619
        if (!u) {
5620
            unallocated_encoding(s);
5621
            return;
5622
        }
5623
        size = extract32(size, 0, 1) ? 3 : 2;
5624
        fpst = get_fpstatus_ptr();
5625
        break;
5626
    default:
5627
        unallocated_encoding(s);
5628
        return;
5629
    }
5630

    
5631
    if (size == 3) {
5632
        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
5633
        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
5634
        TCGv_i64 tcg_res = tcg_temp_new_i64();
5635

    
5636
        read_vec_element(s, tcg_op1, rn, 0, MO_64);
5637
        read_vec_element(s, tcg_op2, rn, 1, MO_64);
5638

    
5639
        switch (opcode) {
5640
        case 0x3b: /* ADDP */
5641
            tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
5642
            break;
5643
        case 0xc: /* FMAXNMP */
5644
            gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
5645
            break;
5646
        case 0xd: /* FADDP */
5647
            gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
5648
            break;
5649
        case 0xf: /* FMAXP */
5650
            gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
5651
            break;
5652
        case 0x2c: /* FMINNMP */
5653
            gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
5654
            break;
5655
        case 0x2f: /* FMINP */
5656
            gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
5657
            break;
5658
        default:
5659
            g_assert_not_reached();
5660
        }
5661

    
5662
        write_fp_dreg(s, rd, tcg_res);
5663

    
5664
        tcg_temp_free_i64(tcg_op1);
5665
        tcg_temp_free_i64(tcg_op2);
5666
        tcg_temp_free_i64(tcg_res);
5667
    } else {
5668
        TCGv_i32 tcg_op1 = tcg_temp_new_i32();
5669
        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
5670
        TCGv_i32 tcg_res = tcg_temp_new_i32();
5671

    
5672
        read_vec_element_i32(s, tcg_op1, rn, 0, MO_32);
5673
        read_vec_element_i32(s, tcg_op2, rn, 1, MO_32);
5674

    
5675
        switch (opcode) {
5676
        case 0xc: /* FMAXNMP */
5677
            gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
5678
            break;
5679
        case 0xd: /* FADDP */
5680
            gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
5681
            break;
5682
        case 0xf: /* FMAXP */
5683
            gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
5684
            break;
5685
        case 0x2c: /* FMINNMP */
5686
            gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
5687
            break;
5688
        case 0x2f: /* FMINP */
5689
            gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
5690
            break;
5691
        default:
5692
            g_assert_not_reached();
5693
        }
5694

    
5695
        write_fp_sreg(s, rd, tcg_res);
5696

    
5697
        tcg_temp_free_i32(tcg_op1);
5698
        tcg_temp_free_i32(tcg_op2);
5699
        tcg_temp_free_i32(tcg_res);
5700
    }
5701

    
5702
    if (!TCGV_IS_UNUSED_PTR(fpst)) {
5703
        tcg_temp_free_ptr(fpst);
5704
    }
5705
}
5706

    
5707
/*
5708
 * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
5709
 *
5710
 * This code is handles the common shifting code and is used by both
5711
 * the vector and scalar code.
5712
 */
5713
static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
5714
                                    TCGv_i64 tcg_rnd, bool accumulate,
5715
                                    bool is_u, int size, int shift)
5716
{
5717
    bool extended_result = false;
5718
    bool round = !TCGV_IS_UNUSED_I64(tcg_rnd);
5719
    int ext_lshift = 0;
5720
    TCGv_i64 tcg_src_hi;
5721

    
5722
    if (round && size == 3) {
5723
        extended_result = true;
5724
        ext_lshift = 64 - shift;
5725
        tcg_src_hi = tcg_temp_new_i64();
5726
    } else if (shift == 64) {
5727
        if (!accumulate && is_u) {
5728
            /* result is zero */
5729
            tcg_gen_movi_i64(tcg_res, 0);
5730
            return;
5731
        }
5732
    }
5733

    
5734
    /* Deal with the rounding step */
5735
    if (round) {
5736
        if (extended_result) {
5737
            TCGv_i64 tcg_zero = tcg_const_i64(0);
5738
            if (!is_u) {
5739
                /* take care of sign extending tcg_res */
5740
                tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
5741
                tcg_gen_add2_i64(tcg_src, tcg_src_hi,
5742
                                 tcg_src, tcg_src_hi,
5743
                                 tcg_rnd, tcg_zero);
5744
            } else {
5745
                tcg_gen_add2_i64(tcg_src, tcg_src_hi,
5746
                                 tcg_src, tcg_zero,
5747
                                 tcg_rnd, tcg_zero);
5748
            }
5749
            tcg_temp_free_i64(tcg_zero);
5750
        } else {
5751
            tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
5752
        }
5753
    }
5754

    
5755
    /* Now do the shift right */
5756
    if (round && extended_result) {
5757
        /* extended case, >64 bit precision required */
5758
        if (ext_lshift == 0) {
5759
            /* special case, only high bits matter */
5760
            tcg_gen_mov_i64(tcg_src, tcg_src_hi);
5761
        } else {
5762
            tcg_gen_shri_i64(tcg_src, tcg_src, shift);
5763
            tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
5764
            tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
5765
        }
5766
    } else {
5767
        if (is_u) {
5768
            if (shift == 64) {
5769
                /* essentially shifting in 64 zeros */
5770
                tcg_gen_movi_i64(tcg_src, 0);
5771
            } else {
5772
                tcg_gen_shri_i64(tcg_src, tcg_src, shift);
5773
            }
5774
        } else {
5775
            if (shift == 64) {
5776
                /* effectively extending the sign-bit */
5777
                tcg_gen_sari_i64(tcg_src, tcg_src, 63);
5778
            } else {
5779
                tcg_gen_sari_i64(tcg_src, tcg_src, shift);
5780
            }
5781
        }
5782
    }
5783

    
5784
    if (accumulate) {
5785
        tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
5786
    } else {
5787
        tcg_gen_mov_i64(tcg_res, tcg_src);
5788
    }
5789

    
5790
    if (extended_result) {
5791
        tcg_temp_free_i64(tcg_src_hi);
5792
    }
5793
}
5794

    
5795
/* Common SHL/SLI - Shift left with an optional insert */
5796
static void handle_shli_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
5797
                                 bool insert, int shift)
5798
{
5799
    if (insert) { /* SLI */
5800
        tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, shift, 64 - shift);
5801
    } else { /* SHL */
5802
        tcg_gen_shli_i64(tcg_res, tcg_src, shift);
5803
    }
5804
}
5805

    
5806
/* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
5807
static void handle_scalar_simd_shri(DisasContext *s,
5808
                                    bool is_u, int immh, int immb,
5809
                                    int opcode, int rn, int rd)
5810
{
5811
    const int size = 3;
5812
    int immhb = immh << 3 | immb;
5813
    int shift = 2 * (8 << size) - immhb;
5814
    bool accumulate = false;
5815
    bool round = false;
5816
    TCGv_i64 tcg_rn;
5817
    TCGv_i64 tcg_rd;
5818
    TCGv_i64 tcg_round;
5819

    
5820
    if (!extract32(immh, 3, 1)) {
5821
        unallocated_encoding(s);
5822
        return;
5823
    }
5824

    
5825
    switch (opcode) {
5826
    case 0x02: /* SSRA / USRA (accumulate) */
5827
        accumulate = true;
5828
        break;
5829
    case 0x04: /* SRSHR / URSHR (rounding) */
5830
        round = true;
5831
        break;
5832
    case 0x06: /* SRSRA / URSRA (accum + rounding) */
5833
        accumulate = round = true;
5834
        break;
5835
    }
5836

    
5837
    if (round) {
5838
        uint64_t round_const = 1ULL << (shift - 1);
5839
        tcg_round = tcg_const_i64(round_const);
5840
    } else {
5841
        TCGV_UNUSED_I64(tcg_round);
5842
    }
5843

    
5844
    tcg_rn = read_fp_dreg(s, rn);
5845
    tcg_rd = accumulate ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
5846

    
5847
    handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
5848
                               accumulate, is_u, size, shift);
5849

    
5850
    write_fp_dreg(s, rd, tcg_rd);
5851

    
5852
    tcg_temp_free_i64(tcg_rn);
5853
    tcg_temp_free_i64(tcg_rd);
5854
    if (round) {
5855
        tcg_temp_free_i64(tcg_round);
5856
    }
5857
}
5858

    
5859
/* SHL/SLI - Scalar shift left */
5860
static void handle_scalar_simd_shli(DisasContext *s, bool insert,
5861
                                    int immh, int immb, int opcode,
5862
                                    int rn, int rd)
5863
{
5864
    int size = 32 - clz32(immh) - 1;
5865
    int immhb = immh << 3 | immb;
5866
    int shift = immhb - (8 << size);
5867
    TCGv_i64 tcg_rn = new_tmp_a64(s);
5868
    TCGv_i64 tcg_rd = new_tmp_a64(s);
5869

    
5870
    if (!extract32(immh, 3, 1)) {
5871
        unallocated_encoding(s);
5872
        return;
5873
    }
5874

    
5875
    tcg_rn = read_fp_dreg(s, rn);
5876
    tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
5877

    
5878
    handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
5879

    
5880
    write_fp_dreg(s, rd, tcg_rd);
5881

    
5882
    tcg_temp_free_i64(tcg_rn);
5883
    tcg_temp_free_i64(tcg_rd);
5884
}
5885

    
5886
/* C3.6.9 AdvSIMD scalar shift by immediate
5887
 *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
5888
 * +-----+---+-------------+------+------+--------+---+------+------+
5889
 * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
5890
 * +-----+---+-------------+------+------+--------+---+------+------+
5891
 *
5892
 * This is the scalar version so it works on a fixed sized registers
5893
 */
5894
static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
5895
{
5896
    int rd = extract32(insn, 0, 5);
5897
    int rn = extract32(insn, 5, 5);
5898
    int opcode = extract32(insn, 11, 5);
5899
    int immb = extract32(insn, 16, 3);
5900
    int immh = extract32(insn, 19, 4);
5901
    bool is_u = extract32(insn, 29, 1);
5902

    
5903
    switch (opcode) {
5904
    case 0x00: /* SSHR / USHR */
5905
    case 0x02: /* SSRA / USRA */
5906
    case 0x04: /* SRSHR / URSHR */
5907
    case 0x06: /* SRSRA / URSRA */
5908
        handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
5909
        break;
5910
    case 0x0a: /* SHL / SLI */
5911
        handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
5912
        break;
5913
    default:
5914
        unsupported_encoding(s, insn);
5915
        break;
5916
    }
5917
}
5918

    
5919
/* C3.6.10 AdvSIMD scalar three different
5920
 *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
5921
 * +-----+---+-----------+------+---+------+--------+-----+------+------+
5922
 * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
5923
 * +-----+---+-----------+------+---+------+--------+-----+------+------+
5924
 */
5925
static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
5926
{
5927
    bool is_u = extract32(insn, 29, 1);
5928
    int size = extract32(insn, 22, 2);
5929
    int opcode = extract32(insn, 12, 4);
5930
    int rm = extract32(insn, 16, 5);
5931
    int rn = extract32(insn, 5, 5);
5932
    int rd = extract32(insn, 0, 5);
5933

    
5934
    if (is_u) {
5935
        unallocated_encoding(s);
5936
        return;
5937
    }
5938

    
5939
    switch (opcode) {
5940
    case 0x9: /* SQDMLAL, SQDMLAL2 */
5941
    case 0xb: /* SQDMLSL, SQDMLSL2 */
5942
    case 0xd: /* SQDMULL, SQDMULL2 */
5943
        if (size == 0 || size == 3) {
5944
            unallocated_encoding(s);
5945
            return;
5946
        }
5947
        break;
5948
    default:
5949
        unallocated_encoding(s);
5950
        return;
5951
    }
5952

    
5953
    if (size == 2) {
5954
        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
5955
        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
5956
        TCGv_i64 tcg_res = tcg_temp_new_i64();
5957

    
5958
        read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
5959
        read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
5960

    
5961
        tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
5962
        gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
5963

    
5964
        switch (opcode) {
5965
        case 0xd: /* SQDMULL, SQDMULL2 */
5966
            break;
5967
        case 0xb: /* SQDMLSL, SQDMLSL2 */
5968
            tcg_gen_neg_i64(tcg_res, tcg_res);
5969
            /* fall through */
5970
        case 0x9: /* SQDMLAL, SQDMLAL2 */
5971
            read_vec_element(s, tcg_op1, rd, 0, MO_64);
5972
            gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
5973
                                              tcg_res, tcg_op1);
5974
            break;
5975
        default:
5976
            g_assert_not_reached();
5977
        }
5978

    
5979
        write_fp_dreg(s, rd, tcg_res);
5980

    
5981
        tcg_temp_free_i64(tcg_op1);
5982
        tcg_temp_free_i64(tcg_op2);
5983
        tcg_temp_free_i64(tcg_res);
5984
    } else {
5985
        TCGv_i32 tcg_op1 = tcg_temp_new_i32();
5986
        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
5987
        TCGv_i64 tcg_res = tcg_temp_new_i64();
5988

    
5989
        read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
5990
        read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
5991

    
5992
        gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
5993
        gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
5994

    
5995
        switch (opcode) {
5996
        case 0xd: /* SQDMULL, SQDMULL2 */
5997
            break;
5998
        case 0xb: /* SQDMLSL, SQDMLSL2 */
5999
            gen_helper_neon_negl_u32(tcg_res, tcg_res);
6000
            /* fall through */
6001
        case 0x9: /* SQDMLAL, SQDMLAL2 */
6002
        {
6003
            TCGv_i64 tcg_op3 = tcg_temp_new_i64();
6004
            read_vec_element(s, tcg_op3, rd, 0, MO_32);
6005
            gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
6006
                                              tcg_res, tcg_op3);
6007
            tcg_temp_free_i64(tcg_op3);
6008
            break;
6009
        }
6010
        default:
6011
            g_assert_not_reached();
6012
        }
6013

    
6014
        tcg_gen_ext32u_i64(tcg_res, tcg_res);
6015
        write_fp_dreg(s, rd, tcg_res);
6016

    
6017
        tcg_temp_free_i32(tcg_op1);
6018
        tcg_temp_free_i32(tcg_op2);
6019
        tcg_temp_free_i64(tcg_res);
6020
    }
6021
}
6022

    
6023
static void handle_3same_64(DisasContext *s, int opcode, bool u,
6024
                            TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
6025
{
6026
    /* Handle 64x64->64 opcodes which are shared between the scalar
6027
     * and vector 3-same groups. We cover every opcode where size == 3
6028
     * is valid in either the three-reg-same (integer, not pairwise)
6029
     * or scalar-three-reg-same groups. (Some opcodes are not yet
6030
     * implemented.)
6031
     */
6032
    TCGCond cond;
6033

    
6034
    switch (opcode) {
6035
    case 0x1: /* SQADD */
6036
        if (u) {
6037
            gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6038
        } else {
6039
            gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6040
        }
6041
        break;
6042
    case 0x5: /* SQSUB */
6043
        if (u) {
6044
            gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6045
        } else {
6046
            gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6047
        }
6048
        break;
6049
    case 0x6: /* CMGT, CMHI */
6050
        /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
6051
         * We implement this using setcond (test) and then negating.
6052
         */
6053
        cond = u ? TCG_COND_GTU : TCG_COND_GT;
6054
    do_cmop:
6055
        tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
6056
        tcg_gen_neg_i64(tcg_rd, tcg_rd);
6057
        break;
6058
    case 0x7: /* CMGE, CMHS */
6059
        cond = u ? TCG_COND_GEU : TCG_COND_GE;
6060
        goto do_cmop;
6061
    case 0x11: /* CMTST, CMEQ */
6062
        if (u) {
6063
            cond = TCG_COND_EQ;
6064
            goto do_cmop;
6065
        }
6066
        /* CMTST : test is "if (X & Y != 0)". */
6067
        tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
6068
        tcg_gen_setcondi_i64(TCG_COND_NE, tcg_rd, tcg_rd, 0);
6069
        tcg_gen_neg_i64(tcg_rd, tcg_rd);
6070
        break;
6071
    case 0x8: /* SSHL, USHL */
6072
        if (u) {
6073
            gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
6074
        } else {
6075
            gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm);
6076
        }
6077
        break;
6078
    case 0x9: /* SQSHL, UQSHL */
6079
        if (u) {
6080
            gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6081
        } else {
6082
            gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6083
        }
6084
        break;
6085
    case 0xa: /* SRSHL, URSHL */
6086
        if (u) {
6087
            gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
6088
        } else {
6089
            gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
6090
        }
6091
        break;
6092
    case 0xb: /* SQRSHL, UQRSHL */
6093
        if (u) {
6094
            gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6095
        } else {
6096
            gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6097
        }
6098
        break;
6099
    case 0x10: /* ADD, SUB */
6100
        if (u) {
6101
            tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
6102
        } else {
6103
            tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
6104
        }
6105
        break;
6106
    default:
6107
        g_assert_not_reached();
6108
    }
6109
}
6110

    
6111
/* Handle the 3-same-operands float operations; shared by the scalar
6112
 * and vector encodings. The caller must filter out any encodings
6113
 * not allocated for the encoding it is dealing with.
6114
 */
6115
static void handle_3same_float(DisasContext *s, int size, int elements,
6116
                               int fpopcode, int rd, int rn, int rm)
6117
{
6118
    int pass;
6119
    TCGv_ptr fpst = get_fpstatus_ptr();
6120

    
6121
    for (pass = 0; pass < elements; pass++) {
6122
        if (size) {
6123
            /* Double */
6124
            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6125
            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6126
            TCGv_i64 tcg_res = tcg_temp_new_i64();
6127

    
6128
            read_vec_element(s, tcg_op1, rn, pass, MO_64);
6129
            read_vec_element(s, tcg_op2, rm, pass, MO_64);
6130

    
6131
            switch (fpopcode) {
6132
            case 0x39: /* FMLS */
6133
                /* As usual for ARM, separate negation for fused multiply-add */
6134
                gen_helper_vfp_negd(tcg_op1, tcg_op1);
6135
                /* fall through */
6136
            case 0x19: /* FMLA */
6137
                read_vec_element(s, tcg_res, rd, pass, MO_64);
6138
                gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
6139
                                       tcg_res, fpst);
6140
                break;
6141
            case 0x18: /* FMAXNM */
6142
                gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6143
                break;
6144
            case 0x1a: /* FADD */
6145
                gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6146
                break;
6147
            case 0x1b: /* FMULX */
6148
                gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
6149
                break;
6150
            case 0x1c: /* FCMEQ */
6151
                gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
6152
                break;
6153
            case 0x1e: /* FMAX */
6154
                gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6155
                break;
6156
            case 0x1f: /* FRECPS */
6157
                gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
6158
                break;
6159
            case 0x38: /* FMINNM */
6160
                gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6161
                break;
6162
            case 0x3a: /* FSUB */
6163
                gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
6164
                break;
6165
            case 0x3e: /* FMIN */
6166
                gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6167
                break;
6168
            case 0x3f: /* FRSQRTS */
6169
                gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
6170
                break;
6171
            case 0x5b: /* FMUL */
6172
                gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
6173
                break;
6174
            case 0x5c: /* FCMGE */
6175
                gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
6176
                break;
6177
            case 0x5d: /* FACGE */
6178
                gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
6179
                break;
6180
            case 0x5f: /* FDIV */
6181
                gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
6182
                break;
6183
            case 0x7a: /* FABD */
6184
                gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
6185
                gen_helper_vfp_absd(tcg_res, tcg_res);
6186
                break;
6187
            case 0x7c: /* FCMGT */
6188
                gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
6189
                break;
6190
            case 0x7d: /* FACGT */
6191
                gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
6192
                break;
6193
            default:
6194
                g_assert_not_reached();
6195
            }
6196

    
6197
            write_vec_element(s, tcg_res, rd, pass, MO_64);
6198

    
6199
            tcg_temp_free_i64(tcg_res);
6200
            tcg_temp_free_i64(tcg_op1);
6201
            tcg_temp_free_i64(tcg_op2);
6202
        } else {
6203
            /* Single */
6204
            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
6205
            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
6206
            TCGv_i32 tcg_res = tcg_temp_new_i32();
6207

    
6208
            read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
6209
            read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
6210

    
6211
            switch (fpopcode) {
6212
            case 0x39: /* FMLS */
6213
                /* As usual for ARM, separate negation for fused multiply-add */
6214
                gen_helper_vfp_negs(tcg_op1, tcg_op1);
6215
                /* fall through */
6216
            case 0x19: /* FMLA */
6217
                read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
6218
                gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
6219
                                       tcg_res, fpst);
6220
                break;
6221
            case 0x1a: /* FADD */
6222
                gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6223
                break;
6224
            case 0x1b: /* FMULX */
6225
                gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
6226
                break;
6227
            case 0x1c: /* FCMEQ */
6228
                gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
6229
                break;
6230
            case 0x1e: /* FMAX */
6231
                gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6232
                break;
6233
            case 0x1f: /* FRECPS */
6234
                gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
6235
                break;
6236
            case 0x18: /* FMAXNM */
6237
                gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6238
                break;
6239
            case 0x38: /* FMINNM */
6240
                gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6241
                break;
6242
            case 0x3a: /* FSUB */
6243
                gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
6244
                break;
6245
            case 0x3e: /* FMIN */
6246
                gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6247
                break;
6248
            case 0x3f: /* FRSQRTS */
6249
                gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
6250
                break;
6251
            case 0x5b: /* FMUL */
6252
                gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
6253
                break;
6254
            case 0x5c: /* FCMGE */
6255
                gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
6256
                break;
6257
            case 0x5d: /* FACGE */
6258
                gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
6259
                break;
6260
            case 0x5f: /* FDIV */
6261
                gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
6262
                break;
6263
            case 0x7a: /* FABD */
6264
                gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
6265
                gen_helper_vfp_abss(tcg_res, tcg_res);
6266
                break;
6267
            case 0x7c: /* FCMGT */
6268
                gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
6269
                break;
6270
            case 0x7d: /* FACGT */
6271
                gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
6272
                break;
6273
            default:
6274
                g_assert_not_reached();
6275
            }
6276

    
6277
            if (elements == 1) {
6278
                /* scalar single so clear high part */
6279
                TCGv_i64 tcg_tmp = tcg_temp_new_i64();
6280

    
6281
                tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
6282
                write_vec_element(s, tcg_tmp, rd, pass, MO_64);
6283
                tcg_temp_free_i64(tcg_tmp);
6284
            } else {
6285
                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
6286
            }
6287

    
6288
            tcg_temp_free_i32(tcg_res);
6289
            tcg_temp_free_i32(tcg_op1);
6290
            tcg_temp_free_i32(tcg_op2);
6291
        }
6292
    }
6293

    
6294
    tcg_temp_free_ptr(fpst);
6295

    
6296
    if ((elements << size) < 4) {
6297
        /* scalar, or non-quad vector op */
6298
        clear_vec_high(s, rd);
6299
    }
6300
}
6301

    
6302
/* C3.6.11 AdvSIMD scalar three same
6303
 *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
6304
 * +-----+---+-----------+------+---+------+--------+---+------+------+
6305
 * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
6306
 * +-----+---+-----------+------+---+------+--------+---+------+------+
6307
 */
6308
static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
6309
{
6310
    int rd = extract32(insn, 0, 5);
6311
    int rn = extract32(insn, 5, 5);
6312
    int opcode = extract32(insn, 11, 5);
6313
    int rm = extract32(insn, 16, 5);
6314
    int size = extract32(insn, 22, 2);
6315
    bool u = extract32(insn, 29, 1);
6316
    TCGv_i64 tcg_rd;
6317

    
6318
    if (opcode >= 0x18) {
6319
        /* Floating point: U, size[1] and opcode indicate operation */
6320
        int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
6321
        switch (fpopcode) {
6322
        case 0x1b: /* FMULX */
6323
        case 0x1f: /* FRECPS */
6324
        case 0x3f: /* FRSQRTS */
6325
        case 0x5d: /* FACGE */
6326
        case 0x7d: /* FACGT */
6327
        case 0x1c: /* FCMEQ */
6328
        case 0x5c: /* FCMGE */
6329
        case 0x7c: /* FCMGT */
6330
        case 0x7a: /* FABD */
6331
            break;
6332
        default:
6333
            unallocated_encoding(s);
6334
            return;
6335
        }
6336

    
6337
        handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
6338
        return;
6339
    }
6340

    
6341
    switch (opcode) {
6342
    case 0x1: /* SQADD, UQADD */
6343
    case 0x5: /* SQSUB, UQSUB */
6344
    case 0x9: /* SQSHL, UQSHL */
6345
    case 0xb: /* SQRSHL, UQRSHL */
6346
        break;
6347
    case 0x8: /* SSHL, USHL */
6348
    case 0xa: /* SRSHL, URSHL */
6349
    case 0x6: /* CMGT, CMHI */
6350
    case 0x7: /* CMGE, CMHS */
6351
    case 0x11: /* CMTST, CMEQ */
6352
    case 0x10: /* ADD, SUB (vector) */
6353
        if (size != 3) {
6354
            unallocated_encoding(s);
6355
            return;
6356
        }
6357
        break;
6358
    case 0x16: /* SQDMULH, SQRDMULH (vector) */
6359
        if (size != 1 && size != 2) {
6360
            unallocated_encoding(s);
6361
            return;
6362
        }
6363
        break;
6364
    default:
6365
        unallocated_encoding(s);
6366
        return;
6367
    }
6368

    
6369
    tcg_rd = tcg_temp_new_i64();
6370

    
6371
    if (size == 3) {
6372
        TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
6373
        TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
6374

    
6375
        handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
6376
        tcg_temp_free_i64(tcg_rn);
6377
        tcg_temp_free_i64(tcg_rm);
6378
    } else {
6379
        /* Do a single operation on the lowest element in the vector.
6380
         * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
6381
         * no side effects for all these operations.
6382
         * OPTME: special-purpose helpers would avoid doing some
6383
         * unnecessary work in the helper for the 8 and 16 bit cases.
6384
         */
6385
        NeonGenTwoOpEnvFn *genenvfn;
6386
        TCGv_i32 tcg_rn = tcg_temp_new_i32();
6387
        TCGv_i32 tcg_rm = tcg_temp_new_i32();
6388
        TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
6389

    
6390
        read_vec_element_i32(s, tcg_rn, rn, 0, size);
6391
        read_vec_element_i32(s, tcg_rm, rm, 0, size);
6392

    
6393
        switch (opcode) {
6394
        case 0x1: /* SQADD, UQADD */
6395
        {
6396
            static NeonGenTwoOpEnvFn * const fns[3][2] = {
6397
                { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
6398
                { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
6399
                { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
6400
            };
6401
            genenvfn = fns[size][u];
6402
            break;
6403
        }
6404
        case 0x5: /* SQSUB, UQSUB */
6405
        {
6406
            static NeonGenTwoOpEnvFn * const fns[3][2] = {
6407
                { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
6408
                { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
6409
                { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
6410
            };
6411
            genenvfn = fns[size][u];
6412
            break;
6413
        }
6414
        case 0x9: /* SQSHL, UQSHL */
6415
        {
6416
            static NeonGenTwoOpEnvFn * const fns[3][2] = {
6417
                { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
6418
                { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
6419
                { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
6420
            };
6421
            genenvfn = fns[size][u];
6422
            break;
6423
        }
6424
        case 0xb: /* SQRSHL, UQRSHL */
6425
        {
6426
            static NeonGenTwoOpEnvFn * const fns[3][2] = {
6427
                { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
6428
                { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
6429
                { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
6430
            };
6431
            genenvfn = fns[size][u];
6432
            break;
6433
        }
6434
        case 0x16: /* SQDMULH, SQRDMULH */
6435
        {
6436
            static NeonGenTwoOpEnvFn * const fns[2][2] = {
6437
                { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
6438
                { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
6439
            };
6440
            assert(size == 1 || size == 2);
6441
            genenvfn = fns[size - 1][u];
6442
            break;
6443
        }
6444
        default:
6445
            g_assert_not_reached();
6446
        }
6447

    
6448
        genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
6449
        tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
6450
        tcg_temp_free_i32(tcg_rd32);
6451
        tcg_temp_free_i32(tcg_rn);
6452
        tcg_temp_free_i32(tcg_rm);
6453
    }
6454

    
6455
    write_fp_dreg(s, rd, tcg_rd);
6456

    
6457
    tcg_temp_free_i64(tcg_rd);
6458
}
6459

    
6460
static void handle_2misc_64(DisasContext *s, int opcode, bool u,
6461
                            TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
6462
{
6463
    /* Handle 64->64 opcodes which are shared between the scalar and
6464
     * vector 2-reg-misc groups. We cover every integer opcode where size == 3
6465
     * is valid in either group and also the double-precision fp ops.
6466
     */
6467
    TCGCond cond;
6468

    
6469
    switch (opcode) {
6470
    case 0x5: /* NOT */
6471
        /* This opcode is shared with CNT and RBIT but we have earlier
6472
         * enforced that size == 3 if and only if this is the NOT insn.
6473
         */
6474
        tcg_gen_not_i64(tcg_rd, tcg_rn);
6475
        break;
6476
    case 0xa: /* CMLT */
6477
        /* 64 bit integer comparison against zero, result is
6478
         * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
6479
         * subtracting 1.
6480
         */
6481
        cond = TCG_COND_LT;
6482
    do_cmop:
6483
        tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
6484
        tcg_gen_neg_i64(tcg_rd, tcg_rd);
6485
        break;
6486
    case 0x8: /* CMGT, CMGE */
6487
        cond = u ? TCG_COND_GE : TCG_COND_GT;
6488
        goto do_cmop;
6489
    case 0x9: /* CMEQ, CMLE */
6490
        cond = u ? TCG_COND_LE : TCG_COND_EQ;
6491
        goto do_cmop;
6492
    case 0xb: /* ABS, NEG */
6493
        if (u) {
6494
            tcg_gen_neg_i64(tcg_rd, tcg_rn);
6495
        } else {
6496
            TCGv_i64 tcg_zero = tcg_const_i64(0);
6497
            tcg_gen_neg_i64(tcg_rd, tcg_rn);
6498
            tcg_gen_movcond_i64(TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero,
6499
                                tcg_rn, tcg_rd);
6500
            tcg_temp_free_i64(tcg_zero);
6501
        }
6502
        break;
6503
    case 0x2f: /* FABS */
6504
        gen_helper_vfp_absd(tcg_rd, tcg_rn);
6505
        break;
6506
    case 0x6f: /* FNEG */
6507
        gen_helper_vfp_negd(tcg_rd, tcg_rn);
6508
        break;
6509
    default:
6510
        g_assert_not_reached();
6511
    }
6512
}
6513

    
6514
static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
6515
                                   bool is_scalar, bool is_u, bool is_q,
6516
                                   int size, int rn, int rd)
6517
{
6518
    bool is_double = (size == 3);
6519
    TCGv_ptr fpst = get_fpstatus_ptr();
6520

    
6521
    if (is_double) {
6522
        TCGv_i64 tcg_op = tcg_temp_new_i64();
6523
        TCGv_i64 tcg_zero = tcg_const_i64(0);
6524
        TCGv_i64 tcg_res = tcg_temp_new_i64();
6525
        NeonGenTwoDoubleOPFn *genfn;
6526
        bool swap = false;
6527
        int pass;
6528

    
6529
        switch (opcode) {
6530
        case 0x2e: /* FCMLT (zero) */
6531
            swap = true;
6532
            /* fallthrough */
6533
        case 0x2c: /* FCMGT (zero) */
6534
            genfn = gen_helper_neon_cgt_f64;
6535
            break;
6536
        case 0x2d: /* FCMEQ (zero) */
6537
            genfn = gen_helper_neon_ceq_f64;
6538
            break;
6539
        case 0x6d: /* FCMLE (zero) */
6540
            swap = true;
6541
            /* fall through */
6542
        case 0x6c: /* FCMGE (zero) */
6543
            genfn = gen_helper_neon_cge_f64;
6544
            break;
6545
        default:
6546
            g_assert_not_reached();
6547
        }
6548

    
6549
        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
6550
            read_vec_element(s, tcg_op, rn, pass, MO_64);
6551
            if (swap) {
6552
                genfn(tcg_res, tcg_zero, tcg_op, fpst);
6553
            } else {
6554
                genfn(tcg_res, tcg_op, tcg_zero, fpst);
6555
            }
6556
            write_vec_element(s, tcg_res, rd, pass, MO_64);
6557
        }
6558
        if (is_scalar) {
6559
            clear_vec_high(s, rd);
6560
        }
6561

    
6562
        tcg_temp_free_i64(tcg_res);
6563
        tcg_temp_free_i64(tcg_zero);
6564
        tcg_temp_free_i64(tcg_op);
6565
    } else {
6566
        TCGv_i32 tcg_op = tcg_temp_new_i32();
6567
        TCGv_i32 tcg_zero = tcg_const_i32(0);
6568
        TCGv_i32 tcg_res = tcg_temp_new_i32();
6569
        NeonGenTwoSingleOPFn *genfn;
6570
        bool swap = false;
6571
        int pass, maxpasses;
6572

    
6573
        switch (opcode) {
6574
        case 0x2e: /* FCMLT (zero) */
6575
            swap = true;
6576
            /* fall through */
6577
        case 0x2c: /* FCMGT (zero) */
6578
            genfn = gen_helper_neon_cgt_f32;
6579
            break;
6580
        case 0x2d: /* FCMEQ (zero) */
6581
            genfn = gen_helper_neon_ceq_f32;
6582
            break;
6583
        case 0x6d: /* FCMLE (zero) */
6584
            swap = true;
6585
            /* fall through */
6586
        case 0x6c: /* FCMGE (zero) */
6587
            genfn = gen_helper_neon_cge_f32;
6588
            break;
6589
        default:
6590
            g_assert_not_reached();
6591
        }
6592

    
6593
        if (is_scalar) {
6594
            maxpasses = 1;
6595
        } else {
6596
            maxpasses = is_q ? 4 : 2;
6597
        }
6598

    
6599
        for (pass = 0; pass < maxpasses; pass++) {
6600
            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
6601
            if (swap) {
6602
                genfn(tcg_res, tcg_zero, tcg_op, fpst);
6603
            } else {
6604
                genfn(tcg_res, tcg_op, tcg_zero, fpst);
6605
            }
6606
            if (is_scalar) {
6607
                write_fp_sreg(s, rd, tcg_res);
6608
            } else {
6609
                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
6610
            }
6611
        }
6612
        tcg_temp_free_i32(tcg_res);
6613
        tcg_temp_free_i32(tcg_zero);
6614
        tcg_temp_free_i32(tcg_op);
6615
        if (!is_q && !is_scalar) {
6616
            clear_vec_high(s, rd);
6617
        }
6618
    }
6619

    
6620
    tcg_temp_free_ptr(fpst);
6621
}
6622

    
6623
/* C3.6.12 AdvSIMD scalar two reg misc
6624
 *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
6625
 * +-----+---+-----------+------+-----------+--------+-----+------+------+
6626
 * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
6627
 * +-----+---+-----------+------+-----------+--------+-----+------+------+
6628
 */
6629
static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
6630
{
6631
    int rd = extract32(insn, 0, 5);
6632
    int rn = extract32(insn, 5, 5);
6633
    int opcode = extract32(insn, 12, 5);
6634
    int size = extract32(insn, 22, 2);
6635
    bool u = extract32(insn, 29, 1);
6636

    
6637
    switch (opcode) {
6638
    case 0xa: /* CMLT */
6639
        if (u) {
6640
            unallocated_encoding(s);
6641
            return;
6642
        }
6643
        /* fall through */
6644
    case 0x8: /* CMGT, CMGE */
6645
    case 0x9: /* CMEQ, CMLE */
6646
    case 0xb: /* ABS, NEG */
6647
        if (size != 3) {
6648
            unallocated_encoding(s);
6649
            return;
6650
        }
6651
        break;
6652
    case 0xc ... 0xf:
6653
    case 0x16 ... 0x1d:
6654
    case 0x1f:
6655
        /* Floating point: U, size[1] and opcode indicate operation;
6656
         * size[0] indicates single or double precision.
6657
         */
6658
        opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
6659
        size = extract32(size, 0, 1) ? 3 : 2;
6660
        switch (opcode) {
6661
        case 0x2c: /* FCMGT (zero) */
6662
        case 0x2d: /* FCMEQ (zero) */
6663
        case 0x2e: /* FCMLT (zero) */
6664
        case 0x6c: /* FCMGE (zero) */
6665
        case 0x6d: /* FCMLE (zero) */
6666
            handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
6667
            return;
6668
        case 0x1a: /* FCVTNS */
6669
        case 0x1b: /* FCVTMS */
6670
        case 0x1c: /* FCVTAS */
6671
        case 0x1d: /* SCVTF */
6672
        case 0x3a: /* FCVTPS */
6673
        case 0x3b: /* FCVTZS */
6674
        case 0x3d: /* FRECPE */
6675
        case 0x3f: /* FRECPX */
6676
        case 0x56: /* FCVTXN, FCVTXN2 */
6677
        case 0x5a: /* FCVTNU */
6678
        case 0x5b: /* FCVTMU */
6679
        case 0x5c: /* FCVTAU */
6680
        case 0x5d: /* UCVTF */
6681
        case 0x7a: /* FCVTPU */
6682
        case 0x7b: /* FCVTZU */
6683
        case 0x7d: /* FRSQRTE */
6684
            unsupported_encoding(s, insn);
6685
            return;
6686
        default:
6687
            unallocated_encoding(s);
6688
            return;
6689
        }
6690
        break;
6691
    default:
6692
        /* Other categories of encoding in this class:
6693
         *  + SUQADD/USQADD/SQABS/SQNEG : size 8, 16, 32 or 64
6694
         *  + SQXTN/SQXTN2/SQXTUN/SQXTUN2/UQXTN/UQXTN2:
6695
         *    narrowing saturate ops: size 64/32/16 -> 32/16/8
6696
         */
6697
        unsupported_encoding(s, insn);
6698
        return;
6699
    }
6700

    
6701
    if (size == 3) {
6702
        TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
6703
        TCGv_i64 tcg_rd = tcg_temp_new_i64();
6704

    
6705
        handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn);
6706
        write_fp_dreg(s, rd, tcg_rd);
6707
        tcg_temp_free_i64(tcg_rd);
6708
        tcg_temp_free_i64(tcg_rn);
6709
    } else {
6710
        /* the 'size might not be 64' ops aren't implemented yet */
6711
        g_assert_not_reached();
6712
    }
6713
}
6714

    
6715
/* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
6716
static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
6717
                                 int immh, int immb, int opcode, int rn, int rd)
6718
{
6719
    int size = 32 - clz32(immh) - 1;
6720
    int immhb = immh << 3 | immb;
6721
    int shift = 2 * (8 << size) - immhb;
6722
    bool accumulate = false;
6723
    bool round = false;
6724
    int dsize = is_q ? 128 : 64;
6725
    int esize = 8 << size;
6726
    int elements = dsize/esize;
6727
    TCGMemOp memop = size | (is_u ? 0 : MO_SIGN);
6728
    TCGv_i64 tcg_rn = new_tmp_a64(s);
6729
    TCGv_i64 tcg_rd = new_tmp_a64(s);
6730
    TCGv_i64 tcg_round;
6731
    int i;
6732

    
6733
    if (extract32(immh, 3, 1) && !is_q) {
6734
        unallocated_encoding(s);
6735
        return;
6736
    }
6737

    
6738
    if (size > 3 && !is_q) {
6739
        unallocated_encoding(s);
6740
        return;
6741
    }
6742

    
6743
    switch (opcode) {
6744
    case 0x02: /* SSRA / USRA (accumulate) */
6745
        accumulate = true;
6746
        break;
6747
    case 0x04: /* SRSHR / URSHR (rounding) */
6748
        round = true;
6749
        break;
6750
    case 0x06: /* SRSRA / URSRA (accum + rounding) */
6751
        accumulate = round = true;
6752
        break;
6753
    }
6754

    
6755
    if (round) {
6756
        uint64_t round_const = 1ULL << (shift - 1);
6757
        tcg_round = tcg_const_i64(round_const);
6758
    } else {
6759
        TCGV_UNUSED_I64(tcg_round);
6760
    }
6761

    
6762
    for (i = 0; i < elements; i++) {
6763
        read_vec_element(s, tcg_rn, rn, i, memop);
6764
        if (accumulate) {
6765
            read_vec_element(s, tcg_rd, rd, i, memop);
6766
        }
6767

    
6768
        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6769
                                accumulate, is_u, size, shift);
6770

    
6771
        write_vec_element(s, tcg_rd, rd, i, size);
6772
    }
6773

    
6774
    if (!is_q) {
6775
        clear_vec_high(s, rd);
6776
    }
6777

    
6778
    if (round) {
6779
        tcg_temp_free_i64(tcg_round);
6780
    }
6781
}
6782

    
6783
/* SHL/SLI - Vector shift left */
6784
static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
6785
                                int immh, int immb, int opcode, int rn, int rd)
6786
{
6787
    int size = 32 - clz32(immh) - 1;
6788
    int immhb = immh << 3 | immb;
6789
    int shift = immhb - (8 << size);
6790
    int dsize = is_q ? 128 : 64;
6791
    int esize = 8 << size;
6792
    int elements = dsize/esize;
6793
    TCGv_i64 tcg_rn = new_tmp_a64(s);
6794
    TCGv_i64 tcg_rd = new_tmp_a64(s);
6795
    int i;
6796

    
6797
    if (extract32(immh, 3, 1) && !is_q) {
6798
        unallocated_encoding(s);
6799
        return;
6800
    }
6801

    
6802
    if (size > 3 && !is_q) {
6803
        unallocated_encoding(s);
6804
        return;
6805
    }
6806

    
6807
    for (i = 0; i < elements; i++) {
6808
        read_vec_element(s, tcg_rn, rn, i, size);
6809
        if (insert) {
6810
            read_vec_element(s, tcg_rd, rd, i, size);
6811
        }
6812

    
6813
        handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
6814

    
6815
        write_vec_element(s, tcg_rd, rd, i, size);
6816
    }
6817

    
6818
    if (!is_q) {
6819
        clear_vec_high(s, rd);
6820
    }
6821
}
6822

    
6823
/* USHLL/SHLL - Vector shift left with widening */
6824
static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
6825
                                 int immh, int immb, int opcode, int rn, int rd)
6826
{
6827
    int size = 32 - clz32(immh) - 1;
6828
    int immhb = immh << 3 | immb;
6829
    int shift = immhb - (8 << size);
6830
    int dsize = 64;
6831
    int esize = 8 << size;
6832
    int elements = dsize/esize;
6833
    TCGv_i64 tcg_rn = new_tmp_a64(s);
6834
    TCGv_i64 tcg_rd = new_tmp_a64(s);
6835
    int i;
6836

    
6837
    if (size >= 3) {
6838
        unallocated_encoding(s);
6839
        return;
6840
    }
6841

    
6842
    /* For the LL variants the store is larger than the load,
6843
     * so if rd == rn we would overwrite parts of our input.
6844
     * So load everything right now and use shifts in the main loop.
6845
     */
6846
    read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
6847

    
6848
    for (i = 0; i < elements; i++) {
6849
        tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
6850
        ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
6851
        tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
6852
        write_vec_element(s, tcg_rd, rd, i, size + 1);
6853
    }
6854
}
6855

    
6856

    
6857
/* C3.6.14 AdvSIMD shift by immediate
6858
 *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
6859
 * +---+---+---+-------------+------+------+--------+---+------+------+
6860
 * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
6861
 * +---+---+---+-------------+------+------+--------+---+------+------+
6862
 */
6863
static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
6864
{
6865
    int rd = extract32(insn, 0, 5);
6866
    int rn = extract32(insn, 5, 5);
6867
    int opcode = extract32(insn, 11, 5);
6868
    int immb = extract32(insn, 16, 3);
6869
    int immh = extract32(insn, 19, 4);
6870
    bool is_u = extract32(insn, 29, 1);
6871
    bool is_q = extract32(insn, 30, 1);
6872

    
6873
    switch (opcode) {
6874
    case 0x00: /* SSHR / USHR */
6875
    case 0x02: /* SSRA / USRA (accumulate) */
6876
    case 0x04: /* SRSHR / URSHR (rounding) */
6877
    case 0x06: /* SRSRA / URSRA (accum + rounding) */
6878
        handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
6879
        break;
6880
    case 0x0a: /* SHL / SLI */
6881
        handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
6882
        break;
6883
    case 0x14: /* SSHLL / USHLL */
6884
        handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
6885
        break;
6886
    default:
6887
        /* We don't currently implement any of the Narrow or saturating shifts;
6888
         * nor do we implement the fixed-point conversions in this
6889
         * encoding group (SCVTF, FCVTZS, UCVTF, FCVTZU).
6890
         */
6891
        unsupported_encoding(s, insn);
6892
        return;
6893
    }
6894
}
6895

    
6896
/* Generate code to do a "long" addition or subtraction, ie one done in
6897
 * TCGv_i64 on vector lanes twice the width specified by size.
6898
 */
6899
static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
6900
                          TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
6901
{
6902
    static NeonGenTwo64OpFn * const fns[3][2] = {
6903
        { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
6904
        { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
6905
        { tcg_gen_add_i64, tcg_gen_sub_i64 },
6906
    };
6907
    NeonGenTwo64OpFn *genfn;
6908
    assert(size < 3);
6909

    
6910
    genfn = fns[size][is_sub];
6911
    genfn(tcg_res, tcg_op1, tcg_op2);
6912
}
6913

    
6914
static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
6915
                                int opcode, int rd, int rn, int rm)
6916
{
6917
    /* 3-reg-different widening insns: 64 x 64 -> 128 */
6918
    TCGv_i64 tcg_res[2];
6919
    int pass, accop;
6920

    
6921
    tcg_res[0] = tcg_temp_new_i64();
6922
    tcg_res[1] = tcg_temp_new_i64();
6923

    
6924
    /* Does this op do an adding accumulate, a subtracting accumulate,
6925
     * or no accumulate at all?
6926
     */
6927
    switch (opcode) {
6928
    case 5:
6929
    case 8:
6930
    case 9:
6931
        accop = 1;
6932
        break;
6933
    case 10:
6934
    case 11:
6935
        accop = -1;
6936
        break;
6937
    default:
6938
        accop = 0;
6939
        break;
6940
    }
6941

    
6942
    if (accop != 0) {
6943
        read_vec_element(s, tcg_res[0], rd, 0, MO_64);
6944
        read_vec_element(s, tcg_res[1], rd, 1, MO_64);
6945
    }
6946

    
6947
    /* size == 2 means two 32x32->64 operations; this is worth special
6948
     * casing because we can generally handle it inline.
6949
     */
6950
    if (size == 2) {
6951
        for (pass = 0; pass < 2; pass++) {
6952
            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6953
            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6954
            TCGv_i64 tcg_passres;
6955
            TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
6956

    
6957
            int elt = pass + is_q * 2;
6958

    
6959
            read_vec_element(s, tcg_op1, rn, elt, memop);
6960
            read_vec_element(s, tcg_op2, rm, elt, memop);
6961

    
6962
            if (accop == 0) {
6963
                tcg_passres = tcg_res[pass];
6964
            } else {
6965
                tcg_passres = tcg_temp_new_i64();
6966
            }
6967

    
6968
            switch (opcode) {
6969
            case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
6970
                tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
6971
                break;
6972
            case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
6973
                tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
6974
                break;
6975
            case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
6976
            case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
6977
            {
6978
                TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
6979
                TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
6980

    
6981
                tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
6982
                tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
6983
                tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
6984
                                    tcg_passres,
6985
                                    tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
6986
                tcg_temp_free_i64(tcg_tmp1);
6987
                tcg_temp_free_i64(tcg_tmp2);
6988
                break;
6989
            }
6990
            case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
6991
            case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
6992
            case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
6993
                tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
6994
                break;
6995
            case 9: /* SQDMLAL, SQDMLAL2 */
6996
            case 11: /* SQDMLSL, SQDMLSL2 */
6997
            case 13: /* SQDMULL, SQDMULL2 */
6998
                tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
6999
                gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
7000
                                                  tcg_passres, tcg_passres);
7001
                break;
7002
            default:
7003
                g_assert_not_reached();
7004
            }
7005

    
7006
            if (opcode == 9 || opcode == 11) {
7007
                /* saturating accumulate ops */
7008
                if (accop < 0) {
7009
                    tcg_gen_neg_i64(tcg_passres, tcg_passres);
7010
                }
7011
                gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
7012
                                                  tcg_res[pass], tcg_passres);
7013
            } else if (accop > 0) {
7014
                tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
7015
            } else if (accop < 0) {
7016
                tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
7017
            }
7018

    
7019
            if (accop != 0) {
7020
                tcg_temp_free_i64(tcg_passres);
7021
            }
7022

    
7023
            tcg_temp_free_i64(tcg_op1);
7024
            tcg_temp_free_i64(tcg_op2);
7025
        }
7026
    } else {
7027
        /* size 0 or 1, generally helper functions */
7028
        for (pass = 0; pass < 2; pass++) {
7029
            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7030
            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7031
            TCGv_i64 tcg_passres;
7032
            int elt = pass + is_q * 2;
7033

    
7034
            read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
7035
            read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
7036

    
7037
            if (accop == 0) {
7038
                tcg_passres = tcg_res[pass];
7039
            } else {
7040
                tcg_passres = tcg_temp_new_i64();
7041
            }
7042

    
7043
            switch (opcode) {
7044
            case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
7045
            case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
7046
            {
7047
                TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
7048
                static NeonGenWidenFn * const widenfns[2][2] = {
7049
                    { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
7050
                    { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
7051
                };
7052
                NeonGenWidenFn *widenfn = widenfns[size][is_u];
7053

    
7054
                widenfn(tcg_op2_64, tcg_op2);
7055
                widenfn(tcg_passres, tcg_op1);
7056
                gen_neon_addl(size, (opcode == 2), tcg_passres,
7057
                              tcg_passres, tcg_op2_64);
7058
                tcg_temp_free_i64(tcg_op2_64);
7059
                break;
7060
            }
7061
            case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
7062
            case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
7063
                if (size == 0) {
7064
                    if (is_u) {
7065
                        gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
7066
                    } else {
7067
                        gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
7068
                    }
7069
                } else {
7070
                    if (is_u) {
7071
                        gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
7072
                    } else {
7073
                        gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
7074
                    }
7075
                }
7076
                break;
7077
            case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
7078
            case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
7079
            case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
7080
                if (size == 0) {
7081
                    if (is_u) {
7082
                        gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
7083
                    } else {
7084
                        gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
7085
                    }
7086
                } else {
7087
                    if (is_u) {
7088
                        gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
7089
                    } else {
7090
                        gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
7091
                    }
7092
                }
7093
                break;
7094
            case 9: /* SQDMLAL, SQDMLAL2 */
7095
            case 11: /* SQDMLSL, SQDMLSL2 */
7096
            case 13: /* SQDMULL, SQDMULL2 */
7097
                assert(size == 1);
7098
                gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
7099
                gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
7100
                                                  tcg_passres, tcg_passres);
7101
                break;
7102
            default:
7103
                g_assert_not_reached();
7104
            }
7105
            tcg_temp_free_i32(tcg_op1);
7106
            tcg_temp_free_i32(tcg_op2);
7107

    
7108
            if (accop != 0) {
7109
                if (opcode == 9 || opcode == 11) {
7110
                    /* saturating accumulate ops */
7111
                    if (accop < 0) {
7112
                        gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
7113
                    }
7114
                    gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
7115
                                                      tcg_res[pass],
7116
                                                      tcg_passres);
7117
                } else {
7118
                    gen_neon_addl(size, (accop < 0), tcg_res[pass],
7119
                                  tcg_res[pass], tcg_passres);
7120
                }
7121
                tcg_temp_free_i64(tcg_passres);
7122
            }
7123
        }
7124
    }
7125

    
7126
    write_vec_element(s, tcg_res[0], rd, 0, MO_64);
7127
    write_vec_element(s, tcg_res[1], rd, 1, MO_64);
7128
    tcg_temp_free_i64(tcg_res[0]);
7129
    tcg_temp_free_i64(tcg_res[1]);
7130
}
7131

    
7132
static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
7133
                            int opcode, int rd, int rn, int rm)
7134
{
7135
    TCGv_i64 tcg_res[2];
7136
    int part = is_q ? 2 : 0;
7137
    int pass;
7138

    
7139
    for (pass = 0; pass < 2; pass++) {
7140
        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7141
        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7142
        TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
7143
        static NeonGenWidenFn * const widenfns[3][2] = {
7144
            { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
7145
            { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
7146
            { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
7147
        };
7148
        NeonGenWidenFn *widenfn = widenfns[size][is_u];
7149

    
7150
        read_vec_element(s, tcg_op1, rn, pass, MO_64);
7151
        read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
7152
        widenfn(tcg_op2_wide, tcg_op2);
7153
        tcg_temp_free_i32(tcg_op2);
7154
        tcg_res[pass] = tcg_temp_new_i64();
7155
        gen_neon_addl(size, (opcode == 3),
7156
                      tcg_res[pass], tcg_op1, tcg_op2_wide);
7157
        tcg_temp_free_i64(tcg_op1);
7158
        tcg_temp_free_i64(tcg_op2_wide);
7159
    }
7160

    
7161
    for (pass = 0; pass < 2; pass++) {
7162
        write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
7163
        tcg_temp_free_i64(tcg_res[pass]);
7164
    }
7165
}
7166

    
7167
static void do_narrow_high_u32(TCGv_i32 res, TCGv_i64 in)
7168
{
7169
    tcg_gen_shri_i64(in, in, 32);
7170
    tcg_gen_trunc_i64_i32(res, in);
7171
}
7172

    
7173
static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
7174
{
7175
    tcg_gen_addi_i64(in, in, 1U << 31);
7176
    do_narrow_high_u32(res, in);
7177
}
7178

    
7179
static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
7180
                                 int opcode, int rd, int rn, int rm)
7181
{
7182
    TCGv_i32 tcg_res[2];
7183
    int part = is_q ? 2 : 0;
7184
    int pass;
7185

    
7186
    for (pass = 0; pass < 2; pass++) {
7187
        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7188
        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7189
        TCGv_i64 tcg_wideres = tcg_temp_new_i64();
7190
        static NeonGenNarrowFn * const narrowfns[3][2] = {
7191
            { gen_helper_neon_narrow_high_u8,
7192
              gen_helper_neon_narrow_round_high_u8 },
7193
            { gen_helper_neon_narrow_high_u16,
7194
              gen_helper_neon_narrow_round_high_u16 },
7195
            { do_narrow_high_u32, do_narrow_round_high_u32 },
7196
        };
7197
        NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
7198

    
7199
        read_vec_element(s, tcg_op1, rn, pass, MO_64);
7200
        read_vec_element(s, tcg_op2, rm, pass, MO_64);
7201

    
7202
        gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
7203

    
7204
        tcg_temp_free_i64(tcg_op1);
7205
        tcg_temp_free_i64(tcg_op2);
7206

    
7207
        tcg_res[pass] = tcg_temp_new_i32();
7208
        gennarrow(tcg_res[pass], tcg_wideres);
7209
        tcg_temp_free_i64(tcg_wideres);
7210
    }
7211

    
7212
    for (pass = 0; pass < 2; pass++) {
7213
        write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
7214
        tcg_temp_free_i32(tcg_res[pass]);
7215
    }
7216
    if (!is_q) {
7217
        clear_vec_high(s, rd);
7218
    }
7219
}
7220

    
7221
/* C3.6.15 AdvSIMD three different
7222
 *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
7223
 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
7224
 * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
7225
 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
7226
 */
7227
static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
7228
{
7229
    /* Instructions in this group fall into three basic classes
7230
     * (in each case with the operation working on each element in
7231
     * the input vectors):
7232
     * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
7233
     *     128 bit input)
7234
     * (2) wide 64 x 128 -> 128
7235
     * (3) narrowing 128 x 128 -> 64
7236
     * Here we do initial decode, catch unallocated cases and
7237
     * dispatch to separate functions for each class.
7238
     */
7239
    int is_q = extract32(insn, 30, 1);
7240
    int is_u = extract32(insn, 29, 1);
7241
    int size = extract32(insn, 22, 2);
7242
    int opcode = extract32(insn, 12, 4);
7243
    int rm = extract32(insn, 16, 5);
7244
    int rn = extract32(insn, 5, 5);
7245
    int rd = extract32(insn, 0, 5);
7246

    
7247
    switch (opcode) {
7248
    case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
7249
    case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
7250
        /* 64 x 128 -> 128 */
7251
        if (size == 3) {
7252
            unallocated_encoding(s);
7253
            return;
7254
        }
7255
        handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
7256
        break;
7257
    case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
7258
    case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
7259
        /* 128 x 128 -> 64 */
7260
        if (size == 3) {
7261
            unallocated_encoding(s);
7262
            return;
7263
        }
7264
        handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
7265
        break;
7266
    case 14: /* PMULL, PMULL2 */
7267
        if (is_u || size == 1 || size == 2) {
7268
            unallocated_encoding(s);
7269
            return;
7270
        }
7271
        unsupported_encoding(s, insn);
7272
        break;
7273
    case 9: /* SQDMLAL, SQDMLAL2 */
7274
    case 11: /* SQDMLSL, SQDMLSL2 */
7275
    case 13: /* SQDMULL, SQDMULL2 */
7276
        if (is_u || size == 0) {
7277
            unallocated_encoding(s);
7278
            return;
7279
        }
7280
        /* fall through */
7281
    case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
7282
    case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
7283
    case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
7284
    case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
7285
    case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
7286
    case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
7287
    case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
7288
        /* 64 x 64 -> 128 */
7289
        if (size == 3) {
7290
            unallocated_encoding(s);
7291
            return;
7292
        }
7293
        handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
7294
        break;
7295
    default:
7296
        /* opcode 15 not allocated */
7297
        unallocated_encoding(s);
7298
        break;
7299
    }
7300
}
7301

    
7302
/* Logic op (opcode == 3) subgroup of C3.6.16. */
7303
static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
7304
{
7305
    int rd = extract32(insn, 0, 5);
7306
    int rn = extract32(insn, 5, 5);
7307
    int rm = extract32(insn, 16, 5);
7308
    int size = extract32(insn, 22, 2);
7309
    bool is_u = extract32(insn, 29, 1);
7310
    bool is_q = extract32(insn, 30, 1);
7311
    TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7312
    TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7313
    TCGv_i64 tcg_res[2];
7314
    int pass;
7315

    
7316
    tcg_res[0] = tcg_temp_new_i64();
7317
    tcg_res[1] = tcg_temp_new_i64();
7318

    
7319
    for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
7320
        read_vec_element(s, tcg_op1, rn, pass, MO_64);
7321
        read_vec_element(s, tcg_op2, rm, pass, MO_64);
7322

    
7323
        if (!is_u) {
7324
            switch (size) {
7325
            case 0: /* AND */
7326
                tcg_gen_and_i64(tcg_res[pass], tcg_op1, tcg_op2);
7327
                break;
7328
            case 1: /* BIC */
7329
                tcg_gen_andc_i64(tcg_res[pass], tcg_op1, tcg_op2);
7330
                break;
7331
            case 2: /* ORR */
7332
                tcg_gen_or_i64(tcg_res[pass], tcg_op1, tcg_op2);
7333
                break;
7334
            case 3: /* ORN */
7335
                tcg_gen_orc_i64(tcg_res[pass], tcg_op1, tcg_op2);
7336
                break;
7337
            }
7338
        } else {
7339
            if (size != 0) {
7340
                /* B* ops need res loaded to operate on */
7341
                read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
7342
            }
7343

    
7344
            switch (size) {
7345
            case 0: /* EOR */
7346
                tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
7347
                break;
7348
            case 1: /* BSL bitwise select */
7349
                tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_op2);
7350
                tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_res[pass]);
7351
                tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op1);
7352
                break;
7353
            case 2: /* BIT, bitwise insert if true */
7354
                tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
7355
                tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_op2);
7356
                tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
7357
                break;
7358
            case 3: /* BIF, bitwise insert if false */
7359
                tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
7360
                tcg_gen_andc_i64(tcg_op1, tcg_op1, tcg_op2);
7361
                tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
7362
                break;
7363
            }
7364
        }
7365
    }
7366

    
7367
    write_vec_element(s, tcg_res[0], rd, 0, MO_64);
7368
    if (!is_q) {
7369
        tcg_gen_movi_i64(tcg_res[1], 0);
7370
    }
7371
    write_vec_element(s, tcg_res[1], rd, 1, MO_64);
7372

    
7373
    tcg_temp_free_i64(tcg_op1);
7374
    tcg_temp_free_i64(tcg_op2);
7375
    tcg_temp_free_i64(tcg_res[0]);
7376
    tcg_temp_free_i64(tcg_res[1]);
7377
}
7378

    
7379
/* Helper functions for 32 bit comparisons */
7380
static void gen_max_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
7381
{
7382
    tcg_gen_movcond_i32(TCG_COND_GE, res, op1, op2, op1, op2);
7383
}
7384

    
7385
static void gen_max_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
7386
{
7387
    tcg_gen_movcond_i32(TCG_COND_GEU, res, op1, op2, op1, op2);
7388
}
7389

    
7390
static void gen_min_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
7391
{
7392
    tcg_gen_movcond_i32(TCG_COND_LE, res, op1, op2, op1, op2);
7393
}
7394

    
7395
static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
7396
{
7397
    tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2);
7398
}
7399

    
7400
/* Pairwise op subgroup of C3.6.16.
7401
 *
7402
 * This is called directly or via the handle_3same_float for float pairwise
7403
 * operations where the opcode and size are calculated differently.
7404
 */
7405
static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
7406
                                   int size, int rn, int rm, int rd)
7407
{
7408
    TCGv_ptr fpst;
7409
    int pass;
7410

    
7411
    /* Floating point operations need fpst */
7412
    if (opcode >= 0x58) {
7413
        fpst = get_fpstatus_ptr();
7414
    } else {
7415
        TCGV_UNUSED_PTR(fpst);
7416
    }
7417

    
7418
    /* These operations work on the concatenated rm:rn, with each pair of
7419
     * adjacent elements being operated on to produce an element in the result.
7420
     */
7421
    if (size == 3) {
7422
        TCGv_i64 tcg_res[2];
7423

    
7424
        for (pass = 0; pass < 2; pass++) {
7425
            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7426
            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7427
            int passreg = (pass == 0) ? rn : rm;
7428

    
7429
            read_vec_element(s, tcg_op1, passreg, 0, MO_64);
7430
            read_vec_element(s, tcg_op2, passreg, 1, MO_64);
7431
            tcg_res[pass] = tcg_temp_new_i64();
7432

    
7433
            switch (opcode) {
7434
            case 0x17: /* ADDP */
7435
                tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
7436
                break;
7437
            case 0x58: /* FMAXNMP */
7438
                gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
7439
                break;
7440
            case 0x5a: /* FADDP */
7441
                gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
7442
                break;
7443
            case 0x5e: /* FMAXP */
7444
                gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
7445
                break;
7446
            case 0x78: /* FMINNMP */
7447
                gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
7448
                break;
7449
            case 0x7e: /* FMINP */
7450
                gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
7451
                break;
7452
            default:
7453
                g_assert_not_reached();
7454
            }
7455

    
7456
            tcg_temp_free_i64(tcg_op1);
7457
            tcg_temp_free_i64(tcg_op2);
7458
        }
7459

    
7460
        for (pass = 0; pass < 2; pass++) {
7461
            write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
7462
            tcg_temp_free_i64(tcg_res[pass]);
7463
        }
7464
    } else {
7465
        int maxpass = is_q ? 4 : 2;
7466
        TCGv_i32 tcg_res[4];
7467

    
7468
        for (pass = 0; pass < maxpass; pass++) {
7469
            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7470
            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7471
            NeonGenTwoOpFn *genfn = NULL;
7472
            int passreg = pass < (maxpass / 2) ? rn : rm;
7473
            int passelt = (is_q && (pass & 1)) ? 2 : 0;
7474

    
7475
            read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
7476
            read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
7477
            tcg_res[pass] = tcg_temp_new_i32();
7478

    
7479
            switch (opcode) {
7480
            case 0x17: /* ADDP */
7481
            {
7482
                static NeonGenTwoOpFn * const fns[3] = {
7483
                    gen_helper_neon_padd_u8,
7484
                    gen_helper_neon_padd_u16,
7485
                    tcg_gen_add_i32,
7486
                };
7487
                genfn = fns[size];
7488
                break;
7489
            }
7490
            case 0x14: /* SMAXP, UMAXP */
7491
            {
7492
                static NeonGenTwoOpFn * const fns[3][2] = {
7493
                    { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
7494
                    { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
7495
                    { gen_max_s32, gen_max_u32 },
7496
                };
7497
                genfn = fns[size][u];
7498
                break;
7499
            }
7500
            case 0x15: /* SMINP, UMINP */
7501
            {
7502
                static NeonGenTwoOpFn * const fns[3][2] = {
7503
                    { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
7504
                    { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
7505
                    { gen_min_s32, gen_min_u32 },
7506
                };
7507
                genfn = fns[size][u];
7508
                break;
7509
            }
7510
            /* The FP operations are all on single floats (32 bit) */
7511
            case 0x58: /* FMAXNMP */
7512
                gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
7513
                break;
7514
            case 0x5a: /* FADDP */
7515
                gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
7516
                break;
7517
            case 0x5e: /* FMAXP */
7518
                gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
7519
                break;
7520
            case 0x78: /* FMINNMP */
7521
                gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
7522
                break;
7523
            case 0x7e: /* FMINP */
7524
                gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
7525
                break;
7526
            default:
7527
                g_assert_not_reached();
7528
            }
7529

    
7530
            /* FP ops called directly, otherwise call now */
7531
            if (genfn) {
7532
                genfn(tcg_res[pass], tcg_op1, tcg_op2);
7533
            }
7534

    
7535
            tcg_temp_free_i32(tcg_op1);
7536
            tcg_temp_free_i32(tcg_op2);
7537
        }
7538

    
7539
        for (pass = 0; pass < maxpass; pass++) {
7540
            write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
7541
            tcg_temp_free_i32(tcg_res[pass]);
7542
        }
7543
        if (!is_q) {
7544
            clear_vec_high(s, rd);
7545
        }
7546
    }
7547

    
7548
    if (!TCGV_IS_UNUSED_PTR(fpst)) {
7549
        tcg_temp_free_ptr(fpst);
7550
    }
7551
}
7552

    
7553
/* Floating point op subgroup of C3.6.16. */
7554
static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
7555
{
7556
    /* For floating point ops, the U, size[1] and opcode bits
7557
     * together indicate the operation. size[0] indicates single
7558
     * or double.
7559
     */
7560
    int fpopcode = extract32(insn, 11, 5)
7561
        | (extract32(insn, 23, 1) << 5)
7562
        | (extract32(insn, 29, 1) << 6);
7563
    int is_q = extract32(insn, 30, 1);
7564
    int size = extract32(insn, 22, 1);
7565
    int rm = extract32(insn, 16, 5);
7566
    int rn = extract32(insn, 5, 5);
7567
    int rd = extract32(insn, 0, 5);
7568

    
7569
    int datasize = is_q ? 128 : 64;
7570
    int esize = 32 << size;
7571
    int elements = datasize / esize;
7572

    
7573
    if (size == 1 && !is_q) {
7574
        unallocated_encoding(s);
7575
        return;
7576
    }
7577

    
7578
    switch (fpopcode) {
7579
    case 0x58: /* FMAXNMP */
7580
    case 0x5a: /* FADDP */
7581
    case 0x5e: /* FMAXP */
7582
    case 0x78: /* FMINNMP */
7583
    case 0x7e: /* FMINP */
7584
        if (size && !is_q) {
7585
            unallocated_encoding(s);
7586
            return;
7587
        }
7588
        handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
7589
                               rn, rm, rd);
7590
        return;
7591
    case 0x1b: /* FMULX */
7592
    case 0x1f: /* FRECPS */
7593
    case 0x3f: /* FRSQRTS */
7594
    case 0x5d: /* FACGE */
7595
    case 0x7d: /* FACGT */
7596
    case 0x19: /* FMLA */
7597
    case 0x39: /* FMLS */
7598
    case 0x18: /* FMAXNM */
7599
    case 0x1a: /* FADD */
7600
    case 0x1c: /* FCMEQ */
7601
    case 0x1e: /* FMAX */
7602
    case 0x38: /* FMINNM */
7603
    case 0x3a: /* FSUB */
7604
    case 0x3e: /* FMIN */
7605
    case 0x5b: /* FMUL */
7606
    case 0x5c: /* FCMGE */
7607
    case 0x5f: /* FDIV */
7608
    case 0x7a: /* FABD */
7609
    case 0x7c: /* FCMGT */
7610
        handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
7611
        return;
7612
    default:
7613
        unallocated_encoding(s);
7614
        return;
7615
    }
7616
}
7617

    
7618
/* Integer op subgroup of C3.6.16. */
7619
static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
7620
{
7621
    int is_q = extract32(insn, 30, 1);
7622
    int u = extract32(insn, 29, 1);
7623
    int size = extract32(insn, 22, 2);
7624
    int opcode = extract32(insn, 11, 5);
7625
    int rm = extract32(insn, 16, 5);
7626
    int rn = extract32(insn, 5, 5);
7627
    int rd = extract32(insn, 0, 5);
7628
    int pass;
7629

    
7630
    switch (opcode) {
7631
    case 0x13: /* MUL, PMUL */
7632
        if (u && size != 0) {
7633
            unallocated_encoding(s);
7634
            return;
7635
        }
7636
        /* fall through */
7637
    case 0x0: /* SHADD, UHADD */
7638
    case 0x2: /* SRHADD, URHADD */
7639
    case 0x4: /* SHSUB, UHSUB */
7640
    case 0xc: /* SMAX, UMAX */
7641
    case 0xd: /* SMIN, UMIN */
7642
    case 0xe: /* SABD, UABD */
7643
    case 0xf: /* SABA, UABA */
7644
    case 0x12: /* MLA, MLS */
7645
        if (size == 3) {
7646
            unallocated_encoding(s);
7647
            return;
7648
        }
7649
        break;
7650
    case 0x16: /* SQDMULH, SQRDMULH */
7651
        if (size == 0 || size == 3) {
7652
            unallocated_encoding(s);
7653
            return;
7654
        }
7655
        break;
7656
    default:
7657
        if (size == 3 && !is_q) {
7658
            unallocated_encoding(s);
7659
            return;
7660
        }
7661
        break;
7662
    }
7663

    
7664
    if (size == 3) {
7665
        for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
7666
            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7667
            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7668
            TCGv_i64 tcg_res = tcg_temp_new_i64();
7669

    
7670
            read_vec_element(s, tcg_op1, rn, pass, MO_64);
7671
            read_vec_element(s, tcg_op2, rm, pass, MO_64);
7672

    
7673
            handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
7674

    
7675
            write_vec_element(s, tcg_res, rd, pass, MO_64);
7676

    
7677
            tcg_temp_free_i64(tcg_res);
7678
            tcg_temp_free_i64(tcg_op1);
7679
            tcg_temp_free_i64(tcg_op2);
7680
        }
7681
    } else {
7682
        for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
7683
            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7684
            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7685
            TCGv_i32 tcg_res = tcg_temp_new_i32();
7686
            NeonGenTwoOpFn *genfn = NULL;
7687
            NeonGenTwoOpEnvFn *genenvfn = NULL;
7688

    
7689
            read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
7690
            read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
7691

    
7692
            switch (opcode) {
7693
            case 0x0: /* SHADD, UHADD */
7694
            {
7695
                static NeonGenTwoOpFn * const fns[3][2] = {
7696
                    { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
7697
                    { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
7698
                    { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
7699
                };
7700
                genfn = fns[size][u];
7701
                break;
7702
            }
7703
            case 0x1: /* SQADD, UQADD */
7704
            {
7705
                static NeonGenTwoOpEnvFn * const fns[3][2] = {
7706
                    { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
7707
                    { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
7708
                    { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
7709
                };
7710
                genenvfn = fns[size][u];
7711
                break;
7712
            }
7713
            case 0x2: /* SRHADD, URHADD */
7714
            {
7715
                static NeonGenTwoOpFn * const fns[3][2] = {
7716
                    { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
7717
                    { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
7718
                    { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
7719
                };
7720
                genfn = fns[size][u];
7721
                break;
7722
            }
7723
            case 0x4: /* SHSUB, UHSUB */
7724
            {
7725
                static NeonGenTwoOpFn * const fns[3][2] = {
7726
                    { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
7727
                    { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
7728
                    { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
7729
                };
7730
                genfn = fns[size][u];
7731
                break;
7732
            }
7733
            case 0x5: /* SQSUB, UQSUB */
7734
            {
7735
                static NeonGenTwoOpEnvFn * const fns[3][2] = {
7736
                    { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
7737
                    { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
7738
                    { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
7739
                };
7740
                genenvfn = fns[size][u];
7741
                break;
7742
            }
7743
            case 0x6: /* CMGT, CMHI */
7744
            {
7745
                static NeonGenTwoOpFn * const fns[3][2] = {
7746
                    { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_u8 },
7747
                    { gen_helper_neon_cgt_s16, gen_helper_neon_cgt_u16 },
7748
                    { gen_helper_neon_cgt_s32, gen_helper_neon_cgt_u32 },
7749
                };
7750
                genfn = fns[size][u];
7751
                break;
7752
            }
7753
            case 0x7: /* CMGE, CMHS */
7754
            {
7755
                static NeonGenTwoOpFn * const fns[3][2] = {
7756
                    { gen_helper_neon_cge_s8, gen_helper_neon_cge_u8 },
7757
                    { gen_helper_neon_cge_s16, gen_helper_neon_cge_u16 },
7758
                    { gen_helper_neon_cge_s32, gen_helper_neon_cge_u32 },
7759
                };
7760
                genfn = fns[size][u];
7761
                break;
7762
            }
7763
            case 0x8: /* SSHL, USHL */
7764
            {
7765
                static NeonGenTwoOpFn * const fns[3][2] = {
7766
                    { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
7767
                    { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
7768
                    { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
7769
                };
7770
                genfn = fns[size][u];
7771
                break;
7772
            }
7773
            case 0x9: /* SQSHL, UQSHL */
7774
            {
7775
                static NeonGenTwoOpEnvFn * const fns[3][2] = {
7776
                    { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
7777
                    { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
7778
                    { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
7779
                };
7780
                genenvfn = fns[size][u];
7781
                break;
7782
            }
7783
            case 0xa: /* SRSHL, URSHL */
7784
            {
7785
                static NeonGenTwoOpFn * const fns[3][2] = {
7786
                    { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
7787
                    { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
7788
                    { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
7789
                };
7790
                genfn = fns[size][u];
7791
                break;
7792
            }
7793
            case 0xb: /* SQRSHL, UQRSHL */
7794
            {
7795
                static NeonGenTwoOpEnvFn * const fns[3][2] = {
7796
                    { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
7797
                    { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
7798
                    { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
7799
                };
7800
                genenvfn = fns[size][u];
7801
                break;
7802
            }
7803
            case 0xc: /* SMAX, UMAX */
7804
            {
7805
                static NeonGenTwoOpFn * const fns[3][2] = {
7806
                    { gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
7807
                    { gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
7808
                    { gen_max_s32, gen_max_u32 },
7809
                };
7810
                genfn = fns[size][u];
7811
                break;
7812
            }
7813

    
7814
            case 0xd: /* SMIN, UMIN */
7815
            {
7816
                static NeonGenTwoOpFn * const fns[3][2] = {
7817
                    { gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
7818
                    { gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
7819
                    { gen_min_s32, gen_min_u32 },
7820
                };
7821
                genfn = fns[size][u];
7822
                break;
7823
            }
7824
            case 0xe: /* SABD, UABD */
7825
            case 0xf: /* SABA, UABA */
7826
            {
7827
                static NeonGenTwoOpFn * const fns[3][2] = {
7828
                    { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
7829
                    { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
7830
                    { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
7831
                };
7832
                genfn = fns[size][u];
7833
                break;
7834
            }
7835
            case 0x10: /* ADD, SUB */
7836
            {
7837
                static NeonGenTwoOpFn * const fns[3][2] = {
7838
                    { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
7839
                    { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
7840
                    { tcg_gen_add_i32, tcg_gen_sub_i32 },
7841
                };
7842
                genfn = fns[size][u];
7843
                break;
7844
            }
7845
            case 0x11: /* CMTST, CMEQ */
7846
            {
7847
                static NeonGenTwoOpFn * const fns[3][2] = {
7848
                    { gen_helper_neon_tst_u8, gen_helper_neon_ceq_u8 },
7849
                    { gen_helper_neon_tst_u16, gen_helper_neon_ceq_u16 },
7850
                    { gen_helper_neon_tst_u32, gen_helper_neon_ceq_u32 },
7851
                };
7852
                genfn = fns[size][u];
7853
                break;
7854
            }
7855
            case 0x13: /* MUL, PMUL */
7856
                if (u) {
7857
                    /* PMUL */
7858
                    assert(size == 0);
7859
                    genfn = gen_helper_neon_mul_p8;
7860
                    break;
7861
                }
7862
                /* fall through : MUL */
7863
            case 0x12: /* MLA, MLS */
7864
            {
7865
                static NeonGenTwoOpFn * const fns[3] = {
7866
                    gen_helper_neon_mul_u8,
7867
                    gen_helper_neon_mul_u16,
7868
                    tcg_gen_mul_i32,
7869
                };
7870
                genfn = fns[size];
7871
                break;
7872
            }
7873
            case 0x16: /* SQDMULH, SQRDMULH */
7874
            {
7875
                static NeonGenTwoOpEnvFn * const fns[2][2] = {
7876
                    { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
7877
                    { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
7878
                };
7879
                assert(size == 1 || size == 2);
7880
                genenvfn = fns[size - 1][u];
7881
                break;
7882
            }
7883
            default:
7884
                g_assert_not_reached();
7885
            }
7886

    
7887
            if (genenvfn) {
7888
                genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
7889
            } else {
7890
                genfn(tcg_res, tcg_op1, tcg_op2);
7891
            }
7892

    
7893
            if (opcode == 0xf || opcode == 0x12) {
7894
                /* SABA, UABA, MLA, MLS: accumulating ops */
7895
                static NeonGenTwoOpFn * const fns[3][2] = {
7896
                    { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
7897
                    { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
7898
                    { tcg_gen_add_i32, tcg_gen_sub_i32 },
7899
                };
7900
                bool is_sub = (opcode == 0x12 && u); /* MLS */
7901

    
7902
                genfn = fns[size][is_sub];
7903
                read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
7904
                genfn(tcg_res, tcg_res, tcg_op1);
7905
            }
7906

    
7907
            write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7908

    
7909
            tcg_temp_free_i32(tcg_res);
7910
            tcg_temp_free_i32(tcg_op1);
7911
            tcg_temp_free_i32(tcg_op2);
7912
        }
7913
    }
7914

    
7915
    if (!is_q) {
7916
        clear_vec_high(s, rd);
7917
    }
7918
}
7919

    
7920
/* C3.6.16 AdvSIMD three same
7921
 *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
7922
 * +---+---+---+-----------+------+---+------+--------+---+------+------+
7923
 * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
7924
 * +---+---+---+-----------+------+---+------+--------+---+------+------+
7925
 */
7926
static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
7927
{
7928
    int opcode = extract32(insn, 11, 5);
7929

    
7930
    switch (opcode) {
7931
    case 0x3: /* logic ops */
7932
        disas_simd_3same_logic(s, insn);
7933
        break;
7934
    case 0x17: /* ADDP */
7935
    case 0x14: /* SMAXP, UMAXP */
7936
    case 0x15: /* SMINP, UMINP */
7937
    {
7938
        /* Pairwise operations */
7939
        int is_q = extract32(insn, 30, 1);
7940
        int u = extract32(insn, 29, 1);
7941
        int size = extract32(insn, 22, 2);
7942
        int rm = extract32(insn, 16, 5);
7943
        int rn = extract32(insn, 5, 5);
7944
        int rd = extract32(insn, 0, 5);
7945
        if (opcode == 0x17) {
7946
            if (u || (size == 3 && !is_q)) {
7947
                unallocated_encoding(s);
7948
                return;
7949
            }
7950
        } else {
7951
            if (size == 3) {
7952
                unallocated_encoding(s);
7953
                return;
7954
            }
7955
        }
7956
        handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
7957
        break;
7958
    }
7959
    case 0x18 ... 0x31:
7960
        /* floating point ops, sz[1] and U are part of opcode */
7961
        disas_simd_3same_float(s, insn);
7962
        break;
7963
    default:
7964
        disas_simd_3same_int(s, insn);
7965
        break;
7966
    }
7967
}
7968

    
7969
static void handle_2misc_narrow(DisasContext *s, int opcode, bool u, bool is_q,
7970
                                int size, int rn, int rd)
7971
{
7972
    /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
7973
     * in the source becomes a size element in the destination).
7974
     */
7975
    int pass;
7976
    TCGv_i32 tcg_res[2];
7977
    int destelt = is_q ? 2 : 0;
7978

    
7979
    for (pass = 0; pass < 2; pass++) {
7980
        TCGv_i64 tcg_op = tcg_temp_new_i64();
7981
        NeonGenNarrowFn *genfn = NULL;
7982
        NeonGenNarrowEnvFn *genenvfn = NULL;
7983

    
7984
        read_vec_element(s, tcg_op, rn, pass, MO_64);
7985
        tcg_res[pass] = tcg_temp_new_i32();
7986

    
7987
        switch (opcode) {
7988
        case 0x12: /* XTN, SQXTUN */
7989
        {
7990
            static NeonGenNarrowFn * const xtnfns[3] = {
7991
                gen_helper_neon_narrow_u8,
7992
                gen_helper_neon_narrow_u16,
7993
                tcg_gen_trunc_i64_i32,
7994
            };
7995
            static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
7996
                gen_helper_neon_unarrow_sat8,
7997
                gen_helper_neon_unarrow_sat16,
7998
                gen_helper_neon_unarrow_sat32,
7999
            };
8000
            if (u) {
8001
                genenvfn = sqxtunfns[size];
8002
            } else {
8003
                genfn = xtnfns[size];
8004
            }
8005
            break;
8006
        }
8007
        case 0x14: /* SQXTN, UQXTN */
8008
        {
8009
            static NeonGenNarrowEnvFn * const fns[3][2] = {
8010
                { gen_helper_neon_narrow_sat_s8,
8011
                  gen_helper_neon_narrow_sat_u8 },
8012
                { gen_helper_neon_narrow_sat_s16,
8013
                  gen_helper_neon_narrow_sat_u16 },
8014
                { gen_helper_neon_narrow_sat_s32,
8015
                  gen_helper_neon_narrow_sat_u32 },
8016
            };
8017
            genenvfn = fns[size][u];
8018
            break;
8019
        }
8020
        default:
8021
            g_assert_not_reached();
8022
        }
8023

    
8024
        if (genfn) {
8025
            genfn(tcg_res[pass], tcg_op);
8026
        } else {
8027
            genenvfn(tcg_res[pass], cpu_env, tcg_op);
8028
        }
8029

    
8030
        tcg_temp_free_i64(tcg_op);
8031
    }
8032

    
8033
    for (pass = 0; pass < 2; pass++) {
8034
        write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
8035
        tcg_temp_free_i32(tcg_res[pass]);
8036
    }
8037
    if (!is_q) {
8038
        clear_vec_high(s, rd);
8039
    }
8040
}
8041

    
8042
static void handle_rev(DisasContext *s, int opcode, bool u,
8043
                       bool is_q, int size, int rn, int rd)
8044
{
8045
    int op = (opcode << 1) | u;
8046
    int opsz = op + size;
8047
    int grp_size = 3 - opsz;
8048
    int dsize = is_q ? 128 : 64;
8049
    int i;
8050

    
8051
    if (opsz >= 3) {
8052
        unallocated_encoding(s);
8053
        return;
8054
    }
8055

    
8056
    if (size == 0) {
8057
        /* Special case bytes, use bswap op on each group of elements */
8058
        int groups = dsize / (8 << grp_size);
8059

    
8060
        for (i = 0; i < groups; i++) {
8061
            TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8062

    
8063
            read_vec_element(s, tcg_tmp, rn, i, grp_size);
8064
            switch (grp_size) {
8065
            case MO_16:
8066
                tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
8067
                break;
8068
            case MO_32:
8069
                tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
8070
                break;
8071
            case MO_64:
8072
                tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
8073
                break;
8074
            default:
8075
                g_assert_not_reached();
8076
            }
8077
            write_vec_element(s, tcg_tmp, rd, i, grp_size);
8078
            tcg_temp_free_i64(tcg_tmp);
8079
        }
8080
        if (!is_q) {
8081
            clear_vec_high(s, rd);
8082
        }
8083
    } else {
8084
        int revmask = (1 << grp_size) - 1;
8085
        int esize = 8 << size;
8086
        int elements = dsize / esize;
8087
        TCGv_i64 tcg_rn = tcg_temp_new_i64();
8088
        TCGv_i64 tcg_rd = tcg_const_i64(0);
8089
        TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
8090

    
8091
        for (i = 0; i < elements; i++) {
8092
            int e_rev = (i & 0xf) ^ revmask;
8093
            int off = e_rev * esize;
8094
            read_vec_element(s, tcg_rn, rn, i, size);
8095
            if (off >= 64) {
8096
                tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
8097
                                    tcg_rn, off - 64, esize);
8098
            } else {
8099
                tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
8100
            }
8101
        }
8102
        write_vec_element(s, tcg_rd, rd, 0, MO_64);
8103
        write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
8104

    
8105
        tcg_temp_free_i64(tcg_rd_hi);
8106
        tcg_temp_free_i64(tcg_rd);
8107
        tcg_temp_free_i64(tcg_rn);
8108
    }
8109
}
8110

    
8111
/* C3.6.17 AdvSIMD two reg misc
8112
 *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
8113
 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
8114
 * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
8115
 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
8116
 */
8117
static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
8118
{
8119
    int size = extract32(insn, 22, 2);
8120
    int opcode = extract32(insn, 12, 5);
8121
    bool u = extract32(insn, 29, 1);
8122
    bool is_q = extract32(insn, 30, 1);
8123
    int rn = extract32(insn, 5, 5);
8124
    int rd = extract32(insn, 0, 5);
8125

    
8126
    switch (opcode) {
8127
    case 0x0: /* REV64, REV32 */
8128
    case 0x1: /* REV16 */
8129
        handle_rev(s, opcode, u, is_q, size, rn, rd);
8130
        return;
8131
    case 0x5: /* CNT, NOT, RBIT */
8132
        if (u && size == 0) {
8133
            /* NOT: adjust size so we can use the 64-bits-at-a-time loop. */
8134
            size = 3;
8135
            break;
8136
        } else if (u && size == 1) {
8137
            /* RBIT */
8138
            break;
8139
        } else if (!u && size == 0) {
8140
            /* CNT */
8141
            break;
8142
        }
8143
        unallocated_encoding(s);
8144
        return;
8145
    case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
8146
    case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
8147
        if (size == 3) {
8148
            unallocated_encoding(s);
8149
            return;
8150
        }
8151
        handle_2misc_narrow(s, opcode, u, is_q, size, rn, rd);
8152
        return;
8153
    case 0x2: /* SADDLP, UADDLP */
8154
    case 0x4: /* CLS, CLZ */
8155
    case 0x6: /* SADALP, UADALP */
8156
        if (size == 3) {
8157
            unallocated_encoding(s);
8158
            return;
8159
        }
8160
        unsupported_encoding(s, insn);
8161
        return;
8162
    case 0x13: /* SHLL, SHLL2 */
8163
        if (u == 0 || size == 3) {
8164
            unallocated_encoding(s);
8165
            return;
8166
        }
8167
        unsupported_encoding(s, insn);
8168
        return;
8169
    case 0xa: /* CMLT */
8170
        if (u == 1) {
8171
            unallocated_encoding(s);
8172
            return;
8173
        }
8174
        /* fall through */
8175
    case 0x8: /* CMGT, CMGE */
8176
    case 0x9: /* CMEQ, CMLE */
8177
    case 0xb: /* ABS, NEG */
8178
        if (size == 3 && !is_q) {
8179
            unallocated_encoding(s);
8180
            return;
8181
        }
8182
        break;
8183
    case 0x3: /* SUQADD, USQADD */
8184
    case 0x7: /* SQABS, SQNEG */
8185
        if (size == 3 && !is_q) {
8186
            unallocated_encoding(s);
8187
            return;
8188
        }
8189
        unsupported_encoding(s, insn);
8190
        return;
8191
    case 0xc ... 0xf:
8192
    case 0x16 ... 0x1d:
8193
    case 0x1f:
8194
    {
8195
        /* Floating point: U, size[1] and opcode indicate operation;
8196
         * size[0] indicates single or double precision.
8197
         */
8198
        opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
8199
        size = extract32(size, 0, 1) ? 3 : 2;
8200
        switch (opcode) {
8201
        case 0x2f: /* FABS */
8202
        case 0x6f: /* FNEG */
8203
            if (size == 3 && !is_q) {
8204
                unallocated_encoding(s);
8205
                return;
8206
            }
8207
            break;
8208
        case 0x2c: /* FCMGT (zero) */
8209
        case 0x2d: /* FCMEQ (zero) */
8210
        case 0x2e: /* FCMLT (zero) */
8211
        case 0x6c: /* FCMGE (zero) */
8212
        case 0x6d: /* FCMLE (zero) */
8213
            if (size == 3 && !is_q) {
8214
                unallocated_encoding(s);
8215
                return;
8216
            }
8217
            handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
8218
            return;
8219
        case 0x16: /* FCVTN, FCVTN2 */
8220
        case 0x17: /* FCVTL, FCVTL2 */
8221
        case 0x18: /* FRINTN */
8222
        case 0x19: /* FRINTM */
8223
        case 0x1a: /* FCVTNS */
8224
        case 0x1b: /* FCVTMS */
8225
        case 0x1c: /* FCVTAS */
8226
        case 0x1d: /* SCVTF */
8227
        case 0x38: /* FRINTP */
8228
        case 0x39: /* FRINTZ */
8229
        case 0x3a: /* FCVTPS */
8230
        case 0x3b: /* FCVTZS */
8231
        case 0x3c: /* URECPE */
8232
        case 0x3d: /* FRECPE */
8233
        case 0x56: /* FCVTXN, FCVTXN2 */
8234
        case 0x58: /* FRINTA */
8235
        case 0x59: /* FRINTX */
8236
        case 0x5a: /* FCVTNU */
8237
        case 0x5b: /* FCVTMU */
8238
        case 0x5c: /* FCVTAU */
8239
        case 0x5d: /* UCVTF */
8240
        case 0x79: /* FRINTI */
8241
        case 0x7a: /* FCVTPU */
8242
        case 0x7b: /* FCVTZU */
8243
        case 0x7c: /* URSQRTE */
8244
        case 0x7d: /* FRSQRTE */
8245
        case 0x7f: /* FSQRT */
8246
            unsupported_encoding(s, insn);
8247
            return;
8248
        default:
8249
            unallocated_encoding(s);
8250
            return;
8251
        }
8252
        break;
8253
    }
8254
    default:
8255
        unallocated_encoding(s);
8256
        return;
8257
    }
8258

    
8259
    if (size == 3) {
8260
        /* All 64-bit element operations can be shared with scalar 2misc */
8261
        int pass;
8262

    
8263
        for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
8264
            TCGv_i64 tcg_op = tcg_temp_new_i64();
8265
            TCGv_i64 tcg_res = tcg_temp_new_i64();
8266

    
8267
            read_vec_element(s, tcg_op, rn, pass, MO_64);
8268

    
8269
            handle_2misc_64(s, opcode, u, tcg_res, tcg_op);
8270

    
8271
            write_vec_element(s, tcg_res, rd, pass, MO_64);
8272

    
8273
            tcg_temp_free_i64(tcg_res);
8274
            tcg_temp_free_i64(tcg_op);
8275
        }
8276
    } else {
8277
        int pass;
8278

    
8279
        for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
8280
            TCGv_i32 tcg_op = tcg_temp_new_i32();
8281
            TCGv_i32 tcg_res = tcg_temp_new_i32();
8282
            TCGCond cond;
8283

    
8284
            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
8285

    
8286
            if (size == 2) {
8287
                /* Special cases for 32 bit elements */
8288
                switch (opcode) {
8289
                case 0xa: /* CMLT */
8290
                    /* 32 bit integer comparison against zero, result is
8291
                     * test ? (2^32 - 1) : 0. We implement via setcond(test)
8292
                     * and inverting.
8293
                     */
8294
                    cond = TCG_COND_LT;
8295
                do_cmop:
8296
                    tcg_gen_setcondi_i32(cond, tcg_res, tcg_op, 0);
8297
                    tcg_gen_neg_i32(tcg_res, tcg_res);
8298
                    break;
8299
                case 0x8: /* CMGT, CMGE */
8300
                    cond = u ? TCG_COND_GE : TCG_COND_GT;
8301
                    goto do_cmop;
8302
                case 0x9: /* CMEQ, CMLE */
8303
                    cond = u ? TCG_COND_LE : TCG_COND_EQ;
8304
                    goto do_cmop;
8305
                case 0xb: /* ABS, NEG */
8306
                    if (u) {
8307
                        tcg_gen_neg_i32(tcg_res, tcg_op);
8308
                    } else {
8309
                        TCGv_i32 tcg_zero = tcg_const_i32(0);
8310
                        tcg_gen_neg_i32(tcg_res, tcg_op);
8311
                        tcg_gen_movcond_i32(TCG_COND_GT, tcg_res, tcg_op,
8312
                                            tcg_zero, tcg_op, tcg_res);
8313
                        tcg_temp_free_i32(tcg_zero);
8314
                    }
8315
                    break;
8316
                case 0x2f: /* FABS */
8317
                    gen_helper_vfp_abss(tcg_res, tcg_op);
8318
                    break;
8319
                case 0x6f: /* FNEG */
8320
                    gen_helper_vfp_negs(tcg_res, tcg_op);
8321
                    break;
8322
                default:
8323
                    g_assert_not_reached();
8324
                }
8325
            } else {
8326
                /* Use helpers for 8 and 16 bit elements */
8327
                switch (opcode) {
8328
                case 0x5: /* CNT, RBIT */
8329
                    /* For these two insns size is part of the opcode specifier
8330
                     * (handled earlier); they always operate on byte elements.
8331
                     */
8332
                    if (u) {
8333
                        gen_helper_neon_rbit_u8(tcg_res, tcg_op);
8334
                    } else {
8335
                        gen_helper_neon_cnt_u8(tcg_res, tcg_op);
8336
                    }
8337
                    break;
8338
                case 0x8: /* CMGT, CMGE */
8339
                case 0x9: /* CMEQ, CMLE */
8340
                case 0xa: /* CMLT */
8341
                {
8342
                    static NeonGenTwoOpFn * const fns[3][2] = {
8343
                        { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 },
8344
                        { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 },
8345
                        { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 },
8346
                    };
8347
                    NeonGenTwoOpFn *genfn;
8348
                    int comp;
8349
                    bool reverse;
8350
                    TCGv_i32 tcg_zero = tcg_const_i32(0);
8351

    
8352
                    /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
8353
                    comp = (opcode - 0x8) * 2 + u;
8354
                    /* ...but LE, LT are implemented as reverse GE, GT */
8355
                    reverse = (comp > 2);
8356
                    if (reverse) {
8357
                        comp = 4 - comp;
8358
                    }
8359
                    genfn = fns[comp][size];
8360
                    if (reverse) {
8361
                        genfn(tcg_res, tcg_zero, tcg_op);
8362
                    } else {
8363
                        genfn(tcg_res, tcg_op, tcg_zero);
8364
                    }
8365
                    tcg_temp_free_i32(tcg_zero);
8366
                    break;
8367
                }
8368
                case 0xb: /* ABS, NEG */
8369
                    if (u) {
8370
                        TCGv_i32 tcg_zero = tcg_const_i32(0);
8371
                        if (size) {
8372
                            gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op);
8373
                        } else {
8374
                            gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op);
8375
                        }
8376
                        tcg_temp_free_i32(tcg_zero);
8377
                    } else {
8378
                        if (size) {
8379
                            gen_helper_neon_abs_s16(tcg_res, tcg_op);
8380
                        } else {
8381
                            gen_helper_neon_abs_s8(tcg_res, tcg_op);
8382
                        }
8383
                    }
8384
                    break;
8385
                default:
8386
                    g_assert_not_reached();
8387
                }
8388
            }
8389

    
8390
            write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
8391

    
8392
            tcg_temp_free_i32(tcg_res);
8393
            tcg_temp_free_i32(tcg_op);
8394
        }
8395
    }
8396
    if (!is_q) {
8397
        clear_vec_high(s, rd);
8398
    }
8399
}
8400

    
8401
/* C3.6.13 AdvSIMD scalar x indexed element
8402
 *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
8403
 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
8404
 * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
8405
 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
8406
 * C3.6.18 AdvSIMD vector x indexed element
8407
 *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
8408
 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
8409
 * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
8410
 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
8411
 */
8412
static void disas_simd_indexed(DisasContext *s, uint32_t insn)
8413
{
8414
    /* This encoding has two kinds of instruction:
8415
     *  normal, where we perform elt x idxelt => elt for each
8416
     *     element in the vector
8417
     *  long, where we perform elt x idxelt and generate a result of
8418
     *     double the width of the input element
8419
     * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
8420
     */
8421
    bool is_scalar = extract32(insn, 28, 1);
8422
    bool is_q = extract32(insn, 30, 1);
8423
    bool u = extract32(insn, 29, 1);
8424
    int size = extract32(insn, 22, 2);
8425
    int l = extract32(insn, 21, 1);
8426
    int m = extract32(insn, 20, 1);
8427
    /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
8428
    int rm = extract32(insn, 16, 4);
8429
    int opcode = extract32(insn, 12, 4);
8430
    int h = extract32(insn, 11, 1);
8431
    int rn = extract32(insn, 5, 5);
8432
    int rd = extract32(insn, 0, 5);
8433
    bool is_long = false;
8434
    bool is_fp = false;
8435
    int index;
8436
    TCGv_ptr fpst;
8437

    
8438
    switch (opcode) {
8439
    case 0x0: /* MLA */
8440
    case 0x4: /* MLS */
8441
        if (!u || is_scalar) {
8442
            unallocated_encoding(s);
8443
            return;
8444
        }
8445
        break;
8446
    case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8447
    case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8448
    case 0xa: /* SMULL, SMULL2, UMULL, UMULL2 */
8449
        if (is_scalar) {
8450
            unallocated_encoding(s);
8451
            return;
8452
        }
8453
        is_long = true;
8454
        break;
8455
    case 0x3: /* SQDMLAL, SQDMLAL2 */
8456
    case 0x7: /* SQDMLSL, SQDMLSL2 */
8457
    case 0xb: /* SQDMULL, SQDMULL2 */
8458
        is_long = true;
8459
        /* fall through */
8460
    case 0xc: /* SQDMULH */
8461
    case 0xd: /* SQRDMULH */
8462
        if (u) {
8463
            unallocated_encoding(s);
8464
            return;
8465
        }
8466
        break;
8467
    case 0x8: /* MUL */
8468
        if (u || is_scalar) {
8469
            unallocated_encoding(s);
8470
            return;
8471
        }
8472
        break;
8473
    case 0x1: /* FMLA */
8474
    case 0x5: /* FMLS */
8475
        if (u) {
8476
            unallocated_encoding(s);
8477
            return;
8478
        }
8479
        /* fall through */
8480
    case 0x9: /* FMUL, FMULX */
8481
        if (!extract32(size, 1, 1)) {
8482
            unallocated_encoding(s);
8483
            return;
8484
        }
8485
        is_fp = true;
8486
        break;
8487
    default:
8488
        unallocated_encoding(s);
8489
        return;
8490
    }
8491

    
8492
    if (is_fp) {
8493
        /* low bit of size indicates single/double */
8494
        size = extract32(size, 0, 1) ? 3 : 2;
8495
        if (size == 2) {
8496
            index = h << 1 | l;
8497
        } else {
8498
            if (l || !is_q) {
8499
                unallocated_encoding(s);
8500
                return;
8501
            }
8502
            index = h;
8503
        }
8504
        rm |= (m << 4);
8505
    } else {
8506
        switch (size) {
8507
        case 1:
8508
            index = h << 2 | l << 1 | m;
8509
            break;
8510
        case 2:
8511
            index = h << 1 | l;
8512
            rm |= (m << 4);
8513
            break;
8514
        default:
8515
            unallocated_encoding(s);
8516
            return;
8517
        }
8518
    }
8519

    
8520
    if (is_fp) {
8521
        fpst = get_fpstatus_ptr();
8522
    } else {
8523
        TCGV_UNUSED_PTR(fpst);
8524
    }
8525

    
8526
    if (size == 3) {
8527
        TCGv_i64 tcg_idx = tcg_temp_new_i64();
8528
        int pass;
8529

    
8530
        assert(is_fp && is_q && !is_long);
8531

    
8532
        read_vec_element(s, tcg_idx, rm, index, MO_64);
8533

    
8534
        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
8535
            TCGv_i64 tcg_op = tcg_temp_new_i64();
8536
            TCGv_i64 tcg_res = tcg_temp_new_i64();
8537

    
8538
            read_vec_element(s, tcg_op, rn, pass, MO_64);
8539

    
8540
            switch (opcode) {
8541
            case 0x5: /* FMLS */
8542
                /* As usual for ARM, separate negation for fused multiply-add */
8543
                gen_helper_vfp_negd(tcg_op, tcg_op);
8544
                /* fall through */
8545
            case 0x1: /* FMLA */
8546
                read_vec_element(s, tcg_res, rd, pass, MO_64);
8547
                gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
8548
                break;
8549
            case 0x9: /* FMUL, FMULX */
8550
                if (u) {
8551
                    gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
8552
                } else {
8553
                    gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
8554
                }
8555
                break;
8556
            default:
8557
                g_assert_not_reached();
8558
            }
8559

    
8560
            write_vec_element(s, tcg_res, rd, pass, MO_64);
8561
            tcg_temp_free_i64(tcg_op);
8562
            tcg_temp_free_i64(tcg_res);
8563
        }
8564

    
8565
        if (is_scalar) {
8566
            clear_vec_high(s, rd);
8567
        }
8568

    
8569
        tcg_temp_free_i64(tcg_idx);
8570
    } else if (!is_long) {
8571
        /* 32 bit floating point, or 16 or 32 bit integer.
8572
         * For the 16 bit scalar case we use the usual Neon helpers and
8573
         * rely on the fact that 0 op 0 == 0 with no side effects.
8574
         */
8575
        TCGv_i32 tcg_idx = tcg_temp_new_i32();
8576
        int pass, maxpasses;
8577

    
8578
        if (is_scalar) {
8579
            maxpasses = 1;
8580
        } else {
8581
            maxpasses = is_q ? 4 : 2;
8582
        }
8583

    
8584
        read_vec_element_i32(s, tcg_idx, rm, index, size);
8585

    
8586
        if (size == 1 && !is_scalar) {
8587
            /* The simplest way to handle the 16x16 indexed ops is to duplicate
8588
             * the index into both halves of the 32 bit tcg_idx and then use
8589
             * the usual Neon helpers.
8590
             */
8591
            tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
8592
        }
8593

    
8594
        for (pass = 0; pass < maxpasses; pass++) {
8595
            TCGv_i32 tcg_op = tcg_temp_new_i32();
8596
            TCGv_i32 tcg_res = tcg_temp_new_i32();
8597

    
8598
            read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
8599

    
8600
            switch (opcode) {
8601
            case 0x0: /* MLA */
8602
            case 0x4: /* MLS */
8603
            case 0x8: /* MUL */
8604
            {
8605
                static NeonGenTwoOpFn * const fns[2][2] = {
8606
                    { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
8607
                    { tcg_gen_add_i32, tcg_gen_sub_i32 },
8608
                };
8609
                NeonGenTwoOpFn *genfn;
8610
                bool is_sub = opcode == 0x4;
8611

    
8612
                if (size == 1) {
8613
                    gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
8614
                } else {
8615
                    tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
8616
                }
8617
                if (opcode == 0x8) {
8618
                    break;
8619
                }
8620
                read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
8621
                genfn = fns[size - 1][is_sub];
8622
                genfn(tcg_res, tcg_op, tcg_res);
8623
                break;
8624
            }
8625
            case 0x5: /* FMLS */
8626
                /* As usual for ARM, separate negation for fused multiply-add */
8627
                gen_helper_vfp_negs(tcg_op, tcg_op);
8628
                /* fall through */
8629
            case 0x1: /* FMLA */
8630
                read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
8631
                gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
8632
                break;
8633
            case 0x9: /* FMUL, FMULX */
8634
                if (u) {
8635
                    gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
8636
                } else {
8637
                    gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
8638
                }
8639
                break;
8640
            case 0xc: /* SQDMULH */
8641
                if (size == 1) {
8642
                    gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
8643
                                               tcg_op, tcg_idx);
8644
                } else {
8645
                    gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
8646
                                               tcg_op, tcg_idx);
8647
                }
8648
                break;
8649
            case 0xd: /* SQRDMULH */
8650
                if (size == 1) {
8651
                    gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
8652
                                                tcg_op, tcg_idx);
8653
                } else {
8654
                    gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
8655
                                                tcg_op, tcg_idx);
8656
                }
8657
                break;
8658
            default:
8659
                g_assert_not_reached();
8660
            }
8661

    
8662
            if (is_scalar) {
8663
                write_fp_sreg(s, rd, tcg_res);
8664
            } else {
8665
                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
8666
            }
8667

    
8668
            tcg_temp_free_i32(tcg_op);
8669
            tcg_temp_free_i32(tcg_res);
8670
        }
8671

    
8672
        tcg_temp_free_i32(tcg_idx);
8673

    
8674
        if (!is_q) {
8675
            clear_vec_high(s, rd);
8676
        }
8677
    } else {
8678
        /* long ops: 16x16->32 or 32x32->64 */
8679
        TCGv_i64 tcg_res[2];
8680
        int pass;
8681
        bool satop = extract32(opcode, 0, 1);
8682
        TCGMemOp memop = MO_32;
8683

    
8684
        if (satop || !u) {
8685
            memop |= MO_SIGN;
8686
        }
8687

    
8688
        if (size == 2) {
8689
            TCGv_i64 tcg_idx = tcg_temp_new_i64();
8690

    
8691
            read_vec_element(s, tcg_idx, rm, index, memop);
8692

    
8693
            for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
8694
                TCGv_i64 tcg_op = tcg_temp_new_i64();
8695
                TCGv_i64 tcg_passres;
8696
                int passelt;
8697

    
8698
                if (is_scalar) {
8699
                    passelt = 0;
8700
                } else {
8701
                    passelt = pass + (is_q * 2);
8702
                }
8703

    
8704
                read_vec_element(s, tcg_op, rn, passelt, memop);
8705

    
8706
                tcg_res[pass] = tcg_temp_new_i64();
8707

    
8708
                if (opcode == 0xa || opcode == 0xb) {
8709
                    /* Non-accumulating ops */
8710
                    tcg_passres = tcg_res[pass];
8711
                } else {
8712
                    tcg_passres = tcg_temp_new_i64();
8713
                }
8714

    
8715
                tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
8716
                tcg_temp_free_i64(tcg_op);
8717

    
8718
                if (satop) {
8719
                    /* saturating, doubling */
8720
                    gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
8721
                                                      tcg_passres, tcg_passres);
8722
                }
8723

    
8724
                if (opcode == 0xa || opcode == 0xb) {
8725
                    continue;
8726
                }
8727

    
8728
                /* Accumulating op: handle accumulate step */
8729
                read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8730

    
8731
                switch (opcode) {
8732
                case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8733
                    tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8734
                    break;
8735
                case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8736
                    tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8737
                    break;
8738
                case 0x7: /* SQDMLSL, SQDMLSL2 */
8739
                    tcg_gen_neg_i64(tcg_passres, tcg_passres);
8740
                    /* fall through */
8741
                case 0x3: /* SQDMLAL, SQDMLAL2 */
8742
                    gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
8743
                                                      tcg_res[pass],
8744
                                                      tcg_passres);
8745
                    break;
8746
                default:
8747
                    g_assert_not_reached();
8748
                }
8749
                tcg_temp_free_i64(tcg_passres);
8750
            }
8751
            tcg_temp_free_i64(tcg_idx);
8752

    
8753
            if (is_scalar) {
8754
                clear_vec_high(s, rd);
8755
            }
8756
        } else {
8757
            TCGv_i32 tcg_idx = tcg_temp_new_i32();
8758

    
8759
            assert(size == 1);
8760
            read_vec_element_i32(s, tcg_idx, rm, index, size);
8761

    
8762
            if (!is_scalar) {
8763
                /* The simplest way to handle the 16x16 indexed ops is to
8764
                 * duplicate the index into both halves of the 32 bit tcg_idx
8765
                 * and then use the usual Neon helpers.
8766
                 */
8767
                tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
8768
            }
8769

    
8770
            for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
8771
                TCGv_i32 tcg_op = tcg_temp_new_i32();
8772
                TCGv_i64 tcg_passres;
8773

    
8774
                if (is_scalar) {
8775
                    read_vec_element_i32(s, tcg_op, rn, pass, size);
8776
                } else {
8777
                    read_vec_element_i32(s, tcg_op, rn,
8778
                                         pass + (is_q * 2), MO_32);
8779
                }
8780

    
8781
                tcg_res[pass] = tcg_temp_new_i64();
8782

    
8783
                if (opcode == 0xa || opcode == 0xb) {
8784
                    /* Non-accumulating ops */
8785
                    tcg_passres = tcg_res[pass];
8786
                } else {
8787
                    tcg_passres = tcg_temp_new_i64();
8788
                }
8789

    
8790
                if (memop & MO_SIGN) {
8791
                    gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
8792
                } else {
8793
                    gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
8794
                }
8795
                if (satop) {
8796
                    gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
8797
                                                      tcg_passres, tcg_passres);
8798
                }
8799
                tcg_temp_free_i32(tcg_op);
8800

    
8801
                if (opcode == 0xa || opcode == 0xb) {
8802
                    continue;
8803
                }
8804

    
8805
                /* Accumulating op: handle accumulate step */
8806
                read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8807

    
8808
                switch (opcode) {
8809
                case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8810
                    gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
8811
                                             tcg_passres);
8812
                    break;
8813
                case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8814
                    gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
8815
                                             tcg_passres);
8816
                    break;
8817
                case 0x7: /* SQDMLSL, SQDMLSL2 */
8818
                    gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
8819
                    /* fall through */
8820
                case 0x3: /* SQDMLAL, SQDMLAL2 */
8821
                    gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
8822
                                                      tcg_res[pass],
8823
                                                      tcg_passres);
8824
                    break;
8825
                default:
8826
                    g_assert_not_reached();
8827
                }
8828
                tcg_temp_free_i64(tcg_passres);
8829
            }
8830
            tcg_temp_free_i32(tcg_idx);
8831

    
8832
            if (is_scalar) {
8833
                tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
8834
            }
8835
        }
8836

    
8837
        if (is_scalar) {
8838
            tcg_res[1] = tcg_const_i64(0);
8839
        }
8840

    
8841
        for (pass = 0; pass < 2; pass++) {
8842
            write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8843
            tcg_temp_free_i64(tcg_res[pass]);
8844
        }
8845
    }
8846

    
8847
    if (!TCGV_IS_UNUSED_PTR(fpst)) {
8848
        tcg_temp_free_ptr(fpst);
8849
    }
8850
}
8851

    
8852
/* C3.6.19 Crypto AES
8853
 *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
8854
 * +-----------------+------+-----------+--------+-----+------+------+
8855
 * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
8856
 * +-----------------+------+-----------+--------+-----+------+------+
8857
 */
8858
static void disas_crypto_aes(DisasContext *s, uint32_t insn)
8859
{
8860
    unsupported_encoding(s, insn);
8861
}
8862

    
8863
/* C3.6.20 Crypto three-reg SHA
8864
 *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
8865
 * +-----------------+------+---+------+---+--------+-----+------+------+
8866
 * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
8867
 * +-----------------+------+---+------+---+--------+-----+------+------+
8868
 */
8869
static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
8870
{
8871
    unsupported_encoding(s, insn);
8872
}
8873

    
8874
/* C3.6.21 Crypto two-reg SHA
8875
 *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
8876
 * +-----------------+------+-----------+--------+-----+------+------+
8877
 * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
8878
 * +-----------------+------+-----------+--------+-----+------+------+
8879
 */
8880
static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
8881
{
8882
    unsupported_encoding(s, insn);
8883
}
8884

    
8885
/* C3.6 Data processing - SIMD, inc Crypto
8886
 *
8887
 * As the decode gets a little complex we are using a table based
8888
 * approach for this part of the decode.
8889
 */
8890
static const AArch64DecodeTable data_proc_simd[] = {
8891
    /* pattern  ,  mask     ,  fn                        */
8892
    { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
8893
    { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
8894
    { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
8895
    { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
8896
    { 0x0e000400, 0x9fe08400, disas_simd_copy },
8897
    { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
8898
    /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
8899
    { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
8900
    { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
8901
    { 0x0e000000, 0xbf208c00, disas_simd_tb },
8902
    { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
8903
    { 0x2e000000, 0xbf208400, disas_simd_ext },
8904
    { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
8905
    { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
8906
    { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
8907
    { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
8908
    { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
8909
    { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
8910
    { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
8911
    { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
8912
    { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
8913
    { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
8914
    { 0x00000000, 0x00000000, NULL }
8915
};
8916

    
8917
static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
8918
{
8919
    /* Note that this is called with all non-FP cases from
8920
     * table C3-6 so it must UNDEF for entries not specifically
8921
     * allocated to instructions in that table.
8922
     */
8923
    AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
8924
    if (fn) {
8925
        fn(s, insn);
8926
    } else {
8927
        unallocated_encoding(s);
8928
    }
8929
}
8930

    
8931
/* C3.6 Data processing - SIMD and floating point */
8932
static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
8933
{
8934
    if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
8935
        disas_data_proc_fp(s, insn);
8936
    } else {
8937
        /* SIMD, including crypto */
8938
        disas_data_proc_simd(s, insn);
8939
    }
8940
}
8941

    
8942
/* C3.1 A64 instruction index by encoding */
8943
static void disas_a64_insn(CPUARMState *env, DisasContext *s)
8944
{
8945
    uint32_t insn;
8946

    
8947
    insn = arm_ldl_code(env, s->pc, s->bswap_code);
8948
    s->insn = insn;
8949
    s->pc += 4;
8950

    
8951
    switch (extract32(insn, 25, 4)) {
8952
    case 0x0: case 0x1: case 0x2: case 0x3: /* UNALLOCATED */
8953
        unallocated_encoding(s);
8954
        break;
8955
    case 0x8: case 0x9: /* Data processing - immediate */
8956
        disas_data_proc_imm(s, insn);
8957
        break;
8958
    case 0xa: case 0xb: /* Branch, exception generation and system insns */
8959
        disas_b_exc_sys(s, insn);
8960
        break;
8961
    case 0x4:
8962
    case 0x6:
8963
    case 0xc:
8964
    case 0xe:      /* Loads and stores */
8965
        disas_ldst(s, insn);
8966
        break;
8967
    case 0x5:
8968
    case 0xd:      /* Data processing - register */
8969
        disas_data_proc_reg(s, insn);
8970
        break;
8971
    case 0x7:
8972
    case 0xf:      /* Data processing - SIMD and floating point */
8973
        disas_data_proc_simd_fp(s, insn);
8974
        break;
8975
    default:
8976
        assert(FALSE); /* all 15 cases should be handled above */
8977
        break;
8978
    }
8979

    
8980
    /* if we allocated any temporaries, free them here */
8981
    free_tmp_a64(s);
8982
}
8983

    
8984
void gen_intermediate_code_internal_a64(ARMCPU *cpu,
8985
                                        TranslationBlock *tb,
8986
                                        bool search_pc)
8987
{
8988
    CPUState *cs = CPU(cpu);
8989
    CPUARMState *env = &cpu->env;
8990
    DisasContext dc1, *dc = &dc1;
8991
    CPUBreakpoint *bp;
8992
    uint16_t *gen_opc_end;
8993
    int j, lj;
8994
    target_ulong pc_start;
8995
    target_ulong next_page_start;
8996
    int num_insns;
8997
    int max_insns;
8998

    
8999
    pc_start = tb->pc;
9000

    
9001
    dc->tb = tb;
9002

    
9003
    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
9004

    
9005
    dc->is_jmp = DISAS_NEXT;
9006
    dc->pc = pc_start;
9007
    dc->singlestep_enabled = cs->singlestep_enabled;
9008
    dc->condjmp = 0;
9009

    
9010
    dc->aarch64 = 1;
9011
    dc->thumb = 0;
9012
    dc->bswap_code = 0;
9013
    dc->condexec_mask = 0;
9014
    dc->condexec_cond = 0;
9015
#if !defined(CONFIG_USER_ONLY)
9016
    dc->user = (ARM_TBFLAG_AA64_EL(tb->flags) == 0);
9017
#endif
9018
    dc->vfp_enabled = 0;
9019
    dc->vec_len = 0;
9020
    dc->vec_stride = 0;
9021
    dc->cp_regs = cpu->cp_regs;
9022
    dc->current_pl = arm_current_pl(env);
9023

    
9024
    init_tmp_a64_array(dc);
9025

    
9026
    next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
9027
    lj = -1;
9028
    num_insns = 0;
9029
    max_insns = tb->cflags & CF_COUNT_MASK;
9030
    if (max_insns == 0) {
9031
        max_insns = CF_COUNT_MASK;
9032
    }
9033

    
9034
    gen_tb_start();
9035

    
9036
    tcg_clear_temp_count();
9037

    
9038
    do {
9039
        if (unlikely(!QTAILQ_EMPTY(&env->breakpoints))) {
9040
            QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
9041
                if (bp->pc == dc->pc) {
9042
                    gen_exception_insn(dc, 0, EXCP_DEBUG);
9043
                    /* Advance PC so that clearing the breakpoint will
9044
                       invalidate this TB.  */
9045
                    dc->pc += 2;
9046
                    goto done_generating;
9047
                }
9048
            }
9049
        }
9050

    
9051
        if (search_pc) {
9052
            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
9053
            if (lj < j) {
9054
                lj++;
9055
                while (lj < j) {
9056
                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
9057
                }
9058
            }
9059
            tcg_ctx.gen_opc_pc[lj] = dc->pc;
9060
            tcg_ctx.gen_opc_instr_start[lj] = 1;
9061
            tcg_ctx.gen_opc_icount[lj] = num_insns;
9062
        }
9063

    
9064
        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO)) {
9065
            gen_io_start();
9066
        }
9067

    
9068
        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
9069
            tcg_gen_debug_insn_start(dc->pc);
9070
        }
9071

    
9072
        disas_a64_insn(env, dc);
9073

    
9074
        if (tcg_check_temp_count()) {
9075
            fprintf(stderr, "TCG temporary leak before "TARGET_FMT_lx"\n",
9076
                    dc->pc);
9077
        }
9078

    
9079
        /* Translation stops when a conditional branch is encountered.
9080
         * Otherwise the subsequent code could get translated several times.
9081
         * Also stop translation when a page boundary is reached.  This
9082
         * ensures prefetch aborts occur at the right place.
9083
         */
9084
        num_insns++;
9085
    } while (!dc->is_jmp && tcg_ctx.gen_opc_ptr < gen_opc_end &&
9086
             !cs->singlestep_enabled &&
9087
             !singlestep &&
9088
             dc->pc < next_page_start &&
9089
             num_insns < max_insns);
9090

    
9091
    if (tb->cflags & CF_LAST_IO) {
9092
        gen_io_end();
9093
    }
9094

    
9095
    if (unlikely(cs->singlestep_enabled) && dc->is_jmp != DISAS_EXC) {
9096
        /* Note that this means single stepping WFI doesn't halt the CPU.
9097
         * For conditional branch insns this is harmless unreachable code as
9098
         * gen_goto_tb() has already handled emitting the debug exception
9099
         * (and thus a tb-jump is not possible when singlestepping).
9100
         */
9101
        assert(dc->is_jmp != DISAS_TB_JUMP);
9102
        if (dc->is_jmp != DISAS_JUMP) {
9103
            gen_a64_set_pc_im(dc->pc);
9104
        }
9105
        gen_exception(EXCP_DEBUG);
9106
    } else {
9107
        switch (dc->is_jmp) {
9108
        case DISAS_NEXT:
9109
            gen_goto_tb(dc, 1, dc->pc);
9110
            break;
9111
        default:
9112
        case DISAS_UPDATE:
9113
            gen_a64_set_pc_im(dc->pc);
9114
            /* fall through */
9115
        case DISAS_JUMP:
9116
            /* indicate that the hash table must be used to find the next TB */
9117
            tcg_gen_exit_tb(0);
9118
            break;
9119
        case DISAS_TB_JUMP:
9120
        case DISAS_EXC:
9121
        case DISAS_SWI:
9122
            break;
9123
        case DISAS_WFI:
9124
            /* This is a special case because we don't want to just halt the CPU
9125
             * if trying to debug across a WFI.
9126
             */
9127
            gen_helper_wfi(cpu_env);
9128
            break;
9129
        }
9130
    }
9131

    
9132
done_generating:
9133
    gen_tb_end(tb, num_insns);
9134
    *tcg_ctx.gen_opc_ptr = INDEX_op_end;
9135

    
9136
#ifdef DEBUG_DISAS
9137
    if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
9138
        qemu_log("----------------\n");
9139
        qemu_log("IN: %s\n", lookup_symbol(pc_start));
9140
        log_target_disas(env, pc_start, dc->pc - pc_start,
9141
                         4 | (dc->bswap_code << 1));
9142
        qemu_log("\n");
9143
    }
9144
#endif
9145
    if (search_pc) {
9146
        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
9147
        lj++;
9148
        while (lj <= j) {
9149
            tcg_ctx.gen_opc_instr_start[lj++] = 0;
9150
        }
9151
    } else {
9152
        tb->size = dc->pc - pc_start;
9153
        tb->icount = num_insns;
9154
    }
9155
}