Statistics
| Branch: | Revision:

root / target-arm / translate.c @ 6ddbc6e4

History | View | Annotate | Download (267.3 kB)

1
/*
2
 *  ARM translation
3
 *
4
 *  Copyright (c) 2003 Fabrice Bellard
5
 *  Copyright (c) 2005-2007 CodeSourcery
6
 *  Copyright (c) 2007 OpenedHand, Ltd.
7
 *
8
 * This library is free software; you can redistribute it and/or
9
 * modify it under the terms of the GNU Lesser General Public
10
 * License as published by the Free Software Foundation; either
11
 * version 2 of the License, or (at your option) any later version.
12
 *
13
 * This library is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
 * Lesser General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with this library; if not, write to the Free Software
20
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
21
 */
22
#include <stdarg.h>
23
#include <stdlib.h>
24
#include <stdio.h>
25
#include <string.h>
26
#include <inttypes.h>
27

    
28
#include "cpu.h"
29
#include "exec-all.h"
30
#include "disas.h"
31
#include "tcg-op.h"
32

    
33
#define GEN_HELPER 1
34
#include "helpers.h"
35

    
36
#define ENABLE_ARCH_5J    0
37
#define ENABLE_ARCH_6     arm_feature(env, ARM_FEATURE_V6)
38
#define ENABLE_ARCH_6K   arm_feature(env, ARM_FEATURE_V6K)
39
#define ENABLE_ARCH_6T2   arm_feature(env, ARM_FEATURE_THUMB2)
40
#define ENABLE_ARCH_7     arm_feature(env, ARM_FEATURE_V7)
41

    
42
#define ARCH(x) if (!ENABLE_ARCH_##x) goto illegal_op;
43

    
44
/* internal defines */
45
typedef struct DisasContext {
46
    target_ulong pc;
47
    int is_jmp;
48
    /* Nonzero if this instruction has been conditionally skipped.  */
49
    int condjmp;
50
    /* The label that will be jumped to when the instruction is skipped.  */
51
    int condlabel;
52
    /* Thumb-2 condtional execution bits.  */
53
    int condexec_mask;
54
    int condexec_cond;
55
    struct TranslationBlock *tb;
56
    int singlestep_enabled;
57
    int thumb;
58
    int is_mem;
59
#if !defined(CONFIG_USER_ONLY)
60
    int user;
61
#endif
62
} DisasContext;
63

    
64
#if defined(CONFIG_USER_ONLY)
65
#define IS_USER(s) 1
66
#else
67
#define IS_USER(s) (s->user)
68
#endif
69

    
70
/* These instructions trap after executing, so defer them until after the
71
   conditional executions state has been updated.  */
72
#define DISAS_WFI 4
73
#define DISAS_SWI 5
74

    
75
/* XXX: move that elsewhere */
76
extern FILE *logfile;
77
extern int loglevel;
78

    
79
static TCGv cpu_env;
80
/* FIXME:  These should be removed.  */
81
static TCGv cpu_T[3];
82

    
83
/* initialize TCG globals.  */
84
void arm_translate_init(void)
85
{
86
    cpu_env = tcg_global_reg_new(TCG_TYPE_PTR, TCG_AREG0, "env");
87

    
88
    cpu_T[0] = tcg_global_reg_new(TCG_TYPE_I32, TCG_AREG1, "T0");
89
    cpu_T[1] = tcg_global_reg_new(TCG_TYPE_I32, TCG_AREG2, "T1");
90
    cpu_T[2] = tcg_global_reg_new(TCG_TYPE_I32, TCG_AREG3, "T2");
91
}
92

    
93
/* The code generator doesn't like lots of temporaries, so maintain our own
94
   cache for reuse within a function.  */
95
#define MAX_TEMPS 8
96
static int num_temps;
97
static TCGv temps[MAX_TEMPS];
98

    
99
/* Allocate a temporary variable.  */
100
static TCGv new_tmp(void)
101
{
102
    TCGv tmp;
103
    if (num_temps == MAX_TEMPS)
104
        abort();
105

    
106
    if (GET_TCGV(temps[num_temps]))
107
      return temps[num_temps++];
108

    
109
    tmp = tcg_temp_new(TCG_TYPE_I32);
110
    temps[num_temps++] = tmp;
111
    return tmp;
112
}
113

    
114
/* Release a temporary variable.  */
115
static void dead_tmp(TCGv tmp)
116
{
117
    int i;
118
    num_temps--;
119
    i = num_temps;
120
    if (GET_TCGV(temps[i]) == GET_TCGV(tmp))
121
        return;
122

    
123
    /* Shuffle this temp to the last slot.  */
124
    while (GET_TCGV(temps[i]) != GET_TCGV(tmp))
125
        i--;
126
    while (i < num_temps) {
127
        temps[i] = temps[i + 1];
128
        i++;
129
    }
130
    temps[i] = tmp;
131
}
132

    
133
/* Set a variable to the value of a CPU register.  */
134
static void load_reg_var(DisasContext *s, TCGv var, int reg)
135
{
136
    if (reg == 15) {
137
        uint32_t addr;
138
        /* normaly, since we updated PC, we need only to add one insn */
139
        if (s->thumb)
140
            addr = (long)s->pc + 2;
141
        else
142
            addr = (long)s->pc + 4;
143
        tcg_gen_movi_i32(var, addr);
144
    } else {
145
        tcg_gen_ld_i32(var, cpu_env, offsetof(CPUState, regs[reg]));
146
    }
147
}
148

    
149
/* Create a new temporary and set it to the value of a CPU register.  */
150
static inline TCGv load_reg(DisasContext *s, int reg)
151
{
152
    TCGv tmp = new_tmp();
153
    load_reg_var(s, tmp, reg);
154
    return tmp;
155
}
156

    
157
/* Set a CPU register.  The source must be a temporary and will be
158
   marked as dead.  */
159
static void store_reg(DisasContext *s, int reg, TCGv var)
160
{
161
    if (reg == 15) {
162
        tcg_gen_andi_i32(var, var, ~1);
163
        s->is_jmp = DISAS_JUMP;
164
    }
165
    tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, regs[reg]));
166
    dead_tmp(var);
167
}
168

    
169

    
170
/* Basic operations.  */
171
#define gen_op_movl_T0_T1() tcg_gen_mov_i32(cpu_T[0], cpu_T[1])
172
#define gen_op_movl_T0_T2() tcg_gen_mov_i32(cpu_T[0], cpu_T[2])
173
#define gen_op_movl_T1_T0() tcg_gen_mov_i32(cpu_T[1], cpu_T[0])
174
#define gen_op_movl_T1_T2() tcg_gen_mov_i32(cpu_T[1], cpu_T[2])
175
#define gen_op_movl_T2_T0() tcg_gen_mov_i32(cpu_T[2], cpu_T[0])
176
#define gen_op_movl_T0_im(im) tcg_gen_movi_i32(cpu_T[0], im)
177
#define gen_op_movl_T1_im(im) tcg_gen_movi_i32(cpu_T[1], im)
178
#define gen_op_movl_T2_im(im) tcg_gen_movi_i32(cpu_T[2], im)
179

    
180
#define gen_op_addl_T1_im(im) tcg_gen_addi_i32(cpu_T[1], cpu_T[1], im)
181
#define gen_op_addl_T0_T1() tcg_gen_add_i32(cpu_T[0], cpu_T[0], cpu_T[1])
182
#define gen_op_subl_T0_T1() tcg_gen_sub_i32(cpu_T[0], cpu_T[0], cpu_T[1])
183
#define gen_op_rsbl_T0_T1() tcg_gen_sub_i32(cpu_T[0], cpu_T[1], cpu_T[0])
184

    
185
#define gen_op_andl_T0_T1() tcg_gen_and_i32(cpu_T[0], cpu_T[0], cpu_T[1])
186
#define gen_op_xorl_T0_T1() tcg_gen_xor_i32(cpu_T[0], cpu_T[0], cpu_T[1])
187
#define gen_op_orl_T0_T1() tcg_gen_or_i32(cpu_T[0], cpu_T[0], cpu_T[1])
188
#define gen_op_notl_T0() tcg_gen_not_i32(cpu_T[0], cpu_T[0])
189
#define gen_op_notl_T1() tcg_gen_not_i32(cpu_T[1], cpu_T[1])
190
#define gen_op_logic_T0_cc() gen_logic_CC(cpu_T[0]);
191
#define gen_op_logic_T1_cc() gen_logic_CC(cpu_T[1]);
192

    
193
#define gen_op_shll_T0_im(im) tcg_gen_shli_i32(cpu_T[0], cpu_T[0], im)
194
#define gen_op_shll_T1_im(im) tcg_gen_shli_i32(cpu_T[1], cpu_T[1], im)
195
#define gen_op_shrl_T1_im(im) tcg_gen_shri_i32(cpu_T[1], cpu_T[1], im)
196
#define gen_op_sarl_T1_im(im) tcg_gen_sari_i32(cpu_T[1], cpu_T[1], im)
197
#define gen_op_rorl_T1_im(im) tcg_gen_rori_i32(cpu_T[1], cpu_T[1], im)
198

    
199
/* Value extensions.  */
200
#define gen_uxtb(var) tcg_gen_andi_i32(var, var, 0xff)
201
#define gen_uxth(var) tcg_gen_andi_i32(var, var, 0xffff)
202
#define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
203
#define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
204

    
205
#define gen_sxtb16(var) gen_helper_sxtb16(var, var)
206
#define gen_uxtb16(var) gen_helper_uxtb16(var, var)
207
#define gen_op_rev_T0() tcg_gen_bswap_i32(cpu_T[0], cpu_T[0])
208

    
209
#define gen_op_mul_T0_T1() tcg_gen_mul_i32(cpu_T[0], cpu_T[0], cpu_T[1])
210

    
211
#define gen_op_addl_T0_T1_setq() \
212
    gen_helper_add_setq(cpu_T[0], cpu_T[0], cpu_T[1])
213
#define gen_op_addl_T0_T1_saturate() \
214
    gen_helper_add_saturate(cpu_T[0], cpu_T[0], cpu_T[1])
215
#define gen_op_subl_T0_T1_saturate() \
216
    gen_helper_sub_saturate(cpu_T[0], cpu_T[0], cpu_T[1])
217
#define gen_op_addl_T0_T1_usaturate() \
218
    gen_helper_add_usaturate(cpu_T[0], cpu_T[0], cpu_T[1])
219
#define gen_op_subl_T0_T1_usaturate() \
220
    gen_helper_sub_usaturate(cpu_T[0], cpu_T[0], cpu_T[1])
221

    
222
/* Copy the most significant bit of T0 to all bits of T1.  */
223
#define gen_op_signbit_T1_T0() tcg_gen_sari_i32(cpu_T[1], cpu_T[0], 31)
224

    
225
static void gen_smul_dual(TCGv a, TCGv b)
226
{
227
    TCGv tmp1 = new_tmp();
228
    TCGv tmp2 = new_tmp();
229
    tcg_gen_ext8s_i32(tmp1, a);
230
    tcg_gen_ext8s_i32(tmp2, b);
231
    tcg_gen_mul_i32(tmp1, tmp1, tmp2);
232
    dead_tmp(tmp2);
233
    tcg_gen_sari_i32(a, a, 16);
234
    tcg_gen_sari_i32(b, b, 16);
235
    tcg_gen_mul_i32(b, b, a);
236
    tcg_gen_mov_i32(a, tmp1);
237
    dead_tmp(tmp1);
238
}
239

    
240
/* Byteswap each halfword.  */
241
static void gen_rev16(TCGv var)
242
{
243
    TCGv tmp = new_tmp();
244
    tcg_gen_shri_i32(tmp, var, 8);
245
    tcg_gen_andi_i32(tmp, tmp, 0x00ff00ff);
246
    tcg_gen_shli_i32(var, var, 8);
247
    tcg_gen_andi_i32(var, var, 0xff00ff00);
248
    tcg_gen_or_i32(var, var, tmp);
249
    dead_tmp(tmp);
250
}
251

    
252
/* Byteswap low halfword and sign extend.  */
253
static void gen_revsh(TCGv var)
254
{
255
    TCGv tmp = new_tmp();
256
    tcg_gen_shri_i32(tmp, var, 8);
257
    tcg_gen_andi_i32(tmp, tmp, 0x00ff);
258
    tcg_gen_shli_i32(var, var, 8);
259
    tcg_gen_ext8s_i32(var, var);
260
    tcg_gen_or_i32(var, var, tmp);
261
    dead_tmp(tmp);
262
}
263

    
264
/* Unsigned bitfield extract.  */
265
static void gen_ubfx(TCGv var, int shift, uint32_t mask)
266
{
267
    if (shift)
268
        tcg_gen_shri_i32(var, var, shift);
269
    tcg_gen_andi_i32(var, var, mask);
270
}
271

    
272
/* Signed bitfield extract.  */
273
static void gen_sbfx(TCGv var, int shift, int width)
274
{
275
    uint32_t signbit;
276

    
277
    if (shift)
278
        tcg_gen_sari_i32(var, var, shift);
279
    if (shift + width < 32) {
280
        signbit = 1u << (width - 1);
281
        tcg_gen_andi_i32(var, var, (1u << width) - 1);
282
        tcg_gen_xori_i32(var, var, signbit);
283
        tcg_gen_subi_i32(var, var, signbit);
284
    }
285
}
286

    
287
/* Bitfield insertion.  Insert val into base.  Clobbers base and val.  */
288
static void gen_bfi(TCGv dest, TCGv base, TCGv val, int shift, uint32_t mask)
289
{
290
    tcg_gen_shli_i32(val, val, shift);
291
    tcg_gen_andi_i32(val, val, mask);
292
    tcg_gen_andi_i32(base, base, ~mask);
293
    tcg_gen_or_i32(dest, base, val);
294
}
295

    
296
static void gen_op_roundqd_T0_T1(void)
297
{
298
    tcg_gen_shri_i32(cpu_T[0], cpu_T[0], 31);
299
    tcg_gen_add_i32(cpu_T[0], cpu_T[0], cpu_T[1]);
300
}
301

    
302
/* FIXME: Most targets have native widening multiplication.
303
   It would be good to use that instead of a full wide multiply.  */
304
/* Unsigned 32x32->64 multiply.  */
305
static void gen_op_mull_T0_T1(void)
306
{
307
    TCGv tmp1 = tcg_temp_new(TCG_TYPE_I64);
308
    TCGv tmp2 = tcg_temp_new(TCG_TYPE_I64);
309

    
310
    tcg_gen_extu_i32_i64(tmp1, cpu_T[0]);
311
    tcg_gen_extu_i32_i64(tmp2, cpu_T[1]);
312
    tcg_gen_mul_i64(tmp1, tmp1, tmp2);
313
    tcg_gen_trunc_i64_i32(cpu_T[0], tmp1);
314
    tcg_gen_shri_i64(tmp1, tmp1, 32);
315
    tcg_gen_trunc_i64_i32(cpu_T[1], tmp1);
316
}
317

    
318
/* Signed 32x32->64 multiply.  */
319
static void gen_op_imull_T0_T1(void)
320
{
321
    TCGv tmp1 = tcg_temp_new(TCG_TYPE_I64);
322
    TCGv tmp2 = tcg_temp_new(TCG_TYPE_I64);
323

    
324
    tcg_gen_ext_i32_i64(tmp1, cpu_T[0]);
325
    tcg_gen_ext_i32_i64(tmp2, cpu_T[1]);
326
    tcg_gen_mul_i64(tmp1, tmp1, tmp2);
327
    tcg_gen_trunc_i64_i32(cpu_T[0], tmp1);
328
    tcg_gen_shri_i64(tmp1, tmp1, 32);
329
    tcg_gen_trunc_i64_i32(cpu_T[1], tmp1);
330
}
331

    
332
/* Swap low and high halfwords.  */
333
static void gen_swap_half(TCGv var)
334
{
335
    TCGv tmp = new_tmp();
336
    tcg_gen_shri_i32(tmp, var, 16);
337
    tcg_gen_shli_i32(var, var, 16);
338
    tcg_gen_or_i32(var, var, tmp);
339
    dead_tmp(tmp);
340
}
341

    
342
/* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
343
    tmp = (t0 ^ t1) & 0x8000;
344
    t0 &= ~0x8000;
345
    t1 &= ~0x8000;
346
    t0 = (t0 + t1) ^ tmp;
347
 */
348

    
349
static void gen_add16(TCGv t0, TCGv t1)
350
{
351
    TCGv tmp = new_tmp();
352
    tcg_gen_xor_i32(tmp, t0, t1);
353
    tcg_gen_andi_i32(tmp, tmp, 0x8000);
354
    tcg_gen_andi_i32(t0, t0, ~0x8000);
355
    tcg_gen_andi_i32(t1, t1, ~0x8000);
356
    tcg_gen_add_i32(t0, t0, t1);
357
    tcg_gen_xor_i32(t0, t0, tmp);
358
    dead_tmp(tmp);
359
    dead_tmp(t1);
360
}
361

    
362
#define gen_set_CF(var) tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, CF))
363

    
364
/* Set CF to the top bit of var.  */
365
static void gen_set_CF_bit31(TCGv var)
366
{
367
    TCGv tmp = new_tmp();
368
    tcg_gen_shri_i32(tmp, var, 31);
369
    gen_set_CF(var);
370
    dead_tmp(tmp);
371
}
372

    
373
/* Set N and Z flags from var.  */
374
static inline void gen_logic_CC(TCGv var)
375
{
376
    tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, NZF));
377
}
378

    
379
/* T0 += T1 + CF.  */
380
static void gen_adc_T0_T1(void)
381
{
382
    TCGv tmp = new_tmp();
383
    gen_op_addl_T0_T1();
384
    tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUState, CF));
385
    tcg_gen_add_i32(cpu_T[0], cpu_T[0], tmp);
386
    dead_tmp(tmp);
387
}
388

    
389
/* dest = T0 - T1 + CF - 1.  */
390
static void gen_sub_carry(TCGv dest, TCGv t0, TCGv t1)
391
{
392
    TCGv tmp = new_tmp();
393
    tcg_gen_sub_i32(dest, t0, t1);
394
    tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUState, CF));
395
    tcg_gen_add_i32(dest, dest, tmp);
396
    tcg_gen_subi_i32(dest, dest, 1);
397
    dead_tmp(tmp);
398
}
399

    
400
#define gen_sbc_T0_T1() gen_sub_carry(cpu_T[0], cpu_T[0], cpu_T[1])
401
#define gen_rsc_T0_T1() gen_sub_carry(cpu_T[0], cpu_T[1], cpu_T[0])
402

    
403
/* FIXME:  Implement this natively.  */
404
static inline void tcg_gen_not_i32(TCGv t0, TCGv t1)
405
{
406
    tcg_gen_xori_i32(t0, t1, ~0);
407
}
408

    
409
/* T0 &= ~T1.  Clobbers T1.  */
410
/* FIXME: Implement bic natively.  */
411
static inline void gen_op_bicl_T0_T1(void)
412
{
413
    gen_op_notl_T1();
414
    gen_op_andl_T0_T1();
415
}
416

    
417
/* FIXME:  Implement this natively.  */
418
static void tcg_gen_rori_i32(TCGv t0, TCGv t1, int i)
419
{
420
    TCGv tmp;
421

    
422
    if (i == 0)
423
        return;
424

    
425
    tmp = new_tmp();
426
    tcg_gen_shri_i32(tmp, t1, i);
427
    tcg_gen_shli_i32(t1, t1, 32 - i);
428
    tcg_gen_or_i32(t0, t1, tmp);
429
    dead_tmp(tmp);
430
}
431

    
432
static void shifter_out_im(TCGv var, int shift)
433
{
434
    TCGv tmp = new_tmp();
435
    if (shift == 0) {
436
        tcg_gen_andi_i32(tmp, var, 1);
437
    } else {
438
        tcg_gen_shri_i32(tmp, var, shift);
439
        if (shift != 31);
440
            tcg_gen_andi_i32(tmp, tmp, 1);
441
    }
442
    gen_set_CF(tmp);
443
    dead_tmp(tmp);
444
}
445

    
446
/* Shift by immediate.  Includes special handling for shift == 0.  */
447
static inline void gen_arm_shift_im(TCGv var, int shiftop, int shift, int flags)
448
{
449
    switch (shiftop) {
450
    case 0: /* LSL */
451
        if (shift != 0) {
452
            if (flags)
453
                shifter_out_im(var, 32 - shift);
454
            tcg_gen_shli_i32(var, var, shift);
455
        }
456
        break;
457
    case 1: /* LSR */
458
        if (shift == 0) {
459
            if (flags) {
460
                tcg_gen_shri_i32(var, var, 31);
461
                gen_set_CF(var);
462
            }
463
            tcg_gen_movi_i32(var, 0);
464
        } else {
465
            if (flags)
466
                shifter_out_im(var, shift - 1);
467
            tcg_gen_shri_i32(var, var, shift);
468
        }
469
        break;
470
    case 2: /* ASR */
471
        if (shift == 0)
472
            shift = 32;
473
        if (flags)
474
            shifter_out_im(var, shift - 1);
475
        if (shift == 32)
476
          shift = 31;
477
        tcg_gen_sari_i32(var, var, shift);
478
        break;
479
    case 3: /* ROR/RRX */
480
        if (shift != 0) {
481
            if (flags)
482
                shifter_out_im(var, shift - 1);
483
            tcg_gen_rori_i32(var, var, shift); break;
484
        } else {
485
            TCGv tmp = new_tmp();
486
            tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUState, CF));
487
            if (flags)
488
                shifter_out_im(var, 0);
489
            tcg_gen_shri_i32(var, var, 1);
490
            tcg_gen_shli_i32(tmp, tmp, 31);
491
            tcg_gen_or_i32(var, var, tmp);
492
            dead_tmp(tmp);
493
        }
494
    }
495
};
496

    
497
#define PAS_OP(pfx) \
498
    switch (op2) {  \
499
    case 0: gen_pas_helper(glue(pfx,add16)); break; \
500
    case 1: gen_pas_helper(glue(pfx,addsubx)); break; \
501
    case 2: gen_pas_helper(glue(pfx,subaddx)); break; \
502
    case 3: gen_pas_helper(glue(pfx,sub16)); break; \
503
    case 4: gen_pas_helper(glue(pfx,add8)); break; \
504
    case 7: gen_pas_helper(glue(pfx,sub8)); break; \
505
    }
506
void gen_arm_parallel_addsub(int op1, int op2, TCGv a, TCGv b)
507
{
508
    TCGv tmp;
509

    
510
    switch (op1) {
511
#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
512
    case 1:
513
        tmp = tcg_temp_new(TCG_TYPE_PTR);
514
        tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
515
        PAS_OP(s)
516
        break;
517
    case 5:
518
        tmp = tcg_temp_new(TCG_TYPE_PTR);
519
        tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
520
        PAS_OP(u)
521
        break;
522
#undef gen_pas_helper
523
#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
524
    case 2:
525
        PAS_OP(q);
526
        break;
527
    case 3:
528
        PAS_OP(sh);
529
        break;
530
    case 6:
531
        PAS_OP(uq);
532
        break;
533
    case 7:
534
        PAS_OP(uh);
535
        break;
536
#undef gen_pas_helper
537
    }
538
}
539
#undef PAS_OP
540

    
541
/* For unknown reasons Arm and Thumb-2 use arbitrarily different encodings.  */
542
#define PAS_OP(pfx) \
543
    switch (op2) {  \
544
    case 0: gen_pas_helper(glue(pfx,add8)); break; \
545
    case 1: gen_pas_helper(glue(pfx,add16)); break; \
546
    case 2: gen_pas_helper(glue(pfx,addsubx)); break; \
547
    case 4: gen_pas_helper(glue(pfx,sub8)); break; \
548
    case 5: gen_pas_helper(glue(pfx,sub16)); break; \
549
    case 6: gen_pas_helper(glue(pfx,subaddx)); break; \
550
    }
551
void gen_thumb2_parallel_addsub(int op1, int op2, TCGv a, TCGv b)
552
{
553
    TCGv tmp;
554

    
555
    switch (op1) {
556
#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
557
    case 0:
558
        tmp = tcg_temp_new(TCG_TYPE_PTR);
559
        tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
560
        PAS_OP(s)
561
        break;
562
    case 4:
563
        tmp = tcg_temp_new(TCG_TYPE_PTR);
564
        tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
565
        PAS_OP(u)
566
        break;
567
#undef gen_pas_helper
568
#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
569
    case 1:
570
        PAS_OP(q);
571
        break;
572
    case 2:
573
        PAS_OP(sh);
574
        break;
575
    case 5:
576
        PAS_OP(uq);
577
        break;
578
    case 6:
579
        PAS_OP(uh);
580
        break;
581
#undef gen_pas_helper
582
    }
583
}
584
#undef PAS_OP
585

    
586
static GenOpFunc1 *gen_test_cc[14] = {
587
    gen_op_test_eq,
588
    gen_op_test_ne,
589
    gen_op_test_cs,
590
    gen_op_test_cc,
591
    gen_op_test_mi,
592
    gen_op_test_pl,
593
    gen_op_test_vs,
594
    gen_op_test_vc,
595
    gen_op_test_hi,
596
    gen_op_test_ls,
597
    gen_op_test_ge,
598
    gen_op_test_lt,
599
    gen_op_test_gt,
600
    gen_op_test_le,
601
};
602

    
603
const uint8_t table_logic_cc[16] = {
604
    1, /* and */
605
    1, /* xor */
606
    0, /* sub */
607
    0, /* rsb */
608
    0, /* add */
609
    0, /* adc */
610
    0, /* sbc */
611
    0, /* rsc */
612
    1, /* andl */
613
    1, /* xorl */
614
    0, /* cmp */
615
    0, /* cmn */
616
    1, /* orr */
617
    1, /* mov */
618
    1, /* bic */
619
    1, /* mvn */
620
};
621

    
622
static GenOpFunc *gen_shift_T1_T0[4] = {
623
    gen_op_shll_T1_T0,
624
    gen_op_shrl_T1_T0,
625
    gen_op_sarl_T1_T0,
626
    gen_op_rorl_T1_T0,
627
};
628

    
629
static GenOpFunc *gen_shift_T1_T0_cc[4] = {
630
    gen_op_shll_T1_T0_cc,
631
    gen_op_shrl_T1_T0_cc,
632
    gen_op_sarl_T1_T0_cc,
633
    gen_op_rorl_T1_T0_cc,
634
};
635

    
636
/* Set PC and thumb state from T0.  Clobbers T0.  */
637
static inline void gen_bx(DisasContext *s)
638
{
639
    TCGv tmp;
640

    
641
    s->is_jmp = DISAS_UPDATE;
642
    tmp = new_tmp();
643
    tcg_gen_andi_i32(tmp, cpu_T[0], 1);
644
    tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUState, thumb));
645
    dead_tmp(tmp);
646
    tcg_gen_andi_i32(cpu_T[0], cpu_T[0], ~1);
647
    tcg_gen_st_i32(cpu_T[0], cpu_env, offsetof(CPUState, regs[15]));
648
}
649

    
650
#if defined(CONFIG_USER_ONLY)
651
#define gen_ldst(name, s) gen_op_##name##_raw()
652
#else
653
#define gen_ldst(name, s) do { \
654
    s->is_mem = 1; \
655
    if (IS_USER(s)) \
656
        gen_op_##name##_user(); \
657
    else \
658
        gen_op_##name##_kernel(); \
659
    } while (0)
660
#endif
661

    
662
static inline void gen_movl_T0_reg(DisasContext *s, int reg)
663
{
664
    load_reg_var(s, cpu_T[0], reg);
665
}
666

    
667
static inline void gen_movl_T1_reg(DisasContext *s, int reg)
668
{
669
    load_reg_var(s, cpu_T[1], reg);
670
}
671

    
672
static inline void gen_movl_T2_reg(DisasContext *s, int reg)
673
{
674
    load_reg_var(s, cpu_T[2], reg);
675
}
676

    
677
static inline void gen_set_pc_T0(void)
678
{
679
    tcg_gen_st_i32(cpu_T[0], cpu_env, offsetof(CPUState, regs[15]));
680
}
681

    
682
static inline void gen_movl_reg_TN(DisasContext *s, int reg, int t)
683
{
684
    TCGv tmp;
685
    if (reg == 15) {
686
        tmp = new_tmp();
687
        tcg_gen_andi_i32(tmp, cpu_T[t], ~1);
688
    } else {
689
        tmp = cpu_T[t];
690
    }
691
    tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUState, regs[reg]));
692
    if (reg == 15) {
693
        dead_tmp(tmp);
694
        s->is_jmp = DISAS_JUMP;
695
    }
696
}
697

    
698
static inline void gen_movl_reg_T0(DisasContext *s, int reg)
699
{
700
    gen_movl_reg_TN(s, reg, 0);
701
}
702

    
703
static inline void gen_movl_reg_T1(DisasContext *s, int reg)
704
{
705
    gen_movl_reg_TN(s, reg, 1);
706
}
707

    
708
/* Force a TB lookup after an instruction that changes the CPU state.  */
709
static inline void gen_lookup_tb(DisasContext *s)
710
{
711
    gen_op_movl_T0_im(s->pc);
712
    gen_movl_reg_T0(s, 15);
713
    s->is_jmp = DISAS_UPDATE;
714
}
715

    
716
static inline void gen_add_data_offset(DisasContext *s, unsigned int insn)
717
{
718
    int val, rm, shift, shiftop;
719
    TCGv offset;
720

    
721
    if (!(insn & (1 << 25))) {
722
        /* immediate */
723
        val = insn & 0xfff;
724
        if (!(insn & (1 << 23)))
725
            val = -val;
726
        if (val != 0)
727
            gen_op_addl_T1_im(val);
728
    } else {
729
        /* shift/register */
730
        rm = (insn) & 0xf;
731
        shift = (insn >> 7) & 0x1f;
732
        shiftop = (insn >> 5) & 3;
733
        offset = load_reg(s, rm);
734
        gen_arm_shift_im(offset, shiftop, shift, 0);
735
        if (!(insn & (1 << 23)))
736
            tcg_gen_sub_i32(cpu_T[1], cpu_T[1], offset);
737
        else
738
            tcg_gen_add_i32(cpu_T[1], cpu_T[1], offset);
739
        dead_tmp(offset);
740
    }
741
}
742

    
743
static inline void gen_add_datah_offset(DisasContext *s, unsigned int insn,
744
                                        int extra)
745
{
746
    int val, rm;
747
    TCGv offset;
748

    
749
    if (insn & (1 << 22)) {
750
        /* immediate */
751
        val = (insn & 0xf) | ((insn >> 4) & 0xf0);
752
        if (!(insn & (1 << 23)))
753
            val = -val;
754
        val += extra;
755
        if (val != 0)
756
            gen_op_addl_T1_im(val);
757
    } else {
758
        /* register */
759
        if (extra)
760
            gen_op_addl_T1_im(extra);
761
        rm = (insn) & 0xf;
762
        offset = load_reg(s, rm);
763
        if (!(insn & (1 << 23)))
764
            tcg_gen_sub_i32(cpu_T[1], cpu_T[1], offset);
765
        else
766
            tcg_gen_add_i32(cpu_T[1], cpu_T[1], offset);
767
        dead_tmp(offset);
768
    }
769
}
770

    
771
#define VFP_OP(name)                      \
772
static inline void gen_vfp_##name(int dp) \
773
{                                         \
774
    if (dp)                               \
775
        gen_op_vfp_##name##d();           \
776
    else                                  \
777
        gen_op_vfp_##name##s();           \
778
}
779

    
780
#define VFP_OP1(name)                               \
781
static inline void gen_vfp_##name(int dp, int arg)  \
782
{                                                   \
783
    if (dp)                                         \
784
        gen_op_vfp_##name##d(arg);                  \
785
    else                                            \
786
        gen_op_vfp_##name##s(arg);                  \
787
}
788

    
789
VFP_OP(add)
790
VFP_OP(sub)
791
VFP_OP(mul)
792
VFP_OP(div)
793
VFP_OP(neg)
794
VFP_OP(abs)
795
VFP_OP(sqrt)
796
VFP_OP(cmp)
797
VFP_OP(cmpe)
798
VFP_OP(F1_ld0)
799
VFP_OP(uito)
800
VFP_OP(sito)
801
VFP_OP(toui)
802
VFP_OP(touiz)
803
VFP_OP(tosi)
804
VFP_OP(tosiz)
805
VFP_OP1(tosh)
806
VFP_OP1(tosl)
807
VFP_OP1(touh)
808
VFP_OP1(toul)
809
VFP_OP1(shto)
810
VFP_OP1(slto)
811
VFP_OP1(uhto)
812
VFP_OP1(ulto)
813

    
814
#undef VFP_OP
815

    
816
static inline void gen_vfp_fconst(int dp, uint32_t val)
817
{
818
    if (dp)
819
        gen_op_vfp_fconstd(val);
820
    else
821
        gen_op_vfp_fconsts(val);
822
}
823

    
824
static inline void gen_vfp_ld(DisasContext *s, int dp)
825
{
826
    if (dp)
827
        gen_ldst(vfp_ldd, s);
828
    else
829
        gen_ldst(vfp_lds, s);
830
}
831

    
832
static inline void gen_vfp_st(DisasContext *s, int dp)
833
{
834
    if (dp)
835
        gen_ldst(vfp_std, s);
836
    else
837
        gen_ldst(vfp_sts, s);
838
}
839

    
840
static inline long
841
vfp_reg_offset (int dp, int reg)
842
{
843
    if (dp)
844
        return offsetof(CPUARMState, vfp.regs[reg]);
845
    else if (reg & 1) {
846
        return offsetof(CPUARMState, vfp.regs[reg >> 1])
847
          + offsetof(CPU_DoubleU, l.upper);
848
    } else {
849
        return offsetof(CPUARMState, vfp.regs[reg >> 1])
850
          + offsetof(CPU_DoubleU, l.lower);
851
    }
852
}
853

    
854
/* Return the offset of a 32-bit piece of a NEON register.
855
   zero is the least significant end of the register.  */
856
static inline long
857
neon_reg_offset (int reg, int n)
858
{
859
    int sreg;
860
    sreg = reg * 2 + n;
861
    return vfp_reg_offset(0, sreg);
862
}
863

    
864
#define NEON_GET_REG(T, reg, n) gen_op_neon_getreg_##T(neon_reg_offset(reg, n))
865
#define NEON_SET_REG(T, reg, n) gen_op_neon_setreg_##T(neon_reg_offset(reg, n))
866

    
867
static inline void gen_mov_F0_vreg(int dp, int reg)
868
{
869
    if (dp)
870
        gen_op_vfp_getreg_F0d(vfp_reg_offset(dp, reg));
871
    else
872
        gen_op_vfp_getreg_F0s(vfp_reg_offset(dp, reg));
873
}
874

    
875
static inline void gen_mov_F1_vreg(int dp, int reg)
876
{
877
    if (dp)
878
        gen_op_vfp_getreg_F1d(vfp_reg_offset(dp, reg));
879
    else
880
        gen_op_vfp_getreg_F1s(vfp_reg_offset(dp, reg));
881
}
882

    
883
static inline void gen_mov_vreg_F0(int dp, int reg)
884
{
885
    if (dp)
886
        gen_op_vfp_setreg_F0d(vfp_reg_offset(dp, reg));
887
    else
888
        gen_op_vfp_setreg_F0s(vfp_reg_offset(dp, reg));
889
}
890

    
891
#define ARM_CP_RW_BIT        (1 << 20)
892

    
893
static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn)
894
{
895
    int rd;
896
    uint32_t offset;
897

    
898
    rd = (insn >> 16) & 0xf;
899
    gen_movl_T1_reg(s, rd);
900

    
901
    offset = (insn & 0xff) << ((insn >> 7) & 2);
902
    if (insn & (1 << 24)) {
903
        /* Pre indexed */
904
        if (insn & (1 << 23))
905
            gen_op_addl_T1_im(offset);
906
        else
907
            gen_op_addl_T1_im(-offset);
908

    
909
        if (insn & (1 << 21))
910
            gen_movl_reg_T1(s, rd);
911
    } else if (insn & (1 << 21)) {
912
        /* Post indexed */
913
        if (insn & (1 << 23))
914
            gen_op_movl_T0_im(offset);
915
        else
916
            gen_op_movl_T0_im(- offset);
917
        gen_op_addl_T0_T1();
918
        gen_movl_reg_T0(s, rd);
919
    } else if (!(insn & (1 << 23)))
920
        return 1;
921
    return 0;
922
}
923

    
924
static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask)
925
{
926
    int rd = (insn >> 0) & 0xf;
927

    
928
    if (insn & (1 << 8))
929
        if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3)
930
            return 1;
931
        else
932
            gen_op_iwmmxt_movl_T0_wCx(rd);
933
    else
934
        gen_op_iwmmxt_movl_T0_T1_wRn(rd);
935

    
936
    gen_op_movl_T1_im(mask);
937
    gen_op_andl_T0_T1();
938
    return 0;
939
}
940

    
941
/* Disassemble an iwMMXt instruction.  Returns nonzero if an error occured
942
   (ie. an undefined instruction).  */
943
static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
944
{
945
    int rd, wrd;
946
    int rdhi, rdlo, rd0, rd1, i;
947

    
948
    if ((insn & 0x0e000e00) == 0x0c000000) {
949
        if ((insn & 0x0fe00ff0) == 0x0c400000) {
950
            wrd = insn & 0xf;
951
            rdlo = (insn >> 12) & 0xf;
952
            rdhi = (insn >> 16) & 0xf;
953
            if (insn & ARM_CP_RW_BIT) {                        /* TMRRC */
954
                gen_op_iwmmxt_movl_T0_T1_wRn(wrd);
955
                gen_movl_reg_T0(s, rdlo);
956
                gen_movl_reg_T1(s, rdhi);
957
            } else {                                        /* TMCRR */
958
                gen_movl_T0_reg(s, rdlo);
959
                gen_movl_T1_reg(s, rdhi);
960
                gen_op_iwmmxt_movl_wRn_T0_T1(wrd);
961
                gen_op_iwmmxt_set_mup();
962
            }
963
            return 0;
964
        }
965

    
966
        wrd = (insn >> 12) & 0xf;
967
        if (gen_iwmmxt_address(s, insn))
968
            return 1;
969
        if (insn & ARM_CP_RW_BIT) {
970
            if ((insn >> 28) == 0xf) {                        /* WLDRW wCx */
971
                gen_ldst(ldl, s);
972
                gen_op_iwmmxt_movl_wCx_T0(wrd);
973
            } else {
974
                if (insn & (1 << 8))
975
                    if (insn & (1 << 22))                /* WLDRD */
976
                        gen_ldst(iwmmxt_ldq, s);
977
                    else                                /* WLDRW wRd */
978
                        gen_ldst(iwmmxt_ldl, s);
979
                else
980
                    if (insn & (1 << 22))                /* WLDRH */
981
                        gen_ldst(iwmmxt_ldw, s);
982
                    else                                /* WLDRB */
983
                        gen_ldst(iwmmxt_ldb, s);
984
                gen_op_iwmmxt_movq_wRn_M0(wrd);
985
            }
986
        } else {
987
            if ((insn >> 28) == 0xf) {                        /* WSTRW wCx */
988
                gen_op_iwmmxt_movl_T0_wCx(wrd);
989
                gen_ldst(stl, s);
990
            } else {
991
                gen_op_iwmmxt_movq_M0_wRn(wrd);
992
                if (insn & (1 << 8))
993
                    if (insn & (1 << 22))                /* WSTRD */
994
                        gen_ldst(iwmmxt_stq, s);
995
                    else                                /* WSTRW wRd */
996
                        gen_ldst(iwmmxt_stl, s);
997
                else
998
                    if (insn & (1 << 22))                /* WSTRH */
999
                        gen_ldst(iwmmxt_ldw, s);
1000
                    else                                /* WSTRB */
1001
                        gen_ldst(iwmmxt_stb, s);
1002
            }
1003
        }
1004
        return 0;
1005
    }
1006

    
1007
    if ((insn & 0x0f000000) != 0x0e000000)
1008
        return 1;
1009

    
1010
    switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1011
    case 0x000:                                                /* WOR */
1012
        wrd = (insn >> 12) & 0xf;
1013
        rd0 = (insn >> 0) & 0xf;
1014
        rd1 = (insn >> 16) & 0xf;
1015
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1016
        gen_op_iwmmxt_orq_M0_wRn(rd1);
1017
        gen_op_iwmmxt_setpsr_nz();
1018
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1019
        gen_op_iwmmxt_set_mup();
1020
        gen_op_iwmmxt_set_cup();
1021
        break;
1022
    case 0x011:                                                /* TMCR */
1023
        if (insn & 0xf)
1024
            return 1;
1025
        rd = (insn >> 12) & 0xf;
1026
        wrd = (insn >> 16) & 0xf;
1027
        switch (wrd) {
1028
        case ARM_IWMMXT_wCID:
1029
        case ARM_IWMMXT_wCASF:
1030
            break;
1031
        case ARM_IWMMXT_wCon:
1032
            gen_op_iwmmxt_set_cup();
1033
            /* Fall through.  */
1034
        case ARM_IWMMXT_wCSSF:
1035
            gen_op_iwmmxt_movl_T0_wCx(wrd);
1036
            gen_movl_T1_reg(s, rd);
1037
            gen_op_bicl_T0_T1();
1038
            gen_op_iwmmxt_movl_wCx_T0(wrd);
1039
            break;
1040
        case ARM_IWMMXT_wCGR0:
1041
        case ARM_IWMMXT_wCGR1:
1042
        case ARM_IWMMXT_wCGR2:
1043
        case ARM_IWMMXT_wCGR3:
1044
            gen_op_iwmmxt_set_cup();
1045
            gen_movl_reg_T0(s, rd);
1046
            gen_op_iwmmxt_movl_wCx_T0(wrd);
1047
            break;
1048
        default:
1049
            return 1;
1050
        }
1051
        break;
1052
    case 0x100:                                                /* WXOR */
1053
        wrd = (insn >> 12) & 0xf;
1054
        rd0 = (insn >> 0) & 0xf;
1055
        rd1 = (insn >> 16) & 0xf;
1056
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1057
        gen_op_iwmmxt_xorq_M0_wRn(rd1);
1058
        gen_op_iwmmxt_setpsr_nz();
1059
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1060
        gen_op_iwmmxt_set_mup();
1061
        gen_op_iwmmxt_set_cup();
1062
        break;
1063
    case 0x111:                                                /* TMRC */
1064
        if (insn & 0xf)
1065
            return 1;
1066
        rd = (insn >> 12) & 0xf;
1067
        wrd = (insn >> 16) & 0xf;
1068
        gen_op_iwmmxt_movl_T0_wCx(wrd);
1069
        gen_movl_reg_T0(s, rd);
1070
        break;
1071
    case 0x300:                                                /* WANDN */
1072
        wrd = (insn >> 12) & 0xf;
1073
        rd0 = (insn >> 0) & 0xf;
1074
        rd1 = (insn >> 16) & 0xf;
1075
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1076
        gen_op_iwmmxt_negq_M0();
1077
        gen_op_iwmmxt_andq_M0_wRn(rd1);
1078
        gen_op_iwmmxt_setpsr_nz();
1079
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1080
        gen_op_iwmmxt_set_mup();
1081
        gen_op_iwmmxt_set_cup();
1082
        break;
1083
    case 0x200:                                                /* WAND */
1084
        wrd = (insn >> 12) & 0xf;
1085
        rd0 = (insn >> 0) & 0xf;
1086
        rd1 = (insn >> 16) & 0xf;
1087
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1088
        gen_op_iwmmxt_andq_M0_wRn(rd1);
1089
        gen_op_iwmmxt_setpsr_nz();
1090
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1091
        gen_op_iwmmxt_set_mup();
1092
        gen_op_iwmmxt_set_cup();
1093
        break;
1094
    case 0x810: case 0xa10:                                /* WMADD */
1095
        wrd = (insn >> 12) & 0xf;
1096
        rd0 = (insn >> 0) & 0xf;
1097
        rd1 = (insn >> 16) & 0xf;
1098
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1099
        if (insn & (1 << 21))
1100
            gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1101
        else
1102
            gen_op_iwmmxt_madduq_M0_wRn(rd1);
1103
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1104
        gen_op_iwmmxt_set_mup();
1105
        break;
1106
    case 0x10e: case 0x50e: case 0x90e: case 0xd0e:        /* WUNPCKIL */
1107
        wrd = (insn >> 12) & 0xf;
1108
        rd0 = (insn >> 16) & 0xf;
1109
        rd1 = (insn >> 0) & 0xf;
1110
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1111
        switch ((insn >> 22) & 3) {
1112
        case 0:
1113
            gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1114
            break;
1115
        case 1:
1116
            gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1117
            break;
1118
        case 2:
1119
            gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1120
            break;
1121
        case 3:
1122
            return 1;
1123
        }
1124
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1125
        gen_op_iwmmxt_set_mup();
1126
        gen_op_iwmmxt_set_cup();
1127
        break;
1128
    case 0x10c: case 0x50c: case 0x90c: case 0xd0c:        /* WUNPCKIH */
1129
        wrd = (insn >> 12) & 0xf;
1130
        rd0 = (insn >> 16) & 0xf;
1131
        rd1 = (insn >> 0) & 0xf;
1132
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1133
        switch ((insn >> 22) & 3) {
1134
        case 0:
1135
            gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1136
            break;
1137
        case 1:
1138
            gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1139
            break;
1140
        case 2:
1141
            gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1142
            break;
1143
        case 3:
1144
            return 1;
1145
        }
1146
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1147
        gen_op_iwmmxt_set_mup();
1148
        gen_op_iwmmxt_set_cup();
1149
        break;
1150
    case 0x012: case 0x112: case 0x412: case 0x512:        /* WSAD */
1151
        wrd = (insn >> 12) & 0xf;
1152
        rd0 = (insn >> 16) & 0xf;
1153
        rd1 = (insn >> 0) & 0xf;
1154
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1155
        if (insn & (1 << 22))
1156
            gen_op_iwmmxt_sadw_M0_wRn(rd1);
1157
        else
1158
            gen_op_iwmmxt_sadb_M0_wRn(rd1);
1159
        if (!(insn & (1 << 20)))
1160
            gen_op_iwmmxt_addl_M0_wRn(wrd);
1161
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1162
        gen_op_iwmmxt_set_mup();
1163
        break;
1164
    case 0x010: case 0x110: case 0x210: case 0x310:        /* WMUL */
1165
        wrd = (insn >> 12) & 0xf;
1166
        rd0 = (insn >> 16) & 0xf;
1167
        rd1 = (insn >> 0) & 0xf;
1168
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1169
        if (insn & (1 << 21))
1170
            gen_op_iwmmxt_mulsw_M0_wRn(rd1, (insn & (1 << 20)) ? 16 : 0);
1171
        else
1172
            gen_op_iwmmxt_muluw_M0_wRn(rd1, (insn & (1 << 20)) ? 16 : 0);
1173
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1174
        gen_op_iwmmxt_set_mup();
1175
        break;
1176
    case 0x410: case 0x510: case 0x610: case 0x710:        /* WMAC */
1177
        wrd = (insn >> 12) & 0xf;
1178
        rd0 = (insn >> 16) & 0xf;
1179
        rd1 = (insn >> 0) & 0xf;
1180
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1181
        if (insn & (1 << 21))
1182
            gen_op_iwmmxt_macsw_M0_wRn(rd1);
1183
        else
1184
            gen_op_iwmmxt_macuw_M0_wRn(rd1);
1185
        if (!(insn & (1 << 20))) {
1186
            if (insn & (1 << 21))
1187
                gen_op_iwmmxt_addsq_M0_wRn(wrd);
1188
            else
1189
                gen_op_iwmmxt_adduq_M0_wRn(wrd);
1190
        }
1191
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1192
        gen_op_iwmmxt_set_mup();
1193
        break;
1194
    case 0x006: case 0x406: case 0x806: case 0xc06:        /* WCMPEQ */
1195
        wrd = (insn >> 12) & 0xf;
1196
        rd0 = (insn >> 16) & 0xf;
1197
        rd1 = (insn >> 0) & 0xf;
1198
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1199
        switch ((insn >> 22) & 3) {
1200
        case 0:
1201
            gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1202
            break;
1203
        case 1:
1204
            gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1205
            break;
1206
        case 2:
1207
            gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1208
            break;
1209
        case 3:
1210
            return 1;
1211
        }
1212
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1213
        gen_op_iwmmxt_set_mup();
1214
        gen_op_iwmmxt_set_cup();
1215
        break;
1216
    case 0x800: case 0x900: case 0xc00: case 0xd00:        /* WAVG2 */
1217
        wrd = (insn >> 12) & 0xf;
1218
        rd0 = (insn >> 16) & 0xf;
1219
        rd1 = (insn >> 0) & 0xf;
1220
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1221
        if (insn & (1 << 22))
1222
            gen_op_iwmmxt_avgw_M0_wRn(rd1, (insn >> 20) & 1);
1223
        else
1224
            gen_op_iwmmxt_avgb_M0_wRn(rd1, (insn >> 20) & 1);
1225
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1226
        gen_op_iwmmxt_set_mup();
1227
        gen_op_iwmmxt_set_cup();
1228
        break;
1229
    case 0x802: case 0x902: case 0xa02: case 0xb02:        /* WALIGNR */
1230
        wrd = (insn >> 12) & 0xf;
1231
        rd0 = (insn >> 16) & 0xf;
1232
        rd1 = (insn >> 0) & 0xf;
1233
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1234
        gen_op_iwmmxt_movl_T0_wCx(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1235
        gen_op_movl_T1_im(7);
1236
        gen_op_andl_T0_T1();
1237
        gen_op_iwmmxt_align_M0_T0_wRn(rd1);
1238
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1239
        gen_op_iwmmxt_set_mup();
1240
        break;
1241
    case 0x601: case 0x605: case 0x609: case 0x60d:        /* TINSR */
1242
        rd = (insn >> 12) & 0xf;
1243
        wrd = (insn >> 16) & 0xf;
1244
        gen_movl_T0_reg(s, rd);
1245
        gen_op_iwmmxt_movq_M0_wRn(wrd);
1246
        switch ((insn >> 6) & 3) {
1247
        case 0:
1248
            gen_op_movl_T1_im(0xff);
1249
            gen_op_iwmmxt_insr_M0_T0_T1((insn & 7) << 3);
1250
            break;
1251
        case 1:
1252
            gen_op_movl_T1_im(0xffff);
1253
            gen_op_iwmmxt_insr_M0_T0_T1((insn & 3) << 4);
1254
            break;
1255
        case 2:
1256
            gen_op_movl_T1_im(0xffffffff);
1257
            gen_op_iwmmxt_insr_M0_T0_T1((insn & 1) << 5);
1258
            break;
1259
        case 3:
1260
            return 1;
1261
        }
1262
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1263
        gen_op_iwmmxt_set_mup();
1264
        break;
1265
    case 0x107: case 0x507: case 0x907: case 0xd07:        /* TEXTRM */
1266
        rd = (insn >> 12) & 0xf;
1267
        wrd = (insn >> 16) & 0xf;
1268
        if (rd == 15)
1269
            return 1;
1270
        gen_op_iwmmxt_movq_M0_wRn(wrd);
1271
        switch ((insn >> 22) & 3) {
1272
        case 0:
1273
            if (insn & 8)
1274
                gen_op_iwmmxt_extrsb_T0_M0((insn & 7) << 3);
1275
            else {
1276
                gen_op_movl_T1_im(0xff);
1277
                gen_op_iwmmxt_extru_T0_M0_T1((insn & 7) << 3);
1278
            }
1279
            break;
1280
        case 1:
1281
            if (insn & 8)
1282
                gen_op_iwmmxt_extrsw_T0_M0((insn & 3) << 4);
1283
            else {
1284
                gen_op_movl_T1_im(0xffff);
1285
                gen_op_iwmmxt_extru_T0_M0_T1((insn & 3) << 4);
1286
            }
1287
            break;
1288
        case 2:
1289
            gen_op_movl_T1_im(0xffffffff);
1290
            gen_op_iwmmxt_extru_T0_M0_T1((insn & 1) << 5);
1291
            break;
1292
        case 3:
1293
            return 1;
1294
        }
1295
        gen_movl_reg_T0(s, rd);
1296
        break;
1297
    case 0x117: case 0x517: case 0x917: case 0xd17:        /* TEXTRC */
1298
        if ((insn & 0x000ff008) != 0x0003f000)
1299
            return 1;
1300
        gen_op_iwmmxt_movl_T1_wCx(ARM_IWMMXT_wCASF);
1301
        switch ((insn >> 22) & 3) {
1302
        case 0:
1303
            gen_op_shrl_T1_im(((insn & 7) << 2) + 0);
1304
            break;
1305
        case 1:
1306
            gen_op_shrl_T1_im(((insn & 3) << 3) + 4);
1307
            break;
1308
        case 2:
1309
            gen_op_shrl_T1_im(((insn & 1) << 4) + 12);
1310
            break;
1311
        case 3:
1312
            return 1;
1313
        }
1314
        gen_op_shll_T1_im(28);
1315
        gen_op_movl_T0_T1();
1316
        gen_op_movl_cpsr_T0(0xf0000000);
1317
        break;
1318
    case 0x401: case 0x405: case 0x409: case 0x40d:        /* TBCST */
1319
        rd = (insn >> 12) & 0xf;
1320
        wrd = (insn >> 16) & 0xf;
1321
        gen_movl_T0_reg(s, rd);
1322
        switch ((insn >> 6) & 3) {
1323
        case 0:
1324
            gen_op_iwmmxt_bcstb_M0_T0();
1325
            break;
1326
        case 1:
1327
            gen_op_iwmmxt_bcstw_M0_T0();
1328
            break;
1329
        case 2:
1330
            gen_op_iwmmxt_bcstl_M0_T0();
1331
            break;
1332
        case 3:
1333
            return 1;
1334
        }
1335
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1336
        gen_op_iwmmxt_set_mup();
1337
        break;
1338
    case 0x113: case 0x513: case 0x913: case 0xd13:        /* TANDC */
1339
        if ((insn & 0x000ff00f) != 0x0003f000)
1340
            return 1;
1341
        gen_op_iwmmxt_movl_T1_wCx(ARM_IWMMXT_wCASF);
1342
        switch ((insn >> 22) & 3) {
1343
        case 0:
1344
            for (i = 0; i < 7; i ++) {
1345
                gen_op_shll_T1_im(4);
1346
                gen_op_andl_T0_T1();
1347
            }
1348
            break;
1349
        case 1:
1350
            for (i = 0; i < 3; i ++) {
1351
                gen_op_shll_T1_im(8);
1352
                gen_op_andl_T0_T1();
1353
            }
1354
            break;
1355
        case 2:
1356
            gen_op_shll_T1_im(16);
1357
            gen_op_andl_T0_T1();
1358
            break;
1359
        case 3:
1360
            return 1;
1361
        }
1362
        gen_op_movl_cpsr_T0(0xf0000000);
1363
        break;
1364
    case 0x01c: case 0x41c: case 0x81c: case 0xc1c:        /* WACC */
1365
        wrd = (insn >> 12) & 0xf;
1366
        rd0 = (insn >> 16) & 0xf;
1367
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1368
        switch ((insn >> 22) & 3) {
1369
        case 0:
1370
            gen_op_iwmmxt_addcb_M0();
1371
            break;
1372
        case 1:
1373
            gen_op_iwmmxt_addcw_M0();
1374
            break;
1375
        case 2:
1376
            gen_op_iwmmxt_addcl_M0();
1377
            break;
1378
        case 3:
1379
            return 1;
1380
        }
1381
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1382
        gen_op_iwmmxt_set_mup();
1383
        break;
1384
    case 0x115: case 0x515: case 0x915: case 0xd15:        /* TORC */
1385
        if ((insn & 0x000ff00f) != 0x0003f000)
1386
            return 1;
1387
        gen_op_iwmmxt_movl_T1_wCx(ARM_IWMMXT_wCASF);
1388
        switch ((insn >> 22) & 3) {
1389
        case 0:
1390
            for (i = 0; i < 7; i ++) {
1391
                gen_op_shll_T1_im(4);
1392
                gen_op_orl_T0_T1();
1393
            }
1394
            break;
1395
        case 1:
1396
            for (i = 0; i < 3; i ++) {
1397
                gen_op_shll_T1_im(8);
1398
                gen_op_orl_T0_T1();
1399
            }
1400
            break;
1401
        case 2:
1402
            gen_op_shll_T1_im(16);
1403
            gen_op_orl_T0_T1();
1404
            break;
1405
        case 3:
1406
            return 1;
1407
        }
1408
        gen_op_movl_T1_im(0xf0000000);
1409
        gen_op_andl_T0_T1();
1410
        gen_op_movl_cpsr_T0(0xf0000000);
1411
        break;
1412
    case 0x103: case 0x503: case 0x903: case 0xd03:        /* TMOVMSK */
1413
        rd = (insn >> 12) & 0xf;
1414
        rd0 = (insn >> 16) & 0xf;
1415
        if ((insn & 0xf) != 0)
1416
            return 1;
1417
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1418
        switch ((insn >> 22) & 3) {
1419
        case 0:
1420
            gen_op_iwmmxt_msbb_T0_M0();
1421
            break;
1422
        case 1:
1423
            gen_op_iwmmxt_msbw_T0_M0();
1424
            break;
1425
        case 2:
1426
            gen_op_iwmmxt_msbl_T0_M0();
1427
            break;
1428
        case 3:
1429
            return 1;
1430
        }
1431
        gen_movl_reg_T0(s, rd);
1432
        break;
1433
    case 0x106: case 0x306: case 0x506: case 0x706:        /* WCMPGT */
1434
    case 0x906: case 0xb06: case 0xd06: case 0xf06:
1435
        wrd = (insn >> 12) & 0xf;
1436
        rd0 = (insn >> 16) & 0xf;
1437
        rd1 = (insn >> 0) & 0xf;
1438
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1439
        switch ((insn >> 22) & 3) {
1440
        case 0:
1441
            if (insn & (1 << 21))
1442
                gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
1443
            else
1444
                gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
1445
            break;
1446
        case 1:
1447
            if (insn & (1 << 21))
1448
                gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
1449
            else
1450
                gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
1451
            break;
1452
        case 2:
1453
            if (insn & (1 << 21))
1454
                gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
1455
            else
1456
                gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
1457
            break;
1458
        case 3:
1459
            return 1;
1460
        }
1461
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1462
        gen_op_iwmmxt_set_mup();
1463
        gen_op_iwmmxt_set_cup();
1464
        break;
1465
    case 0x00e: case 0x20e: case 0x40e: case 0x60e:        /* WUNPCKEL */
1466
    case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
1467
        wrd = (insn >> 12) & 0xf;
1468
        rd0 = (insn >> 16) & 0xf;
1469
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1470
        switch ((insn >> 22) & 3) {
1471
        case 0:
1472
            if (insn & (1 << 21))
1473
                gen_op_iwmmxt_unpacklsb_M0();
1474
            else
1475
                gen_op_iwmmxt_unpacklub_M0();
1476
            break;
1477
        case 1:
1478
            if (insn & (1 << 21))
1479
                gen_op_iwmmxt_unpacklsw_M0();
1480
            else
1481
                gen_op_iwmmxt_unpackluw_M0();
1482
            break;
1483
        case 2:
1484
            if (insn & (1 << 21))
1485
                gen_op_iwmmxt_unpacklsl_M0();
1486
            else
1487
                gen_op_iwmmxt_unpacklul_M0();
1488
            break;
1489
        case 3:
1490
            return 1;
1491
        }
1492
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1493
        gen_op_iwmmxt_set_mup();
1494
        gen_op_iwmmxt_set_cup();
1495
        break;
1496
    case 0x00c: case 0x20c: case 0x40c: case 0x60c:        /* WUNPCKEH */
1497
    case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
1498
        wrd = (insn >> 12) & 0xf;
1499
        rd0 = (insn >> 16) & 0xf;
1500
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1501
        switch ((insn >> 22) & 3) {
1502
        case 0:
1503
            if (insn & (1 << 21))
1504
                gen_op_iwmmxt_unpackhsb_M0();
1505
            else
1506
                gen_op_iwmmxt_unpackhub_M0();
1507
            break;
1508
        case 1:
1509
            if (insn & (1 << 21))
1510
                gen_op_iwmmxt_unpackhsw_M0();
1511
            else
1512
                gen_op_iwmmxt_unpackhuw_M0();
1513
            break;
1514
        case 2:
1515
            if (insn & (1 << 21))
1516
                gen_op_iwmmxt_unpackhsl_M0();
1517
            else
1518
                gen_op_iwmmxt_unpackhul_M0();
1519
            break;
1520
        case 3:
1521
            return 1;
1522
        }
1523
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1524
        gen_op_iwmmxt_set_mup();
1525
        gen_op_iwmmxt_set_cup();
1526
        break;
1527
    case 0x204: case 0x604: case 0xa04: case 0xe04:        /* WSRL */
1528
    case 0x214: case 0x614: case 0xa14: case 0xe14:
1529
        wrd = (insn >> 12) & 0xf;
1530
        rd0 = (insn >> 16) & 0xf;
1531
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1532
        if (gen_iwmmxt_shift(insn, 0xff))
1533
            return 1;
1534
        switch ((insn >> 22) & 3) {
1535
        case 0:
1536
            return 1;
1537
        case 1:
1538
            gen_op_iwmmxt_srlw_M0_T0();
1539
            break;
1540
        case 2:
1541
            gen_op_iwmmxt_srll_M0_T0();
1542
            break;
1543
        case 3:
1544
            gen_op_iwmmxt_srlq_M0_T0();
1545
            break;
1546
        }
1547
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1548
        gen_op_iwmmxt_set_mup();
1549
        gen_op_iwmmxt_set_cup();
1550
        break;
1551
    case 0x004: case 0x404: case 0x804: case 0xc04:        /* WSRA */
1552
    case 0x014: case 0x414: case 0x814: case 0xc14:
1553
        wrd = (insn >> 12) & 0xf;
1554
        rd0 = (insn >> 16) & 0xf;
1555
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1556
        if (gen_iwmmxt_shift(insn, 0xff))
1557
            return 1;
1558
        switch ((insn >> 22) & 3) {
1559
        case 0:
1560
            return 1;
1561
        case 1:
1562
            gen_op_iwmmxt_sraw_M0_T0();
1563
            break;
1564
        case 2:
1565
            gen_op_iwmmxt_sral_M0_T0();
1566
            break;
1567
        case 3:
1568
            gen_op_iwmmxt_sraq_M0_T0();
1569
            break;
1570
        }
1571
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1572
        gen_op_iwmmxt_set_mup();
1573
        gen_op_iwmmxt_set_cup();
1574
        break;
1575
    case 0x104: case 0x504: case 0x904: case 0xd04:        /* WSLL */
1576
    case 0x114: case 0x514: case 0x914: case 0xd14:
1577
        wrd = (insn >> 12) & 0xf;
1578
        rd0 = (insn >> 16) & 0xf;
1579
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1580
        if (gen_iwmmxt_shift(insn, 0xff))
1581
            return 1;
1582
        switch ((insn >> 22) & 3) {
1583
        case 0:
1584
            return 1;
1585
        case 1:
1586
            gen_op_iwmmxt_sllw_M0_T0();
1587
            break;
1588
        case 2:
1589
            gen_op_iwmmxt_slll_M0_T0();
1590
            break;
1591
        case 3:
1592
            gen_op_iwmmxt_sllq_M0_T0();
1593
            break;
1594
        }
1595
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1596
        gen_op_iwmmxt_set_mup();
1597
        gen_op_iwmmxt_set_cup();
1598
        break;
1599
    case 0x304: case 0x704: case 0xb04: case 0xf04:        /* WROR */
1600
    case 0x314: case 0x714: case 0xb14: case 0xf14:
1601
        wrd = (insn >> 12) & 0xf;
1602
        rd0 = (insn >> 16) & 0xf;
1603
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1604
        switch ((insn >> 22) & 3) {
1605
        case 0:
1606
            return 1;
1607
        case 1:
1608
            if (gen_iwmmxt_shift(insn, 0xf))
1609
                return 1;
1610
            gen_op_iwmmxt_rorw_M0_T0();
1611
            break;
1612
        case 2:
1613
            if (gen_iwmmxt_shift(insn, 0x1f))
1614
                return 1;
1615
            gen_op_iwmmxt_rorl_M0_T0();
1616
            break;
1617
        case 3:
1618
            if (gen_iwmmxt_shift(insn, 0x3f))
1619
                return 1;
1620
            gen_op_iwmmxt_rorq_M0_T0();
1621
            break;
1622
        }
1623
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1624
        gen_op_iwmmxt_set_mup();
1625
        gen_op_iwmmxt_set_cup();
1626
        break;
1627
    case 0x116: case 0x316: case 0x516: case 0x716:        /* WMIN */
1628
    case 0x916: case 0xb16: case 0xd16: case 0xf16:
1629
        wrd = (insn >> 12) & 0xf;
1630
        rd0 = (insn >> 16) & 0xf;
1631
        rd1 = (insn >> 0) & 0xf;
1632
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1633
        switch ((insn >> 22) & 3) {
1634
        case 0:
1635
            if (insn & (1 << 21))
1636
                gen_op_iwmmxt_minsb_M0_wRn(rd1);
1637
            else
1638
                gen_op_iwmmxt_minub_M0_wRn(rd1);
1639
            break;
1640
        case 1:
1641
            if (insn & (1 << 21))
1642
                gen_op_iwmmxt_minsw_M0_wRn(rd1);
1643
            else
1644
                gen_op_iwmmxt_minuw_M0_wRn(rd1);
1645
            break;
1646
        case 2:
1647
            if (insn & (1 << 21))
1648
                gen_op_iwmmxt_minsl_M0_wRn(rd1);
1649
            else
1650
                gen_op_iwmmxt_minul_M0_wRn(rd1);
1651
            break;
1652
        case 3:
1653
            return 1;
1654
        }
1655
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1656
        gen_op_iwmmxt_set_mup();
1657
        break;
1658
    case 0x016: case 0x216: case 0x416: case 0x616:        /* WMAX */
1659
    case 0x816: case 0xa16: case 0xc16: case 0xe16:
1660
        wrd = (insn >> 12) & 0xf;
1661
        rd0 = (insn >> 16) & 0xf;
1662
        rd1 = (insn >> 0) & 0xf;
1663
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1664
        switch ((insn >> 22) & 3) {
1665
        case 0:
1666
            if (insn & (1 << 21))
1667
                gen_op_iwmmxt_maxsb_M0_wRn(rd1);
1668
            else
1669
                gen_op_iwmmxt_maxub_M0_wRn(rd1);
1670
            break;
1671
        case 1:
1672
            if (insn & (1 << 21))
1673
                gen_op_iwmmxt_maxsw_M0_wRn(rd1);
1674
            else
1675
                gen_op_iwmmxt_maxuw_M0_wRn(rd1);
1676
            break;
1677
        case 2:
1678
            if (insn & (1 << 21))
1679
                gen_op_iwmmxt_maxsl_M0_wRn(rd1);
1680
            else
1681
                gen_op_iwmmxt_maxul_M0_wRn(rd1);
1682
            break;
1683
        case 3:
1684
            return 1;
1685
        }
1686
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1687
        gen_op_iwmmxt_set_mup();
1688
        break;
1689
    case 0x002: case 0x102: case 0x202: case 0x302:        /* WALIGNI */
1690
    case 0x402: case 0x502: case 0x602: case 0x702:
1691
        wrd = (insn >> 12) & 0xf;
1692
        rd0 = (insn >> 16) & 0xf;
1693
        rd1 = (insn >> 0) & 0xf;
1694
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1695
        gen_op_movl_T0_im((insn >> 20) & 3);
1696
        gen_op_iwmmxt_align_M0_T0_wRn(rd1);
1697
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1698
        gen_op_iwmmxt_set_mup();
1699
        break;
1700
    case 0x01a: case 0x11a: case 0x21a: case 0x31a:        /* WSUB */
1701
    case 0x41a: case 0x51a: case 0x61a: case 0x71a:
1702
    case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
1703
    case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
1704
        wrd = (insn >> 12) & 0xf;
1705
        rd0 = (insn >> 16) & 0xf;
1706
        rd1 = (insn >> 0) & 0xf;
1707
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1708
        switch ((insn >> 20) & 0xf) {
1709
        case 0x0:
1710
            gen_op_iwmmxt_subnb_M0_wRn(rd1);
1711
            break;
1712
        case 0x1:
1713
            gen_op_iwmmxt_subub_M0_wRn(rd1);
1714
            break;
1715
        case 0x3:
1716
            gen_op_iwmmxt_subsb_M0_wRn(rd1);
1717
            break;
1718
        case 0x4:
1719
            gen_op_iwmmxt_subnw_M0_wRn(rd1);
1720
            break;
1721
        case 0x5:
1722
            gen_op_iwmmxt_subuw_M0_wRn(rd1);
1723
            break;
1724
        case 0x7:
1725
            gen_op_iwmmxt_subsw_M0_wRn(rd1);
1726
            break;
1727
        case 0x8:
1728
            gen_op_iwmmxt_subnl_M0_wRn(rd1);
1729
            break;
1730
        case 0x9:
1731
            gen_op_iwmmxt_subul_M0_wRn(rd1);
1732
            break;
1733
        case 0xb:
1734
            gen_op_iwmmxt_subsl_M0_wRn(rd1);
1735
            break;
1736
        default:
1737
            return 1;
1738
        }
1739
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1740
        gen_op_iwmmxt_set_mup();
1741
        gen_op_iwmmxt_set_cup();
1742
        break;
1743
    case 0x01e: case 0x11e: case 0x21e: case 0x31e:        /* WSHUFH */
1744
    case 0x41e: case 0x51e: case 0x61e: case 0x71e:
1745
    case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
1746
    case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
1747
        wrd = (insn >> 12) & 0xf;
1748
        rd0 = (insn >> 16) & 0xf;
1749
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1750
        gen_op_movl_T0_im(((insn >> 16) & 0xf0) | (insn & 0x0f));
1751
        gen_op_iwmmxt_shufh_M0_T0();
1752
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1753
        gen_op_iwmmxt_set_mup();
1754
        gen_op_iwmmxt_set_cup();
1755
        break;
1756
    case 0x018: case 0x118: case 0x218: case 0x318:        /* WADD */
1757
    case 0x418: case 0x518: case 0x618: case 0x718:
1758
    case 0x818: case 0x918: case 0xa18: case 0xb18:
1759
    case 0xc18: case 0xd18: case 0xe18: case 0xf18:
1760
        wrd = (insn >> 12) & 0xf;
1761
        rd0 = (insn >> 16) & 0xf;
1762
        rd1 = (insn >> 0) & 0xf;
1763
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1764
        switch ((insn >> 20) & 0xf) {
1765
        case 0x0:
1766
            gen_op_iwmmxt_addnb_M0_wRn(rd1);
1767
            break;
1768
        case 0x1:
1769
            gen_op_iwmmxt_addub_M0_wRn(rd1);
1770
            break;
1771
        case 0x3:
1772
            gen_op_iwmmxt_addsb_M0_wRn(rd1);
1773
            break;
1774
        case 0x4:
1775
            gen_op_iwmmxt_addnw_M0_wRn(rd1);
1776
            break;
1777
        case 0x5:
1778
            gen_op_iwmmxt_adduw_M0_wRn(rd1);
1779
            break;
1780
        case 0x7:
1781
            gen_op_iwmmxt_addsw_M0_wRn(rd1);
1782
            break;
1783
        case 0x8:
1784
            gen_op_iwmmxt_addnl_M0_wRn(rd1);
1785
            break;
1786
        case 0x9:
1787
            gen_op_iwmmxt_addul_M0_wRn(rd1);
1788
            break;
1789
        case 0xb:
1790
            gen_op_iwmmxt_addsl_M0_wRn(rd1);
1791
            break;
1792
        default:
1793
            return 1;
1794
        }
1795
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1796
        gen_op_iwmmxt_set_mup();
1797
        gen_op_iwmmxt_set_cup();
1798
        break;
1799
    case 0x008: case 0x108: case 0x208: case 0x308:        /* WPACK */
1800
    case 0x408: case 0x508: case 0x608: case 0x708:
1801
    case 0x808: case 0x908: case 0xa08: case 0xb08:
1802
    case 0xc08: case 0xd08: case 0xe08: case 0xf08:
1803
        wrd = (insn >> 12) & 0xf;
1804
        rd0 = (insn >> 16) & 0xf;
1805
        rd1 = (insn >> 0) & 0xf;
1806
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1807
        if (!(insn & (1 << 20)))
1808
            return 1;
1809
        switch ((insn >> 22) & 3) {
1810
        case 0:
1811
            return 1;
1812
        case 1:
1813
            if (insn & (1 << 21))
1814
                gen_op_iwmmxt_packsw_M0_wRn(rd1);
1815
            else
1816
                gen_op_iwmmxt_packuw_M0_wRn(rd1);
1817
            break;
1818
        case 2:
1819
            if (insn & (1 << 21))
1820
                gen_op_iwmmxt_packsl_M0_wRn(rd1);
1821
            else
1822
                gen_op_iwmmxt_packul_M0_wRn(rd1);
1823
            break;
1824
        case 3:
1825
            if (insn & (1 << 21))
1826
                gen_op_iwmmxt_packsq_M0_wRn(rd1);
1827
            else
1828
                gen_op_iwmmxt_packuq_M0_wRn(rd1);
1829
            break;
1830
        }
1831
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1832
        gen_op_iwmmxt_set_mup();
1833
        gen_op_iwmmxt_set_cup();
1834
        break;
1835
    case 0x201: case 0x203: case 0x205: case 0x207:
1836
    case 0x209: case 0x20b: case 0x20d: case 0x20f:
1837
    case 0x211: case 0x213: case 0x215: case 0x217:
1838
    case 0x219: case 0x21b: case 0x21d: case 0x21f:
1839
        wrd = (insn >> 5) & 0xf;
1840
        rd0 = (insn >> 12) & 0xf;
1841
        rd1 = (insn >> 0) & 0xf;
1842
        if (rd0 == 0xf || rd1 == 0xf)
1843
            return 1;
1844
        gen_op_iwmmxt_movq_M0_wRn(wrd);
1845
        switch ((insn >> 16) & 0xf) {
1846
        case 0x0:                                        /* TMIA */
1847
            gen_movl_T0_reg(s, rd0);
1848
            gen_movl_T1_reg(s, rd1);
1849
            gen_op_iwmmxt_muladdsl_M0_T0_T1();
1850
            break;
1851
        case 0x8:                                        /* TMIAPH */
1852
            gen_movl_T0_reg(s, rd0);
1853
            gen_movl_T1_reg(s, rd1);
1854
            gen_op_iwmmxt_muladdsw_M0_T0_T1();
1855
            break;
1856
        case 0xc: case 0xd: case 0xe: case 0xf:                /* TMIAxy */
1857
            gen_movl_T1_reg(s, rd0);
1858
            if (insn & (1 << 16))
1859
                gen_op_shrl_T1_im(16);
1860
            gen_op_movl_T0_T1();
1861
            gen_movl_T1_reg(s, rd1);
1862
            if (insn & (1 << 17))
1863
                gen_op_shrl_T1_im(16);
1864
            gen_op_iwmmxt_muladdswl_M0_T0_T1();
1865
            break;
1866
        default:
1867
            return 1;
1868
        }
1869
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1870
        gen_op_iwmmxt_set_mup();
1871
        break;
1872
    default:
1873
        return 1;
1874
    }
1875

    
1876
    return 0;
1877
}
1878

    
1879
/* Disassemble an XScale DSP instruction.  Returns nonzero if an error occured
1880
   (ie. an undefined instruction).  */
1881
static int disas_dsp_insn(CPUState *env, DisasContext *s, uint32_t insn)
1882
{
1883
    int acc, rd0, rd1, rdhi, rdlo;
1884

    
1885
    if ((insn & 0x0ff00f10) == 0x0e200010) {
1886
        /* Multiply with Internal Accumulate Format */
1887
        rd0 = (insn >> 12) & 0xf;
1888
        rd1 = insn & 0xf;
1889
        acc = (insn >> 5) & 7;
1890

    
1891
        if (acc != 0)
1892
            return 1;
1893

    
1894
        switch ((insn >> 16) & 0xf) {
1895
        case 0x0:                                        /* MIA */
1896
            gen_movl_T0_reg(s, rd0);
1897
            gen_movl_T1_reg(s, rd1);
1898
            gen_op_iwmmxt_muladdsl_M0_T0_T1();
1899
            break;
1900
        case 0x8:                                        /* MIAPH */
1901
            gen_movl_T0_reg(s, rd0);
1902
            gen_movl_T1_reg(s, rd1);
1903
            gen_op_iwmmxt_muladdsw_M0_T0_T1();
1904
            break;
1905
        case 0xc:                                        /* MIABB */
1906
        case 0xd:                                        /* MIABT */
1907
        case 0xe:                                        /* MIATB */
1908
        case 0xf:                                        /* MIATT */
1909
            gen_movl_T1_reg(s, rd0);
1910
            if (insn & (1 << 16))
1911
                gen_op_shrl_T1_im(16);
1912
            gen_op_movl_T0_T1();
1913
            gen_movl_T1_reg(s, rd1);
1914
            if (insn & (1 << 17))
1915
                gen_op_shrl_T1_im(16);
1916
            gen_op_iwmmxt_muladdswl_M0_T0_T1();
1917
            break;
1918
        default:
1919
            return 1;
1920
        }
1921

    
1922
        gen_op_iwmmxt_movq_wRn_M0(acc);
1923
        return 0;
1924
    }
1925

    
1926
    if ((insn & 0x0fe00ff8) == 0x0c400000) {
1927
        /* Internal Accumulator Access Format */
1928
        rdhi = (insn >> 16) & 0xf;
1929
        rdlo = (insn >> 12) & 0xf;
1930
        acc = insn & 7;
1931

    
1932
        if (acc != 0)
1933
            return 1;
1934

    
1935
        if (insn & ARM_CP_RW_BIT) {                        /* MRA */
1936
            gen_op_iwmmxt_movl_T0_T1_wRn(acc);
1937
            gen_movl_reg_T0(s, rdlo);
1938
            gen_op_movl_T0_im((1 << (40 - 32)) - 1);
1939
            gen_op_andl_T0_T1();
1940
            gen_movl_reg_T0(s, rdhi);
1941
        } else {                                        /* MAR */
1942
            gen_movl_T0_reg(s, rdlo);
1943
            gen_movl_T1_reg(s, rdhi);
1944
            gen_op_iwmmxt_movl_wRn_T0_T1(acc);
1945
        }
1946
        return 0;
1947
    }
1948

    
1949
    return 1;
1950
}
1951

    
1952
/* Disassemble system coprocessor instruction.  Return nonzero if
1953
   instruction is not defined.  */
1954
static int disas_cp_insn(CPUState *env, DisasContext *s, uint32_t insn)
1955
{
1956
    uint32_t rd = (insn >> 12) & 0xf;
1957
    uint32_t cp = (insn >> 8) & 0xf;
1958
    if (IS_USER(s)) {
1959
        return 1;
1960
    }
1961

    
1962
    if (insn & ARM_CP_RW_BIT) {
1963
        if (!env->cp[cp].cp_read)
1964
            return 1;
1965
        gen_op_movl_T0_im((uint32_t) s->pc);
1966
        gen_set_pc_T0();
1967
        gen_op_movl_T0_cp(insn);
1968
        gen_movl_reg_T0(s, rd);
1969
    } else {
1970
        if (!env->cp[cp].cp_write)
1971
            return 1;
1972
        gen_op_movl_T0_im((uint32_t) s->pc);
1973
        gen_set_pc_T0();
1974
        gen_movl_T0_reg(s, rd);
1975
        gen_op_movl_cp_T0(insn);
1976
    }
1977
    return 0;
1978
}
1979

    
1980
static int cp15_user_ok(uint32_t insn)
1981
{
1982
    int cpn = (insn >> 16) & 0xf;
1983
    int cpm = insn & 0xf;
1984
    int op = ((insn >> 5) & 7) | ((insn >> 18) & 0x38);
1985

    
1986
    if (cpn == 13 && cpm == 0) {
1987
        /* TLS register.  */
1988
        if (op == 2 || (op == 3 && (insn & ARM_CP_RW_BIT)))
1989
            return 1;
1990
    }
1991
    if (cpn == 7) {
1992
        /* ISB, DSB, DMB.  */
1993
        if ((cpm == 5 && op == 4)
1994
                || (cpm == 10 && (op == 4 || op == 5)))
1995
            return 1;
1996
    }
1997
    return 0;
1998
}
1999

    
2000
/* Disassemble system coprocessor (cp15) instruction.  Return nonzero if
2001
   instruction is not defined.  */
2002
static int disas_cp15_insn(CPUState *env, DisasContext *s, uint32_t insn)
2003
{
2004
    uint32_t rd;
2005

    
2006
    /* M profile cores use memory mapped registers instead of cp15.  */
2007
    if (arm_feature(env, ARM_FEATURE_M))
2008
        return 1;
2009

    
2010
    if ((insn & (1 << 25)) == 0) {
2011
        if (insn & (1 << 20)) {
2012
            /* mrrc */
2013
            return 1;
2014
        }
2015
        /* mcrr.  Used for block cache operations, so implement as no-op.  */
2016
        return 0;
2017
    }
2018
    if ((insn & (1 << 4)) == 0) {
2019
        /* cdp */
2020
        return 1;
2021
    }
2022
    if (IS_USER(s) && !cp15_user_ok(insn)) {
2023
        return 1;
2024
    }
2025
    if ((insn & 0x0fff0fff) == 0x0e070f90
2026
        || (insn & 0x0fff0fff) == 0x0e070f58) {
2027
        /* Wait for interrupt.  */
2028
        gen_op_movl_T0_im((long)s->pc);
2029
        gen_set_pc_T0();
2030
        s->is_jmp = DISAS_WFI;
2031
        return 0;
2032
    }
2033
    rd = (insn >> 12) & 0xf;
2034
    if (insn & ARM_CP_RW_BIT) {
2035
        gen_op_movl_T0_cp15(insn);
2036
        /* If the destination register is r15 then sets condition codes.  */
2037
        if (rd != 15)
2038
            gen_movl_reg_T0(s, rd);
2039
    } else {
2040
        gen_movl_T0_reg(s, rd);
2041
        gen_op_movl_cp15_T0(insn);
2042
        /* Normally we would always end the TB here, but Linux
2043
         * arch/arm/mach-pxa/sleep.S expects two instructions following
2044
         * an MMU enable to execute from cache.  Imitate this behaviour.  */
2045
        if (!arm_feature(env, ARM_FEATURE_XSCALE) ||
2046
                (insn & 0x0fff0fff) != 0x0e010f10)
2047
            gen_lookup_tb(s);
2048
    }
2049
    return 0;
2050
}
2051

    
2052
#define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
2053
#define VFP_SREG(insn, bigbit, smallbit) \
2054
  ((VFP_REG_SHR(insn, bigbit - 1) & 0x1e) | (((insn) >> (smallbit)) & 1))
2055
#define VFP_DREG(reg, insn, bigbit, smallbit) do { \
2056
    if (arm_feature(env, ARM_FEATURE_VFP3)) { \
2057
        reg = (((insn) >> (bigbit)) & 0x0f) \
2058
              | (((insn) >> ((smallbit) - 4)) & 0x10); \
2059
    } else { \
2060
        if (insn & (1 << (smallbit))) \
2061
            return 1; \
2062
        reg = ((insn) >> (bigbit)) & 0x0f; \
2063
    }} while (0)
2064

    
2065
#define VFP_SREG_D(insn) VFP_SREG(insn, 12, 22)
2066
#define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
2067
#define VFP_SREG_N(insn) VFP_SREG(insn, 16,  7)
2068
#define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16,  7)
2069
#define VFP_SREG_M(insn) VFP_SREG(insn,  0,  5)
2070
#define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn,  0,  5)
2071

    
2072
static inline int
2073
vfp_enabled(CPUState * env)
2074
{
2075
    return ((env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)) != 0);
2076
}
2077

    
2078
/* Disassemble a VFP instruction.  Returns nonzero if an error occured
2079
   (ie. an undefined instruction).  */
2080
static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
2081
{
2082
    uint32_t rd, rn, rm, op, i, n, offset, delta_d, delta_m, bank_mask;
2083
    int dp, veclen;
2084

    
2085
    if (!arm_feature(env, ARM_FEATURE_VFP))
2086
        return 1;
2087

    
2088
    if (!vfp_enabled(env)) {
2089
        /* VFP disabled.  Only allow fmxr/fmrx to/from some control regs.  */
2090
        if ((insn & 0x0fe00fff) != 0x0ee00a10)
2091
            return 1;
2092
        rn = (insn >> 16) & 0xf;
2093
        if (rn != ARM_VFP_FPSID && rn != ARM_VFP_FPEXC
2094
            && rn != ARM_VFP_MVFR1 && rn != ARM_VFP_MVFR0)
2095
            return 1;
2096
    }
2097
    dp = ((insn & 0xf00) == 0xb00);
2098
    switch ((insn >> 24) & 0xf) {
2099
    case 0xe:
2100
        if (insn & (1 << 4)) {
2101
            /* single register transfer */
2102
            rd = (insn >> 12) & 0xf;
2103
            if (dp) {
2104
                int size;
2105
                int pass;
2106

    
2107
                VFP_DREG_N(rn, insn);
2108
                if (insn & 0xf)
2109
                    return 1;
2110
                if (insn & 0x00c00060
2111
                    && !arm_feature(env, ARM_FEATURE_NEON))
2112
                    return 1;
2113

    
2114
                pass = (insn >> 21) & 1;
2115
                if (insn & (1 << 22)) {
2116
                    size = 0;
2117
                    offset = ((insn >> 5) & 3) * 8;
2118
                } else if (insn & (1 << 5)) {
2119
                    size = 1;
2120
                    offset = (insn & (1 << 6)) ? 16 : 0;
2121
                } else {
2122
                    size = 2;
2123
                    offset = 0;
2124
                }
2125
                if (insn & ARM_CP_RW_BIT) {
2126
                    /* vfp->arm */
2127
                    switch (size) {
2128
                    case 0:
2129
                        NEON_GET_REG(T1, rn, pass);
2130
                        if (offset)
2131
                            gen_op_shrl_T1_im(offset);
2132
                        if (insn & (1 << 23))
2133
                            gen_uxtb(cpu_T[1]);
2134
                        else
2135
                            gen_sxtb(cpu_T[1]);
2136
                        break;
2137
                    case 1:
2138
                        NEON_GET_REG(T1, rn, pass);
2139
                        if (insn & (1 << 23)) {
2140
                            if (offset) {
2141
                                gen_op_shrl_T1_im(16);
2142
                            } else {
2143
                                gen_uxth(cpu_T[1]);
2144
                            }
2145
                        } else {
2146
                            if (offset) {
2147
                                gen_op_sarl_T1_im(16);
2148
                            } else {
2149
                                gen_sxth(cpu_T[1]);
2150
                            }
2151
                        }
2152
                        break;
2153
                    case 2:
2154
                        NEON_GET_REG(T1, rn, pass);
2155
                        break;
2156
                    }
2157
                    gen_movl_reg_T1(s, rd);
2158
                } else {
2159
                    /* arm->vfp */
2160
                    gen_movl_T0_reg(s, rd);
2161
                    if (insn & (1 << 23)) {
2162
                        /* VDUP */
2163
                        if (size == 0) {
2164
                            gen_op_neon_dup_u8(0);
2165
                        } else if (size == 1) {
2166
                            gen_op_neon_dup_low16();
2167
                        }
2168
                        NEON_SET_REG(T0, rn, 0);
2169
                        NEON_SET_REG(T0, rn, 1);
2170
                    } else {
2171
                        /* VMOV */
2172
                        switch (size) {
2173
                        case 0:
2174
                            NEON_GET_REG(T2, rn, pass);
2175
                            gen_op_movl_T1_im(0xff);
2176
                            gen_op_andl_T0_T1();
2177
                            gen_op_neon_insert_elt(offset, ~(0xff << offset));
2178
                            NEON_SET_REG(T2, rn, pass);
2179
                            break;
2180
                        case 1:
2181
                            NEON_GET_REG(T2, rn, pass);
2182
                            gen_op_movl_T1_im(0xffff);
2183
                            gen_op_andl_T0_T1();
2184
                            bank_mask = offset ? 0xffff : 0xffff0000;
2185
                            gen_op_neon_insert_elt(offset, bank_mask);
2186
                            NEON_SET_REG(T2, rn, pass);
2187
                            break;
2188
                        case 2:
2189
                            NEON_SET_REG(T0, rn, pass);
2190
                            break;
2191
                        }
2192
                    }
2193
                }
2194
            } else { /* !dp */
2195
                if ((insn & 0x6f) != 0x00)
2196
                    return 1;
2197
                rn = VFP_SREG_N(insn);
2198
                if (insn & ARM_CP_RW_BIT) {
2199
                    /* vfp->arm */
2200
                    if (insn & (1 << 21)) {
2201
                        /* system register */
2202
                        rn >>= 1;
2203

    
2204
                        switch (rn) {
2205
                        case ARM_VFP_FPSID:
2206
                            /* VFP2 allows access for FSID from userspace.
2207
                               VFP3 restricts all id registers to privileged
2208
                               accesses.  */
2209
                            if (IS_USER(s)
2210
                                && arm_feature(env, ARM_FEATURE_VFP3))
2211
                                return 1;
2212
                            gen_op_vfp_movl_T0_xreg(rn);
2213
                            break;
2214
                        case ARM_VFP_FPEXC:
2215
                            if (IS_USER(s))
2216
                                return 1;
2217
                            gen_op_vfp_movl_T0_xreg(rn);
2218
                            break;
2219
                        case ARM_VFP_FPINST:
2220
                        case ARM_VFP_FPINST2:
2221
                            /* Not present in VFP3.  */
2222
                            if (IS_USER(s)
2223
                                || arm_feature(env, ARM_FEATURE_VFP3))
2224
                                return 1;
2225
                            gen_op_vfp_movl_T0_xreg(rn);
2226
                            break;
2227
                        case ARM_VFP_FPSCR:
2228
                            if (rd == 15)
2229
                                gen_op_vfp_movl_T0_fpscr_flags();
2230
                            else
2231
                                gen_op_vfp_movl_T0_fpscr();
2232
                            break;
2233
                        case ARM_VFP_MVFR0:
2234
                        case ARM_VFP_MVFR1:
2235
                            if (IS_USER(s)
2236
                                || !arm_feature(env, ARM_FEATURE_VFP3))
2237
                                return 1;
2238
                            gen_op_vfp_movl_T0_xreg(rn);
2239
                            break;
2240
                        default:
2241
                            return 1;
2242
                        }
2243
                    } else {
2244
                        gen_mov_F0_vreg(0, rn);
2245
                        gen_op_vfp_mrs();
2246
                    }
2247
                    if (rd == 15) {
2248
                        /* Set the 4 flag bits in the CPSR.  */
2249
                        gen_op_movl_cpsr_T0(0xf0000000);
2250
                    } else
2251
                        gen_movl_reg_T0(s, rd);
2252
                } else {
2253
                    /* arm->vfp */
2254
                    gen_movl_T0_reg(s, rd);
2255
                    if (insn & (1 << 21)) {
2256
                        rn >>= 1;
2257
                        /* system register */
2258
                        switch (rn) {
2259
                        case ARM_VFP_FPSID:
2260
                        case ARM_VFP_MVFR0:
2261
                        case ARM_VFP_MVFR1:
2262
                            /* Writes are ignored.  */
2263
                            break;
2264
                        case ARM_VFP_FPSCR:
2265
                            gen_op_vfp_movl_fpscr_T0();
2266
                            gen_lookup_tb(s);
2267
                            break;
2268
                        case ARM_VFP_FPEXC:
2269
                            if (IS_USER(s))
2270
                                return 1;
2271
                            gen_op_vfp_movl_xreg_T0(rn);
2272
                            gen_lookup_tb(s);
2273
                            break;
2274
                        case ARM_VFP_FPINST:
2275
                        case ARM_VFP_FPINST2:
2276
                            gen_op_vfp_movl_xreg_T0(rn);
2277
                            break;
2278
                        default:
2279
                            return 1;
2280
                        }
2281
                    } else {
2282
                        gen_op_vfp_msr();
2283
                        gen_mov_vreg_F0(0, rn);
2284
                    }
2285
                }
2286
            }
2287
        } else {
2288
            /* data processing */
2289
            /* The opcode is in bits 23, 21, 20 and 6.  */
2290
            op = ((insn >> 20) & 8) | ((insn >> 19) & 6) | ((insn >> 6) & 1);
2291
            if (dp) {
2292
                if (op == 15) {
2293
                    /* rn is opcode */
2294
                    rn = ((insn >> 15) & 0x1e) | ((insn >> 7) & 1);
2295
                } else {
2296
                    /* rn is register number */
2297
                    VFP_DREG_N(rn, insn);
2298
                }
2299

    
2300
                if (op == 15 && (rn == 15 || rn > 17)) {
2301
                    /* Integer or single precision destination.  */
2302
                    rd = VFP_SREG_D(insn);
2303
                } else {
2304
                    VFP_DREG_D(rd, insn);
2305
                }
2306

    
2307
                if (op == 15 && (rn == 16 || rn == 17)) {
2308
                    /* Integer source.  */
2309
                    rm = ((insn << 1) & 0x1e) | ((insn >> 5) & 1);
2310
                } else {
2311
                    VFP_DREG_M(rm, insn);
2312
                }
2313
            } else {
2314
                rn = VFP_SREG_N(insn);
2315
                if (op == 15 && rn == 15) {
2316
                    /* Double precision destination.  */
2317
                    VFP_DREG_D(rd, insn);
2318
                } else {
2319
                    rd = VFP_SREG_D(insn);
2320
                }
2321
                rm = VFP_SREG_M(insn);
2322
            }
2323

    
2324
            veclen = env->vfp.vec_len;
2325
            if (op == 15 && rn > 3)
2326
                veclen = 0;
2327

    
2328
            /* Shut up compiler warnings.  */
2329
            delta_m = 0;
2330
            delta_d = 0;
2331
            bank_mask = 0;
2332

    
2333
            if (veclen > 0) {
2334
                if (dp)
2335
                    bank_mask = 0xc;
2336
                else
2337
                    bank_mask = 0x18;
2338

    
2339
                /* Figure out what type of vector operation this is.  */
2340
                if ((rd & bank_mask) == 0) {
2341
                    /* scalar */
2342
                    veclen = 0;
2343
                } else {
2344
                    if (dp)
2345
                        delta_d = (env->vfp.vec_stride >> 1) + 1;
2346
                    else
2347
                        delta_d = env->vfp.vec_stride + 1;
2348

    
2349
                    if ((rm & bank_mask) == 0) {
2350
                        /* mixed scalar/vector */
2351
                        delta_m = 0;
2352
                    } else {
2353
                        /* vector */
2354
                        delta_m = delta_d;
2355
                    }
2356
                }
2357
            }
2358

    
2359
            /* Load the initial operands.  */
2360
            if (op == 15) {
2361
                switch (rn) {
2362
                case 16:
2363
                case 17:
2364
                    /* Integer source */
2365
                    gen_mov_F0_vreg(0, rm);
2366
                    break;
2367
                case 8:
2368
                case 9:
2369
                    /* Compare */
2370
                    gen_mov_F0_vreg(dp, rd);
2371
                    gen_mov_F1_vreg(dp, rm);
2372
                    break;
2373
                case 10:
2374
                case 11:
2375
                    /* Compare with zero */
2376
                    gen_mov_F0_vreg(dp, rd);
2377
                    gen_vfp_F1_ld0(dp);
2378
                    break;
2379
                case 20:
2380
                case 21:
2381
                case 22:
2382
                case 23:
2383
                    /* Source and destination the same.  */
2384
                    gen_mov_F0_vreg(dp, rd);
2385
                    break;
2386
                default:
2387
                    /* One source operand.  */
2388
                    gen_mov_F0_vreg(dp, rm);
2389
                    break;
2390
                }
2391
            } else {
2392
                /* Two source operands.  */
2393
                gen_mov_F0_vreg(dp, rn);
2394
                gen_mov_F1_vreg(dp, rm);
2395
            }
2396

    
2397
            for (;;) {
2398
                /* Perform the calculation.  */
2399
                switch (op) {
2400
                case 0: /* mac: fd + (fn * fm) */
2401
                    gen_vfp_mul(dp);
2402
                    gen_mov_F1_vreg(dp, rd);
2403
                    gen_vfp_add(dp);
2404
                    break;
2405
                case 1: /* nmac: fd - (fn * fm) */
2406
                    gen_vfp_mul(dp);
2407
                    gen_vfp_neg(dp);
2408
                    gen_mov_F1_vreg(dp, rd);
2409
                    gen_vfp_add(dp);
2410
                    break;
2411
                case 2: /* msc: -fd + (fn * fm) */
2412
                    gen_vfp_mul(dp);
2413
                    gen_mov_F1_vreg(dp, rd);
2414
                    gen_vfp_sub(dp);
2415
                    break;
2416
                case 3: /* nmsc: -fd - (fn * fm)  */
2417
                    gen_vfp_mul(dp);
2418
                    gen_mov_F1_vreg(dp, rd);
2419
                    gen_vfp_add(dp);
2420
                    gen_vfp_neg(dp);
2421
                    break;
2422
                case 4: /* mul: fn * fm */
2423
                    gen_vfp_mul(dp);
2424
                    break;
2425
                case 5: /* nmul: -(fn * fm) */
2426
                    gen_vfp_mul(dp);
2427
                    gen_vfp_neg(dp);
2428
                    break;
2429
                case 6: /* add: fn + fm */
2430
                    gen_vfp_add(dp);
2431
                    break;
2432
                case 7: /* sub: fn - fm */
2433
                    gen_vfp_sub(dp);
2434
                    break;
2435
                case 8: /* div: fn / fm */
2436
                    gen_vfp_div(dp);
2437
                    break;
2438
                case 14: /* fconst */
2439
                    if (!arm_feature(env, ARM_FEATURE_VFP3))
2440
                      return 1;
2441

    
2442
                    n = (insn << 12) & 0x80000000;
2443
                    i = ((insn >> 12) & 0x70) | (insn & 0xf);
2444
                    if (dp) {
2445
                        if (i & 0x40)
2446
                            i |= 0x3f80;
2447
                        else
2448
                            i |= 0x4000;
2449
                        n |= i << 16;
2450
                    } else {
2451
                        if (i & 0x40)
2452
                            i |= 0x780;
2453
                        else
2454
                            i |= 0x800;
2455
                        n |= i << 19;
2456
                    }
2457
                    gen_vfp_fconst(dp, n);
2458
                    break;
2459
                case 15: /* extension space */
2460
                    switch (rn) {
2461
                    case 0: /* cpy */
2462
                        /* no-op */
2463
                        break;
2464
                    case 1: /* abs */
2465
                        gen_vfp_abs(dp);
2466
                        break;
2467
                    case 2: /* neg */
2468
                        gen_vfp_neg(dp);
2469
                        break;
2470
                    case 3: /* sqrt */
2471
                        gen_vfp_sqrt(dp);
2472
                        break;
2473
                    case 8: /* cmp */
2474
                        gen_vfp_cmp(dp);
2475
                        break;
2476
                    case 9: /* cmpe */
2477
                        gen_vfp_cmpe(dp);
2478
                        break;
2479
                    case 10: /* cmpz */
2480
                        gen_vfp_cmp(dp);
2481
                        break;
2482
                    case 11: /* cmpez */
2483
                        gen_vfp_F1_ld0(dp);
2484
                        gen_vfp_cmpe(dp);
2485
                        break;
2486
                    case 15: /* single<->double conversion */
2487
                        if (dp)
2488
                            gen_op_vfp_fcvtsd();
2489
                        else
2490
                            gen_op_vfp_fcvtds();
2491
                        break;
2492
                    case 16: /* fuito */
2493
                        gen_vfp_uito(dp);
2494
                        break;
2495
                    case 17: /* fsito */
2496
                        gen_vfp_sito(dp);
2497
                        break;
2498
                    case 20: /* fshto */
2499
                        if (!arm_feature(env, ARM_FEATURE_VFP3))
2500
                          return 1;
2501
                        gen_vfp_shto(dp, rm);
2502
                        break;
2503
                    case 21: /* fslto */
2504
                        if (!arm_feature(env, ARM_FEATURE_VFP3))
2505
                          return 1;
2506
                        gen_vfp_slto(dp, rm);
2507
                        break;
2508
                    case 22: /* fuhto */
2509
                        if (!arm_feature(env, ARM_FEATURE_VFP3))
2510
                          return 1;
2511
                        gen_vfp_uhto(dp, rm);
2512
                        break;
2513
                    case 23: /* fulto */
2514
                        if (!arm_feature(env, ARM_FEATURE_VFP3))
2515
                          return 1;
2516
                        gen_vfp_ulto(dp, rm);
2517
                        break;
2518
                    case 24: /* ftoui */
2519
                        gen_vfp_toui(dp);
2520
                        break;
2521
                    case 25: /* ftouiz */
2522
                        gen_vfp_touiz(dp);
2523
                        break;
2524
                    case 26: /* ftosi */
2525
                        gen_vfp_tosi(dp);
2526
                        break;
2527
                    case 27: /* ftosiz */
2528
                        gen_vfp_tosiz(dp);
2529
                        break;
2530
                    case 28: /* ftosh */
2531
                        if (!arm_feature(env, ARM_FEATURE_VFP3))
2532
                          return 1;
2533
                        gen_vfp_tosh(dp, rm);
2534
                        break;
2535
                    case 29: /* ftosl */
2536
                        if (!arm_feature(env, ARM_FEATURE_VFP3))
2537
                          return 1;
2538
                        gen_vfp_tosl(dp, rm);
2539
                        break;
2540
                    case 30: /* ftouh */
2541
                        if (!arm_feature(env, ARM_FEATURE_VFP3))
2542
                          return 1;
2543
                        gen_vfp_touh(dp, rm);
2544
                        break;
2545
                    case 31: /* ftoul */
2546
                        if (!arm_feature(env, ARM_FEATURE_VFP3))
2547
                          return 1;
2548
                        gen_vfp_toul(dp, rm);
2549
                        break;
2550
                    default: /* undefined */
2551
                        printf ("rn:%d\n", rn);
2552
                        return 1;
2553
                    }
2554
                    break;
2555
                default: /* undefined */
2556
                    printf ("op:%d\n", op);
2557
                    return 1;
2558
                }
2559

    
2560
                /* Write back the result.  */
2561
                if (op == 15 && (rn >= 8 && rn <= 11))
2562
                    ; /* Comparison, do nothing.  */
2563
                else if (op == 15 && rn > 17)
2564
                    /* Integer result.  */
2565
                    gen_mov_vreg_F0(0, rd);
2566
                else if (op == 15 && rn == 15)
2567
                    /* conversion */
2568
                    gen_mov_vreg_F0(!dp, rd);
2569
                else
2570
                    gen_mov_vreg_F0(dp, rd);
2571

    
2572
                /* break out of the loop if we have finished  */
2573
                if (veclen == 0)
2574
                    break;
2575

    
2576
                if (op == 15 && delta_m == 0) {
2577
                    /* single source one-many */
2578
                    while (veclen--) {
2579
                        rd = ((rd + delta_d) & (bank_mask - 1))
2580
                             | (rd & bank_mask);
2581
                        gen_mov_vreg_F0(dp, rd);
2582
                    }
2583
                    break;
2584
                }
2585
                /* Setup the next operands.  */
2586
                veclen--;
2587
                rd = ((rd + delta_d) & (bank_mask - 1))
2588
                     | (rd & bank_mask);
2589

    
2590
                if (op == 15) {
2591
                    /* One source operand.  */
2592
                    rm = ((rm + delta_m) & (bank_mask - 1))
2593
                         | (rm & bank_mask);
2594
                    gen_mov_F0_vreg(dp, rm);
2595
                } else {
2596
                    /* Two source operands.  */
2597
                    rn = ((rn + delta_d) & (bank_mask - 1))
2598
                         | (rn & bank_mask);
2599
                    gen_mov_F0_vreg(dp, rn);
2600
                    if (delta_m) {
2601
                        rm = ((rm + delta_m) & (bank_mask - 1))
2602
                             | (rm & bank_mask);
2603
                        gen_mov_F1_vreg(dp, rm);
2604
                    }
2605
                }
2606
            }
2607
        }
2608
        break;
2609
    case 0xc:
2610
    case 0xd:
2611
        if (dp && (insn & 0x03e00000) == 0x00400000) {
2612
            /* two-register transfer */
2613
            rn = (insn >> 16) & 0xf;
2614
            rd = (insn >> 12) & 0xf;
2615
            if (dp) {
2616
                VFP_DREG_M(rm, insn);
2617
            } else {
2618
                rm = VFP_SREG_M(insn);
2619
            }
2620

    
2621
            if (insn & ARM_CP_RW_BIT) {
2622
                /* vfp->arm */
2623
                if (dp) {
2624
                    gen_mov_F0_vreg(1, rm);
2625
                    gen_op_vfp_mrrd();
2626
                    gen_movl_reg_T0(s, rd);
2627
                    gen_movl_reg_T1(s, rn);
2628
                } else {
2629
                    gen_mov_F0_vreg(0, rm);
2630
                    gen_op_vfp_mrs();
2631
                    gen_movl_reg_T0(s, rn);
2632
                    gen_mov_F0_vreg(0, rm + 1);
2633
                    gen_op_vfp_mrs();
2634
                    gen_movl_reg_T0(s, rd);
2635
                }
2636
            } else {
2637
                /* arm->vfp */
2638
                if (dp) {
2639
                    gen_movl_T0_reg(s, rd);
2640
                    gen_movl_T1_reg(s, rn);
2641
                    gen_op_vfp_mdrr();
2642
                    gen_mov_vreg_F0(1, rm);
2643
                } else {
2644
                    gen_movl_T0_reg(s, rn);
2645
                    gen_op_vfp_msr();
2646
                    gen_mov_vreg_F0(0, rm);
2647
                    gen_movl_T0_reg(s, rd);
2648
                    gen_op_vfp_msr();
2649
                    gen_mov_vreg_F0(0, rm + 1);
2650
                }
2651
            }
2652
        } else {
2653
            /* Load/store */
2654
            rn = (insn >> 16) & 0xf;
2655
            if (dp)
2656
                VFP_DREG_D(rd, insn);
2657
            else
2658
                rd = VFP_SREG_D(insn);
2659
            if (s->thumb && rn == 15) {
2660
                gen_op_movl_T1_im(s->pc & ~2);
2661
            } else {
2662
                gen_movl_T1_reg(s, rn);
2663
            }
2664
            if ((insn & 0x01200000) == 0x01000000) {
2665
                /* Single load/store */
2666
                offset = (insn & 0xff) << 2;
2667
                if ((insn & (1 << 23)) == 0)
2668
                    offset = -offset;
2669
                gen_op_addl_T1_im(offset);
2670
                if (insn & (1 << 20)) {
2671
                    gen_vfp_ld(s, dp);
2672
                    gen_mov_vreg_F0(dp, rd);
2673
                } else {
2674
                    gen_mov_F0_vreg(dp, rd);
2675
                    gen_vfp_st(s, dp);
2676
                }
2677
            } else {
2678
                /* load/store multiple */
2679
                if (dp)
2680
                    n = (insn >> 1) & 0x7f;
2681
                else
2682
                    n = insn & 0xff;
2683

    
2684
                if (insn & (1 << 24)) /* pre-decrement */
2685
                    gen_op_addl_T1_im(-((insn & 0xff) << 2));
2686

    
2687
                if (dp)
2688
                    offset = 8;
2689
                else
2690
                    offset = 4;
2691
                for (i = 0; i < n; i++) {
2692
                    if (insn & ARM_CP_RW_BIT) {
2693
                        /* load */
2694
                        gen_vfp_ld(s, dp);
2695
                        gen_mov_vreg_F0(dp, rd + i);
2696
                    } else {
2697
                        /* store */
2698
                        gen_mov_F0_vreg(dp, rd + i);
2699
                        gen_vfp_st(s, dp);
2700
                    }
2701
                    gen_op_addl_T1_im(offset);
2702
                }
2703
                if (insn & (1 << 21)) {
2704
                    /* writeback */
2705
                    if (insn & (1 << 24))
2706
                        offset = -offset * n;
2707
                    else if (dp && (insn & 1))
2708
                        offset = 4;
2709
                    else
2710
                        offset = 0;
2711

    
2712
                    if (offset != 0)
2713
                        gen_op_addl_T1_im(offset);
2714
                    gen_movl_reg_T1(s, rn);
2715
                }
2716
            }
2717
        }
2718
        break;
2719
    default:
2720
        /* Should never happen.  */
2721
        return 1;
2722
    }
2723
    return 0;
2724
}
2725

    
2726
static inline void gen_goto_tb(DisasContext *s, int n, uint32_t dest)
2727
{
2728
    TranslationBlock *tb;
2729

    
2730
    tb = s->tb;
2731
    if ((tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK)) {
2732
        tcg_gen_goto_tb(n);
2733
        gen_op_movl_T0_im(dest);
2734
        gen_set_pc_T0();
2735
        tcg_gen_exit_tb((long)tb + n);
2736
    } else {
2737
        gen_op_movl_T0_im(dest);
2738
        gen_set_pc_T0();
2739
        tcg_gen_exit_tb(0);
2740
    }
2741
}
2742

    
2743
static inline void gen_jmp (DisasContext *s, uint32_t dest)
2744
{
2745
    if (__builtin_expect(s->singlestep_enabled, 0)) {
2746
        /* An indirect jump so that we still trigger the debug exception.  */
2747
        if (s->thumb)
2748
          dest |= 1;
2749
        gen_op_movl_T0_im(dest);
2750
        gen_bx(s);
2751
    } else {
2752
        gen_goto_tb(s, 0, dest);
2753
        s->is_jmp = DISAS_TB_JUMP;
2754
    }
2755
}
2756

    
2757
static inline void gen_mulxy(int x, int y)
2758
{
2759
    if (x)
2760
        tcg_gen_sari_i32(cpu_T[0], cpu_T[0], 16);
2761
    else
2762
        gen_sxth(cpu_T[0]);
2763
    if (y)
2764
        gen_op_sarl_T1_im(16);
2765
    else
2766
        gen_sxth(cpu_T[1]);
2767
    gen_op_mul_T0_T1();
2768
}
2769

    
2770
/* Return the mask of PSR bits set by a MSR instruction.  */
2771
static uint32_t msr_mask(CPUState *env, DisasContext *s, int flags, int spsr) {
2772
    uint32_t mask;
2773

    
2774
    mask = 0;
2775
    if (flags & (1 << 0))
2776
        mask |= 0xff;
2777
    if (flags & (1 << 1))
2778
        mask |= 0xff00;
2779
    if (flags & (1 << 2))
2780
        mask |= 0xff0000;
2781
    if (flags & (1 << 3))
2782
        mask |= 0xff000000;
2783

    
2784
    /* Mask out undefined bits.  */
2785
    mask &= ~CPSR_RESERVED;
2786
    if (!arm_feature(env, ARM_FEATURE_V6))
2787
        mask &= ~(CPSR_E | CPSR_GE);
2788
    if (!arm_feature(env, ARM_FEATURE_THUMB2))
2789
        mask &= ~CPSR_IT;
2790
    /* Mask out execution state bits.  */
2791
    if (!spsr)
2792
        mask &= ~CPSR_EXEC;
2793
    /* Mask out privileged bits.  */
2794
    if (IS_USER(s))
2795
        mask &= CPSR_USER;
2796
    return mask;
2797
}
2798

    
2799
/* Returns nonzero if access to the PSR is not permitted.  */
2800
static int gen_set_psr_T0(DisasContext *s, uint32_t mask, int spsr)
2801
{
2802
    if (spsr) {
2803
        /* ??? This is also undefined in system mode.  */
2804
        if (IS_USER(s))
2805
            return 1;
2806
        gen_op_movl_spsr_T0(mask);
2807
    } else {
2808
        gen_op_movl_cpsr_T0(mask);
2809
    }
2810
    gen_lookup_tb(s);
2811
    return 0;
2812
}
2813

    
2814
/* Generate an old-style exception return.  */
2815
static void gen_exception_return(DisasContext *s)
2816
{
2817
    gen_set_pc_T0();
2818
    gen_op_movl_T0_spsr();
2819
    gen_op_movl_cpsr_T0(0xffffffff);
2820
    s->is_jmp = DISAS_UPDATE;
2821
}
2822

    
2823
/* Generate a v6 exception return.  */
2824
static void gen_rfe(DisasContext *s)
2825
{
2826
    gen_op_movl_cpsr_T0(0xffffffff);
2827
    gen_op_movl_T0_T2();
2828
    gen_set_pc_T0();
2829
    s->is_jmp = DISAS_UPDATE;
2830
}
2831

    
2832
static inline void
2833
gen_set_condexec (DisasContext *s)
2834
{
2835
    if (s->condexec_mask) {
2836
        uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
2837
        TCGv tmp = new_tmp();
2838
        tcg_gen_movi_i32(tmp, val);
2839
        tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUState, condexec_bits));
2840
        dead_tmp(tmp);
2841
    }
2842
}
2843

    
2844
static void gen_nop_hint(DisasContext *s, int val)
2845
{
2846
    switch (val) {
2847
    case 3: /* wfi */
2848
        gen_op_movl_T0_im((long)s->pc);
2849
        gen_set_pc_T0();
2850
        s->is_jmp = DISAS_WFI;
2851
        break;
2852
    case 2: /* wfe */
2853
    case 4: /* sev */
2854
        /* TODO: Implement SEV and WFE.  May help SMP performance.  */
2855
    default: /* nop */
2856
        break;
2857
    }
2858
}
2859

    
2860
/* Neon shift by constant.  The actual ops are the same as used for variable
2861
   shifts.  [OP][U][SIZE]  */
2862
static GenOpFunc *gen_neon_shift_im[8][2][4] = {
2863
    { /* 0 */ /* VSHR */
2864
      {
2865
        gen_op_neon_shl_u8,
2866
        gen_op_neon_shl_u16,
2867
        gen_op_neon_shl_u32,
2868
        gen_op_neon_shl_u64
2869
      }, {
2870
        gen_op_neon_shl_s8,
2871
        gen_op_neon_shl_s16,
2872
        gen_op_neon_shl_s32,
2873
        gen_op_neon_shl_s64
2874
      }
2875
    }, { /* 1 */ /* VSRA */
2876
      {
2877
        gen_op_neon_shl_u8,
2878
        gen_op_neon_shl_u16,
2879
        gen_op_neon_shl_u32,
2880
        gen_op_neon_shl_u64
2881
      }, {
2882
        gen_op_neon_shl_s8,
2883
        gen_op_neon_shl_s16,
2884
        gen_op_neon_shl_s32,
2885
        gen_op_neon_shl_s64
2886
      }
2887
    }, { /* 2 */ /* VRSHR */
2888
      {
2889
        gen_op_neon_rshl_u8,
2890
        gen_op_neon_rshl_u16,
2891
        gen_op_neon_rshl_u32,
2892
        gen_op_neon_rshl_u64
2893
      }, {
2894
        gen_op_neon_rshl_s8,
2895
        gen_op_neon_rshl_s16,
2896
        gen_op_neon_rshl_s32,
2897
        gen_op_neon_rshl_s64
2898
      }
2899
    }, { /* 3 */ /* VRSRA */
2900
      {
2901
        gen_op_neon_rshl_u8,
2902
        gen_op_neon_rshl_u16,
2903
        gen_op_neon_rshl_u32,
2904
        gen_op_neon_rshl_u64
2905
      }, {
2906
        gen_op_neon_rshl_s8,
2907
        gen_op_neon_rshl_s16,
2908
        gen_op_neon_rshl_s32,
2909
        gen_op_neon_rshl_s64
2910
      }
2911
    }, { /* 4 */
2912
      {
2913
        NULL, NULL, NULL, NULL
2914
      }, { /* VSRI */
2915
        gen_op_neon_shl_u8,
2916
        gen_op_neon_shl_u16,
2917
        gen_op_neon_shl_u32,
2918
        gen_op_neon_shl_u64,
2919
      }
2920
    }, { /* 5 */
2921
      { /* VSHL */
2922
        gen_op_neon_shl_u8,
2923
        gen_op_neon_shl_u16,
2924
        gen_op_neon_shl_u32,
2925
        gen_op_neon_shl_u64,
2926
      }, { /* VSLI */
2927
        gen_op_neon_shl_u8,
2928
        gen_op_neon_shl_u16,
2929
        gen_op_neon_shl_u32,
2930
        gen_op_neon_shl_u64,
2931
      }
2932
    }, { /* 6 */ /* VQSHL */
2933
      {
2934
        gen_op_neon_qshl_u8,
2935
        gen_op_neon_qshl_u16,
2936
        gen_op_neon_qshl_u32,
2937
        gen_op_neon_qshl_u64
2938
      }, {
2939
        gen_op_neon_qshl_s8,
2940
        gen_op_neon_qshl_s16,
2941
        gen_op_neon_qshl_s32,
2942
        gen_op_neon_qshl_s64
2943
      }
2944
    }, { /* 7 */ /* VQSHLU */
2945
      {
2946
        gen_op_neon_qshl_u8,
2947
        gen_op_neon_qshl_u16,
2948
        gen_op_neon_qshl_u32,
2949
        gen_op_neon_qshl_u64
2950
      }, {
2951
        gen_op_neon_qshl_u8,
2952
        gen_op_neon_qshl_u16,
2953
        gen_op_neon_qshl_u32,
2954
        gen_op_neon_qshl_u64
2955
      }
2956
    }
2957
};
2958

    
2959
/* [R][U][size - 1] */
2960
static GenOpFunc *gen_neon_shift_im_narrow[2][2][3] = {
2961
    {
2962
      {
2963
        gen_op_neon_shl_u16,
2964
        gen_op_neon_shl_u32,
2965
        gen_op_neon_shl_u64
2966
      }, {
2967
        gen_op_neon_shl_s16,
2968
        gen_op_neon_shl_s32,
2969
        gen_op_neon_shl_s64
2970
      }
2971
    }, {
2972
      {
2973
        gen_op_neon_rshl_u16,
2974
        gen_op_neon_rshl_u32,
2975
        gen_op_neon_rshl_u64
2976
      }, {
2977
        gen_op_neon_rshl_s16,
2978
        gen_op_neon_rshl_s32,
2979
        gen_op_neon_rshl_s64
2980
      }
2981
    }
2982
};
2983

    
2984
static inline void
2985
gen_op_neon_narrow_u32 ()
2986
{
2987
    /* No-op.  */
2988
}
2989

    
2990
static GenOpFunc *gen_neon_narrow[3] = {
2991
    gen_op_neon_narrow_u8,
2992
    gen_op_neon_narrow_u16,
2993
    gen_op_neon_narrow_u32
2994
};
2995

    
2996
static GenOpFunc *gen_neon_narrow_satu[3] = {
2997
    gen_op_neon_narrow_sat_u8,
2998
    gen_op_neon_narrow_sat_u16,
2999
    gen_op_neon_narrow_sat_u32
3000
};
3001

    
3002
static GenOpFunc *gen_neon_narrow_sats[3] = {
3003
    gen_op_neon_narrow_sat_s8,
3004
    gen_op_neon_narrow_sat_s16,
3005
    gen_op_neon_narrow_sat_s32
3006
};
3007

    
3008
static inline int gen_neon_add(int size)
3009
{
3010
    switch (size) {
3011
    case 0: gen_op_neon_add_u8(); break;
3012
    case 1: gen_op_neon_add_u16(); break;
3013
    case 2: gen_op_addl_T0_T1(); break;
3014
    default: return 1;
3015
    }
3016
    return 0;
3017
}
3018

    
3019
/* 32-bit pairwise ops end up the same as the elementsise versions.  */
3020
#define gen_op_neon_pmax_s32  gen_op_neon_max_s32
3021
#define gen_op_neon_pmax_u32  gen_op_neon_max_u32
3022
#define gen_op_neon_pmin_s32  gen_op_neon_min_s32
3023
#define gen_op_neon_pmin_u32  gen_op_neon_min_u32
3024

    
3025
#define GEN_NEON_INTEGER_OP(name) do { \
3026
    switch ((size << 1) | u) { \
3027
    case 0: gen_op_neon_##name##_s8(); break; \
3028
    case 1: gen_op_neon_##name##_u8(); break; \
3029
    case 2: gen_op_neon_##name##_s16(); break; \
3030
    case 3: gen_op_neon_##name##_u16(); break; \
3031
    case 4: gen_op_neon_##name##_s32(); break; \
3032
    case 5: gen_op_neon_##name##_u32(); break; \
3033
    default: return 1; \
3034
    }} while (0)
3035

    
3036
static inline void
3037
gen_neon_movl_scratch_T0(int scratch)
3038
{
3039
  uint32_t offset;
3040

    
3041
  offset = offsetof(CPUARMState, vfp.scratch[scratch]);
3042
  gen_op_neon_setreg_T0(offset);
3043
}
3044

    
3045
static inline void
3046
gen_neon_movl_scratch_T1(int scratch)
3047
{
3048
  uint32_t offset;
3049

    
3050
  offset = offsetof(CPUARMState, vfp.scratch[scratch]);
3051
  gen_op_neon_setreg_T1(offset);
3052
}
3053

    
3054
static inline void
3055
gen_neon_movl_T0_scratch(int scratch)
3056
{
3057
  uint32_t offset;
3058

    
3059
  offset = offsetof(CPUARMState, vfp.scratch[scratch]);
3060
  gen_op_neon_getreg_T0(offset);
3061
}
3062

    
3063
static inline void
3064
gen_neon_movl_T1_scratch(int scratch)
3065
{
3066
  uint32_t offset;
3067

    
3068
  offset = offsetof(CPUARMState, vfp.scratch[scratch]);
3069
  gen_op_neon_getreg_T1(offset);
3070
}
3071

    
3072
static inline void gen_op_neon_widen_u32(void)
3073
{
3074
    gen_op_movl_T1_im(0);
3075
}
3076

    
3077
static inline void gen_neon_get_scalar(int size, int reg)
3078
{
3079
    if (size == 1) {
3080
        NEON_GET_REG(T0, reg >> 1, reg & 1);
3081
    } else {
3082
        NEON_GET_REG(T0, reg >> 2, (reg >> 1) & 1);
3083
        if (reg & 1)
3084
            gen_op_neon_dup_low16();
3085
        else
3086
            gen_op_neon_dup_high16();
3087
    }
3088
}
3089

    
3090
static void gen_neon_unzip(int reg, int q, int tmp, int size)
3091
{
3092
    int n;
3093

    
3094
    for (n = 0; n < q + 1; n += 2) {
3095
        NEON_GET_REG(T0, reg, n);
3096
        NEON_GET_REG(T0, reg, n + n);
3097
        switch (size) {
3098
        case 0: gen_op_neon_unzip_u8(); break;
3099
        case 1: gen_op_neon_zip_u16(); break; /* zip and unzip are the same.  */
3100
        case 2: /* no-op */; break;
3101
        default: abort();
3102
        }
3103
        gen_neon_movl_scratch_T0(tmp + n);
3104
        gen_neon_movl_scratch_T1(tmp + n + 1);
3105
    }
3106
}
3107

    
3108
static struct {
3109
    int nregs;
3110
    int interleave;
3111
    int spacing;
3112
} neon_ls_element_type[11] = {
3113
    {4, 4, 1},
3114
    {4, 4, 2},
3115
    {4, 1, 1},
3116
    {4, 2, 1},
3117
    {3, 3, 1},
3118
    {3, 3, 2},
3119
    {3, 1, 1},
3120
    {1, 1, 1},
3121
    {2, 2, 1},
3122
    {2, 2, 2},
3123
    {2, 1, 1}
3124
};
3125

    
3126
/* Translate a NEON load/store element instruction.  Return nonzero if the
3127
   instruction is invalid.  */
3128
static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn)
3129
{
3130
    int rd, rn, rm;
3131
    int op;
3132
    int nregs;
3133
    int interleave;
3134
    int stride;
3135
    int size;
3136
    int reg;
3137
    int pass;
3138
    int load;
3139
    int shift;
3140
    uint32_t mask;
3141
    int n;
3142

    
3143
    if (!vfp_enabled(env))
3144
      return 1;
3145
    VFP_DREG_D(rd, insn);
3146
    rn = (insn >> 16) & 0xf;
3147
    rm = insn & 0xf;
3148
    load = (insn & (1 << 21)) != 0;
3149
    if ((insn & (1 << 23)) == 0) {
3150
        /* Load store all elements.  */
3151
        op = (insn >> 8) & 0xf;
3152
        size = (insn >> 6) & 3;
3153
        if (op > 10 || size == 3)
3154
            return 1;
3155
        nregs = neon_ls_element_type[op].nregs;
3156
        interleave = neon_ls_element_type[op].interleave;
3157
        gen_movl_T1_reg(s, rn);
3158
        stride = (1 << size) * interleave;
3159
        for (reg = 0; reg < nregs; reg++) {
3160
            if (interleave > 2 || (interleave == 2 && nregs == 2)) {
3161
                gen_movl_T1_reg(s, rn);
3162
                gen_op_addl_T1_im((1 << size) * reg);
3163
            } else if (interleave == 2 && nregs == 4 && reg == 2) {
3164
                gen_movl_T1_reg(s, rn);
3165
                gen_op_addl_T1_im(1 << size);
3166
            }
3167
            for (pass = 0; pass < 2; pass++) {
3168
                if (size == 2) {
3169
                    if (load) {
3170
                        gen_ldst(ldl, s);
3171
                        NEON_SET_REG(T0, rd, pass);
3172
                    } else {
3173
                        NEON_GET_REG(T0, rd, pass);
3174
                        gen_ldst(stl, s);
3175
                    }
3176
                    gen_op_addl_T1_im(stride);
3177
                } else if (size == 1) {
3178
                    if (load) {
3179
                        gen_ldst(lduw, s);
3180
                        gen_op_addl_T1_im(stride);
3181
                        gen_op_movl_T2_T0();
3182
                        gen_ldst(lduw, s);
3183
                        gen_op_addl_T1_im(stride);
3184
                        gen_op_neon_insert_elt(16, 0xffff);
3185
                        NEON_SET_REG(T2, rd, pass);
3186
                    } else {
3187
                        NEON_GET_REG(T2, rd, pass);
3188
                        gen_op_movl_T0_T2();
3189
                        gen_ldst(stw, s);
3190
                        gen_op_addl_T1_im(stride);
3191
                        gen_op_neon_extract_elt(16, 0xffff0000);
3192
                        gen_ldst(stw, s);
3193
                        gen_op_addl_T1_im(stride);
3194
                    }
3195
                } else /* size == 0 */ {
3196
                    if (load) {
3197
                        mask = 0xff;
3198
                        for (n = 0; n < 4; n++) {
3199
                            gen_ldst(ldub, s);
3200
                            gen_op_addl_T1_im(stride);
3201
                            if (n == 0) {
3202
                                gen_op_movl_T2_T0();
3203
                            } else {
3204
                                gen_op_neon_insert_elt(n * 8, ~mask);
3205
                            }
3206
                            mask <<= 8;
3207
                        }
3208
                        NEON_SET_REG(T2, rd, pass);
3209
                    } else {
3210
                        NEON_GET_REG(T2, rd, pass);
3211
                        mask = 0xff;
3212
                        for (n = 0; n < 4; n++) {
3213
                            if (n == 0) {
3214
                                gen_op_movl_T0_T2();
3215
                            } else {
3216
                                gen_op_neon_extract_elt(n * 8, mask);
3217
                            }
3218
                            gen_ldst(stb, s);
3219
                            gen_op_addl_T1_im(stride);
3220
                            mask <<= 8;
3221
                        }
3222
                    }
3223
                }
3224
            }
3225
            rd += neon_ls_element_type[op].spacing;
3226
        }
3227
        stride = nregs * 8;
3228
    } else {
3229
        size = (insn >> 10) & 3;
3230
        if (size == 3) {
3231
            /* Load single element to all lanes.  */
3232
            if (!load)
3233
                return 1;
3234
            size = (insn >> 6) & 3;
3235
            nregs = ((insn >> 8) & 3) + 1;
3236
            stride = (insn & (1 << 5)) ? 2 : 1;
3237
            gen_movl_T1_reg(s, rn);
3238
            for (reg = 0; reg < nregs; reg++) {
3239
                switch (size) {
3240
                case 0:
3241
                    gen_ldst(ldub, s);
3242
                    gen_op_neon_dup_u8(0);
3243
                    break;
3244
                case 1:
3245
                    gen_ldst(lduw, s);
3246
                    gen_op_neon_dup_low16();
3247
                    break;
3248
                case 2:
3249
                    gen_ldst(ldl, s);
3250
                    break;
3251
                case 3:
3252
                    return 1;
3253
                }
3254
                gen_op_addl_T1_im(1 << size);
3255
                NEON_SET_REG(T0, rd, 0);
3256
                NEON_SET_REG(T0, rd, 1);
3257
                rd += stride;
3258
            }
3259
            stride = (1 << size) * nregs;
3260
        } else {
3261
            /* Single element.  */
3262
            pass = (insn >> 7) & 1;
3263
            switch (size) {
3264
            case 0:
3265
                shift = ((insn >> 5) & 3) * 8;
3266
                mask = 0xff << shift;
3267
                stride = 1;
3268
                break;
3269
            case 1:
3270
                shift = ((insn >> 6) & 1) * 16;
3271
                mask = shift ? 0xffff0000 : 0xffff;
3272
                stride = (insn & (1 << 5)) ? 2 : 1;
3273
                break;
3274
            case 2:
3275
                shift = 0;
3276
                mask = 0xffffffff;
3277
                stride = (insn & (1 << 6)) ? 2 : 1;
3278
                break;
3279
            default:
3280
                abort();
3281
            }
3282
            nregs = ((insn >> 8) & 3) + 1;
3283
            gen_movl_T1_reg(s, rn);
3284
            for (reg = 0; reg < nregs; reg++) {
3285
                if (load) {
3286
                    if (size != 2) {
3287
                        NEON_GET_REG(T2, rd, pass);
3288
                    }
3289
                    switch (size) {
3290
                    case 0:
3291
                        gen_ldst(ldub, s);
3292
                        break;
3293
                    case 1:
3294
                        gen_ldst(lduw, s);
3295
                        break;
3296
                    case 2:
3297
                        gen_ldst(ldl, s);
3298
                        NEON_SET_REG(T0, rd, pass);
3299
                        break;
3300
                    }
3301
                    if (size != 2) {
3302
                        gen_op_neon_insert_elt(shift, ~mask);
3303
                        NEON_SET_REG(T0, rd, pass);
3304
                    }
3305
                } else { /* Store */
3306
                    if (size == 2) {
3307
                        NEON_GET_REG(T0, rd, pass);
3308
                    } else {
3309
                        NEON_GET_REG(T2, rd, pass);
3310
                        gen_op_neon_extract_elt(shift, mask);
3311
                    }
3312
                    switch (size) {
3313
                    case 0:
3314
                        gen_ldst(stb, s);
3315
                        break;
3316
                    case 1:
3317
                        gen_ldst(stw, s);
3318
                        break;
3319
                    case 2:
3320
                        gen_ldst(stl, s);
3321
                        break;
3322
                    }
3323
                }
3324
                rd += stride;
3325
                gen_op_addl_T1_im(1 << size);
3326
            }
3327
            stride = nregs * (1 << size);
3328
        }
3329
    }
3330
    if (rm != 15) {
3331
        TCGv base;
3332

    
3333
        base = load_reg(s, rn);
3334
        if (rm == 13) {
3335
            tcg_gen_addi_i32(base, base, stride);
3336
        } else {
3337
            TCGv index;
3338
            index = load_reg(s, rm);
3339
            tcg_gen_add_i32(base, base, index);
3340
            dead_tmp(index);
3341
        }
3342
        store_reg(s, rn, base);
3343
    }
3344
    return 0;
3345
}
3346

    
3347
/* Translate a NEON data processing instruction.  Return nonzero if the
3348
   instruction is invalid.
3349
   In general we process vectors in 32-bit chunks.  This means we can reuse
3350
   some of the scalar ops, and hopefully the code generated for 32-bit
3351
   hosts won't be too awful.  The downside is that the few 64-bit operations
3352
   (mainly shifts) get complicated.  */
3353

    
3354
static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
3355
{
3356
    int op;
3357
    int q;
3358
    int rd, rn, rm;
3359
    int size;
3360
    int shift;
3361
    int pass;
3362
    int count;
3363
    int pairwise;
3364
    int u;
3365
    int n;
3366
    uint32_t imm;
3367

    
3368
    if (!vfp_enabled(env))
3369
      return 1;
3370
    q = (insn & (1 << 6)) != 0;
3371
    u = (insn >> 24) & 1;
3372
    VFP_DREG_D(rd, insn);
3373
    VFP_DREG_N(rn, insn);
3374
    VFP_DREG_M(rm, insn);
3375
    size = (insn >> 20) & 3;
3376
    if ((insn & (1 << 23)) == 0) {
3377
        /* Three register same length.  */
3378
        op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
3379
        if (size == 3 && (op == 1 || op == 5 || op == 16)) {
3380
            for (pass = 0; pass < (q ? 2 : 1); pass++) {
3381
                NEON_GET_REG(T0, rm, pass * 2);
3382
                NEON_GET_REG(T1, rm, pass * 2 + 1);
3383
                gen_neon_movl_scratch_T0(0);
3384
                gen_neon_movl_scratch_T1(1);
3385
                NEON_GET_REG(T0, rn, pass * 2);
3386
                NEON_GET_REG(T1, rn, pass * 2 + 1);
3387
                switch (op) {
3388
                case 1: /* VQADD */
3389
                    if (u) {
3390
                        gen_op_neon_addl_saturate_u64();
3391
                    } else {
3392
                        gen_op_neon_addl_saturate_s64();
3393
                    }
3394
                    break;
3395
                case 5: /* VQSUB */
3396
                    if (u) {
3397
                        gen_op_neon_subl_saturate_u64();
3398
                    } else {
3399
                        gen_op_neon_subl_saturate_s64();
3400
                    }
3401
                    break;
3402
                case 16:
3403
                    if (u) {
3404
                        gen_op_neon_subl_u64();
3405
                    } else {
3406
                        gen_op_neon_addl_u64();
3407
                    }
3408
                    break;
3409
                default:
3410
                    abort();
3411
                }
3412
                NEON_SET_REG(T0, rd, pass * 2);
3413
                NEON_SET_REG(T1, rd, pass * 2 + 1);
3414
            }
3415
            return 0;
3416
        }
3417
        switch (op) {
3418
        case 8: /* VSHL */
3419
        case 9: /* VQSHL */
3420
        case 10: /* VRSHL */
3421
        case 11: /* VQSHL */
3422
            /* Shift operations have Rn and Rm reversed.  */
3423
            {
3424
                int tmp;
3425
                tmp = rn;
3426
                rn = rm;
3427
                rm = tmp;
3428
                pairwise = 0;
3429
            }
3430
            break;
3431
        case 20: /* VPMAX */
3432
        case 21: /* VPMIN */
3433
        case 23: /* VPADD */
3434
            pairwise = 1;
3435
            break;
3436
        case 26: /* VPADD (float) */
3437
            pairwise = (u && size < 2);
3438
            break;
3439
        case 30: /* VPMIN/VPMAX (float) */
3440
            pairwise = u;
3441
            break;
3442
        default:
3443
            pairwise = 0;
3444
            break;
3445
        }
3446
        for (pass = 0; pass < (q ? 4 : 2); pass++) {
3447

    
3448
        if (pairwise) {
3449
            /* Pairwise.  */
3450
            if (q)
3451
                n = (pass & 1) * 2;
3452
            else
3453
                n = 0;
3454
            if (pass < q + 1) {
3455
                NEON_GET_REG(T0, rn, n);
3456
                NEON_GET_REG(T1, rn, n + 1);
3457
            } else {
3458
                NEON_GET_REG(T0, rm, n);
3459
                NEON_GET_REG(T1, rm, n + 1);
3460
            }
3461
        } else {
3462
            /* Elementwise.  */
3463
            NEON_GET_REG(T0, rn, pass);
3464
            NEON_GET_REG(T1, rm, pass);
3465
        }
3466
        switch (op) {
3467
        case 0: /* VHADD */
3468
            GEN_NEON_INTEGER_OP(hadd);
3469
            break;
3470
        case 1: /* VQADD */
3471
            switch (size << 1| u) {
3472
            case 0: gen_op_neon_qadd_s8(); break;
3473
            case 1: gen_op_neon_qadd_u8(); break;
3474
            case 2: gen_op_neon_qadd_s16(); break;
3475
            case 3: gen_op_neon_qadd_u16(); break;
3476
            case 4: gen_op_addl_T0_T1_saturate(); break;
3477
            case 5: gen_op_addl_T0_T1_usaturate(); break;
3478
            default: abort();
3479
            }
3480
            break;
3481
        case 2: /* VRHADD */
3482
            GEN_NEON_INTEGER_OP(rhadd);
3483
            break;
3484
        case 3: /* Logic ops.  */
3485
            switch ((u << 2) | size) {
3486
            case 0: /* VAND */
3487
                gen_op_andl_T0_T1();
3488
                break;
3489
            case 1: /* BIC */
3490
                gen_op_bicl_T0_T1();
3491
                break;
3492
            case 2: /* VORR */
3493
                gen_op_orl_T0_T1();
3494
                break;
3495
            case 3: /* VORN */
3496
                gen_op_notl_T1();
3497
                gen_op_orl_T0_T1();
3498
                break;
3499
            case 4: /* VEOR */
3500
                gen_op_xorl_T0_T1();
3501
                break;
3502
            case 5: /* VBSL */
3503
                NEON_GET_REG(T2, rd, pass);
3504
                gen_op_neon_bsl();
3505
                break;
3506
            case 6: /* VBIT */
3507
                NEON_GET_REG(T2, rd, pass);
3508
                gen_op_neon_bit();
3509
                break;
3510
            case 7: /* VBIF */
3511
                NEON_GET_REG(T2, rd, pass);
3512
                gen_op_neon_bif();
3513
                break;
3514
            }
3515
            break;
3516
        case 4: /* VHSUB */
3517
            GEN_NEON_INTEGER_OP(hsub);
3518
            break;
3519
        case 5: /* VQSUB */
3520
            switch ((size << 1) | u) {
3521
            case 0: gen_op_neon_qsub_s8(); break;
3522
            case 1: gen_op_neon_qsub_u8(); break;
3523
            case 2: gen_op_neon_qsub_s16(); break;
3524
            case 3: gen_op_neon_qsub_u16(); break;
3525
            case 4: gen_op_subl_T0_T1_saturate(); break;
3526
            case 5: gen_op_subl_T0_T1_usaturate(); break;
3527
            default: abort();
3528
            }
3529
            break;
3530
        case 6: /* VCGT */
3531
            GEN_NEON_INTEGER_OP(cgt);
3532
            break;
3533
        case 7: /* VCGE */
3534
            GEN_NEON_INTEGER_OP(cge);
3535
            break;
3536
        case 8: /* VSHL */
3537
            switch ((size << 1) | u) {
3538
            case 0: gen_op_neon_shl_s8(); break;
3539
            case 1: gen_op_neon_shl_u8(); break;
3540
            case 2: gen_op_neon_shl_s16(); break;
3541
            case 3: gen_op_neon_shl_u16(); break;
3542
            case 4: gen_op_neon_shl_s32(); break;
3543
            case 5: gen_op_neon_shl_u32(); break;
3544
#if 0
3545
            /* ??? Implementing these is tricky because the vector ops work
3546
               on 32-bit pieces.  */
3547
            case 6: gen_op_neon_shl_s64(); break;
3548
            case 7: gen_op_neon_shl_u64(); break;
3549
#else
3550
            case 6: case 7: cpu_abort(env, "VSHL.64 not implemented");
3551
#endif
3552
            }
3553
            break;
3554
        case 9: /* VQSHL */
3555
            switch ((size << 1) | u) {
3556
            case 0: gen_op_neon_qshl_s8(); break;
3557
            case 1: gen_op_neon_qshl_u8(); break;
3558
            case 2: gen_op_neon_qshl_s16(); break;
3559
            case 3: gen_op_neon_qshl_u16(); break;
3560
            case 4: gen_op_neon_qshl_s32(); break;
3561
            case 5: gen_op_neon_qshl_u32(); break;
3562
#if 0
3563
            /* ??? Implementing these is tricky because the vector ops work
3564
               on 32-bit pieces.  */
3565
            case 6: gen_op_neon_qshl_s64(); break;
3566
            case 7: gen_op_neon_qshl_u64(); break;
3567
#else
3568
            case 6: case 7: cpu_abort(env, "VQSHL.64 not implemented");
3569
#endif
3570
            }
3571
            break;
3572
        case 10: /* VRSHL */
3573
            switch ((size << 1) | u) {
3574
            case 0: gen_op_neon_rshl_s8(); break;
3575
            case 1: gen_op_neon_rshl_u8(); break;
3576
            case 2: gen_op_neon_rshl_s16(); break;
3577
            case 3: gen_op_neon_rshl_u16(); break;
3578
            case 4: gen_op_neon_rshl_s32(); break;
3579
            case 5: gen_op_neon_rshl_u32(); break;
3580
#if 0
3581
            /* ??? Implementing these is tricky because the vector ops work
3582
               on 32-bit pieces.  */
3583
            case 6: gen_op_neon_rshl_s64(); break;
3584
            case 7: gen_op_neon_rshl_u64(); break;
3585
#else
3586
            case 6: case 7: cpu_abort(env, "VRSHL.64 not implemented");
3587
#endif
3588
            }
3589
            break;
3590
        case 11: /* VQRSHL */
3591
            switch ((size << 1) | u) {
3592
            case 0: gen_op_neon_qrshl_s8(); break;
3593
            case 1: gen_op_neon_qrshl_u8(); break;
3594
            case 2: gen_op_neon_qrshl_s16(); break;
3595
            case 3: gen_op_neon_qrshl_u16(); break;
3596
            case 4: gen_op_neon_qrshl_s32(); break;
3597
            case 5: gen_op_neon_qrshl_u32(); break;
3598
#if 0
3599
            /* ??? Implementing these is tricky because the vector ops work
3600
               on 32-bit pieces.  */
3601
            case 6: gen_op_neon_qrshl_s64(); break;
3602
            case 7: gen_op_neon_qrshl_u64(); break;
3603
#else
3604
            case 6: case 7: cpu_abort(env, "VQRSHL.64 not implemented");
3605
#endif
3606
            }
3607
            break;
3608
        case 12: /* VMAX */
3609
            GEN_NEON_INTEGER_OP(max);
3610
            break;
3611
        case 13: /* VMIN */
3612
            GEN_NEON_INTEGER_OP(min);
3613
            break;
3614
        case 14: /* VABD */
3615
            GEN_NEON_INTEGER_OP(abd);
3616
            break;
3617
        case 15: /* VABA */
3618
            GEN_NEON_INTEGER_OP(abd);
3619
            NEON_GET_REG(T1, rd, pass);
3620
            gen_neon_add(size);
3621
            break;
3622
        case 16:
3623
            if (!u) { /* VADD */
3624
                if (gen_neon_add(size))
3625
                    return 1;
3626
            } else { /* VSUB */
3627
                switch (size) {
3628
                case 0: gen_op_neon_sub_u8(); break;
3629
                case 1: gen_op_neon_sub_u16(); break;
3630
                case 2: gen_op_subl_T0_T1(); break;
3631
                default: return 1;
3632
                }
3633
            }
3634
            break;
3635
        case 17:
3636
            if (!u) { /* VTST */
3637
                switch (size) {
3638
                case 0: gen_op_neon_tst_u8(); break;
3639
                case 1: gen_op_neon_tst_u16(); break;
3640
                case 2: gen_op_neon_tst_u32(); break;
3641
                default: return 1;
3642
                }
3643
            } else { /* VCEQ */
3644
                switch (size) {
3645
                case 0: gen_op_neon_ceq_u8(); break;
3646
                case 1: gen_op_neon_ceq_u16(); break;
3647
                case 2: gen_op_neon_ceq_u32(); break;
3648
                default: return 1;
3649
                }
3650
            }
3651
            break;
3652
        case 18: /* Multiply.  */
3653
            switch (size) {
3654
            case 0: gen_op_neon_mul_u8(); break;
3655
            case 1: gen_op_neon_mul_u16(); break;
3656
            case 2: gen_op_mul_T0_T1(); break;
3657
            default: return 1;
3658
            }
3659
            NEON_GET_REG(T1, rd, pass);
3660
            if (u) { /* VMLS */
3661
                switch (size) {
3662
                case 0: gen_op_neon_rsb_u8(); break;
3663
                case 1: gen_op_neon_rsb_u16(); break;
3664
                case 2: gen_op_rsbl_T0_T1(); break;
3665
                default: return 1;
3666
                }
3667
            } else { /* VMLA */
3668
                gen_neon_add(size);
3669
            }
3670
            break;
3671
        case 19: /* VMUL */
3672
            if (u) { /* polynomial */
3673
                gen_op_neon_mul_p8();
3674
            } else { /* Integer */
3675
                switch (size) {
3676
                case 0: gen_op_neon_mul_u8(); break;
3677
                case 1: gen_op_neon_mul_u16(); break;
3678
                case 2: gen_op_mul_T0_T1(); break;
3679
                default: return 1;
3680
                }
3681
            }
3682
            break;
3683
        case 20: /* VPMAX */
3684
            GEN_NEON_INTEGER_OP(pmax);
3685
            break;
3686
        case 21: /* VPMIN */
3687
            GEN_NEON_INTEGER_OP(pmin);
3688
            break;
3689
        case 22: /* Hultiply high.  */
3690
            if (!u) { /* VQDMULH */
3691
                switch (size) {
3692
                case 1: gen_op_neon_qdmulh_s16(); break;
3693
                case 2: gen_op_neon_qdmulh_s32(); break;
3694
                default: return 1;
3695
                }
3696
            } else { /* VQRDHMUL */
3697
                switch (size) {
3698
                case 1: gen_op_neon_qrdmulh_s16(); break;
3699
                case 2: gen_op_neon_qrdmulh_s32(); break;
3700
                default: return 1;
3701
                }
3702
            }
3703
            break;
3704
        case 23: /* VPADD */
3705
            if (u)
3706
                return 1;
3707
            switch (size) {
3708
            case 0: gen_op_neon_padd_u8(); break;
3709
            case 1: gen_op_neon_padd_u16(); break;
3710
            case 2: gen_op_addl_T0_T1(); break;
3711
            default: return 1;
3712
            }
3713
            break;
3714
        case 26: /* Floating point arithnetic.  */
3715
            switch ((u << 2) | size) {
3716
            case 0: /* VADD */
3717
                gen_op_neon_add_f32();
3718
                break;
3719
            case 2: /* VSUB */
3720
                gen_op_neon_sub_f32();
3721
                break;
3722
            case 4: /* VPADD */
3723
                gen_op_neon_add_f32();
3724
                break;
3725
            case 6: /* VABD */
3726
                gen_op_neon_abd_f32();
3727
                break;
3728
            default:
3729
                return 1;
3730
            }
3731
            break;
3732
        case 27: /* Float multiply.  */
3733
            gen_op_neon_mul_f32();
3734
            if (!u) {
3735
                NEON_GET_REG(T1, rd, pass);
3736
                if (size == 0) {
3737
                    gen_op_neon_add_f32();
3738
                } else {
3739
                    gen_op_neon_rsb_f32();
3740
                }
3741
            }
3742
            break;
3743
        case 28: /* Float compare.  */
3744
            if (!u) {
3745
                gen_op_neon_ceq_f32();
3746
            } else {
3747
                if (size == 0)
3748
                    gen_op_neon_cge_f32();
3749
                else
3750
                    gen_op_neon_cgt_f32();
3751
            }
3752
            break;
3753
        case 29: /* Float compare absolute.  */
3754
            if (!u)
3755
                return 1;
3756
            if (size == 0)
3757
                gen_op_neon_acge_f32();
3758
            else
3759
                gen_op_neon_acgt_f32();
3760
            break;
3761
        case 30: /* Float min/max.  */
3762
            if (size == 0)
3763
                gen_op_neon_max_f32();
3764
            else
3765
                gen_op_neon_min_f32();
3766
            break;
3767
        case 31:
3768
            if (size == 0)
3769
                gen_op_neon_recps_f32();
3770
            else
3771
                gen_op_neon_rsqrts_f32();
3772
            break;
3773
        default:
3774
            abort();
3775
        }
3776
        /* Save the result.  For elementwise operations we can put it
3777
           straight into the destination register.  For pairwise operations
3778
           we have to be careful to avoid clobbering the source operands.  */
3779
        if (pairwise && rd == rm) {
3780
            gen_neon_movl_scratch_T0(pass);
3781
        } else {
3782
            NEON_SET_REG(T0, rd, pass);
3783
        }
3784

    
3785
        } /* for pass */
3786
        if (pairwise && rd == rm) {
3787
            for (pass = 0; pass < (q ? 4 : 2); pass++) {
3788
                gen_neon_movl_T0_scratch(pass);
3789
                NEON_SET_REG(T0, rd, pass);
3790
            }
3791
        }
3792
    } else if (insn & (1 << 4)) {
3793
        if ((insn & 0x00380080) != 0) {
3794
            /* Two registers and shift.  */
3795
            op = (insn >> 8) & 0xf;
3796
            if (insn & (1 << 7)) {
3797
                /* 64-bit shift.   */
3798
                size = 3;
3799
            } else {
3800
                size = 2;
3801
                while ((insn & (1 << (size + 19))) == 0)
3802
                    size--;
3803
            }
3804
            shift = (insn >> 16) & ((1 << (3 + size)) - 1);
3805
            /* To avoid excessive dumplication of ops we implement shift
3806
               by immediate using the variable shift operations.  */
3807
            if (op < 8) {
3808
                /* Shift by immediate:
3809
                   VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU.  */
3810
                /* Right shifts are encoded as N - shift, where N is the
3811
                   element size in bits.  */
3812
                if (op <= 4)
3813
                    shift = shift - (1 << (size + 3));
3814
                else
3815
                    shift++;
3816
                if (size == 3) {
3817
                    count = q + 1;
3818
                } else {
3819
                    count = q ? 4: 2;
3820
                }
3821
                switch (size) {
3822
                case 0:
3823
                    imm = (uint8_t) shift;
3824
                    imm |= imm << 8;
3825
                    imm |= imm << 16;
3826
                    break;
3827
                case 1:
3828
                    imm = (uint16_t) shift;
3829
                    imm |= imm << 16;
3830
                    break;
3831
                case 2:
3832
                case 3:
3833
                    imm = shift;
3834
                    break;
3835
                default:
3836
                    abort();
3837
                }
3838

    
3839
                for (pass = 0; pass < count; pass++) {
3840
                    if (size < 3) {
3841
                        /* Operands in T0 and T1.  */
3842
                        gen_op_movl_T1_im(imm);
3843
                        NEON_GET_REG(T0, rm, pass);
3844
                    } else {
3845
                        /* Operands in {T0, T1} and env->vfp.scratch.  */
3846
                        gen_op_movl_T0_im(imm);
3847
                        gen_neon_movl_scratch_T0(0);
3848
                        gen_op_movl_T0_im((int32_t)imm >> 31);
3849
                        gen_neon_movl_scratch_T0(1);
3850
                        NEON_GET_REG(T0, rm, pass * 2);
3851
                        NEON_GET_REG(T1, rm, pass * 2 + 1);
3852
                    }
3853

    
3854
                    if (gen_neon_shift_im[op][u][size] == NULL)
3855
                        return 1;
3856
                    gen_neon_shift_im[op][u][size]();
3857

    
3858
                    if (op == 1 || op == 3) {
3859
                        /* Accumulate.  */
3860
                        if (size == 3) {
3861
                            gen_neon_movl_scratch_T0(0);
3862
                            gen_neon_movl_scratch_T1(1);
3863
                            NEON_GET_REG(T0, rd, pass * 2);
3864
                            NEON_GET_REG(T1, rd, pass * 2 + 1);
3865
                            gen_op_neon_addl_u64();
3866
                        } else {
3867
                            NEON_GET_REG(T1, rd, pass);
3868
                            gen_neon_add(size);
3869
                        }
3870
                    } else if (op == 4 || (op == 5 && u)) {
3871
                        /* Insert */
3872
                        if (size == 3) {
3873
                            cpu_abort(env, "VS[LR]I.64 not implemented");
3874
                        }
3875
                        switch (size) {
3876
                        case 0:
3877
                            if (op == 4)
3878
                                imm = 0xff >> -shift;
3879
                            else
3880
                                imm = (uint8_t)(0xff << shift);
3881
                            imm |= imm << 8;
3882
                            imm |= imm << 16;
3883
                            break;
3884
                        case 1:
3885
                            if (op == 4)
3886
                                imm = 0xffff >> -shift;
3887
                            else
3888
                                imm = (uint16_t)(0xffff << shift);
3889
                            imm |= imm << 16;
3890
                            break;
3891
                        case 2:
3892
                            if (op == 4)
3893
                                imm = 0xffffffffu >> -shift;
3894
                            else
3895
                                imm = 0xffffffffu << shift;
3896
                            break;
3897
                        default:
3898
                            abort();
3899
                        }
3900
                        NEON_GET_REG(T1, rd, pass);
3901
                        gen_op_movl_T2_im(imm);
3902
                        gen_op_neon_bsl();
3903
                    }
3904
                    if (size == 3) {
3905
                        NEON_SET_REG(T0, rd, pass * 2);
3906
                        NEON_SET_REG(T1, rd, pass * 2 + 1);
3907
                    } else {
3908
                        NEON_SET_REG(T0, rd, pass);
3909
                    }
3910
                } /* for pass */
3911
            } else if (op < 10) {
3912
                /* Shift by immedaiate and narrow:
3913
                   VSHRN, VRSHRN, VQSHRN, VQRSHRN.  */
3914
                shift = shift - (1 << (size + 3));
3915
                size++;
3916
                if (size == 3) {
3917
                    count = q + 1;
3918
                } else {
3919
                    count = q ? 4: 2;
3920
                }
3921
                switch (size) {
3922
                case 1:
3923
                    imm = (uint16_t) shift;
3924
                    imm |= imm << 16;
3925
                    break;
3926
                case 2:
3927
                case 3:
3928
                    imm = shift;
3929
                    break;
3930
                default:
3931
                    abort();
3932
                }
3933

    
3934
                /* Processing MSB first means we need to do less shuffling at
3935
                   the end.  */
3936
                for (pass =  count - 1; pass >= 0; pass--) {
3937
                    /* Avoid clobbering the second operand before it has been
3938
                       written.  */
3939
                    n = pass;
3940
                    if (rd == rm)
3941
                        n ^= (count - 1);
3942
                    else
3943
                        n = pass;
3944

    
3945
                    if (size < 3) {
3946
                        /* Operands in T0 and T1.  */
3947
                        gen_op_movl_T1_im(imm);
3948
                        NEON_GET_REG(T0, rm, n);
3949
                    } else {
3950
                        /* Operands in {T0, T1} and env->vfp.scratch.  */
3951
                        gen_op_movl_T0_im(imm);
3952
                        gen_neon_movl_scratch_T0(0);
3953
                        gen_op_movl_T0_im((int32_t)imm >> 31);
3954
                        gen_neon_movl_scratch_T0(1);
3955
                        NEON_GET_REG(T0, rm, n * 2);
3956
                        NEON_GET_REG(T0, rm, n * 2 + 1);
3957
                    }
3958

    
3959
                    gen_neon_shift_im_narrow[q][u][size - 1]();
3960

    
3961
                    if (size < 3 && (pass & 1) == 0) {
3962
                        gen_neon_movl_scratch_T0(0);
3963
                    } else {
3964
                        uint32_t offset;
3965

    
3966
                        if (size < 3)
3967
                            gen_neon_movl_T1_scratch(0);
3968

    
3969
                        if (op == 8 && !u) {
3970
                            gen_neon_narrow[size - 1]();
3971
                        } else {
3972
                            if (op == 8)
3973
                                gen_neon_narrow_sats[size - 2]();
3974
                            else
3975
                                gen_neon_narrow_satu[size - 1]();
3976
                        }
3977
                        if (size == 3)
3978
                            offset = neon_reg_offset(rd, n);
3979
                        else
3980
                            offset = neon_reg_offset(rd, n >> 1);
3981
                        gen_op_neon_setreg_T0(offset);
3982
                    }
3983
                } /* for pass */
3984
            } else if (op == 10) {
3985
                /* VSHLL */
3986
                if (q)
3987
                    return 1;
3988
                for (pass = 0; pass < 2; pass++) {
3989
                    /* Avoid clobbering the input operand.  */
3990
                    if (rd == rm)
3991
                        n = 1 - pass;
3992
                    else
3993
                        n = pass;
3994

    
3995
                    NEON_GET_REG(T0, rm, n);
3996
                    GEN_NEON_INTEGER_OP(widen);
3997
                    if (shift != 0) {
3998
                        /* The shift is less than the width of the source
3999
                           type, so in some cases we can just
4000
                           shift the whole register.  */
4001
                        if (size == 1 || (size == 0 && u)) {
4002
                            gen_op_shll_T0_im(shift);
4003
                            gen_op_shll_T1_im(shift);
4004
                        } else {
4005
                            switch (size) {
4006
                            case 0: gen_op_neon_shll_u16(shift); break;
4007
                            case 2: gen_op_neon_shll_u64(shift); break;
4008
                            default: abort();
4009
                            }
4010
                        }
4011
                    }
4012
                    NEON_SET_REG(T0, rd, n * 2);
4013
                    NEON_SET_REG(T1, rd, n * 2 + 1);
4014
                }
4015
            } else if (op == 15 || op == 16) {
4016
                /* VCVT fixed-point.  */
4017
                for (pass = 0; pass < (q ? 4 : 2); pass++) {
4018
                    gen_op_vfp_getreg_F0s(neon_reg_offset(rm, pass));
4019
                    if (op & 1) {
4020
                        if (u)
4021
                            gen_op_vfp_ultos(shift);
4022
                        else
4023
                            gen_op_vfp_sltos(shift);
4024
                    } else {
4025
                        if (u)
4026
                            gen_op_vfp_touls(shift);
4027
                        else
4028
                            gen_op_vfp_tosls(shift);
4029
                    }
4030
                    gen_op_vfp_setreg_F0s(neon_reg_offset(rd, pass));
4031
                }
4032
            } else {
4033
                return 1;
4034
            }
4035
        } else { /* (insn & 0x00380080) == 0 */
4036
            int invert;
4037

    
4038
            op = (insn >> 8) & 0xf;
4039
            /* One register and immediate.  */
4040
            imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf);
4041
            invert = (insn & (1 << 5)) != 0;
4042
            switch (op) {
4043
            case 0: case 1:
4044
                /* no-op */
4045
                break;
4046
            case 2: case 3:
4047
                imm <<= 8;
4048
                break;
4049
            case 4: case 5:
4050
                imm <<= 16;
4051
                break;
4052
            case 6: case 7:
4053
                imm <<= 24;
4054
                break;
4055
            case 8: case 9:
4056
                imm |= imm << 16;
4057
                break;
4058
            case 10: case 11:
4059
                imm = (imm << 8) | (imm << 24);
4060
                break;
4061
            case 12:
4062
                imm = (imm < 8) | 0xff;
4063
                break;
4064
            case 13:
4065
                imm = (imm << 16) | 0xffff;
4066
                break;
4067
            case 14:
4068
                imm |= (imm << 8) | (imm << 16) | (imm << 24);
4069
                if (invert)
4070
                    imm = ~imm;
4071
                break;
4072
            case 15:
4073
                imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
4074
                      | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
4075
                break;
4076
            }
4077
            if (invert)
4078
                imm = ~imm;
4079

    
4080
            if (op != 14 || !invert)
4081
                gen_op_movl_T1_im(imm);
4082

    
4083
            for (pass = 0; pass < (q ? 4 : 2); pass++) {
4084
                if (op & 1 && op < 12) {
4085
                    NEON_GET_REG(T0, rd, pass);
4086
                    if (invert) {
4087
                        /* The immediate value has already been inverted, so
4088
                           BIC becomes AND.  */
4089
                        gen_op_andl_T0_T1();
4090
                    } else {
4091
                        gen_op_orl_T0_T1();
4092
                    }
4093
                    NEON_SET_REG(T0, rd, pass);
4094
                } else {
4095
                    if (op == 14 && invert) {
4096
                        uint32_t tmp;
4097
                        tmp = 0;
4098
                        for (n = 0; n < 4; n++) {
4099
                            if (imm & (1 << (n + (pass & 1) * 4)))
4100
                                tmp |= 0xff << (n * 8);
4101
                        }
4102
                        gen_op_movl_T1_im(tmp);
4103
                    }
4104
                    /* VMOV, VMVN.  */
4105
                    NEON_SET_REG(T1, rd, pass);
4106
                }
4107
            }
4108
        }
4109
    } else { /* (insn & 0x00800010 == 0x00800010) */
4110
        if (size != 3) {
4111
            op = (insn >> 8) & 0xf;
4112
            if ((insn & (1 << 6)) == 0) {
4113
                /* Three registers of different lengths.  */
4114
                int src1_wide;
4115
                int src2_wide;
4116
                int prewiden;
4117
                /* prewiden, src1_wide, src2_wide */
4118
                static const int neon_3reg_wide[16][3] = {
4119
                    {1, 0, 0}, /* VADDL */
4120
                    {1, 1, 0}, /* VADDW */
4121
                    {1, 0, 0}, /* VSUBL */
4122
                    {1, 1, 0}, /* VSUBW */
4123
                    {0, 1, 1}, /* VADDHN */
4124
                    {0, 0, 0}, /* VABAL */
4125
                    {0, 1, 1}, /* VSUBHN */
4126
                    {0, 0, 0}, /* VABDL */
4127
                    {0, 0, 0}, /* VMLAL */
4128
                    {0, 0, 0}, /* VQDMLAL */
4129
                    {0, 0, 0}, /* VMLSL */
4130
                    {0, 0, 0}, /* VQDMLSL */
4131
                    {0, 0, 0}, /* Integer VMULL */
4132
                    {0, 0, 0}, /* VQDMULL */
4133
                    {0, 0, 0}  /* Polynomial VMULL */
4134
                };
4135

    
4136
                prewiden = neon_3reg_wide[op][0];
4137
                src1_wide = neon_3reg_wide[op][1];
4138
                src2_wide = neon_3reg_wide[op][2];
4139

    
4140
                /* Avoid overlapping operands.  Wide source operands are
4141
                   always aligned so will never overlap with wide
4142
                   destinations in problematic ways.  */
4143
                if (rd == rm) {
4144
                    NEON_GET_REG(T2, rm, 1);
4145
                } else if (rd == rn) {
4146
                    NEON_GET_REG(T2, rn, 1);
4147
                }
4148
                for (pass = 0; pass < 2; pass++) {
4149
                    /* Load the second operand into env->vfp.scratch.
4150
                       Also widen narrow operands.  */
4151
                    if (pass == 1 && rd == rm) {
4152
                        if (prewiden) {
4153
                            gen_op_movl_T0_T2();
4154
                        } else {
4155
                            gen_op_movl_T1_T2();
4156
                        }
4157
                    } else {
4158
                        if (src2_wide) {
4159
                            NEON_GET_REG(T0, rm, pass * 2);
4160
                            NEON_GET_REG(T1, rm, pass * 2 + 1);
4161
                        } else {
4162
                            if (prewiden) {
4163
                                NEON_GET_REG(T0, rm, pass);
4164
                            } else {
4165
                                NEON_GET_REG(T1, rm, pass);
4166
                            }
4167
                        }
4168
                    }
4169
                    if (prewiden && !src2_wide) {
4170
                        GEN_NEON_INTEGER_OP(widen);
4171
                    }
4172
                    if (prewiden || src2_wide) {
4173
                        gen_neon_movl_scratch_T0(0);
4174
                        gen_neon_movl_scratch_T1(1);
4175
                    }
4176

    
4177
                    /* Load the first operand.  */
4178
                    if (pass == 1 && rd == rn) {
4179
                        gen_op_movl_T0_T2();
4180
                    } else {
4181
                        if (src1_wide) {
4182
                            NEON_GET_REG(T0, rn, pass * 2);
4183
                            NEON_GET_REG(T1, rn, pass * 2 + 1);
4184
                        } else {
4185
                            NEON_GET_REG(T0, rn, pass);
4186
                        }
4187
                    }
4188
                    if (prewiden && !src1_wide) {
4189
                        GEN_NEON_INTEGER_OP(widen);
4190
                    }
4191
                    switch (op) {
4192
                    case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
4193
                        switch (size) {
4194
                        case 0: gen_op_neon_addl_u16(); break;
4195
                        case 1: gen_op_neon_addl_u32(); break;
4196
                        case 2: gen_op_neon_addl_u64(); break;
4197
                        default: abort();
4198
                        }
4199
                        break;
4200
                    case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHL, VRSUBHL */
4201
                        switch (size) {
4202
                        case 0: gen_op_neon_subl_u16(); break;
4203
                        case 1: gen_op_neon_subl_u32(); break;
4204
                        case 2: gen_op_neon_subl_u64(); break;
4205
                        default: abort();
4206
                        }
4207
                        break;
4208
                    case 5: case 7: /* VABAL, VABDL */
4209
                        switch ((size << 1) | u) {
4210
                        case 0: gen_op_neon_abdl_s16(); break;
4211
                        case 1: gen_op_neon_abdl_u16(); break;
4212
                        case 2: gen_op_neon_abdl_s32(); break;
4213
                        case 3: gen_op_neon_abdl_u32(); break;
4214
                        case 4: gen_op_neon_abdl_s64(); break;
4215
                        case 5: gen_op_neon_abdl_u64(); break;
4216
                        default: abort();
4217
                        }
4218
                        break;
4219
                    case 8: case 9: case 10: case 11: case 12: case 13:
4220
                        /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
4221
                        switch ((size << 1) | u) {
4222
                        case 0: gen_op_neon_mull_s8(); break;
4223
                        case 1: gen_op_neon_mull_u8(); break;
4224
                        case 2: gen_op_neon_mull_s16(); break;
4225
                        case 3: gen_op_neon_mull_u16(); break;
4226
                        case 4: gen_op_imull_T0_T1(); break;
4227
                        case 5: gen_op_mull_T0_T1(); break;
4228
                        default: abort();
4229
                        }
4230
                        break;
4231
                    case 14: /* Polynomial VMULL */
4232
                        cpu_abort(env, "Polynomial VMULL not implemented");
4233

    
4234
                    default: /* 15 is RESERVED.  */
4235
                        return 1;
4236
                    }
4237
                    if (op == 5 || op == 13 || (op >= 8 && op <= 11)) {
4238
                        /* Accumulate.  */
4239
                        if (op == 10 || op == 11) {
4240
                            switch (size) {
4241
                            case 0: gen_op_neon_negl_u16(); break;
4242
                            case 1: gen_op_neon_negl_u32(); break;
4243
                            case 2: gen_op_neon_negl_u64(); break;
4244
                            default: abort();
4245
                            }
4246
                        }
4247

    
4248
                        gen_neon_movl_scratch_T0(0);
4249
                        gen_neon_movl_scratch_T1(1);
4250

    
4251
                        if (op != 13) {
4252
                            NEON_GET_REG(T0, rd, pass * 2);
4253
                            NEON_GET_REG(T1, rd, pass * 2 + 1);
4254
                        }
4255

    
4256
                        switch (op) {
4257
                        case 5: case 8: case 10: /* VABAL, VMLAL, VMLSL */
4258
                            switch (size) {
4259
                            case 0: gen_op_neon_addl_u16(); break;
4260
                            case 1: gen_op_neon_addl_u32(); break;
4261
                            case 2: gen_op_neon_addl_u64(); break;
4262
                            default: abort();
4263
                            }
4264
                            break;
4265
                        case 9: case 11: /* VQDMLAL, VQDMLSL */
4266
                            switch (size) {
4267
                            case 1: gen_op_neon_addl_saturate_s32(); break;
4268
                            case 2: gen_op_neon_addl_saturate_s64(); break;
4269
                            default: abort();
4270
                            }
4271
                            /* Fall through.  */
4272
                        case 13: /* VQDMULL */
4273
                            switch (size) {
4274
                            case 1: gen_op_neon_addl_saturate_s32(); break;
4275
                            case 2: gen_op_neon_addl_saturate_s64(); break;
4276
                            default: abort();
4277
                            }
4278
                            break;
4279
                        default:
4280
                            abort();
4281
                        }
4282
                        NEON_SET_REG(T0, rd, pass * 2);
4283
                        NEON_SET_REG(T1, rd, pass * 2 + 1);
4284
                    } else if (op == 4 || op == 6) {
4285
                        /* Narrowing operation.  */
4286
                        if (u) {
4287
                            switch (size) {
4288
                            case 0: gen_op_neon_narrow_high_u8(); break;
4289
                            case 1: gen_op_neon_narrow_high_u16(); break;
4290
                            case 2: gen_op_movl_T0_T1(); break;
4291
                            default: abort();
4292
                            }
4293
                        } else {
4294
                            switch (size) {
4295
                            case 0: gen_op_neon_narrow_high_round_u8(); break;
4296
                            case 1: gen_op_neon_narrow_high_round_u16(); break;
4297
                            case 2: gen_op_neon_narrow_high_round_u32(); break;
4298
                            default: abort();
4299
                            }
4300
                        }
4301
                        NEON_SET_REG(T0, rd, pass);
4302
                    } else {
4303
                        /* Write back the result.  */
4304
                        NEON_SET_REG(T0, rd, pass * 2);
4305
                        NEON_SET_REG(T1, rd, pass * 2 + 1);
4306
                    }
4307
                }
4308
            } else {
4309
                /* Two registers and a scalar.  */
4310
                switch (op) {
4311
                case 0: /* Integer VMLA scalar */
4312
                case 1: /* Float VMLA scalar */
4313
                case 4: /* Integer VMLS scalar */
4314
                case 5: /* Floating point VMLS scalar */
4315
                case 8: /* Integer VMUL scalar */
4316
                case 9: /* Floating point VMUL scalar */
4317
                case 12: /* VQDMULH scalar */
4318
                case 13: /* VQRDMULH scalar */
4319
                    gen_neon_get_scalar(size, rm);
4320
                    gen_op_movl_T2_T0();
4321
                    for (pass = 0; pass < (u ? 4 : 2); pass++) {
4322
                        if (pass != 0)
4323
                            gen_op_movl_T0_T2();
4324
                        NEON_GET_REG(T1, rn, pass);
4325
                        if (op == 12) {
4326
                            if (size == 1) {
4327
                                gen_op_neon_qdmulh_s16();
4328
                            } else {
4329
                                gen_op_neon_qdmulh_s32();
4330
                            }
4331
                        } else if (op == 13) {
4332
                            if (size == 1) {
4333
                                gen_op_neon_qrdmulh_s16();
4334
                            } else {
4335
                                gen_op_neon_qrdmulh_s32();
4336
                            }
4337
                        } else if (op & 1) {
4338
                            gen_op_neon_mul_f32();
4339
                        } else {
4340
                            switch (size) {
4341
                            case 0: gen_op_neon_mul_u8(); break;
4342
                            case 1: gen_op_neon_mul_u16(); break;
4343
                            case 2: gen_op_mul_T0_T1(); break;
4344
                            default: return 1;
4345
                            }
4346
                        }
4347
                        if (op < 8) {
4348
                            /* Accumulate.  */
4349
                            NEON_GET_REG(T1, rd, pass);
4350
                            switch (op) {
4351
                            case 0:
4352
                                gen_neon_add(size);
4353
                                break;
4354
                            case 1:
4355
                                gen_op_neon_add_f32();
4356
                                break;
4357
                            case 4:
4358
                                switch (size) {
4359
                                case 0: gen_op_neon_rsb_u8(); break;
4360
                                case 1: gen_op_neon_rsb_u16(); break;
4361
                                case 2: gen_op_rsbl_T0_T1(); break;
4362
                                default: return 1;
4363
                                }
4364
                                break;
4365
                            case 5:
4366
                                gen_op_neon_rsb_f32();
4367
                                break;
4368
                            default:
4369
                                abort();
4370
                            }
4371
                        }
4372
                        NEON_SET_REG(T0, rd, pass);
4373
                    }
4374
                    break;
4375
                case 2: /* VMLAL sclar */
4376
                case 3: /* VQDMLAL scalar */
4377
                case 6: /* VMLSL scalar */
4378
                case 7: /* VQDMLSL scalar */
4379
                case 10: /* VMULL scalar */
4380
                case 11: /* VQDMULL scalar */
4381
                    if (rd == rn) {
4382
                        /* Save overlapping operands before they are
4383
                           clobbered.  */
4384
                        NEON_GET_REG(T0, rn, 1);
4385
                        gen_neon_movl_scratch_T0(2);
4386
                    }
4387
                    gen_neon_get_scalar(size, rm);
4388
                    gen_op_movl_T2_T0();
4389
                    for (pass = 0; pass < 2; pass++) {
4390
                        if (pass != 0) {
4391
                            gen_op_movl_T0_T2();
4392
                        }
4393
                        if (pass != 0 && rd == rn) {
4394
                            gen_neon_movl_T1_scratch(2);
4395
                        } else {
4396
                            NEON_GET_REG(T1, rn, pass);
4397
                        }
4398
                        switch ((size << 1) | u) {
4399
                        case 0: gen_op_neon_mull_s8(); break;
4400
                        case 1: gen_op_neon_mull_u8(); break;
4401
                        case 2: gen_op_neon_mull_s16(); break;
4402
                        case 3: gen_op_neon_mull_u16(); break;
4403
                        case 4: gen_op_imull_T0_T1(); break;
4404
                        case 5: gen_op_mull_T0_T1(); break;
4405
                        default: abort();
4406
                        }
4407
                        if (op == 6 || op == 7) {
4408
                            switch (size) {
4409
                            case 0: gen_op_neon_negl_u16(); break;
4410
                            case 1: gen_op_neon_negl_u32(); break;
4411
                            case 2: gen_op_neon_negl_u64(); break;
4412
                            default: abort();
4413
                            }
4414
                        }
4415
                        gen_neon_movl_scratch_T0(0);
4416
                        gen_neon_movl_scratch_T1(1);
4417
                        NEON_GET_REG(T0, rd, pass * 2);
4418
                        NEON_GET_REG(T1, rd, pass * 2 + 1);
4419
                        switch (op) {
4420
                        case 2: case 6:
4421
                            switch (size) {
4422
                            case 0: gen_op_neon_addl_u16(); break;
4423
                            case 1: gen_op_neon_addl_u32(); break;
4424
                            case 2: gen_op_neon_addl_u64(); break;
4425
                            default: abort();
4426
                            }
4427
                            break;
4428
                        case 3: case 7:
4429
                            switch (size) {
4430
                            case 1:
4431
                                gen_op_neon_addl_saturate_s32();
4432
                                gen_op_neon_addl_saturate_s32();
4433
                                break;
4434
                            case 2:
4435
                                gen_op_neon_addl_saturate_s64();
4436
                                gen_op_neon_addl_saturate_s64();
4437
                                break;
4438
                            default: abort();
4439
                            }
4440
                            break;
4441
                        case 10:
4442
                            /* no-op */
4443
                            break;
4444
                        case 11:
4445
                            switch (size) {
4446
                            case 1: gen_op_neon_addl_saturate_s32(); break;
4447
                            case 2: gen_op_neon_addl_saturate_s64(); break;
4448
                            default: abort();
4449
                            }
4450
                            break;
4451
                        default:
4452
                            abort();
4453
                        }
4454
                        NEON_SET_REG(T0, rd, pass * 2);
4455
                        NEON_SET_REG(T1, rd, pass * 2 + 1);
4456
                    }
4457
                    break;
4458
                default: /* 14 and 15 are RESERVED */
4459
                    return 1;
4460
                }
4461
            }
4462
        } else { /* size == 3 */
4463
            if (!u) {
4464
                /* Extract.  */
4465
                int reg;
4466
                imm = (insn >> 8) & 0xf;
4467
                reg = rn;
4468
                count = q ? 4 : 2;
4469
                n = imm >> 2;
4470
                NEON_GET_REG(T0, reg, n);
4471
                for (pass = 0; pass < count; pass++) {
4472
                    n++;
4473
                    if (n > count) {
4474
                        reg = rm;
4475
                        n -= count;
4476
                    }
4477
                    if (imm & 3) {
4478
                        NEON_GET_REG(T1, reg, n);
4479
                        gen_op_neon_extract((insn << 3) & 0x1f);
4480
                    }
4481
                    /* ??? This is broken if rd and rm overlap */
4482
                    NEON_SET_REG(T0, rd, pass);
4483
                    if (imm & 3) {
4484
                        gen_op_movl_T0_T1();
4485
                    } else {
4486
                        NEON_GET_REG(T0, reg, n);
4487
                    }
4488
                }
4489
            } else if ((insn & (1 << 11)) == 0) {
4490
                /* Two register misc.  */
4491
                op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
4492
                size = (insn >> 18) & 3;
4493
                switch (op) {
4494
                case 0: /* VREV64 */
4495
                    if (size == 3)
4496
                        return 1;
4497
                    for (pass = 0; pass < (q ? 2 : 1); pass++) {
4498
                        NEON_GET_REG(T0, rm, pass * 2);
4499
                        NEON_GET_REG(T1, rm, pass * 2 + 1);
4500
                        switch (size) {
4501
                        case 0: gen_op_rev_T0(); break;
4502
                        case 1: gen_swap_half(cpu_T[0]); break;
4503
                        case 2: /* no-op */ break;
4504
                        default: abort();
4505
                        }
4506
                        NEON_SET_REG(T0, rd, pass * 2 + 1);
4507
                        if (size == 2) {
4508
                            NEON_SET_REG(T1, rd, pass * 2);
4509
                        } else {
4510
                            gen_op_movl_T0_T1();
4511
                            switch (size) {
4512
                            case 0: gen_op_rev_T0(); break;
4513
                            case 1: gen_swap_half(cpu_T[0]); break;
4514
                            default: abort();
4515
                            }
4516
                            NEON_SET_REG(T0, rd, pass * 2);
4517
                        }
4518
                    }
4519
                    break;
4520
                case 4: case 5: /* VPADDL */
4521
                case 12: case 13: /* VPADAL */
4522
                    if (size < 2)
4523
                        goto elementwise;
4524
                    if (size == 3)
4525
                        return 1;
4526
                    for (pass = 0; pass < (q ? 2 : 1); pass++) {
4527
                        NEON_GET_REG(T0, rm, pass * 2);
4528
                        NEON_GET_REG(T1, rm, pass * 2 + 1);
4529
                        if (op & 1)
4530
                            gen_op_neon_paddl_u32();
4531
                        else
4532
                            gen_op_neon_paddl_s32();
4533
                        if (op >= 12) {
4534
                            /* Accumulate.  */
4535
                            gen_neon_movl_scratch_T0(0);
4536
                            gen_neon_movl_scratch_T1(1);
4537

    
4538
                            NEON_GET_REG(T0, rd, pass * 2);
4539
                            NEON_GET_REG(T1, rd, pass * 2 + 1);
4540
                            gen_op_neon_addl_u64();
4541
                        }
4542
                        NEON_SET_REG(T0, rd, pass * 2);
4543
                        NEON_SET_REG(T1, rd, pass * 2 + 1);
4544
                    }
4545
                    break;
4546
                case 33: /* VTRN */
4547
                    if (size == 2) {
4548
                        for (n = 0; n < (q ? 4 : 2); n += 2) {
4549
                            NEON_GET_REG(T0, rm, n);
4550
                            NEON_GET_REG(T1, rd, n + 1);
4551
                            NEON_SET_REG(T1, rm, n);
4552
                            NEON_SET_REG(T0, rd, n + 1);
4553
                        }
4554
                    } else {
4555
                        goto elementwise;
4556
                    }
4557
                    break;
4558
                case 34: /* VUZP */
4559
                    /* Reg  Before       After
4560
                       Rd   A3 A2 A1 A0  B2 B0 A2 A0
4561
                       Rm   B3 B2 B1 B0  B3 B1 A3 A1
4562
                     */
4563
                    if (size == 3)
4564
                        return 1;
4565
                    gen_neon_unzip(rd, q, 0, size);
4566
                    gen_neon_unzip(rm, q, 4, size);
4567
                    if (q) {
4568
                        static int unzip_order_q[8] =
4569
                            {0, 2, 4, 6, 1, 3, 5, 7};
4570
                        for (n = 0; n < 8; n++) {
4571
                            int reg = (n < 4) ? rd : rm;
4572
                            gen_neon_movl_T0_scratch(unzip_order_q[n]);
4573
                            NEON_SET_REG(T0, reg, n % 4);
4574
                        }
4575
                    } else {
4576
                        static int unzip_order[4] =
4577
                            {0, 4, 1, 5};
4578
                        for (n = 0; n < 4; n++) {
4579
                            int reg = (n < 2) ? rd : rm;
4580
                            gen_neon_movl_T0_scratch(unzip_order[n]);
4581
                            NEON_SET_REG(T0, reg, n % 2);
4582
                        }
4583
                    }
4584
                    break;
4585
                case 35: /* VZIP */
4586
                    /* Reg  Before       After
4587
                       Rd   A3 A2 A1 A0  B1 A1 B0 A0
4588
                       Rm   B3 B2 B1 B0  B3 A3 B2 A2
4589
                     */
4590
                    if (size == 3)
4591
                        return 1;
4592
                    count = (q ? 4 : 2);
4593
                    for (n = 0; n < count; n++) {
4594
                        NEON_GET_REG(T0, rd, n);
4595
                        NEON_GET_REG(T1, rd, n);
4596
                        switch (size) {
4597
                        case 0: gen_op_neon_zip_u8(); break;
4598
                        case 1: gen_op_neon_zip_u16(); break;
4599
                        case 2: /* no-op */; break;
4600
                        default: abort();
4601
                        }
4602
                        gen_neon_movl_scratch_T0(n * 2);
4603
                        gen_neon_movl_scratch_T1(n * 2 + 1);
4604
                    }
4605
                    for (n = 0; n < count * 2; n++) {
4606
                        int reg = (n < count) ? rd : rm;
4607
                        gen_neon_movl_T0_scratch(n);
4608
                        NEON_SET_REG(T0, reg, n % count);
4609
                    }
4610
                    break;
4611
                case 36: case 37: /* VMOVN, VQMOVUN, VQMOVN */
4612
                    for (pass = 0; pass < 2; pass++) {
4613
                        if (rd == rm + 1) {
4614
                            n = 1 - pass;
4615
                        } else {
4616
                            n = pass;
4617
                        }
4618
                        NEON_GET_REG(T0, rm, n * 2);
4619
                        NEON_GET_REG(T1, rm, n * 2 + 1);
4620
                        if (op == 36 && q == 0) {
4621
                            switch (size) {
4622
                            case 0: gen_op_neon_narrow_u8(); break;
4623
                            case 1: gen_op_neon_narrow_u16(); break;
4624
                            case 2: /* no-op */ break;
4625
                            default: return 1;
4626
                            }
4627
                        } else if (q) {
4628
                            switch (size) {
4629
                            case 0: gen_op_neon_narrow_sat_u8(); break;
4630
                            case 1: gen_op_neon_narrow_sat_u16(); break;
4631
                            case 2: gen_op_neon_narrow_sat_u32(); break;
4632
                            default: return 1;
4633
                            }
4634
                        } else {
4635
                            switch (size) {
4636
                            case 0: gen_op_neon_narrow_sat_s8(); break;
4637
                            case 1: gen_op_neon_narrow_sat_s16(); break;
4638
                            case 2: gen_op_neon_narrow_sat_s32(); break;
4639
                            default: return 1;
4640
                            }
4641
                        }
4642
                        NEON_SET_REG(T0, rd, n);
4643
                    }
4644
                    break;
4645
                case 38: /* VSHLL */
4646
                    if (q)
4647
                        return 1;
4648
                    if (rm == rd) {
4649
                        NEON_GET_REG(T2, rm, 1);
4650
                    }
4651
                    for (pass = 0; pass < 2; pass++) {
4652
                        if (pass == 1 && rm == rd) {
4653
                            gen_op_movl_T0_T2();
4654
                        } else {
4655
                            NEON_GET_REG(T0, rm, pass);
4656
                        }
4657
                        switch (size) {
4658
                        case 0: gen_op_neon_widen_high_u8(); break;
4659
                        case 1: gen_op_neon_widen_high_u16(); break;
4660
                        case 2:
4661
                            gen_op_movl_T1_T0();
4662
                            gen_op_movl_T0_im(0);
4663
                            break;
4664
                        default: return 1;
4665
                        }
4666
                        NEON_SET_REG(T0, rd, pass * 2);
4667
                        NEON_SET_REG(T1, rd, pass * 2 + 1);
4668
                    }
4669
                    break;
4670
                default:
4671
                elementwise:
4672
                    for (pass = 0; pass < (q ? 4 : 2); pass++) {
4673
                        if (op == 30 || op == 31 || op >= 58) {
4674
                            gen_op_vfp_getreg_F0s(neon_reg_offset(rm, pass));
4675
                        } else {
4676
                            NEON_GET_REG(T0, rm, pass);
4677
                        }
4678
                        switch (op) {
4679
                        case 1: /* VREV32 */
4680
                            switch (size) {
4681
                            case 0: gen_op_rev_T0(); break;
4682
                            case 1: gen_swap_half(cpu_T[0]); break;
4683
                            default: return 1;
4684
                            }
4685
                            break;
4686
                        case 2: /* VREV16 */
4687