Statistics
| Branch: | Revision:

root / target-arm / translate.c @ 8f8e3aa4

History | View | Annotate | Download (286.7 kB)

1
/*
2
 *  ARM translation
3
 *
4
 *  Copyright (c) 2003 Fabrice Bellard
5
 *  Copyright (c) 2005-2007 CodeSourcery
6
 *  Copyright (c) 2007 OpenedHand, Ltd.
7
 *
8
 * This library is free software; you can redistribute it and/or
9
 * modify it under the terms of the GNU Lesser General Public
10
 * License as published by the Free Software Foundation; either
11
 * version 2 of the License, or (at your option) any later version.
12
 *
13
 * This library is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
 * Lesser General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with this library; if not, write to the Free Software
20
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
21
 */
22
#include <stdarg.h>
23
#include <stdlib.h>
24
#include <stdio.h>
25
#include <string.h>
26
#include <inttypes.h>
27

    
28
#include "cpu.h"
29
#include "exec-all.h"
30
#include "disas.h"
31
#include "tcg-op.h"
32

    
33
#define GEN_HELPER 1
34
#include "helpers.h"
35

    
36
#define ENABLE_ARCH_5J    0
37
#define ENABLE_ARCH_6     arm_feature(env, ARM_FEATURE_V6)
38
#define ENABLE_ARCH_6K   arm_feature(env, ARM_FEATURE_V6K)
39
#define ENABLE_ARCH_6T2   arm_feature(env, ARM_FEATURE_THUMB2)
40
#define ENABLE_ARCH_7     arm_feature(env, ARM_FEATURE_V7)
41

    
42
#define ARCH(x) if (!ENABLE_ARCH_##x) goto illegal_op;
43

    
44
/* internal defines */
45
typedef struct DisasContext {
46
    target_ulong pc;
47
    int is_jmp;
48
    /* Nonzero if this instruction has been conditionally skipped.  */
49
    int condjmp;
50
    /* The label that will be jumped to when the instruction is skipped.  */
51
    int condlabel;
52
    /* Thumb-2 condtional execution bits.  */
53
    int condexec_mask;
54
    int condexec_cond;
55
    struct TranslationBlock *tb;
56
    int singlestep_enabled;
57
    int thumb;
58
    int is_mem;
59
#if !defined(CONFIG_USER_ONLY)
60
    int user;
61
#endif
62
} DisasContext;
63

    
64
#if defined(CONFIG_USER_ONLY)
65
#define IS_USER(s) 1
66
#else
67
#define IS_USER(s) (s->user)
68
#endif
69

    
70
/* These instructions trap after executing, so defer them until after the
71
   conditional executions state has been updated.  */
72
#define DISAS_WFI 4
73
#define DISAS_SWI 5
74

    
75
/* XXX: move that elsewhere */
76
extern FILE *logfile;
77
extern int loglevel;
78

    
79
static TCGv cpu_env;
80
/* FIXME:  These should be removed.  */
81
static TCGv cpu_T[2];
82
static TCGv cpu_F0s, cpu_F1s, cpu_F0d, cpu_F1d;
83

    
84
/* initialize TCG globals.  */
85
void arm_translate_init(void)
86
{
87
    cpu_env = tcg_global_reg_new(TCG_TYPE_PTR, TCG_AREG0, "env");
88

    
89
    cpu_T[0] = tcg_global_reg_new(TCG_TYPE_I32, TCG_AREG1, "T0");
90
    cpu_T[1] = tcg_global_reg_new(TCG_TYPE_I32, TCG_AREG2, "T1");
91
}
92

    
93
/* The code generator doesn't like lots of temporaries, so maintain our own
94
   cache for reuse within a function.  */
95
#define MAX_TEMPS 8
96
static int num_temps;
97
static TCGv temps[MAX_TEMPS];
98

    
99
/* Allocate a temporary variable.  */
100
static TCGv new_tmp(void)
101
{
102
    TCGv tmp;
103
    if (num_temps == MAX_TEMPS)
104
        abort();
105

    
106
    if (GET_TCGV(temps[num_temps]))
107
      return temps[num_temps++];
108

    
109
    tmp = tcg_temp_new(TCG_TYPE_I32);
110
    temps[num_temps++] = tmp;
111
    return tmp;
112
}
113

    
114
/* Release a temporary variable.  */
115
static void dead_tmp(TCGv tmp)
116
{
117
    int i;
118
    num_temps--;
119
    i = num_temps;
120
    if (GET_TCGV(temps[i]) == GET_TCGV(tmp))
121
        return;
122

    
123
    /* Shuffle this temp to the last slot.  */
124
    while (GET_TCGV(temps[i]) != GET_TCGV(tmp))
125
        i--;
126
    while (i < num_temps) {
127
        temps[i] = temps[i + 1];
128
        i++;
129
    }
130
    temps[i] = tmp;
131
}
132

    
133
static inline TCGv load_cpu_offset(int offset)
134
{
135
    TCGv tmp = new_tmp();
136
    tcg_gen_ld_i32(tmp, cpu_env, offset);
137
    return tmp;
138
}
139

    
140
#define load_cpu_field(name) load_cpu_offset(offsetof(CPUState, name))
141

    
142
static inline void store_cpu_offset(TCGv var, int offset)
143
{
144
    tcg_gen_st_i32(var, cpu_env, offset);
145
    dead_tmp(var);
146
}
147

    
148
#define store_cpu_field(var, name) \
149
    store_cpu_offset(var, offsetof(CPUState, name))
150

    
151
/* Set a variable to the value of a CPU register.  */
152
static void load_reg_var(DisasContext *s, TCGv var, int reg)
153
{
154
    if (reg == 15) {
155
        uint32_t addr;
156
        /* normaly, since we updated PC, we need only to add one insn */
157
        if (s->thumb)
158
            addr = (long)s->pc + 2;
159
        else
160
            addr = (long)s->pc + 4;
161
        tcg_gen_movi_i32(var, addr);
162
    } else {
163
        tcg_gen_ld_i32(var, cpu_env, offsetof(CPUState, regs[reg]));
164
    }
165
}
166

    
167
/* Create a new temporary and set it to the value of a CPU register.  */
168
static inline TCGv load_reg(DisasContext *s, int reg)
169
{
170
    TCGv tmp = new_tmp();
171
    load_reg_var(s, tmp, reg);
172
    return tmp;
173
}
174

    
175
/* Set a CPU register.  The source must be a temporary and will be
176
   marked as dead.  */
177
static void store_reg(DisasContext *s, int reg, TCGv var)
178
{
179
    if (reg == 15) {
180
        tcg_gen_andi_i32(var, var, ~1);
181
        s->is_jmp = DISAS_JUMP;
182
    }
183
    tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, regs[reg]));
184
    dead_tmp(var);
185
}
186

    
187

    
188
/* Basic operations.  */
189
#define gen_op_movl_T0_T1() tcg_gen_mov_i32(cpu_T[0], cpu_T[1])
190
#define gen_op_movl_T1_T0() tcg_gen_mov_i32(cpu_T[1], cpu_T[0])
191
#define gen_op_movl_T0_im(im) tcg_gen_movi_i32(cpu_T[0], im)
192
#define gen_op_movl_T1_im(im) tcg_gen_movi_i32(cpu_T[1], im)
193

    
194
#define gen_op_addl_T1_im(im) tcg_gen_addi_i32(cpu_T[1], cpu_T[1], im)
195
#define gen_op_addl_T0_T1() tcg_gen_add_i32(cpu_T[0], cpu_T[0], cpu_T[1])
196
#define gen_op_subl_T0_T1() tcg_gen_sub_i32(cpu_T[0], cpu_T[0], cpu_T[1])
197
#define gen_op_rsbl_T0_T1() tcg_gen_sub_i32(cpu_T[0], cpu_T[1], cpu_T[0])
198

    
199
#define gen_op_addl_T0_T1_cc() gen_helper_add_cc(cpu_T[0], cpu_T[0], cpu_T[1])
200
#define gen_op_adcl_T0_T1_cc() gen_helper_adc_cc(cpu_T[0], cpu_T[0], cpu_T[1])
201
#define gen_op_subl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[0], cpu_T[1])
202
#define gen_op_sbcl_T0_T1_cc() gen_helper_sbc_cc(cpu_T[0], cpu_T[0], cpu_T[1])
203
#define gen_op_rsbl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[1], cpu_T[0])
204
#define gen_op_rscl_T0_T1_cc() gen_helper_sbc_cc(cpu_T[0], cpu_T[1], cpu_T[0])
205

    
206
#define gen_op_andl_T0_T1() tcg_gen_and_i32(cpu_T[0], cpu_T[0], cpu_T[1])
207
#define gen_op_xorl_T0_T1() tcg_gen_xor_i32(cpu_T[0], cpu_T[0], cpu_T[1])
208
#define gen_op_orl_T0_T1() tcg_gen_or_i32(cpu_T[0], cpu_T[0], cpu_T[1])
209
#define gen_op_notl_T0() tcg_gen_not_i32(cpu_T[0], cpu_T[0])
210
#define gen_op_notl_T1() tcg_gen_not_i32(cpu_T[1], cpu_T[1])
211
#define gen_op_logic_T0_cc() gen_logic_CC(cpu_T[0]);
212
#define gen_op_logic_T1_cc() gen_logic_CC(cpu_T[1]);
213

    
214
#define gen_op_shll_T0_im(im) tcg_gen_shli_i32(cpu_T[0], cpu_T[0], im)
215
#define gen_op_shll_T1_im(im) tcg_gen_shli_i32(cpu_T[1], cpu_T[1], im)
216
#define gen_op_shrl_T1_im(im) tcg_gen_shri_i32(cpu_T[1], cpu_T[1], im)
217
#define gen_op_sarl_T1_im(im) tcg_gen_sari_i32(cpu_T[1], cpu_T[1], im)
218
#define gen_op_rorl_T1_im(im) tcg_gen_rori_i32(cpu_T[1], cpu_T[1], im)
219

    
220
/* Value extensions.  */
221
#define gen_uxtb(var) tcg_gen_andi_i32(var, var, 0xff)
222
#define gen_uxth(var) tcg_gen_andi_i32(var, var, 0xffff)
223
#define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
224
#define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
225

    
226
#define gen_sxtb16(var) gen_helper_sxtb16(var, var)
227
#define gen_uxtb16(var) gen_helper_uxtb16(var, var)
228

    
229
#define gen_op_mul_T0_T1() tcg_gen_mul_i32(cpu_T[0], cpu_T[0], cpu_T[1])
230

    
231
#define gen_set_cpsr(var, mask) gen_helper_cpsr_write(var, tcg_const_i32(mask))
232
/* Set NZCV flags from the high 4 bits of var.  */
233
#define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
234

    
235
static void gen_exception(int excp)
236
{
237
    TCGv tmp = new_tmp();
238
    tcg_gen_movi_i32(tmp, excp);
239
    gen_helper_exception(tmp);
240
    dead_tmp(tmp);
241
}
242

    
243
static void gen_smul_dual(TCGv a, TCGv b)
244
{
245
    TCGv tmp1 = new_tmp();
246
    TCGv tmp2 = new_tmp();
247
    tcg_gen_ext8s_i32(tmp1, a);
248
    tcg_gen_ext8s_i32(tmp2, b);
249
    tcg_gen_mul_i32(tmp1, tmp1, tmp2);
250
    dead_tmp(tmp2);
251
    tcg_gen_sari_i32(a, a, 16);
252
    tcg_gen_sari_i32(b, b, 16);
253
    tcg_gen_mul_i32(b, b, a);
254
    tcg_gen_mov_i32(a, tmp1);
255
    dead_tmp(tmp1);
256
}
257

    
258
/* Byteswap each halfword.  */
259
static void gen_rev16(TCGv var)
260
{
261
    TCGv tmp = new_tmp();
262
    tcg_gen_shri_i32(tmp, var, 8);
263
    tcg_gen_andi_i32(tmp, tmp, 0x00ff00ff);
264
    tcg_gen_shli_i32(var, var, 8);
265
    tcg_gen_andi_i32(var, var, 0xff00ff00);
266
    tcg_gen_or_i32(var, var, tmp);
267
    dead_tmp(tmp);
268
}
269

    
270
/* Byteswap low halfword and sign extend.  */
271
static void gen_revsh(TCGv var)
272
{
273
    TCGv tmp = new_tmp();
274
    tcg_gen_shri_i32(tmp, var, 8);
275
    tcg_gen_andi_i32(tmp, tmp, 0x00ff);
276
    tcg_gen_shli_i32(var, var, 8);
277
    tcg_gen_ext8s_i32(var, var);
278
    tcg_gen_or_i32(var, var, tmp);
279
    dead_tmp(tmp);
280
}
281

    
282
/* Unsigned bitfield extract.  */
283
static void gen_ubfx(TCGv var, int shift, uint32_t mask)
284
{
285
    if (shift)
286
        tcg_gen_shri_i32(var, var, shift);
287
    tcg_gen_andi_i32(var, var, mask);
288
}
289

    
290
/* Signed bitfield extract.  */
291
static void gen_sbfx(TCGv var, int shift, int width)
292
{
293
    uint32_t signbit;
294

    
295
    if (shift)
296
        tcg_gen_sari_i32(var, var, shift);
297
    if (shift + width < 32) {
298
        signbit = 1u << (width - 1);
299
        tcg_gen_andi_i32(var, var, (1u << width) - 1);
300
        tcg_gen_xori_i32(var, var, signbit);
301
        tcg_gen_subi_i32(var, var, signbit);
302
    }
303
}
304

    
305
/* Bitfield insertion.  Insert val into base.  Clobbers base and val.  */
306
static void gen_bfi(TCGv dest, TCGv base, TCGv val, int shift, uint32_t mask)
307
{
308
    tcg_gen_andi_i32(val, val, mask);
309
    tcg_gen_shli_i32(val, val, shift);
310
    tcg_gen_andi_i32(base, base, ~(mask << shift));
311
    tcg_gen_or_i32(dest, base, val);
312
}
313

    
314
/* Round the top 32 bits of a 64-bit value.  */
315
static void gen_roundqd(TCGv a, TCGv b)
316
{
317
    tcg_gen_shri_i32(a, a, 31);
318
    tcg_gen_add_i32(a, a, b);
319
}
320

    
321
/* FIXME: Most targets have native widening multiplication.
322
   It would be good to use that instead of a full wide multiply.  */
323
/* 32x32->64 multiply.  Marks inputs as dead.  */
324
static TCGv gen_mulu_i64_i32(TCGv a, TCGv b)
325
{
326
    TCGv tmp1 = tcg_temp_new(TCG_TYPE_I64);
327
    TCGv tmp2 = tcg_temp_new(TCG_TYPE_I64);
328

    
329
    tcg_gen_extu_i32_i64(tmp1, a);
330
    dead_tmp(a);
331
    tcg_gen_extu_i32_i64(tmp2, b);
332
    dead_tmp(b);
333
    tcg_gen_mul_i64(tmp1, tmp1, tmp2);
334
    return tmp1;
335
}
336

    
337
static TCGv gen_muls_i64_i32(TCGv a, TCGv b)
338
{
339
    TCGv tmp1 = tcg_temp_new(TCG_TYPE_I64);
340
    TCGv tmp2 = tcg_temp_new(TCG_TYPE_I64);
341

    
342
    tcg_gen_ext_i32_i64(tmp1, a);
343
    dead_tmp(a);
344
    tcg_gen_ext_i32_i64(tmp2, b);
345
    dead_tmp(b);
346
    tcg_gen_mul_i64(tmp1, tmp1, tmp2);
347
    return tmp1;
348
}
349

    
350
/* Unsigned 32x32->64 multiply.  */
351
static void gen_op_mull_T0_T1(void)
352
{
353
    TCGv tmp1 = tcg_temp_new(TCG_TYPE_I64);
354
    TCGv tmp2 = tcg_temp_new(TCG_TYPE_I64);
355

    
356
    tcg_gen_extu_i32_i64(tmp1, cpu_T[0]);
357
    tcg_gen_extu_i32_i64(tmp2, cpu_T[1]);
358
    tcg_gen_mul_i64(tmp1, tmp1, tmp2);
359
    tcg_gen_trunc_i64_i32(cpu_T[0], tmp1);
360
    tcg_gen_shri_i64(tmp1, tmp1, 32);
361
    tcg_gen_trunc_i64_i32(cpu_T[1], tmp1);
362
}
363

    
364
/* Signed 32x32->64 multiply.  */
365
static void gen_imull(TCGv a, TCGv b)
366
{
367
    TCGv tmp1 = tcg_temp_new(TCG_TYPE_I64);
368
    TCGv tmp2 = tcg_temp_new(TCG_TYPE_I64);
369

    
370
    tcg_gen_ext_i32_i64(tmp1, a);
371
    tcg_gen_ext_i32_i64(tmp2, b);
372
    tcg_gen_mul_i64(tmp1, tmp1, tmp2);
373
    tcg_gen_trunc_i64_i32(a, tmp1);
374
    tcg_gen_shri_i64(tmp1, tmp1, 32);
375
    tcg_gen_trunc_i64_i32(b, tmp1);
376
}
377
#define gen_op_imull_T0_T1() gen_imull(cpu_T[0], cpu_T[1])
378

    
379
/* Swap low and high halfwords.  */
380
static void gen_swap_half(TCGv var)
381
{
382
    TCGv tmp = new_tmp();
383
    tcg_gen_shri_i32(tmp, var, 16);
384
    tcg_gen_shli_i32(var, var, 16);
385
    tcg_gen_or_i32(var, var, tmp);
386
    dead_tmp(tmp);
387
}
388

    
389
/* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
390
    tmp = (t0 ^ t1) & 0x8000;
391
    t0 &= ~0x8000;
392
    t1 &= ~0x8000;
393
    t0 = (t0 + t1) ^ tmp;
394
 */
395

    
396
static void gen_add16(TCGv t0, TCGv t1)
397
{
398
    TCGv tmp = new_tmp();
399
    tcg_gen_xor_i32(tmp, t0, t1);
400
    tcg_gen_andi_i32(tmp, tmp, 0x8000);
401
    tcg_gen_andi_i32(t0, t0, ~0x8000);
402
    tcg_gen_andi_i32(t1, t1, ~0x8000);
403
    tcg_gen_add_i32(t0, t0, t1);
404
    tcg_gen_xor_i32(t0, t0, tmp);
405
    dead_tmp(tmp);
406
    dead_tmp(t1);
407
}
408

    
409
#define gen_set_CF(var) tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, CF))
410

    
411
/* Set CF to the top bit of var.  */
412
static void gen_set_CF_bit31(TCGv var)
413
{
414
    TCGv tmp = new_tmp();
415
    tcg_gen_shri_i32(tmp, var, 31);
416
    gen_set_CF(var);
417
    dead_tmp(tmp);
418
}
419

    
420
/* Set N and Z flags from var.  */
421
static inline void gen_logic_CC(TCGv var)
422
{
423
    tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, NZF));
424
}
425

    
426
/* T0 += T1 + CF.  */
427
static void gen_adc_T0_T1(void)
428
{
429
    TCGv tmp;
430
    gen_op_addl_T0_T1();
431
    tmp = load_cpu_field(CF);
432
    tcg_gen_add_i32(cpu_T[0], cpu_T[0], tmp);
433
    dead_tmp(tmp);
434
}
435

    
436
/* dest = T0 - T1 + CF - 1.  */
437
static void gen_sub_carry(TCGv dest, TCGv t0, TCGv t1)
438
{
439
    TCGv tmp;
440
    tcg_gen_sub_i32(dest, t0, t1);
441
    tmp = load_cpu_field(CF);
442
    tcg_gen_add_i32(dest, dest, tmp);
443
    tcg_gen_subi_i32(dest, dest, 1);
444
    dead_tmp(tmp);
445
}
446

    
447
#define gen_sbc_T0_T1() gen_sub_carry(cpu_T[0], cpu_T[0], cpu_T[1])
448
#define gen_rsc_T0_T1() gen_sub_carry(cpu_T[0], cpu_T[1], cpu_T[0])
449

    
450
/* FIXME:  Implement this natively.  */
451
static inline void tcg_gen_not_i32(TCGv t0, TCGv t1)
452
{
453
    tcg_gen_xori_i32(t0, t1, ~0);
454
}
455

    
456
/* T0 &= ~T1.  Clobbers T1.  */
457
/* FIXME: Implement bic natively.  */
458
static inline void tcg_gen_bic_i32(TCGv dest, TCGv t0, TCGv t1)
459
{
460
    TCGv tmp = new_tmp();
461
    tcg_gen_not_i32(tmp, t1);
462
    tcg_gen_and_i32(dest, t0, tmp);
463
    dead_tmp(tmp);
464
}
465
static inline void gen_op_bicl_T0_T1(void)
466
{
467
    gen_op_notl_T1();
468
    gen_op_andl_T0_T1();
469
}
470

    
471
/* FIXME:  Implement this natively.  */
472
static void tcg_gen_rori_i32(TCGv t0, TCGv t1, int i)
473
{
474
    TCGv tmp;
475

    
476
    if (i == 0)
477
        return;
478

    
479
    tmp = new_tmp();
480
    tcg_gen_shri_i32(tmp, t1, i);
481
    tcg_gen_shli_i32(t1, t1, 32 - i);
482
    tcg_gen_or_i32(t0, t1, tmp);
483
    dead_tmp(tmp);
484
}
485

    
486
static void shifter_out_im(TCGv var, int shift)
487
{
488
    TCGv tmp = new_tmp();
489
    if (shift == 0) {
490
        tcg_gen_andi_i32(tmp, var, 1);
491
    } else {
492
        tcg_gen_shri_i32(tmp, var, shift);
493
        if (shift != 31);
494
            tcg_gen_andi_i32(tmp, tmp, 1);
495
    }
496
    gen_set_CF(tmp);
497
    dead_tmp(tmp);
498
}
499

    
500
/* Shift by immediate.  Includes special handling for shift == 0.  */
501
static inline void gen_arm_shift_im(TCGv var, int shiftop, int shift, int flags)
502
{
503
    switch (shiftop) {
504
    case 0: /* LSL */
505
        if (shift != 0) {
506
            if (flags)
507
                shifter_out_im(var, 32 - shift);
508
            tcg_gen_shli_i32(var, var, shift);
509
        }
510
        break;
511
    case 1: /* LSR */
512
        if (shift == 0) {
513
            if (flags) {
514
                tcg_gen_shri_i32(var, var, 31);
515
                gen_set_CF(var);
516
            }
517
            tcg_gen_movi_i32(var, 0);
518
        } else {
519
            if (flags)
520
                shifter_out_im(var, shift - 1);
521
            tcg_gen_shri_i32(var, var, shift);
522
        }
523
        break;
524
    case 2: /* ASR */
525
        if (shift == 0)
526
            shift = 32;
527
        if (flags)
528
            shifter_out_im(var, shift - 1);
529
        if (shift == 32)
530
          shift = 31;
531
        tcg_gen_sari_i32(var, var, shift);
532
        break;
533
    case 3: /* ROR/RRX */
534
        if (shift != 0) {
535
            if (flags)
536
                shifter_out_im(var, shift - 1);
537
            tcg_gen_rori_i32(var, var, shift); break;
538
        } else {
539
            TCGv tmp = load_cpu_field(CF);
540
            if (flags)
541
                shifter_out_im(var, 0);
542
            tcg_gen_shri_i32(var, var, 1);
543
            tcg_gen_shli_i32(tmp, tmp, 31);
544
            tcg_gen_or_i32(var, var, tmp);
545
            dead_tmp(tmp);
546
        }
547
    }
548
};
549

    
550
static inline void gen_arm_shift_reg(TCGv var, int shiftop,
551
                                     TCGv shift, int flags)
552
{
553
    if (flags) {
554
        switch (shiftop) {
555
        case 0: gen_helper_shl_cc(var, var, shift); break;
556
        case 1: gen_helper_shr_cc(var, var, shift); break;
557
        case 2: gen_helper_sar_cc(var, var, shift); break;
558
        case 3: gen_helper_ror_cc(var, var, shift); break;
559
        }
560
    } else {
561
        switch (shiftop) {
562
        case 0: gen_helper_shl(var, var, shift); break;
563
        case 1: gen_helper_shr(var, var, shift); break;
564
        case 2: gen_helper_sar(var, var, shift); break;
565
        case 3: gen_helper_ror(var, var, shift); break;
566
        }
567
    }
568
    dead_tmp(shift);
569
}
570

    
571
#define PAS_OP(pfx) \
572
    switch (op2) {  \
573
    case 0: gen_pas_helper(glue(pfx,add16)); break; \
574
    case 1: gen_pas_helper(glue(pfx,addsubx)); break; \
575
    case 2: gen_pas_helper(glue(pfx,subaddx)); break; \
576
    case 3: gen_pas_helper(glue(pfx,sub16)); break; \
577
    case 4: gen_pas_helper(glue(pfx,add8)); break; \
578
    case 7: gen_pas_helper(glue(pfx,sub8)); break; \
579
    }
580
static void gen_arm_parallel_addsub(int op1, int op2, TCGv a, TCGv b)
581
{
582
    TCGv tmp;
583

    
584
    switch (op1) {
585
#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
586
    case 1:
587
        tmp = tcg_temp_new(TCG_TYPE_PTR);
588
        tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
589
        PAS_OP(s)
590
        break;
591
    case 5:
592
        tmp = tcg_temp_new(TCG_TYPE_PTR);
593
        tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
594
        PAS_OP(u)
595
        break;
596
#undef gen_pas_helper
597
#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
598
    case 2:
599
        PAS_OP(q);
600
        break;
601
    case 3:
602
        PAS_OP(sh);
603
        break;
604
    case 6:
605
        PAS_OP(uq);
606
        break;
607
    case 7:
608
        PAS_OP(uh);
609
        break;
610
#undef gen_pas_helper
611
    }
612
}
613
#undef PAS_OP
614

    
615
/* For unknown reasons Arm and Thumb-2 use arbitrarily different encodings.  */
616
#define PAS_OP(pfx) \
617
    switch (op2) {  \
618
    case 0: gen_pas_helper(glue(pfx,add8)); break; \
619
    case 1: gen_pas_helper(glue(pfx,add16)); break; \
620
    case 2: gen_pas_helper(glue(pfx,addsubx)); break; \
621
    case 4: gen_pas_helper(glue(pfx,sub8)); break; \
622
    case 5: gen_pas_helper(glue(pfx,sub16)); break; \
623
    case 6: gen_pas_helper(glue(pfx,subaddx)); break; \
624
    }
625
static void gen_thumb2_parallel_addsub(int op1, int op2, TCGv a, TCGv b)
626
{
627
    TCGv tmp;
628

    
629
    switch (op1) {
630
#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
631
    case 0:
632
        tmp = tcg_temp_new(TCG_TYPE_PTR);
633
        tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
634
        PAS_OP(s)
635
        break;
636
    case 4:
637
        tmp = tcg_temp_new(TCG_TYPE_PTR);
638
        tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
639
        PAS_OP(u)
640
        break;
641
#undef gen_pas_helper
642
#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
643
    case 1:
644
        PAS_OP(q);
645
        break;
646
    case 2:
647
        PAS_OP(sh);
648
        break;
649
    case 5:
650
        PAS_OP(uq);
651
        break;
652
    case 6:
653
        PAS_OP(uh);
654
        break;
655
#undef gen_pas_helper
656
    }
657
}
658
#undef PAS_OP
659

    
660
static void gen_test_cc(int cc, int label)
661
{
662
    TCGv tmp;
663
    TCGv tmp2;
664
    TCGv zero;
665
    int inv;
666

    
667
    zero = tcg_const_i32(0);
668
    switch (cc) {
669
    case 0: /* eq: Z */
670
        tmp = load_cpu_field(NZF);
671
        tcg_gen_brcond_i32(TCG_COND_EQ, tmp, zero, label);
672
        break;
673
    case 1: /* ne: !Z */
674
        tmp = load_cpu_field(NZF);
675
        tcg_gen_brcond_i32(TCG_COND_NE, tmp, zero, label);
676
        break;
677
    case 2: /* cs: C */
678
        tmp = load_cpu_field(CF);
679
        tcg_gen_brcond_i32(TCG_COND_NE, tmp, zero, label);
680
        break;
681
    case 3: /* cc: !C */
682
        tmp = load_cpu_field(CF);
683
        tcg_gen_brcond_i32(TCG_COND_EQ, tmp, zero, label);
684
        break;
685
    case 4: /* mi: N */
686
        tmp = load_cpu_field(NZF);
687
        tcg_gen_brcond_i32(TCG_COND_LT, tmp, zero, label);
688
        break;
689
    case 5: /* pl: !N */
690
        tmp = load_cpu_field(NZF);
691
        tcg_gen_brcond_i32(TCG_COND_GE, tmp, zero, label);
692
        break;
693
    case 6: /* vs: V */
694
        tmp = load_cpu_field(VF);
695
        tcg_gen_brcond_i32(TCG_COND_LT, tmp, zero, label);
696
        break;
697
    case 7: /* vc: !V */
698
        tmp = load_cpu_field(VF);
699
        tcg_gen_brcond_i32(TCG_COND_GE, tmp, zero, label);
700
        break;
701
    case 8: /* hi: C && !Z */
702
        inv = gen_new_label();
703
        tmp = load_cpu_field(CF);
704
        tcg_gen_brcond_i32(TCG_COND_EQ, tmp, zero, inv);
705
        dead_tmp(tmp);
706
        tmp = load_cpu_field(NZF);
707
        tcg_gen_brcond_i32(TCG_COND_NE, tmp, zero, label);
708
        gen_set_label(inv);
709
        break;
710
    case 9: /* ls: !C || Z */
711
        tmp = load_cpu_field(CF);
712
        tcg_gen_brcond_i32(TCG_COND_EQ, tmp, zero, label);
713
        dead_tmp(tmp);
714
        tmp = load_cpu_field(NZF);
715
        tcg_gen_brcond_i32(TCG_COND_EQ, tmp, zero, label);
716
        break;
717
    case 10: /* ge: N == V -> N ^ V == 0 */
718
        tmp = load_cpu_field(VF);
719
        tmp2 = load_cpu_field(NZF);
720
        tcg_gen_xor_i32(tmp, tmp, tmp2);
721
        dead_tmp(tmp2);
722
        tcg_gen_brcond_i32(TCG_COND_GE, tmp, zero, label);
723
        break;
724
    case 11: /* lt: N != V -> N ^ V != 0 */
725
        tmp = load_cpu_field(VF);
726
        tmp2 = load_cpu_field(NZF);
727
        tcg_gen_xor_i32(tmp, tmp, tmp2);
728
        dead_tmp(tmp2);
729
        tcg_gen_brcond_i32(TCG_COND_LT, tmp, zero, label);
730
        break;
731
    case 12: /* gt: !Z && N == V */
732
        inv = gen_new_label();
733
        tmp = load_cpu_field(NZF);
734
        tcg_gen_brcond_i32(TCG_COND_EQ, tmp, zero, inv);
735
        dead_tmp(tmp);
736
        tmp = load_cpu_field(VF);
737
        tmp2 = load_cpu_field(NZF);
738
        tcg_gen_xor_i32(tmp, tmp, tmp2);
739
        dead_tmp(tmp2);
740
        tcg_gen_brcond_i32(TCG_COND_GE, tmp, zero, label);
741
        gen_set_label(inv);
742
        break;
743
    case 13: /* le: Z || N != V */
744
        tmp = load_cpu_field(NZF);
745
        tcg_gen_brcond_i32(TCG_COND_EQ, tmp, zero, label);
746
        dead_tmp(tmp);
747
        tmp = load_cpu_field(VF);
748
        tmp2 = load_cpu_field(NZF);
749
        tcg_gen_xor_i32(tmp, tmp, tmp2);
750
        dead_tmp(tmp2);
751
        tcg_gen_brcond_i32(TCG_COND_LT, tmp, zero, label);
752
        break;
753
    default:
754
        fprintf(stderr, "Bad condition code 0x%x\n", cc);
755
        abort();
756
    }
757
    dead_tmp(tmp);
758
}
759

    
760
const uint8_t table_logic_cc[16] = {
761
    1, /* and */
762
    1, /* xor */
763
    0, /* sub */
764
    0, /* rsb */
765
    0, /* add */
766
    0, /* adc */
767
    0, /* sbc */
768
    0, /* rsc */
769
    1, /* andl */
770
    1, /* xorl */
771
    0, /* cmp */
772
    0, /* cmn */
773
    1, /* orr */
774
    1, /* mov */
775
    1, /* bic */
776
    1, /* mvn */
777
};
778

    
779
/* Set PC and Thumb state from an immediate address.  */
780
static inline void gen_bx_im(DisasContext *s, uint32_t addr)
781
{
782
    TCGv tmp;
783

    
784
    s->is_jmp = DISAS_UPDATE;
785
    tmp = new_tmp();
786
    if (s->thumb != (addr & 1)) {
787
        tcg_gen_movi_i32(tmp, addr & 1);
788
        tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUState, thumb));
789
    }
790
    tcg_gen_movi_i32(tmp, addr & ~1);
791
    tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUState, regs[15]));
792
    dead_tmp(tmp);
793
}
794

    
795
/* Set PC and Thumb state from var.  var is marked as dead.  */
796
static inline void gen_bx(DisasContext *s, TCGv var)
797
{
798
    TCGv tmp;
799

    
800
    s->is_jmp = DISAS_UPDATE;
801
    tmp = new_tmp();
802
    tcg_gen_andi_i32(tmp, var, 1);
803
    store_cpu_field(tmp, thumb);
804
    tcg_gen_andi_i32(var, var, ~1);
805
    store_cpu_field(var, regs[15]);
806
}
807

    
808
/* TODO: This should be removed.  Use gen_bx instead.  */
809
static inline void gen_bx_T0(DisasContext *s)
810
{
811
    TCGv tmp = new_tmp();
812
    tcg_gen_mov_i32(tmp, cpu_T[0]);
813
    gen_bx(s, tmp);
814
}
815

    
816
#if defined(CONFIG_USER_ONLY)
817
#define gen_ldst(name, s) gen_op_##name##_raw()
818
#else
819
#define gen_ldst(name, s) do { \
820
    s->is_mem = 1; \
821
    if (IS_USER(s)) \
822
        gen_op_##name##_user(); \
823
    else \
824
        gen_op_##name##_kernel(); \
825
    } while (0)
826
#endif
827
static inline TCGv gen_ld8s(TCGv addr, int index)
828
{
829
    TCGv tmp = new_tmp();
830
    tcg_gen_qemu_ld8s(tmp, addr, index);
831
    return tmp;
832
}
833
static inline TCGv gen_ld8u(TCGv addr, int index)
834
{
835
    TCGv tmp = new_tmp();
836
    tcg_gen_qemu_ld8u(tmp, addr, index);
837
    return tmp;
838
}
839
static inline TCGv gen_ld16s(TCGv addr, int index)
840
{
841
    TCGv tmp = new_tmp();
842
    tcg_gen_qemu_ld16s(tmp, addr, index);
843
    return tmp;
844
}
845
static inline TCGv gen_ld16u(TCGv addr, int index)
846
{
847
    TCGv tmp = new_tmp();
848
    tcg_gen_qemu_ld16u(tmp, addr, index);
849
    return tmp;
850
}
851
static inline TCGv gen_ld32(TCGv addr, int index)
852
{
853
    TCGv tmp = new_tmp();
854
    tcg_gen_qemu_ld32u(tmp, addr, index);
855
    return tmp;
856
}
857
static inline void gen_st8(TCGv val, TCGv addr, int index)
858
{
859
    tcg_gen_qemu_st8(val, addr, index);
860
    dead_tmp(val);
861
}
862
static inline void gen_st16(TCGv val, TCGv addr, int index)
863
{
864
    tcg_gen_qemu_st16(val, addr, index);
865
    dead_tmp(val);
866
}
867
static inline void gen_st32(TCGv val, TCGv addr, int index)
868
{
869
    tcg_gen_qemu_st32(val, addr, index);
870
    dead_tmp(val);
871
}
872

    
873
static inline void gen_movl_T0_reg(DisasContext *s, int reg)
874
{
875
    load_reg_var(s, cpu_T[0], reg);
876
}
877

    
878
static inline void gen_movl_T1_reg(DisasContext *s, int reg)
879
{
880
    load_reg_var(s, cpu_T[1], reg);
881
}
882

    
883
static inline void gen_movl_T2_reg(DisasContext *s, int reg)
884
{
885
    load_reg_var(s, cpu_T[2], reg);
886
}
887

    
888
static inline void gen_set_pc_im(uint32_t val)
889
{
890
    TCGv tmp = new_tmp();
891
    tcg_gen_movi_i32(tmp, val);
892
    store_cpu_field(tmp, regs[15]);
893
}
894

    
895
static inline void gen_set_pc_T0(void)
896
{
897
    tcg_gen_st_i32(cpu_T[0], cpu_env, offsetof(CPUState, regs[15]));
898
}
899

    
900
static inline void gen_movl_reg_TN(DisasContext *s, int reg, int t)
901
{
902
    TCGv tmp;
903
    if (reg == 15) {
904
        tmp = new_tmp();
905
        tcg_gen_andi_i32(tmp, cpu_T[t], ~1);
906
    } else {
907
        tmp = cpu_T[t];
908
    }
909
    tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUState, regs[reg]));
910
    if (reg == 15) {
911
        dead_tmp(tmp);
912
        s->is_jmp = DISAS_JUMP;
913
    }
914
}
915

    
916
static inline void gen_movl_reg_T0(DisasContext *s, int reg)
917
{
918
    gen_movl_reg_TN(s, reg, 0);
919
}
920

    
921
static inline void gen_movl_reg_T1(DisasContext *s, int reg)
922
{
923
    gen_movl_reg_TN(s, reg, 1);
924
}
925

    
926
/* Force a TB lookup after an instruction that changes the CPU state.  */
927
static inline void gen_lookup_tb(DisasContext *s)
928
{
929
    gen_op_movl_T0_im(s->pc);
930
    gen_movl_reg_T0(s, 15);
931
    s->is_jmp = DISAS_UPDATE;
932
}
933

    
934
static inline void gen_add_data_offset(DisasContext *s, unsigned int insn,
935
                                       TCGv var)
936
{
937
    int val, rm, shift, shiftop;
938
    TCGv offset;
939

    
940
    if (!(insn & (1 << 25))) {
941
        /* immediate */
942
        val = insn & 0xfff;
943
        if (!(insn & (1 << 23)))
944
            val = -val;
945
        if (val != 0)
946
            tcg_gen_addi_i32(var, var, val);
947
    } else {
948
        /* shift/register */
949
        rm = (insn) & 0xf;
950
        shift = (insn >> 7) & 0x1f;
951
        shiftop = (insn >> 5) & 3;
952
        offset = load_reg(s, rm);
953
        gen_arm_shift_im(offset, shiftop, shift, 0);
954
        if (!(insn & (1 << 23)))
955
            tcg_gen_sub_i32(var, var, offset);
956
        else
957
            tcg_gen_add_i32(var, var, offset);
958
        dead_tmp(offset);
959
    }
960
}
961

    
962
static inline void gen_add_datah_offset(DisasContext *s, unsigned int insn,
963
                                        int extra, TCGv var)
964
{
965
    int val, rm;
966
    TCGv offset;
967

    
968
    if (insn & (1 << 22)) {
969
        /* immediate */
970
        val = (insn & 0xf) | ((insn >> 4) & 0xf0);
971
        if (!(insn & (1 << 23)))
972
            val = -val;
973
        val += extra;
974
        if (val != 0)
975
            tcg_gen_addi_i32(var, var, val);
976
    } else {
977
        /* register */
978
        if (extra)
979
            tcg_gen_addi_i32(var, var, extra);
980
        rm = (insn) & 0xf;
981
        offset = load_reg(s, rm);
982
        if (!(insn & (1 << 23)))
983
            tcg_gen_sub_i32(var, var, offset);
984
        else
985
            tcg_gen_add_i32(var, var, offset);
986
        dead_tmp(offset);
987
    }
988
}
989

    
990
#define VFP_OP2(name)                                                 \
991
static inline void gen_vfp_##name(int dp)                             \
992
{                                                                     \
993
    if (dp)                                                           \
994
        gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, cpu_F1d, cpu_env); \
995
    else                                                              \
996
        gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, cpu_F1s, cpu_env); \
997
}
998

    
999
#define VFP_OP1i(name)                               \
1000
static inline void gen_vfp_##name(int dp, int arg)  \
1001
{                                                   \
1002
    if (dp)                                         \
1003
        gen_op_vfp_##name##d(arg);                  \
1004
    else                                            \
1005
        gen_op_vfp_##name##s(arg);                  \
1006
}
1007

    
1008
VFP_OP2(add)
1009
VFP_OP2(sub)
1010
VFP_OP2(mul)
1011
VFP_OP2(div)
1012

    
1013
#undef VFP_OP2
1014

    
1015
static inline void gen_vfp_abs(int dp)
1016
{
1017
    if (dp)
1018
        gen_helper_vfp_absd(cpu_F0d, cpu_F0d);
1019
    else
1020
        gen_helper_vfp_abss(cpu_F0s, cpu_F0s);
1021
}
1022

    
1023
static inline void gen_vfp_neg(int dp)
1024
{
1025
    if (dp)
1026
        gen_helper_vfp_negd(cpu_F0d, cpu_F0d);
1027
    else
1028
        gen_helper_vfp_negs(cpu_F0s, cpu_F0s);
1029
}
1030

    
1031
static inline void gen_vfp_sqrt(int dp)
1032
{
1033
    if (dp)
1034
        gen_helper_vfp_sqrtd(cpu_F0d, cpu_F0d, cpu_env);
1035
    else
1036
        gen_helper_vfp_sqrts(cpu_F0s, cpu_F0s, cpu_env);
1037
}
1038

    
1039
static inline void gen_vfp_cmp(int dp)
1040
{
1041
    if (dp)
1042
        gen_helper_vfp_cmpd(cpu_F0d, cpu_F1d, cpu_env);
1043
    else
1044
        gen_helper_vfp_cmps(cpu_F0s, cpu_F1s, cpu_env);
1045
}
1046

    
1047
static inline void gen_vfp_cmpe(int dp)
1048
{
1049
    if (dp)
1050
        gen_helper_vfp_cmped(cpu_F0d, cpu_F1d, cpu_env);
1051
    else
1052
        gen_helper_vfp_cmpes(cpu_F0s, cpu_F1s, cpu_env);
1053
}
1054

    
1055
static inline void gen_vfp_F1_ld0(int dp)
1056
{
1057
    if (dp)
1058
        tcg_gen_movi_i64(cpu_F0d, 0);
1059
    else
1060
        tcg_gen_movi_i32(cpu_F0s, 0);
1061
}
1062

    
1063
static inline void gen_vfp_uito(int dp)
1064
{
1065
    if (dp)
1066
        gen_helper_vfp_uitod(cpu_F0d, cpu_F0s, cpu_env);
1067
    else
1068
        gen_helper_vfp_uitos(cpu_F0s, cpu_F0s, cpu_env);
1069
}
1070

    
1071
static inline void gen_vfp_sito(int dp)
1072
{
1073
    if (dp)
1074
        gen_helper_vfp_uitod(cpu_F0d, cpu_F0s, cpu_env);
1075
    else
1076
        gen_helper_vfp_uitos(cpu_F0s, cpu_F0s, cpu_env);
1077
}
1078

    
1079
static inline void gen_vfp_toui(int dp)
1080
{
1081
    if (dp)
1082
        gen_helper_vfp_touid(cpu_F0s, cpu_F0d, cpu_env);
1083
    else
1084
        gen_helper_vfp_touis(cpu_F0s, cpu_F0s, cpu_env);
1085
}
1086

    
1087
static inline void gen_vfp_touiz(int dp)
1088
{
1089
    if (dp)
1090
        gen_helper_vfp_touizd(cpu_F0s, cpu_F0d, cpu_env);
1091
    else
1092
        gen_helper_vfp_touizs(cpu_F0s, cpu_F0s, cpu_env);
1093
}
1094

    
1095
static inline void gen_vfp_tosi(int dp)
1096
{
1097
    if (dp)
1098
        gen_helper_vfp_tosid(cpu_F0s, cpu_F0d, cpu_env);
1099
    else
1100
        gen_helper_vfp_tosis(cpu_F0s, cpu_F0s, cpu_env);
1101
}
1102

    
1103
static inline void gen_vfp_tosiz(int dp)
1104
{
1105
    if (dp)
1106
        gen_helper_vfp_tosizd(cpu_F0s, cpu_F0d, cpu_env);
1107
    else
1108
        gen_helper_vfp_tosizs(cpu_F0s, cpu_F0s, cpu_env);
1109
}
1110

    
1111
#define VFP_GEN_FIX(name) \
1112
static inline void gen_vfp_##name(int dp, int shift) \
1113
{ \
1114
    if (dp) \
1115
        gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, tcg_const_i32(shift), cpu_env);\
1116
    else \
1117
        gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, tcg_const_i32(shift), cpu_env);\
1118
}
1119
VFP_GEN_FIX(tosh)
1120
VFP_GEN_FIX(tosl)
1121
VFP_GEN_FIX(touh)
1122
VFP_GEN_FIX(toul)
1123
VFP_GEN_FIX(shto)
1124
VFP_GEN_FIX(slto)
1125
VFP_GEN_FIX(uhto)
1126
VFP_GEN_FIX(ulto)
1127
#undef VFP_GEN_FIX
1128

    
1129
static inline void gen_vfp_ld(DisasContext *s, int dp)
1130
{
1131
    if (dp)
1132
        tcg_gen_qemu_ld64(cpu_F0d, cpu_T[1], IS_USER(s));
1133
    else
1134
        tcg_gen_qemu_ld32u(cpu_F0s, cpu_T[1], IS_USER(s));
1135
}
1136

    
1137
static inline void gen_vfp_st(DisasContext *s, int dp)
1138
{
1139
    if (dp)
1140
        tcg_gen_qemu_st64(cpu_F0d, cpu_T[1], IS_USER(s));
1141
    else
1142
        tcg_gen_qemu_st32(cpu_F0s, cpu_T[1], IS_USER(s));
1143
}
1144

    
1145
static inline long
1146
vfp_reg_offset (int dp, int reg)
1147
{
1148
    if (dp)
1149
        return offsetof(CPUARMState, vfp.regs[reg]);
1150
    else if (reg & 1) {
1151
        return offsetof(CPUARMState, vfp.regs[reg >> 1])
1152
          + offsetof(CPU_DoubleU, l.upper);
1153
    } else {
1154
        return offsetof(CPUARMState, vfp.regs[reg >> 1])
1155
          + offsetof(CPU_DoubleU, l.lower);
1156
    }
1157
}
1158

    
1159
/* Return the offset of a 32-bit piece of a NEON register.
1160
   zero is the least significant end of the register.  */
1161
static inline long
1162
neon_reg_offset (int reg, int n)
1163
{
1164
    int sreg;
1165
    sreg = reg * 2 + n;
1166
    return vfp_reg_offset(0, sreg);
1167
}
1168

    
1169
#define NEON_GET_REG(T, reg, n) gen_op_neon_getreg_##T(neon_reg_offset(reg, n))
1170
#define NEON_SET_REG(T, reg, n) gen_op_neon_setreg_##T(neon_reg_offset(reg, n))
1171

    
1172
static TCGv neon_load_reg(int reg, int pass)
1173
{
1174
    TCGv tmp = new_tmp();
1175
    tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
1176
    return tmp;
1177
}
1178

    
1179
static void neon_store_reg(int reg, int pass, TCGv var)
1180
{
1181
    tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
1182
    dead_tmp(var);
1183
}
1184

    
1185
#define tcg_gen_ld_f32 tcg_gen_ld_i32
1186
#define tcg_gen_ld_f64 tcg_gen_ld_i64
1187
#define tcg_gen_st_f32 tcg_gen_st_i32
1188
#define tcg_gen_st_f64 tcg_gen_st_i64
1189

    
1190
static inline void gen_mov_F0_vreg(int dp, int reg)
1191
{
1192
    if (dp)
1193
        tcg_gen_ld_f64(cpu_F0d, cpu_env, vfp_reg_offset(dp, reg));
1194
    else
1195
        tcg_gen_ld_f32(cpu_F0s, cpu_env, vfp_reg_offset(dp, reg));
1196
}
1197

    
1198
static inline void gen_mov_F1_vreg(int dp, int reg)
1199
{
1200
    if (dp)
1201
        tcg_gen_ld_f64(cpu_F1d, cpu_env, vfp_reg_offset(dp, reg));
1202
    else
1203
        tcg_gen_ld_f32(cpu_F1s, cpu_env, vfp_reg_offset(dp, reg));
1204
}
1205

    
1206
static inline void gen_mov_vreg_F0(int dp, int reg)
1207
{
1208
    if (dp)
1209
        tcg_gen_st_f64(cpu_F0d, cpu_env, vfp_reg_offset(dp, reg));
1210
    else
1211
        tcg_gen_st_f32(cpu_F0s, cpu_env, vfp_reg_offset(dp, reg));
1212
}
1213

    
1214
#define ARM_CP_RW_BIT        (1 << 20)
1215

    
1216
static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn)
1217
{
1218
    int rd;
1219
    uint32_t offset;
1220

    
1221
    rd = (insn >> 16) & 0xf;
1222
    gen_movl_T1_reg(s, rd);
1223

    
1224
    offset = (insn & 0xff) << ((insn >> 7) & 2);
1225
    if (insn & (1 << 24)) {
1226
        /* Pre indexed */
1227
        if (insn & (1 << 23))
1228
            gen_op_addl_T1_im(offset);
1229
        else
1230
            gen_op_addl_T1_im(-offset);
1231

    
1232
        if (insn & (1 << 21))
1233
            gen_movl_reg_T1(s, rd);
1234
    } else if (insn & (1 << 21)) {
1235
        /* Post indexed */
1236
        if (insn & (1 << 23))
1237
            gen_op_movl_T0_im(offset);
1238
        else
1239
            gen_op_movl_T0_im(- offset);
1240
        gen_op_addl_T0_T1();
1241
        gen_movl_reg_T0(s, rd);
1242
    } else if (!(insn & (1 << 23)))
1243
        return 1;
1244
    return 0;
1245
}
1246

    
1247
static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask)
1248
{
1249
    int rd = (insn >> 0) & 0xf;
1250

    
1251
    if (insn & (1 << 8))
1252
        if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3)
1253
            return 1;
1254
        else
1255
            gen_op_iwmmxt_movl_T0_wCx(rd);
1256
    else
1257
        gen_op_iwmmxt_movl_T0_T1_wRn(rd);
1258

    
1259
    gen_op_movl_T1_im(mask);
1260
    gen_op_andl_T0_T1();
1261
    return 0;
1262
}
1263

    
1264
/* Disassemble an iwMMXt instruction.  Returns nonzero if an error occured
1265
   (ie. an undefined instruction).  */
1266
static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
1267
{
1268
    int rd, wrd;
1269
    int rdhi, rdlo, rd0, rd1, i;
1270
    TCGv tmp;
1271

    
1272
    if ((insn & 0x0e000e00) == 0x0c000000) {
1273
        if ((insn & 0x0fe00ff0) == 0x0c400000) {
1274
            wrd = insn & 0xf;
1275
            rdlo = (insn >> 12) & 0xf;
1276
            rdhi = (insn >> 16) & 0xf;
1277
            if (insn & ARM_CP_RW_BIT) {                        /* TMRRC */
1278
                gen_op_iwmmxt_movl_T0_T1_wRn(wrd);
1279
                gen_movl_reg_T0(s, rdlo);
1280
                gen_movl_reg_T1(s, rdhi);
1281
            } else {                                        /* TMCRR */
1282
                gen_movl_T0_reg(s, rdlo);
1283
                gen_movl_T1_reg(s, rdhi);
1284
                gen_op_iwmmxt_movl_wRn_T0_T1(wrd);
1285
                gen_op_iwmmxt_set_mup();
1286
            }
1287
            return 0;
1288
        }
1289

    
1290
        wrd = (insn >> 12) & 0xf;
1291
        if (gen_iwmmxt_address(s, insn))
1292
            return 1;
1293
        if (insn & ARM_CP_RW_BIT) {
1294
            if ((insn >> 28) == 0xf) {                        /* WLDRW wCx */
1295
                tmp = gen_ld32(cpu_T[1], IS_USER(s));
1296
                tcg_gen_mov_i32(cpu_T[0], tmp);
1297
                dead_tmp(tmp);
1298
                gen_op_iwmmxt_movl_wCx_T0(wrd);
1299
            } else {
1300
                if (insn & (1 << 8))
1301
                    if (insn & (1 << 22))                /* WLDRD */
1302
                        gen_ldst(iwmmxt_ldq, s);
1303
                    else                                /* WLDRW wRd */
1304
                        gen_ldst(iwmmxt_ldl, s);
1305
                else
1306
                    if (insn & (1 << 22))                /* WLDRH */
1307
                        gen_ldst(iwmmxt_ldw, s);
1308
                    else                                /* WLDRB */
1309
                        gen_ldst(iwmmxt_ldb, s);
1310
                gen_op_iwmmxt_movq_wRn_M0(wrd);
1311
            }
1312
        } else {
1313
            if ((insn >> 28) == 0xf) {                        /* WSTRW wCx */
1314
                gen_op_iwmmxt_movl_T0_wCx(wrd);
1315
                tmp = new_tmp();
1316
                tcg_gen_mov_i32(tmp, cpu_T[0]);
1317
                gen_st32(tmp, cpu_T[1], IS_USER(s));
1318
            } else {
1319
                gen_op_iwmmxt_movq_M0_wRn(wrd);
1320
                if (insn & (1 << 8))
1321
                    if (insn & (1 << 22))                /* WSTRD */
1322
                        gen_ldst(iwmmxt_stq, s);
1323
                    else                                /* WSTRW wRd */
1324
                        gen_ldst(iwmmxt_stl, s);
1325
                else
1326
                    if (insn & (1 << 22))                /* WSTRH */
1327
                        gen_ldst(iwmmxt_ldw, s);
1328
                    else                                /* WSTRB */
1329
                        gen_ldst(iwmmxt_stb, s);
1330
            }
1331
        }
1332
        return 0;
1333
    }
1334

    
1335
    if ((insn & 0x0f000000) != 0x0e000000)
1336
        return 1;
1337

    
1338
    switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1339
    case 0x000:                                                /* WOR */
1340
        wrd = (insn >> 12) & 0xf;
1341
        rd0 = (insn >> 0) & 0xf;
1342
        rd1 = (insn >> 16) & 0xf;
1343
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1344
        gen_op_iwmmxt_orq_M0_wRn(rd1);
1345
        gen_op_iwmmxt_setpsr_nz();
1346
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1347
        gen_op_iwmmxt_set_mup();
1348
        gen_op_iwmmxt_set_cup();
1349
        break;
1350
    case 0x011:                                                /* TMCR */
1351
        if (insn & 0xf)
1352
            return 1;
1353
        rd = (insn >> 12) & 0xf;
1354
        wrd = (insn >> 16) & 0xf;
1355
        switch (wrd) {
1356
        case ARM_IWMMXT_wCID:
1357
        case ARM_IWMMXT_wCASF:
1358
            break;
1359
        case ARM_IWMMXT_wCon:
1360
            gen_op_iwmmxt_set_cup();
1361
            /* Fall through.  */
1362
        case ARM_IWMMXT_wCSSF:
1363
            gen_op_iwmmxt_movl_T0_wCx(wrd);
1364
            gen_movl_T1_reg(s, rd);
1365
            gen_op_bicl_T0_T1();
1366
            gen_op_iwmmxt_movl_wCx_T0(wrd);
1367
            break;
1368
        case ARM_IWMMXT_wCGR0:
1369
        case ARM_IWMMXT_wCGR1:
1370
        case ARM_IWMMXT_wCGR2:
1371
        case ARM_IWMMXT_wCGR3:
1372
            gen_op_iwmmxt_set_cup();
1373
            gen_movl_reg_T0(s, rd);
1374
            gen_op_iwmmxt_movl_wCx_T0(wrd);
1375
            break;
1376
        default:
1377
            return 1;
1378
        }
1379
        break;
1380
    case 0x100:                                                /* WXOR */
1381
        wrd = (insn >> 12) & 0xf;
1382
        rd0 = (insn >> 0) & 0xf;
1383
        rd1 = (insn >> 16) & 0xf;
1384
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1385
        gen_op_iwmmxt_xorq_M0_wRn(rd1);
1386
        gen_op_iwmmxt_setpsr_nz();
1387
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1388
        gen_op_iwmmxt_set_mup();
1389
        gen_op_iwmmxt_set_cup();
1390
        break;
1391
    case 0x111:                                                /* TMRC */
1392
        if (insn & 0xf)
1393
            return 1;
1394
        rd = (insn >> 12) & 0xf;
1395
        wrd = (insn >> 16) & 0xf;
1396
        gen_op_iwmmxt_movl_T0_wCx(wrd);
1397
        gen_movl_reg_T0(s, rd);
1398
        break;
1399
    case 0x300:                                                /* WANDN */
1400
        wrd = (insn >> 12) & 0xf;
1401
        rd0 = (insn >> 0) & 0xf;
1402
        rd1 = (insn >> 16) & 0xf;
1403
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1404
        gen_op_iwmmxt_negq_M0();
1405
        gen_op_iwmmxt_andq_M0_wRn(rd1);
1406
        gen_op_iwmmxt_setpsr_nz();
1407
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1408
        gen_op_iwmmxt_set_mup();
1409
        gen_op_iwmmxt_set_cup();
1410
        break;
1411
    case 0x200:                                                /* WAND */
1412
        wrd = (insn >> 12) & 0xf;
1413
        rd0 = (insn >> 0) & 0xf;
1414
        rd1 = (insn >> 16) & 0xf;
1415
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1416
        gen_op_iwmmxt_andq_M0_wRn(rd1);
1417
        gen_op_iwmmxt_setpsr_nz();
1418
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1419
        gen_op_iwmmxt_set_mup();
1420
        gen_op_iwmmxt_set_cup();
1421
        break;
1422
    case 0x810: case 0xa10:                                /* WMADD */
1423
        wrd = (insn >> 12) & 0xf;
1424
        rd0 = (insn >> 0) & 0xf;
1425
        rd1 = (insn >> 16) & 0xf;
1426
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1427
        if (insn & (1 << 21))
1428
            gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1429
        else
1430
            gen_op_iwmmxt_madduq_M0_wRn(rd1);
1431
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1432
        gen_op_iwmmxt_set_mup();
1433
        break;
1434
    case 0x10e: case 0x50e: case 0x90e: case 0xd0e:        /* WUNPCKIL */
1435
        wrd = (insn >> 12) & 0xf;
1436
        rd0 = (insn >> 16) & 0xf;
1437
        rd1 = (insn >> 0) & 0xf;
1438
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1439
        switch ((insn >> 22) & 3) {
1440
        case 0:
1441
            gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1442
            break;
1443
        case 1:
1444
            gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1445
            break;
1446
        case 2:
1447
            gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1448
            break;
1449
        case 3:
1450
            return 1;
1451
        }
1452
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1453
        gen_op_iwmmxt_set_mup();
1454
        gen_op_iwmmxt_set_cup();
1455
        break;
1456
    case 0x10c: case 0x50c: case 0x90c: case 0xd0c:        /* WUNPCKIH */
1457
        wrd = (insn >> 12) & 0xf;
1458
        rd0 = (insn >> 16) & 0xf;
1459
        rd1 = (insn >> 0) & 0xf;
1460
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1461
        switch ((insn >> 22) & 3) {
1462
        case 0:
1463
            gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1464
            break;
1465
        case 1:
1466
            gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1467
            break;
1468
        case 2:
1469
            gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1470
            break;
1471
        case 3:
1472
            return 1;
1473
        }
1474
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1475
        gen_op_iwmmxt_set_mup();
1476
        gen_op_iwmmxt_set_cup();
1477
        break;
1478
    case 0x012: case 0x112: case 0x412: case 0x512:        /* WSAD */
1479
        wrd = (insn >> 12) & 0xf;
1480
        rd0 = (insn >> 16) & 0xf;
1481
        rd1 = (insn >> 0) & 0xf;
1482
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1483
        if (insn & (1 << 22))
1484
            gen_op_iwmmxt_sadw_M0_wRn(rd1);
1485
        else
1486
            gen_op_iwmmxt_sadb_M0_wRn(rd1);
1487
        if (!(insn & (1 << 20)))
1488
            gen_op_iwmmxt_addl_M0_wRn(wrd);
1489
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1490
        gen_op_iwmmxt_set_mup();
1491
        break;
1492
    case 0x010: case 0x110: case 0x210: case 0x310:        /* WMUL */
1493
        wrd = (insn >> 12) & 0xf;
1494
        rd0 = (insn >> 16) & 0xf;
1495
        rd1 = (insn >> 0) & 0xf;
1496
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1497
        if (insn & (1 << 21))
1498
            gen_op_iwmmxt_mulsw_M0_wRn(rd1, (insn & (1 << 20)) ? 16 : 0);
1499
        else
1500
            gen_op_iwmmxt_muluw_M0_wRn(rd1, (insn & (1 << 20)) ? 16 : 0);
1501
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1502
        gen_op_iwmmxt_set_mup();
1503
        break;
1504
    case 0x410: case 0x510: case 0x610: case 0x710:        /* WMAC */
1505
        wrd = (insn >> 12) & 0xf;
1506
        rd0 = (insn >> 16) & 0xf;
1507
        rd1 = (insn >> 0) & 0xf;
1508
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1509
        if (insn & (1 << 21))
1510
            gen_op_iwmmxt_macsw_M0_wRn(rd1);
1511
        else
1512
            gen_op_iwmmxt_macuw_M0_wRn(rd1);
1513
        if (!(insn & (1 << 20))) {
1514
            if (insn & (1 << 21))
1515
                gen_op_iwmmxt_addsq_M0_wRn(wrd);
1516
            else
1517
                gen_op_iwmmxt_adduq_M0_wRn(wrd);
1518
        }
1519
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1520
        gen_op_iwmmxt_set_mup();
1521
        break;
1522
    case 0x006: case 0x406: case 0x806: case 0xc06:        /* WCMPEQ */
1523
        wrd = (insn >> 12) & 0xf;
1524
        rd0 = (insn >> 16) & 0xf;
1525
        rd1 = (insn >> 0) & 0xf;
1526
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1527
        switch ((insn >> 22) & 3) {
1528
        case 0:
1529
            gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1530
            break;
1531
        case 1:
1532
            gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1533
            break;
1534
        case 2:
1535
            gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1536
            break;
1537
        case 3:
1538
            return 1;
1539
        }
1540
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1541
        gen_op_iwmmxt_set_mup();
1542
        gen_op_iwmmxt_set_cup();
1543
        break;
1544
    case 0x800: case 0x900: case 0xc00: case 0xd00:        /* WAVG2 */
1545
        wrd = (insn >> 12) & 0xf;
1546
        rd0 = (insn >> 16) & 0xf;
1547
        rd1 = (insn >> 0) & 0xf;
1548
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1549
        if (insn & (1 << 22))
1550
            gen_op_iwmmxt_avgw_M0_wRn(rd1, (insn >> 20) & 1);
1551
        else
1552
            gen_op_iwmmxt_avgb_M0_wRn(rd1, (insn >> 20) & 1);
1553
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1554
        gen_op_iwmmxt_set_mup();
1555
        gen_op_iwmmxt_set_cup();
1556
        break;
1557
    case 0x802: case 0x902: case 0xa02: case 0xb02:        /* WALIGNR */
1558
        wrd = (insn >> 12) & 0xf;
1559
        rd0 = (insn >> 16) & 0xf;
1560
        rd1 = (insn >> 0) & 0xf;
1561
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1562
        gen_op_iwmmxt_movl_T0_wCx(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1563
        gen_op_movl_T1_im(7);
1564
        gen_op_andl_T0_T1();
1565
        gen_op_iwmmxt_align_M0_T0_wRn(rd1);
1566
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1567
        gen_op_iwmmxt_set_mup();
1568
        break;
1569
    case 0x601: case 0x605: case 0x609: case 0x60d:        /* TINSR */
1570
        rd = (insn >> 12) & 0xf;
1571
        wrd = (insn >> 16) & 0xf;
1572
        gen_movl_T0_reg(s, rd);
1573
        gen_op_iwmmxt_movq_M0_wRn(wrd);
1574
        switch ((insn >> 6) & 3) {
1575
        case 0:
1576
            gen_op_movl_T1_im(0xff);
1577
            gen_op_iwmmxt_insr_M0_T0_T1((insn & 7) << 3);
1578
            break;
1579
        case 1:
1580
            gen_op_movl_T1_im(0xffff);
1581
            gen_op_iwmmxt_insr_M0_T0_T1((insn & 3) << 4);
1582
            break;
1583
        case 2:
1584
            gen_op_movl_T1_im(0xffffffff);
1585
            gen_op_iwmmxt_insr_M0_T0_T1((insn & 1) << 5);
1586
            break;
1587
        case 3:
1588
            return 1;
1589
        }
1590
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1591
        gen_op_iwmmxt_set_mup();
1592
        break;
1593
    case 0x107: case 0x507: case 0x907: case 0xd07:        /* TEXTRM */
1594
        rd = (insn >> 12) & 0xf;
1595
        wrd = (insn >> 16) & 0xf;
1596
        if (rd == 15)
1597
            return 1;
1598
        gen_op_iwmmxt_movq_M0_wRn(wrd);
1599
        switch ((insn >> 22) & 3) {
1600
        case 0:
1601
            if (insn & 8)
1602
                gen_op_iwmmxt_extrsb_T0_M0((insn & 7) << 3);
1603
            else {
1604
                gen_op_movl_T1_im(0xff);
1605
                gen_op_iwmmxt_extru_T0_M0_T1((insn & 7) << 3);
1606
            }
1607
            break;
1608
        case 1:
1609
            if (insn & 8)
1610
                gen_op_iwmmxt_extrsw_T0_M0((insn & 3) << 4);
1611
            else {
1612
                gen_op_movl_T1_im(0xffff);
1613
                gen_op_iwmmxt_extru_T0_M0_T1((insn & 3) << 4);
1614
            }
1615
            break;
1616
        case 2:
1617
            gen_op_movl_T1_im(0xffffffff);
1618
            gen_op_iwmmxt_extru_T0_M0_T1((insn & 1) << 5);
1619
            break;
1620
        case 3:
1621
            return 1;
1622
        }
1623
        gen_movl_reg_T0(s, rd);
1624
        break;
1625
    case 0x117: case 0x517: case 0x917: case 0xd17:        /* TEXTRC */
1626
        if ((insn & 0x000ff008) != 0x0003f000)
1627
            return 1;
1628
        gen_op_iwmmxt_movl_T1_wCx(ARM_IWMMXT_wCASF);
1629
        switch ((insn >> 22) & 3) {
1630
        case 0:
1631
            gen_op_shrl_T1_im(((insn & 7) << 2) + 0);
1632
            break;
1633
        case 1:
1634
            gen_op_shrl_T1_im(((insn & 3) << 3) + 4);
1635
            break;
1636
        case 2:
1637
            gen_op_shrl_T1_im(((insn & 1) << 4) + 12);
1638
            break;
1639
        case 3:
1640
            return 1;
1641
        }
1642
        gen_op_shll_T1_im(28);
1643
        gen_set_nzcv(cpu_T[1]);
1644
        break;
1645
    case 0x401: case 0x405: case 0x409: case 0x40d:        /* TBCST */
1646
        rd = (insn >> 12) & 0xf;
1647
        wrd = (insn >> 16) & 0xf;
1648
        gen_movl_T0_reg(s, rd);
1649
        switch ((insn >> 6) & 3) {
1650
        case 0:
1651
            gen_op_iwmmxt_bcstb_M0_T0();
1652
            break;
1653
        case 1:
1654
            gen_op_iwmmxt_bcstw_M0_T0();
1655
            break;
1656
        case 2:
1657
            gen_op_iwmmxt_bcstl_M0_T0();
1658
            break;
1659
        case 3:
1660
            return 1;
1661
        }
1662
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1663
        gen_op_iwmmxt_set_mup();
1664
        break;
1665
    case 0x113: case 0x513: case 0x913: case 0xd13:        /* TANDC */
1666
        if ((insn & 0x000ff00f) != 0x0003f000)
1667
            return 1;
1668
        gen_op_iwmmxt_movl_T1_wCx(ARM_IWMMXT_wCASF);
1669
        switch ((insn >> 22) & 3) {
1670
        case 0:
1671
            for (i = 0; i < 7; i ++) {
1672
                gen_op_shll_T1_im(4);
1673
                gen_op_andl_T0_T1();
1674
            }
1675
            break;
1676
        case 1:
1677
            for (i = 0; i < 3; i ++) {
1678
                gen_op_shll_T1_im(8);
1679
                gen_op_andl_T0_T1();
1680
            }
1681
            break;
1682
        case 2:
1683
            gen_op_shll_T1_im(16);
1684
            gen_op_andl_T0_T1();
1685
            break;
1686
        case 3:
1687
            return 1;
1688
        }
1689
        gen_set_nzcv(cpu_T[0]);
1690
        break;
1691
    case 0x01c: case 0x41c: case 0x81c: case 0xc1c:        /* WACC */
1692
        wrd = (insn >> 12) & 0xf;
1693
        rd0 = (insn >> 16) & 0xf;
1694
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1695
        switch ((insn >> 22) & 3) {
1696
        case 0:
1697
            gen_op_iwmmxt_addcb_M0();
1698
            break;
1699
        case 1:
1700
            gen_op_iwmmxt_addcw_M0();
1701
            break;
1702
        case 2:
1703
            gen_op_iwmmxt_addcl_M0();
1704
            break;
1705
        case 3:
1706
            return 1;
1707
        }
1708
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1709
        gen_op_iwmmxt_set_mup();
1710
        break;
1711
    case 0x115: case 0x515: case 0x915: case 0xd15:        /* TORC */
1712
        if ((insn & 0x000ff00f) != 0x0003f000)
1713
            return 1;
1714
        gen_op_iwmmxt_movl_T1_wCx(ARM_IWMMXT_wCASF);
1715
        switch ((insn >> 22) & 3) {
1716
        case 0:
1717
            for (i = 0; i < 7; i ++) {
1718
                gen_op_shll_T1_im(4);
1719
                gen_op_orl_T0_T1();
1720
            }
1721
            break;
1722
        case 1:
1723
            for (i = 0; i < 3; i ++) {
1724
                gen_op_shll_T1_im(8);
1725
                gen_op_orl_T0_T1();
1726
            }
1727
            break;
1728
        case 2:
1729
            gen_op_shll_T1_im(16);
1730
            gen_op_orl_T0_T1();
1731
            break;
1732
        case 3:
1733
            return 1;
1734
        }
1735
        gen_set_nzcv(cpu_T[0]);
1736
        break;
1737
    case 0x103: case 0x503: case 0x903: case 0xd03:        /* TMOVMSK */
1738
        rd = (insn >> 12) & 0xf;
1739
        rd0 = (insn >> 16) & 0xf;
1740
        if ((insn & 0xf) != 0)
1741
            return 1;
1742
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1743
        switch ((insn >> 22) & 3) {
1744
        case 0:
1745
            gen_op_iwmmxt_msbb_T0_M0();
1746
            break;
1747
        case 1:
1748
            gen_op_iwmmxt_msbw_T0_M0();
1749
            break;
1750
        case 2:
1751
            gen_op_iwmmxt_msbl_T0_M0();
1752
            break;
1753
        case 3:
1754
            return 1;
1755
        }
1756
        gen_movl_reg_T0(s, rd);
1757
        break;
1758
    case 0x106: case 0x306: case 0x506: case 0x706:        /* WCMPGT */
1759
    case 0x906: case 0xb06: case 0xd06: case 0xf06:
1760
        wrd = (insn >> 12) & 0xf;
1761
        rd0 = (insn >> 16) & 0xf;
1762
        rd1 = (insn >> 0) & 0xf;
1763
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1764
        switch ((insn >> 22) & 3) {
1765
        case 0:
1766
            if (insn & (1 << 21))
1767
                gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
1768
            else
1769
                gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
1770
            break;
1771
        case 1:
1772
            if (insn & (1 << 21))
1773
                gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
1774
            else
1775
                gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
1776
            break;
1777
        case 2:
1778
            if (insn & (1 << 21))
1779
                gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
1780
            else
1781
                gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
1782
            break;
1783
        case 3:
1784
            return 1;
1785
        }
1786
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1787
        gen_op_iwmmxt_set_mup();
1788
        gen_op_iwmmxt_set_cup();
1789
        break;
1790
    case 0x00e: case 0x20e: case 0x40e: case 0x60e:        /* WUNPCKEL */
1791
    case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
1792
        wrd = (insn >> 12) & 0xf;
1793
        rd0 = (insn >> 16) & 0xf;
1794
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1795
        switch ((insn >> 22) & 3) {
1796
        case 0:
1797
            if (insn & (1 << 21))
1798
                gen_op_iwmmxt_unpacklsb_M0();
1799
            else
1800
                gen_op_iwmmxt_unpacklub_M0();
1801
            break;
1802
        case 1:
1803
            if (insn & (1 << 21))
1804
                gen_op_iwmmxt_unpacklsw_M0();
1805
            else
1806
                gen_op_iwmmxt_unpackluw_M0();
1807
            break;
1808
        case 2:
1809
            if (insn & (1 << 21))
1810
                gen_op_iwmmxt_unpacklsl_M0();
1811
            else
1812
                gen_op_iwmmxt_unpacklul_M0();
1813
            break;
1814
        case 3:
1815
            return 1;
1816
        }
1817
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1818
        gen_op_iwmmxt_set_mup();
1819
        gen_op_iwmmxt_set_cup();
1820
        break;
1821
    case 0x00c: case 0x20c: case 0x40c: case 0x60c:        /* WUNPCKEH */
1822
    case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
1823
        wrd = (insn >> 12) & 0xf;
1824
        rd0 = (insn >> 16) & 0xf;
1825
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1826
        switch ((insn >> 22) & 3) {
1827
        case 0:
1828
            if (insn & (1 << 21))
1829
                gen_op_iwmmxt_unpackhsb_M0();
1830
            else
1831
                gen_op_iwmmxt_unpackhub_M0();
1832
            break;
1833
        case 1:
1834
            if (insn & (1 << 21))
1835
                gen_op_iwmmxt_unpackhsw_M0();
1836
            else
1837
                gen_op_iwmmxt_unpackhuw_M0();
1838
            break;
1839
        case 2:
1840
            if (insn & (1 << 21))
1841
                gen_op_iwmmxt_unpackhsl_M0();
1842
            else
1843
                gen_op_iwmmxt_unpackhul_M0();
1844
            break;
1845
        case 3:
1846
            return 1;
1847
        }
1848
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1849
        gen_op_iwmmxt_set_mup();
1850
        gen_op_iwmmxt_set_cup();
1851
        break;
1852
    case 0x204: case 0x604: case 0xa04: case 0xe04:        /* WSRL */
1853
    case 0x214: case 0x614: case 0xa14: case 0xe14:
1854
        wrd = (insn >> 12) & 0xf;
1855
        rd0 = (insn >> 16) & 0xf;
1856
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1857
        if (gen_iwmmxt_shift(insn, 0xff))
1858
            return 1;
1859
        switch ((insn >> 22) & 3) {
1860
        case 0:
1861
            return 1;
1862
        case 1:
1863
            gen_op_iwmmxt_srlw_M0_T0();
1864
            break;
1865
        case 2:
1866
            gen_op_iwmmxt_srll_M0_T0();
1867
            break;
1868
        case 3:
1869
            gen_op_iwmmxt_srlq_M0_T0();
1870
            break;
1871
        }
1872
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1873
        gen_op_iwmmxt_set_mup();
1874
        gen_op_iwmmxt_set_cup();
1875
        break;
1876
    case 0x004: case 0x404: case 0x804: case 0xc04:        /* WSRA */
1877
    case 0x014: case 0x414: case 0x814: case 0xc14:
1878
        wrd = (insn >> 12) & 0xf;
1879
        rd0 = (insn >> 16) & 0xf;
1880
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1881
        if (gen_iwmmxt_shift(insn, 0xff))
1882
            return 1;
1883
        switch ((insn >> 22) & 3) {
1884
        case 0:
1885
            return 1;
1886
        case 1:
1887
            gen_op_iwmmxt_sraw_M0_T0();
1888
            break;
1889
        case 2:
1890
            gen_op_iwmmxt_sral_M0_T0();
1891
            break;
1892
        case 3:
1893
            gen_op_iwmmxt_sraq_M0_T0();
1894
            break;
1895
        }
1896
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1897
        gen_op_iwmmxt_set_mup();
1898
        gen_op_iwmmxt_set_cup();
1899
        break;
1900
    case 0x104: case 0x504: case 0x904: case 0xd04:        /* WSLL */
1901
    case 0x114: case 0x514: case 0x914: case 0xd14:
1902
        wrd = (insn >> 12) & 0xf;
1903
        rd0 = (insn >> 16) & 0xf;
1904
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1905
        if (gen_iwmmxt_shift(insn, 0xff))
1906
            return 1;
1907
        switch ((insn >> 22) & 3) {
1908
        case 0:
1909
            return 1;
1910
        case 1:
1911
            gen_op_iwmmxt_sllw_M0_T0();
1912
            break;
1913
        case 2:
1914
            gen_op_iwmmxt_slll_M0_T0();
1915
            break;
1916
        case 3:
1917
            gen_op_iwmmxt_sllq_M0_T0();
1918
            break;
1919
        }
1920
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1921
        gen_op_iwmmxt_set_mup();
1922
        gen_op_iwmmxt_set_cup();
1923
        break;
1924
    case 0x304: case 0x704: case 0xb04: case 0xf04:        /* WROR */
1925
    case 0x314: case 0x714: case 0xb14: case 0xf14:
1926
        wrd = (insn >> 12) & 0xf;
1927
        rd0 = (insn >> 16) & 0xf;
1928
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1929
        switch ((insn >> 22) & 3) {
1930
        case 0:
1931
            return 1;
1932
        case 1:
1933
            if (gen_iwmmxt_shift(insn, 0xf))
1934
                return 1;
1935
            gen_op_iwmmxt_rorw_M0_T0();
1936
            break;
1937
        case 2:
1938
            if (gen_iwmmxt_shift(insn, 0x1f))
1939
                return 1;
1940
            gen_op_iwmmxt_rorl_M0_T0();
1941
            break;
1942
        case 3:
1943
            if (gen_iwmmxt_shift(insn, 0x3f))
1944
                return 1;
1945
            gen_op_iwmmxt_rorq_M0_T0();
1946
            break;
1947
        }
1948
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1949
        gen_op_iwmmxt_set_mup();
1950
        gen_op_iwmmxt_set_cup();
1951
        break;
1952
    case 0x116: case 0x316: case 0x516: case 0x716:        /* WMIN */
1953
    case 0x916: case 0xb16: case 0xd16: case 0xf16:
1954
        wrd = (insn >> 12) & 0xf;
1955
        rd0 = (insn >> 16) & 0xf;
1956
        rd1 = (insn >> 0) & 0xf;
1957
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1958
        switch ((insn >> 22) & 3) {
1959
        case 0:
1960
            if (insn & (1 << 21))
1961
                gen_op_iwmmxt_minsb_M0_wRn(rd1);
1962
            else
1963
                gen_op_iwmmxt_minub_M0_wRn(rd1);
1964
            break;
1965
        case 1:
1966
            if (insn & (1 << 21))
1967
                gen_op_iwmmxt_minsw_M0_wRn(rd1);
1968
            else
1969
                gen_op_iwmmxt_minuw_M0_wRn(rd1);
1970
            break;
1971
        case 2:
1972
            if (insn & (1 << 21))
1973
                gen_op_iwmmxt_minsl_M0_wRn(rd1);
1974
            else
1975
                gen_op_iwmmxt_minul_M0_wRn(rd1);
1976
            break;
1977
        case 3:
1978
            return 1;
1979
        }
1980
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1981
        gen_op_iwmmxt_set_mup();
1982
        break;
1983
    case 0x016: case 0x216: case 0x416: case 0x616:        /* WMAX */
1984
    case 0x816: case 0xa16: case 0xc16: case 0xe16:
1985
        wrd = (insn >> 12) & 0xf;
1986
        rd0 = (insn >> 16) & 0xf;
1987
        rd1 = (insn >> 0) & 0xf;
1988
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1989
        switch ((insn >> 22) & 3) {
1990
        case 0:
1991
            if (insn & (1 << 21))
1992
                gen_op_iwmmxt_maxsb_M0_wRn(rd1);
1993
            else
1994
                gen_op_iwmmxt_maxub_M0_wRn(rd1);
1995
            break;
1996
        case 1:
1997
            if (insn & (1 << 21))
1998
                gen_op_iwmmxt_maxsw_M0_wRn(rd1);
1999
            else
2000
                gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2001
            break;
2002
        case 2:
2003
            if (insn & (1 << 21))
2004
                gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2005
            else
2006
                gen_op_iwmmxt_maxul_M0_wRn(rd1);
2007
            break;
2008
        case 3:
2009
            return 1;
2010
        }
2011
        gen_op_iwmmxt_movq_wRn_M0(wrd);
2012
        gen_op_iwmmxt_set_mup();
2013
        break;
2014
    case 0x002: case 0x102: case 0x202: case 0x302:        /* WALIGNI */
2015
    case 0x402: case 0x502: case 0x602: case 0x702:
2016
        wrd = (insn >> 12) & 0xf;
2017
        rd0 = (insn >> 16) & 0xf;
2018
        rd1 = (insn >> 0) & 0xf;
2019
        gen_op_iwmmxt_movq_M0_wRn(rd0);
2020
        gen_op_movl_T0_im((insn >> 20) & 3);
2021
        gen_op_iwmmxt_align_M0_T0_wRn(rd1);
2022
        gen_op_iwmmxt_movq_wRn_M0(wrd);
2023
        gen_op_iwmmxt_set_mup();
2024
        break;
2025
    case 0x01a: case 0x11a: case 0x21a: case 0x31a:        /* WSUB */
2026
    case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2027
    case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2028
    case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2029
        wrd = (insn >> 12) & 0xf;
2030
        rd0 = (insn >> 16) & 0xf;
2031
        rd1 = (insn >> 0) & 0xf;
2032
        gen_op_iwmmxt_movq_M0_wRn(rd0);
2033
        switch ((insn >> 20) & 0xf) {
2034
        case 0x0:
2035
            gen_op_iwmmxt_subnb_M0_wRn(rd1);
2036
            break;
2037
        case 0x1:
2038
            gen_op_iwmmxt_subub_M0_wRn(rd1);
2039
            break;
2040
        case 0x3:
2041
            gen_op_iwmmxt_subsb_M0_wRn(rd1);
2042
            break;
2043
        case 0x4:
2044
            gen_op_iwmmxt_subnw_M0_wRn(rd1);
2045
            break;
2046
        case 0x5:
2047
            gen_op_iwmmxt_subuw_M0_wRn(rd1);
2048
            break;
2049
        case 0x7:
2050
            gen_op_iwmmxt_subsw_M0_wRn(rd1);
2051
            break;
2052
        case 0x8:
2053
            gen_op_iwmmxt_subnl_M0_wRn(rd1);
2054
            break;
2055
        case 0x9:
2056
            gen_op_iwmmxt_subul_M0_wRn(rd1);
2057
            break;
2058
        case 0xb:
2059
            gen_op_iwmmxt_subsl_M0_wRn(rd1);
2060
            break;
2061
        default:
2062
            return 1;
2063
        }
2064
        gen_op_iwmmxt_movq_wRn_M0(wrd);
2065
        gen_op_iwmmxt_set_mup();
2066
        gen_op_iwmmxt_set_cup();
2067
        break;
2068
    case 0x01e: case 0x11e: case 0x21e: case 0x31e:        /* WSHUFH */
2069
    case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2070
    case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2071
    case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2072
        wrd = (insn >> 12) & 0xf;
2073
        rd0 = (insn >> 16) & 0xf;
2074
        gen_op_iwmmxt_movq_M0_wRn(rd0);
2075
        gen_op_movl_T0_im(((insn >> 16) & 0xf0) | (insn & 0x0f));
2076
        gen_op_iwmmxt_shufh_M0_T0();
2077
        gen_op_iwmmxt_movq_wRn_M0(wrd);
2078
        gen_op_iwmmxt_set_mup();
2079
        gen_op_iwmmxt_set_cup();
2080
        break;
2081
    case 0x018: case 0x118: case 0x218: case 0x318:        /* WADD */
2082
    case 0x418: case 0x518: case 0x618: case 0x718:
2083
    case 0x818: case 0x918: case 0xa18: case 0xb18:
2084
    case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2085
        wrd = (insn >> 12) & 0xf;
2086
        rd0 = (insn >> 16) & 0xf;
2087
        rd1 = (insn >> 0) & 0xf;
2088
        gen_op_iwmmxt_movq_M0_wRn(rd0);
2089
        switch ((insn >> 20) & 0xf) {
2090
        case 0x0:
2091
            gen_op_iwmmxt_addnb_M0_wRn(rd1);
2092
            break;
2093
        case 0x1:
2094
            gen_op_iwmmxt_addub_M0_wRn(rd1);
2095
            break;
2096
        case 0x3:
2097
            gen_op_iwmmxt_addsb_M0_wRn(rd1);
2098
            break;
2099
        case 0x4:
2100
            gen_op_iwmmxt_addnw_M0_wRn(rd1);
2101
            break;
2102
        case 0x5:
2103
            gen_op_iwmmxt_adduw_M0_wRn(rd1);
2104
            break;
2105
        case 0x7:
2106
            gen_op_iwmmxt_addsw_M0_wRn(rd1);
2107
            break;
2108
        case 0x8:
2109
            gen_op_iwmmxt_addnl_M0_wRn(rd1);
2110
            break;
2111
        case 0x9:
2112
            gen_op_iwmmxt_addul_M0_wRn(rd1);
2113
            break;
2114
        case 0xb:
2115
            gen_op_iwmmxt_addsl_M0_wRn(rd1);
2116
            break;
2117
        default:
2118
            return 1;
2119
        }
2120
        gen_op_iwmmxt_movq_wRn_M0(wrd);
2121
        gen_op_iwmmxt_set_mup();
2122
        gen_op_iwmmxt_set_cup();
2123
        break;
2124
    case 0x008: case 0x108: case 0x208: case 0x308:        /* WPACK */
2125
    case 0x408: case 0x508: case 0x608: case 0x708:
2126
    case 0x808: case 0x908: case 0xa08: case 0xb08:
2127
    case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2128
        wrd = (insn >> 12) & 0xf;
2129
        rd0 = (insn >> 16) & 0xf;
2130
        rd1 = (insn >> 0) & 0xf;
2131
        gen_op_iwmmxt_movq_M0_wRn(rd0);
2132
        if (!(insn & (1 << 20)))
2133
            return 1;
2134
        switch ((insn >> 22) & 3) {
2135
        case 0:
2136
            return 1;
2137
        case 1:
2138
            if (insn & (1 << 21))
2139
                gen_op_iwmmxt_packsw_M0_wRn(rd1);
2140
            else
2141
                gen_op_iwmmxt_packuw_M0_wRn(rd1);
2142
            break;
2143
        case 2:
2144
            if (insn & (1 << 21))
2145
                gen_op_iwmmxt_packsl_M0_wRn(rd1);
2146
            else
2147
                gen_op_iwmmxt_packul_M0_wRn(rd1);
2148
            break;
2149
        case 3:
2150
            if (insn & (1 << 21))
2151
                gen_op_iwmmxt_packsq_M0_wRn(rd1);
2152
            else
2153
                gen_op_iwmmxt_packuq_M0_wRn(rd1);
2154
            break;
2155
        }
2156
        gen_op_iwmmxt_movq_wRn_M0(wrd);
2157
        gen_op_iwmmxt_set_mup();
2158
        gen_op_iwmmxt_set_cup();
2159
        break;
2160
    case 0x201: case 0x203: case 0x205: case 0x207:
2161
    case 0x209: case 0x20b: case 0x20d: case 0x20f:
2162
    case 0x211: case 0x213: case 0x215: case 0x217:
2163
    case 0x219: case 0x21b: case 0x21d: case 0x21f:
2164
        wrd = (insn >> 5) & 0xf;
2165
        rd0 = (insn >> 12) & 0xf;
2166
        rd1 = (insn >> 0) & 0xf;
2167
        if (rd0 == 0xf || rd1 == 0xf)
2168
            return 1;
2169
        gen_op_iwmmxt_movq_M0_wRn(wrd);
2170
        switch ((insn >> 16) & 0xf) {
2171
        case 0x0:                                        /* TMIA */
2172
            gen_movl_T0_reg(s, rd0);
2173
            gen_movl_T1_reg(s, rd1);
2174
            gen_op_iwmmxt_muladdsl_M0_T0_T1();
2175
            break;
2176
        case 0x8:                                        /* TMIAPH */
2177
            gen_movl_T0_reg(s, rd0);
2178
            gen_movl_T1_reg(s, rd1);
2179
            gen_op_iwmmxt_muladdsw_M0_T0_T1();
2180
            break;
2181
        case 0xc: case 0xd: case 0xe: case 0xf:                /* TMIAxy */
2182
            gen_movl_T1_reg(s, rd0);
2183
            if (insn & (1 << 16))
2184
                gen_op_shrl_T1_im(16);
2185
            gen_op_movl_T0_T1();
2186
            gen_movl_T1_reg(s, rd1);
2187
            if (insn & (1 << 17))
2188
                gen_op_shrl_T1_im(16);
2189
            gen_op_iwmmxt_muladdswl_M0_T0_T1();
2190
            break;
2191
        default:
2192
            return 1;
2193
        }
2194
        gen_op_iwmmxt_movq_wRn_M0(wrd);
2195
        gen_op_iwmmxt_set_mup();
2196
        break;
2197
    default:
2198
        return 1;
2199
    }
2200

    
2201
    return 0;
2202
}
2203

    
2204
/* Disassemble an XScale DSP instruction.  Returns nonzero if an error occured
2205
   (ie. an undefined instruction).  */
2206
static int disas_dsp_insn(CPUState *env, DisasContext *s, uint32_t insn)
2207
{
2208
    int acc, rd0, rd1, rdhi, rdlo;
2209

    
2210
    if ((insn & 0x0ff00f10) == 0x0e200010) {
2211
        /* Multiply with Internal Accumulate Format */
2212
        rd0 = (insn >> 12) & 0xf;
2213
        rd1 = insn & 0xf;
2214
        acc = (insn >> 5) & 7;
2215

    
2216
        if (acc != 0)
2217
            return 1;
2218

    
2219
        switch ((insn >> 16) & 0xf) {
2220
        case 0x0:                                        /* MIA */
2221
            gen_movl_T0_reg(s, rd0);
2222
            gen_movl_T1_reg(s, rd1);
2223
            gen_op_iwmmxt_muladdsl_M0_T0_T1();
2224
            break;
2225
        case 0x8:                                        /* MIAPH */
2226
            gen_movl_T0_reg(s, rd0);
2227
            gen_movl_T1_reg(s, rd1);
2228
            gen_op_iwmmxt_muladdsw_M0_T0_T1();
2229
            break;
2230
        case 0xc:                                        /* MIABB */
2231
        case 0xd:                                        /* MIABT */
2232
        case 0xe:                                        /* MIATB */
2233
        case 0xf:                                        /* MIATT */
2234
            gen_movl_T1_reg(s, rd0);
2235
            if (insn & (1 << 16))
2236
                gen_op_shrl_T1_im(16);
2237
            gen_op_movl_T0_T1();
2238
            gen_movl_T1_reg(s, rd1);
2239
            if (insn & (1 << 17))
2240
                gen_op_shrl_T1_im(16);
2241
            gen_op_iwmmxt_muladdswl_M0_T0_T1();
2242
            break;
2243
        default:
2244
            return 1;
2245
        }
2246

    
2247
        gen_op_iwmmxt_movq_wRn_M0(acc);
2248
        return 0;
2249
    }
2250

    
2251
    if ((insn & 0x0fe00ff8) == 0x0c400000) {
2252
        /* Internal Accumulator Access Format */
2253
        rdhi = (insn >> 16) & 0xf;
2254
        rdlo = (insn >> 12) & 0xf;
2255
        acc = insn & 7;
2256

    
2257
        if (acc != 0)
2258
            return 1;
2259

    
2260
        if (insn & ARM_CP_RW_BIT) {                        /* MRA */
2261
            gen_op_iwmmxt_movl_T0_T1_wRn(acc);
2262
            gen_movl_reg_T0(s, rdlo);
2263
            gen_op_movl_T0_im((1 << (40 - 32)) - 1);
2264
            gen_op_andl_T0_T1();
2265
            gen_movl_reg_T0(s, rdhi);
2266
        } else {                                        /* MAR */
2267
            gen_movl_T0_reg(s, rdlo);
2268
            gen_movl_T1_reg(s, rdhi);
2269
            gen_op_iwmmxt_movl_wRn_T0_T1(acc);
2270
        }
2271
        return 0;
2272
    }
2273

    
2274
    return 1;
2275
}
2276

    
2277
/* Disassemble system coprocessor instruction.  Return nonzero if
2278
   instruction is not defined.  */
2279
static int disas_cp_insn(CPUState *env, DisasContext *s, uint32_t insn)
2280
{
2281
    TCGv tmp;
2282
    uint32_t rd = (insn >> 12) & 0xf;
2283
    uint32_t cp = (insn >> 8) & 0xf;
2284
    if (IS_USER(s)) {
2285
        return 1;
2286
    }
2287

    
2288
    if (insn & ARM_CP_RW_BIT) {
2289
        if (!env->cp[cp].cp_read)
2290
            return 1;
2291
        gen_set_pc_im(s->pc);
2292
        tmp = new_tmp();
2293
        gen_helper_get_cp(tmp, cpu_env, tcg_const_i32(insn));
2294
        store_reg(s, rd, tmp);
2295
    } else {
2296
        if (!env->cp[cp].cp_write)
2297
            return 1;
2298
        gen_set_pc_im(s->pc);
2299
        tmp = load_reg(s, rd);
2300
        gen_helper_set_cp(cpu_env, tcg_const_i32(insn), tmp);
2301
    }
2302
    return 0;
2303
}
2304

    
2305
static int cp15_user_ok(uint32_t insn)
2306
{
2307
    int cpn = (insn >> 16) & 0xf;
2308
    int cpm = insn & 0xf;
2309
    int op = ((insn >> 5) & 7) | ((insn >> 18) & 0x38);
2310

    
2311
    if (cpn == 13 && cpm == 0) {
2312
        /* TLS register.  */
2313
        if (op == 2 || (op == 3 && (insn & ARM_CP_RW_BIT)))
2314
            return 1;
2315
    }
2316
    if (cpn == 7) {
2317
        /* ISB, DSB, DMB.  */
2318
        if ((cpm == 5 && op == 4)
2319
                || (cpm == 10 && (op == 4 || op == 5)))
2320
            return 1;
2321
    }
2322
    return 0;
2323
}
2324

    
2325
/* Disassemble system coprocessor (cp15) instruction.  Return nonzero if
2326
   instruction is not defined.  */
2327
static int disas_cp15_insn(CPUState *env, DisasContext *s, uint32_t insn)
2328
{
2329
    uint32_t rd;
2330
    TCGv tmp;
2331

    
2332
    /* M profile cores use memory mapped registers instead of cp15.  */
2333
    if (arm_feature(env, ARM_FEATURE_M))
2334
        return 1;
2335

    
2336
    if ((insn & (1 << 25)) == 0) {
2337
        if (insn & (1 << 20)) {
2338
            /* mrrc */
2339
            return 1;
2340
        }
2341
        /* mcrr.  Used for block cache operations, so implement as no-op.  */
2342
        return 0;
2343
    }
2344
    if ((insn & (1 << 4)) == 0) {
2345
        /* cdp */
2346
        return 1;
2347
    }
2348
    if (IS_USER(s) && !cp15_user_ok(insn)) {
2349
        return 1;
2350
    }
2351
    if ((insn & 0x0fff0fff) == 0x0e070f90
2352
        || (insn & 0x0fff0fff) == 0x0e070f58) {
2353
        /* Wait for interrupt.  */
2354
        gen_set_pc_im(s->pc);
2355
        s->is_jmp = DISAS_WFI;
2356
        return 0;
2357
    }
2358
    rd = (insn >> 12) & 0xf;
2359
    if (insn & ARM_CP_RW_BIT) {
2360
        tmp = new_tmp();
2361
        gen_helper_get_cp15(tmp, cpu_env, tcg_const_i32(insn));
2362
        /* If the destination register is r15 then sets condition codes.  */
2363
        if (rd != 15)
2364
            store_reg(s, rd, tmp);
2365
        else
2366
            dead_tmp(tmp);
2367
    } else {
2368
        tmp = load_reg(s, rd);
2369
        gen_helper_set_cp15(cpu_env, tcg_const_i32(insn), tmp);
2370
        dead_tmp(tmp);
2371
        /* Normally we would always end the TB here, but Linux
2372
         * arch/arm/mach-pxa/sleep.S expects two instructions following
2373
         * an MMU enable to execute from cache.  Imitate this behaviour.  */
2374
        if (!arm_feature(env, ARM_FEATURE_XSCALE) ||
2375
                (insn & 0x0fff0fff) != 0x0e010f10)
2376
            gen_lookup_tb(s);
2377
    }
2378
    return 0;
2379
}
2380

    
2381
#define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
2382
#define VFP_SREG(insn, bigbit, smallbit) \
2383
  ((VFP_REG_SHR(insn, bigbit - 1) & 0x1e) | (((insn) >> (smallbit)) & 1))
2384
#define VFP_DREG(reg, insn, bigbit, smallbit) do { \
2385
    if (arm_feature(env, ARM_FEATURE_VFP3)) { \
2386
        reg = (((insn) >> (bigbit)) & 0x0f) \
2387
              | (((insn) >> ((smallbit) - 4)) & 0x10); \
2388
    } else { \
2389
        if (insn & (1 << (smallbit))) \
2390
            return 1; \
2391
        reg = ((insn) >> (bigbit)) & 0x0f; \
2392
    }} while (0)
2393

    
2394
#define VFP_SREG_D(insn) VFP_SREG(insn, 12, 22)
2395
#define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
2396
#define VFP_SREG_N(insn) VFP_SREG(insn, 16,  7)
2397
#define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16,  7)
2398
#define VFP_SREG_M(insn) VFP_SREG(insn,  0,  5)
2399
#define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn,  0,  5)
2400

    
2401
/* Move between integer and VFP cores.  */
2402
static TCGv gen_vfp_mrs(void)
2403
{
2404
    TCGv tmp = new_tmp();
2405
    tcg_gen_mov_i32(tmp, cpu_F0s);
2406
    return tmp;
2407
}
2408

    
2409
static void gen_vfp_msr(TCGv tmp)
2410
{
2411
    tcg_gen_mov_i32(cpu_F0s, tmp);
2412
    dead_tmp(tmp);
2413
}
2414

    
2415
static inline int
2416
vfp_enabled(CPUState * env)
2417
{
2418
    return ((env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)) != 0);
2419
}
2420

    
2421
/* Disassemble a VFP instruction.  Returns nonzero if an error occured
2422
   (ie. an undefined instruction).  */
2423
static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
2424
{
2425
    uint32_t rd, rn, rm, op, i, n, offset, delta_d, delta_m, bank_mask;
2426
    int dp, veclen;
2427
    TCGv tmp;
2428

    
2429
    if (!arm_feature(env, ARM_FEATURE_VFP))
2430
        return 1;
2431

    
2432
    if (!vfp_enabled(env)) {
2433
        /* VFP disabled.  Only allow fmxr/fmrx to/from some control regs.  */
2434
        if ((insn & 0x0fe00fff) != 0x0ee00a10)
2435
            return 1;
2436
        rn = (insn >> 16) & 0xf;
2437
        if (rn != ARM_VFP_FPSID && rn != ARM_VFP_FPEXC
2438
            && rn != ARM_VFP_MVFR1 && rn != ARM_VFP_MVFR0)
2439
            return 1;
2440
    }
2441
    dp = ((insn & 0xf00) == 0xb00);
2442
    switch ((insn >> 24) & 0xf) {
2443
    case 0xe:
2444
        if (insn & (1 << 4)) {
2445
            /* single register transfer */
2446
            rd = (insn >> 12) & 0xf;
2447
            if (dp) {
2448
                int size;
2449
                int pass;
2450

    
2451
                VFP_DREG_N(rn, insn);
2452
                if (insn & 0xf)
2453
                    return 1;
2454
                if (insn & 0x00c00060
2455
                    && !arm_feature(env, ARM_FEATURE_NEON))
2456
                    return 1;
2457

    
2458
                pass = (insn >> 21) & 1;
2459
                if (insn & (1 << 22)) {
2460
                    size = 0;
2461
                    offset = ((insn >> 5) & 3) * 8;
2462
                } else if (insn & (1 << 5)) {
2463
                    size = 1;
2464
                    offset = (insn & (1 << 6)) ? 16 : 0;
2465
                } else {
2466
                    size = 2;
2467
                    offset = 0;
2468
                }
2469
                if (insn & ARM_CP_RW_BIT) {
2470
                    /* vfp->arm */
2471
                    switch (size) {
2472
                    case 0:
2473
                        NEON_GET_REG(T1, rn, pass);
2474
                        if (offset)
2475
                            gen_op_shrl_T1_im(offset);
2476
                        if (insn & (1 << 23))
2477
                            gen_uxtb(cpu_T[1]);
2478
                        else
2479
                            gen_sxtb(cpu_T[1]);
2480
                        break;
2481
                    case 1:
2482
                        NEON_GET_REG(T1, rn, pass);
2483
                        if (insn & (1 << 23)) {
2484
                            if (offset) {
2485
                                gen_op_shrl_T1_im(16);
2486
                            } else {
2487
                                gen_uxth(cpu_T[1]);
2488
                            }
2489
                        } else {
2490
                            if (offset) {
2491
                                gen_op_sarl_T1_im(16);
2492
                            } else {
2493
                                gen_sxth(cpu_T[1]);
2494
                            }
2495
                        }
2496
                        break;
2497
                    case 2:
2498
                        NEON_GET_REG(T1, rn, pass);
2499
                        break;
2500
                    }
2501
                    gen_movl_reg_T1(s, rd);
2502
                } else {
2503
                    /* arm->vfp */
2504
                    gen_movl_T0_reg(s, rd);
2505
                    if (insn & (1 << 23)) {
2506
                        /* VDUP */
2507
                        if (size == 0) {
2508
                            gen_op_neon_dup_u8(0);
2509
                        } else if (size == 1) {
2510
                            gen_op_neon_dup_low16();
2511
                        }
2512
                        NEON_SET_REG(T0, rn, 0);
2513
                        NEON_SET_REG(T0, rn, 1);
2514
                    } else {
2515
                        /* VMOV */
2516
                        switch (size) {
2517
                        case 0:
2518
                            tmp = neon_load_reg(rn, pass);
2519
                            gen_bfi(tmp, tmp, cpu_T[0], offset, 0xff);
2520
                            neon_store_reg(rn, pass, tmp);
2521
                            break;
2522
                        case 1:
2523
                            tmp = neon_load_reg(rn, pass);
2524
                            gen_bfi(tmp, tmp, cpu_T[0], offset, 0xffff);
2525
                            neon_store_reg(rn, pass, tmp);
2526
                            break;
2527
                        case 2:
2528
                            NEON_SET_REG(T0, rn, pass);
2529
                            break;
2530
                        }
2531
                    }
2532
                }
2533
            } else { /* !dp */
2534
                if ((insn & 0x6f) != 0x00)
2535
                    return 1;
2536
                rn = VFP_SREG_N(insn);
2537
                if (insn & ARM_CP_RW_BIT) {
2538
                    /* vfp->arm */
2539
                    if (insn & (1 << 21)) {
2540
                        /* system register */
2541
                        rn >>= 1;
2542

    
2543
                        switch (rn) {
2544
                        case ARM_VFP_FPSID:
2545
                            /* VFP2 allows access to FSID from userspace.
2546
                               VFP3 restricts all id registers to privileged
2547
                               accesses.  */
2548
                            if (IS_USER(s)
2549
                                && arm_feature(env, ARM_FEATURE_VFP3))
2550
                                return 1;
2551
                            tmp = load_cpu_field(vfp.xregs[rn]);
2552
                            break;
2553
                        case ARM_VFP_FPEXC:
2554
                            if (IS_USER(s))
2555
                                return 1;
2556
                            tmp = load_cpu_field(vfp.xregs[rn]);
2557
                            break;
2558
                        case ARM_VFP_FPINST:
2559
                        case ARM_VFP_FPINST2:
2560
                            /* Not present in VFP3.  */
2561
                            if (IS_USER(s)
2562
                                || arm_feature(env, ARM_FEATURE_VFP3))
2563
                                return 1;
2564
                            tmp = load_cpu_field(vfp.xregs[rn]);
2565
                            break;
2566
                        case ARM_VFP_FPSCR:
2567
                            if (rd == 15) {
2568
                                tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
2569
                                tcg_gen_andi_i32(tmp, tmp, 0xf0000000);
2570
                            } else {
2571
                                tmp = new_tmp();
2572
                                gen_helper_vfp_get_fpscr(tmp, cpu_env);
2573
                            }
2574
                            break;
2575
                        case ARM_VFP_MVFR0:
2576
                        case ARM_VFP_MVFR1:
2577
                            if (IS_USER(s)
2578
                                || !arm_feature(env, ARM_FEATURE_VFP3))
2579
                                return 1;
2580
                            tmp = load_cpu_field(vfp.xregs[rn]);
2581
                            break;
2582
                        default:
2583
                            return 1;
2584
                        }
2585
                    } else {
2586
                        gen_mov_F0_vreg(0, rn);
2587
                        tmp = gen_vfp_mrs();
2588
                    }
2589
                    if (rd == 15) {
2590
                        /* Set the 4 flag bits in the CPSR.  */
2591
                        gen_set_nzcv(tmp);
2592
                        dead_tmp(tmp);
2593
                    } else {
2594
                        store_reg(s, rd, tmp);
2595
                    }
2596
                } else {
2597
                    /* arm->vfp */
2598
                    tmp = load_reg(s, rd);
2599
                    if (insn & (1 << 21)) {
2600
                        rn >>= 1;
2601
                        /* system register */
2602
                        switch (rn) {
2603
                        case ARM_VFP_FPSID:
2604
                        case ARM_VFP_MVFR0:
2605
                        case ARM_VFP_MVFR1:
2606
                            /* Writes are ignored.  */
2607
                            break;
2608
                        case ARM_VFP_FPSCR:
2609
                            gen_helper_vfp_set_fpscr(cpu_env, tmp);
2610
                            dead_tmp(tmp);
2611
                            gen_lookup_tb(s);
2612
                            break;
2613
                        case ARM_VFP_FPEXC:
2614
                            if (IS_USER(s))
2615
                                return 1;
2616
                            store_cpu_field(tmp, vfp.xregs[rn]);
2617
                            gen_lookup_tb(s);
2618
                            break;
2619
                        case ARM_VFP_FPINST:
2620
                        case ARM_VFP_FPINST2:
2621
                            store_cpu_field(tmp, vfp.xregs[rn]);
2622
                            break;
2623
                        default:
2624
                            return 1;
2625
                        }
2626
                    } else {
2627
                        gen_vfp_msr(tmp);
2628
                        gen_mov_vreg_F0(0, rn);
2629
                    }
2630
                }
2631
            }
2632
        } else {
2633
            /* data processing */
2634
            /* The opcode is in bits 23, 21, 20 and 6.  */
2635
            op = ((insn >> 20) & 8) | ((insn >> 19) & 6) | ((insn >> 6) & 1);
2636
            if (dp) {
2637
                if (op == 15) {
2638
                    /* rn is opcode */
2639
                    rn = ((insn >> 15) & 0x1e) | ((insn >> 7) & 1);
2640
                } else {
2641
                    /* rn is register number */
2642
                    VFP_DREG_N(rn, insn);
2643
                }
2644

    
2645
                if (op == 15 && (rn == 15 || rn > 17)) {
2646
                    /* Integer or single precision destination.  */
2647
                    rd = VFP_SREG_D(insn);
2648
                } else {
2649
                    VFP_DREG_D(rd, insn);
2650
                }
2651

    
2652
                if (op == 15 && (rn == 16 || rn == 17)) {
2653
                    /* Integer source.  */
2654
                    rm = ((insn << 1) & 0x1e) | ((insn >> 5) & 1);
2655
                } else {
2656
                    VFP_DREG_M(rm, insn);
2657
                }
2658
            } else {
2659
                rn = VFP_SREG_N(insn);
2660
                if (op == 15 && rn == 15) {
2661
                    /* Double precision destination.  */
2662
                    VFP_DREG_D(rd, insn);
2663
                } else {
2664
                    rd = VFP_SREG_D(insn);
2665
                }
2666
                rm = VFP_SREG_M(insn);
2667
            }
2668

    
2669
            veclen = env->vfp.vec_len;
2670
            if (op == 15 && rn > 3)
2671
                veclen = 0;
2672

    
2673
            /* Shut up compiler warnings.  */
2674
            delta_m = 0;
2675
            delta_d = 0;
2676
            bank_mask = 0;
2677

    
2678
            if (veclen > 0) {
2679
                if (dp)
2680
                    bank_mask = 0xc;
2681
                else
2682
                    bank_mask = 0x18;
2683

    
2684
                /* Figure out what type of vector operation this is.  */
2685
                if ((rd & bank_mask) == 0) {
2686
                    /* scalar */
2687
                    veclen = 0;
2688
                } else {
2689
                    if (dp)
2690
                        delta_d = (env->vfp.vec_stride >> 1) + 1;
2691
                    else
2692
                        delta_d = env->vfp.vec_stride + 1;
2693

    
2694
                    if ((rm & bank_mask) == 0) {
2695
                        /* mixed scalar/vector */
2696
                        delta_m = 0;
2697
                    } else {
2698
                        /* vector */
2699
                        delta_m = delta_d;
2700
                    }
2701
                }
2702
            }
2703

    
2704
            /* Load the initial operands.  */
2705
            if (op == 15) {
2706
                switch (rn) {
2707
                case 16:
2708
                case 17:
2709
                    /* Integer source */
2710
                    gen_mov_F0_vreg(0, rm);
2711
                    break;
2712
                case 8:
2713
                case 9:
2714
                    /* Compare */
2715
                    gen_mov_F0_vreg(dp, rd);
2716
                    gen_mov_F1_vreg(dp, rm);
2717
                    break;
2718
                case 10:
2719
                case 11:
2720
                    /* Compare with zero */
2721
                    gen_mov_F0_vreg(dp, rd);
2722
                    gen_vfp_F1_ld0(dp);
2723
                    break;
2724
                case 20:
2725
                case 21:
2726
                case 22:
2727
                case 23:
2728
                    /* Source and destination the same.  */
2729
                    gen_mov_F0_vreg(dp, rd);
2730
                    break;
2731
                default:
2732
                    /* One source operand.  */
2733
                    gen_mov_F0_vreg(dp, rm);
2734
                    break;
2735
                }
2736
            } else {
2737
                /* Two source operands.  */
2738
                gen_mov_F0_vreg(dp, rn);
2739
                gen_mov_F1_vreg(dp, rm);
2740
            }
2741

    
2742
            for (;;) {
2743
                /* Perform the calculation.  */
2744
                switch (op) {
2745
                case 0: /* mac: fd + (fn * fm) */
2746
                    gen_vfp_mul(dp);
2747
                    gen_mov_F1_vreg(dp, rd);
2748
                    gen_vfp_add(dp);
2749
                    break;
2750
                case 1: /* nmac: fd - (fn * fm) */
2751
                    gen_vfp_mul(dp);
2752
                    gen_vfp_neg(dp);
2753
                    gen_mov_F1_vreg(dp, rd);
2754
                    gen_vfp_add(dp);
2755
                    break;
2756
                case 2: /* msc: -fd + (fn * fm) */
2757
                    gen_vfp_mul(dp);
2758
                    gen_mov_F1_vreg(dp, rd);
2759
                    gen_vfp_sub(dp);
2760
                    break;
2761
                case 3: /* nmsc: -fd - (fn * fm)  */
2762
                    gen_vfp_mul(dp);
2763
                    gen_mov_F1_vreg(dp, rd);
2764
                    gen_vfp_add(dp);
2765
                    gen_vfp_neg(dp);
2766
                    break;
2767
                case 4: /* mul: fn * fm */
2768
                    gen_vfp_mul(dp);
2769
                    break;
2770
                case 5: /* nmul: -(fn * fm) */
2771
                    gen_vfp_mul(dp);
2772
                    gen_vfp_neg(dp);
2773
                    break;
2774
                case 6: /* add: fn + fm */
2775
                    gen_vfp_add(dp);
2776
                    break;
2777
                case 7: /* sub: fn - fm */
2778
                    gen_vfp_sub(dp);
2779
                    break;
2780
                case 8: /* div: fn / fm */
2781
                    gen_vfp_div(dp);
2782
                    break;
2783
                case 14: /* fconst */
2784
                    if (!arm_feature(env, ARM_FEATURE_VFP3))
2785
                      return 1;
2786

    
2787
                    n = (insn << 12) & 0x80000000;
2788
                    i = ((insn >> 12) & 0x70) | (insn & 0xf);
2789
                    if (dp) {
2790
                        if (i & 0x40)
2791
                            i |= 0x3f80;
2792
                        else
2793
                            i |= 0x4000;
2794
                        n |= i << 16;
2795
                        tcg_gen_movi_i64(cpu_F0d, ((uint64_t)n) << 32);
2796
                    } else {
2797
                        if (i & 0x40)
2798
                            i |= 0x780;
2799
                        else
2800
                            i |= 0x800;
2801
                        n |= i << 19;
2802
                        tcg_gen_movi_i32(cpu_F0d, ((uint64_t)n) << 32);
2803
                    }
2804
                    break;
2805
                case 15: /* extension space */
2806
                    switch (rn) {
2807
                    case 0: /* cpy */
2808
                        /* no-op */
2809
                        break;
2810
                    case 1: /* abs */
2811
                        gen_vfp_abs(dp);
2812
                        break;
2813
                    case 2: /* neg */
2814
                        gen_vfp_neg(dp);
2815
                        break;
2816
                    case 3: /* sqrt */
2817
                        gen_vfp_sqrt(dp);
2818
                        break;
2819
                    case 8: /* cmp */
2820
                        gen_vfp_cmp(dp);
2821
                        break;
2822
                    case 9: /* cmpe */
2823
                        gen_vfp_cmpe(dp);
2824
                        break;
2825
                    case 10: /* cmpz */
2826
                        gen_vfp_cmp(dp);
2827
                        break;
2828
                    case 11: /* cmpez */
2829
                        gen_vfp_F1_ld0(dp);
2830
                        gen_vfp_cmpe(dp);
2831
                        break;
2832
                    case 15: /* single<->double conversion */
2833
                        if (dp)
2834
                            gen_helper_vfp_fcvtsd(cpu_F0s, cpu_F0d, cpu_env);
2835
                        else
2836
                            gen_helper_vfp_fcvtds(cpu_F0d, cpu_F0s, cpu_env);
2837
                        break;
2838
                    case 16: /* fuito */
2839
                        gen_vfp_uito(dp);
2840
                        break;
2841
                    case 17: /* fsito */
2842
                        gen_vfp_sito(dp);
2843
                        break;
2844
                    case 20: /* fshto */
2845
                        if (!arm_feature(env, ARM_FEATURE_VFP3))
2846
                          return 1;
2847
                        gen_vfp_shto(dp, rm);
2848
                        break;
2849
                    case 21: /* fslto */
2850
                        if (!arm_feature(env, ARM_FEATURE_VFP3))
2851
                          return 1;
2852
                        gen_vfp_slto(dp, rm);
2853
                        break;
2854
                    case 22: /* fuhto */
2855
                        if (!arm_feature(env, ARM_FEATURE_VFP3))
2856
                          return 1;
2857
                        gen_vfp_uhto(dp, rm);
2858
                        break;
2859
                    case 23: /* fulto */
2860
                        if (!arm_feature(env, ARM_FEATURE_VFP3))
2861
                          return 1;
2862
                        gen_vfp_ulto(dp, rm);
2863
                        break;
2864
                    case 24: /* ftoui */
2865
                        gen_vfp_toui(dp);
2866
                        break;
2867
                    case 25: /* ftouiz */
2868
                        gen_vfp_touiz(dp);
2869
                        break;
2870
                    case 26: /* ftosi */
2871
                        gen_vfp_tosi(dp);
2872
                        break;
2873
                    case 27: /* ftosiz */
2874
                        gen_vfp_tosiz(dp);
2875
                        break;
2876
                    case 28: /* ftosh */
2877
                        if (!arm_feature(env, ARM_FEATURE_VFP3))
2878
                          return 1;
2879
                        gen_vfp_tosh(dp, rm);
2880
                        break;
2881
                    case 29: /* ftosl */
2882
                        if (!arm_feature(env, ARM_FEATURE_VFP3))
2883
                          return 1;
2884
                        gen_vfp_tosl(dp, rm);
2885
                        break;
2886
                    case 30: /* ftouh */
2887
                        if (!arm_feature(env, ARM_FEATURE_VFP3))
2888
                          return 1;
2889
                        gen_vfp_touh(dp, rm);
2890
                        break;
2891
                    case 31: /* ftoul */
2892
                        if (!arm_feature(env, ARM_FEATURE_VFP3))
2893
                          return 1;
2894
                        gen_vfp_toul(dp, rm);
2895
                        break;
2896
                    default: /* undefined */
2897
                        printf ("rn:%d\n", rn);
2898
                        return 1;
2899
                    }
2900
                    break;
2901
                default: /* undefined */
2902
                    printf ("op:%d\n", op);
2903
                    return 1;
2904
                }
2905

    
2906
                /* Write back the result.  */
2907
                if (op == 15 && (rn >= 8 && rn <= 11))
2908
                    ; /* Comparison, do nothing.  */
2909
                else if (op == 15 && rn > 17)
2910
                    /* Integer result.  */
2911
                    gen_mov_vreg_F0(0, rd);
2912
                else if (op == 15 && rn == 15)
2913
                    /* conversion */
2914
                    gen_mov_vreg_F0(!dp, rd);
2915
                else
2916
                    gen_mov_vreg_F0(dp, rd);
2917

    
2918
                /* break out of the loop if we have finished  */
2919
                if (veclen == 0)
2920
                    break;
2921

    
2922
                if (op == 15 && delta_m == 0) {
2923
                    /* single source one-many */
2924
                    while (veclen--) {
2925
                        rd = ((rd + delta_d) & (bank_mask - 1))
2926
                             | (rd & bank_mask);
2927
                        gen_mov_vreg_F0(dp, rd);
2928
                    }
2929
                    break;
2930
                }
2931
                /* Setup the next operands.  */
2932
                veclen--;
2933
                rd = ((rd + delta_d) & (bank_mask - 1))
2934
                     | (rd & bank_mask);
2935

    
2936
                if (op == 15) {
2937
                    /* One source operand.  */
2938
                    rm = ((rm + delta_m) & (bank_mask - 1))
2939
                         | (rm & bank_mask);
2940
                    gen_mov_F0_vreg(dp, rm);
2941
                } else {
2942
                    /* Two source operands.  */
2943
                    rn = ((rn + delta_d) & (bank_mask - 1))
2944
                         | (rn & bank_mask);
2945
                    gen_mov_F0_vreg(dp, rn);
2946
                    if (delta_m) {
2947
                        rm = ((rm + delta_m) & (bank_mask - 1))
2948
                             | (rm & bank_mask);
2949
                        gen_mov_F1_vreg(dp, rm);
2950
                    }
2951
                }
2952
            }
2953
        }
2954
        break;
2955
    case 0xc:
2956
    case 0xd:
2957
        if (dp && (insn & 0x03e00000) == 0x00400000) {
2958
            /* two-register transfer */
2959
            rn = (insn >> 16) & 0xf;
2960
            rd = (insn >> 12) & 0xf;
2961
            if (dp) {
2962
                VFP_DREG_M(rm, insn);
2963
            } else {
2964
                rm = VFP_SREG_M(insn);
2965
            }
2966

    
2967
            if (insn & ARM_CP_RW_BIT) {
2968
                /* vfp->arm */
2969
                if (dp) {
2970
                    gen_mov_F0_vreg(0, rm * 2);
2971
                    tmp = gen_vfp_mrs();
2972
                    store_reg(s, rd, tmp);
2973
                    gen_mov_F0_vreg(0, rm * 2 + 1);
2974
                    tmp = gen_vfp_mrs();
2975
                    store_reg(s, rn, tmp);
2976
                } else {
2977
                    gen_mov_F0_vreg(0, rm);
2978
                    tmp = gen_vfp_mrs();
2979
                    store_reg(s, rn, tmp);
2980
                    gen_mov_F0_vreg(0, rm + 1);
2981
                    tmp = gen_vfp_mrs();
2982
                    store_reg(s, rd, tmp);
2983
                }
2984
            } else {
2985
                /* arm->vfp */
2986
                if (dp) {
2987
                    tmp = load_reg(s, rd);
2988
                    gen_vfp_msr(tmp);
2989
                    gen_mov_vreg_F0(0, rm * 2);
2990
                    tmp = load_reg(s, rn);
2991
                    gen_vfp_msr(tmp);
2992
                    gen_mov_vreg_F0(0, rm * 2 + 1);
2993
                } else {
2994
                    tmp = load_reg(s, rn);
2995
                    gen_vfp_msr(tmp);
2996
                    gen_mov_vreg_F0(0, rm);
2997
                    tmp = load_reg(s, rd);
2998
                    gen_vfp_msr(tmp);
2999
                    gen_mov_vreg_F0(0, rm + 1);
3000
                }
3001
            }
3002
        } else {
3003
            /* Load/store */
3004
            rn = (insn >> 16) & 0xf;
3005
            if (dp)
3006
                VFP_DREG_D(rd, insn);
3007
            else
3008
                rd = VFP_SREG_D(insn);
3009
            if (s->thumb && rn == 15) {
3010
                gen_op_movl_T1_im(s->pc & ~2);
3011
            } else {
3012
                gen_movl_T1_reg(s, rn);
3013
            }
3014
            if ((insn & 0x01200000) == 0x01000000) {
3015
                /* Single load/store */
3016
                offset = (insn & 0xff) << 2;
3017
                if ((insn & (1 << 23)) == 0)
3018
                    offset = -offset;
3019
                gen_op_addl_T1_im(offset);
3020
                if (insn & (1 << 20)) {
3021
                    gen_vfp_ld(s, dp);
3022
                    gen_mov_vreg_F0(dp, rd);
3023
                } else {
3024
                    gen_mov_F0_vreg(dp, rd);
3025
                    gen_vfp_st(s, dp);
3026
                }
3027
            } else {
3028
                /* load/store multiple */
3029
                if (dp)
3030
                    n = (insn >> 1) & 0x7f;
3031
                else
3032
                    n = insn & 0xff;
3033

    
3034
                if (insn & (1 << 24)) /* pre-decrement */
3035
                    gen_op_addl_T1_im(-((insn & 0xff) << 2));
3036

    
3037
                if (dp)
3038
                    offset = 8;
3039
                else
3040
                    offset = 4;
3041
                for (i = 0; i < n; i++) {
3042
                    if (insn & ARM_CP_RW_BIT) {
3043
                        /* load */
3044
                        gen_vfp_ld(s, dp);
3045
                        gen_mov_vreg_F0(dp, rd + i);
3046
                    } else {
3047
                        /* store */
3048
                        gen_mov_F0_vreg(dp, rd + i);
3049
                        gen_vfp_st(s, dp);
3050
                    }
3051
                    gen_op_addl_T1_im(offset);
3052
                }
3053
                if (insn & (1 << 21)) {
3054
                    /* writeback */
3055
                    if (insn & (1 << 24))
3056
                        offset = -offset * n;
3057
                    else if (dp && (insn & 1))
3058
                        offset = 4;
3059
                    else
3060
                        offset = 0;
3061

    
3062
                    if (offset != 0)
3063
                        gen_op_addl_T1_im(offset);
3064
                    gen_movl_reg_T1(s, rn);
3065
                }
3066
            }
3067
        }
3068
        break;
3069
    default:
3070
        /* Should never happen.  */
3071
        return 1;
3072
    }
3073
    return 0;
3074
}
3075

    
3076
static inline void gen_goto_tb(DisasContext *s, int n, uint32_t dest)
3077
{
3078
    TranslationBlock *tb;
3079

    
3080
    tb = s->tb;
3081
    if ((tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK)) {
3082
        tcg_gen_goto_tb(n);
3083
        gen_set_pc_im(dest);
3084
        tcg_gen_exit_tb((long)tb + n);
3085
    } else {
3086
        gen_set_pc_im(dest);
3087
        tcg_gen_exit_tb(0);
3088
    }
3089
}
3090

    
3091
static inline void gen_jmp (DisasContext *s, uint32_t dest)
3092
{
3093
    if (__builtin_expect(s->singlestep_enabled, 0)) {
3094
        /* An indirect jump so that we still trigger the debug exception.  */
3095
        if (s->thumb)
3096
            dest |= 1;
3097
        gen_bx_im(s, dest);
3098
    } else {
3099
        gen_goto_tb(s, 0, dest);
3100
        s->is_jmp = DISAS_TB_JUMP;
3101
    }
3102
}
3103

    
3104
static inline void gen_mulxy(TCGv t0, TCGv t1, int x, int y)
3105
{
3106
    if (x)
3107
        tcg_gen_sari_i32(t0, t0, 16);
3108
    else
3109
        gen_sxth(t0);
3110
    if (y)
3111
        tcg_gen_sari_i32(t1, t1, 16);
3112
    else
3113
        gen_sxth(t1);
3114
    tcg_gen_mul_i32(t0, t0, t1);
3115
}
3116

    
3117
/* Return the mask of PSR bits set by a MSR instruction.  */
3118
static uint32_t msr_mask(CPUState *env, DisasContext *s, int flags, int spsr) {
3119
    uint32_t mask;
3120

    
3121
    mask = 0;
3122
    if (flags & (1 << 0))
3123
        mask |= 0xff;
3124
    if (flags & (1 << 1))
3125
        mask |= 0xff00;
3126
    if (flags & (1 << 2))
3127
        mask |= 0xff0000;
3128
    if (flags & (1 << 3))
3129
        mask |= 0xff000000;
3130

    
3131
    /* Mask out undefined bits.  */
3132
    mask &= ~CPSR_RESERVED;
3133
    if (!arm_feature(env, ARM_FEATURE_V6))
3134
        mask &= ~(CPSR_E | CPSR_GE);
3135
    if (!arm_feature(env, ARM_FEATURE_THUMB2))
3136
        mask &= ~CPSR_IT;
3137
    /* Mask out execution state bits.  */
3138
    if (!spsr)
3139
        mask &= ~CPSR_EXEC;
3140
    /* Mask out privileged bits.  */
3141
    if (IS_USER(s))
3142
        mask &= CPSR_USER;
3143
    return mask;
3144
}
3145

    
3146
/* Returns nonzero if access to the PSR is not permitted.  */
3147
static int gen_set_psr_T0(DisasContext *s, uint32_t mask, int spsr)
3148
{
3149
    TCGv tmp;
3150
    if (spsr) {
3151
        /* ??? This is also undefined in system mode.  */
3152
        if (IS_USER(s))
3153
            return 1;
3154

    
3155
        tmp = load_cpu_field(spsr);
3156
        tcg_gen_andi_i32(tmp, tmp, ~mask);
3157
        tcg_gen_andi_i32(cpu_T[0], cpu_T[0], mask);
3158
        tcg_gen_or_i32(tmp, tmp, cpu_T[0]);
3159
        store_cpu_field(tmp, spsr);
3160
    } else {
3161
        gen_set_cpsr(cpu_T[0], mask);
3162
    }
3163
    gen_lookup_tb(s);
3164
    return 0;
3165
}
3166

    
3167
/* Generate an old-style exception return.  */
3168
static void gen_exception_return(DisasContext *s)
3169
{
3170
    TCGv tmp;
3171
    gen_set_pc_T0();
3172
    tmp = load_cpu_field(spsr);
3173
    gen_set_cpsr(tmp, 0xffffffff);
3174
    dead_tmp(tmp);
3175
    s->is_jmp = DISAS_UPDATE;
3176
}
3177

    
3178
/* Generate a v6 exception return.  Marks both values as dead.  */
3179
static void gen_rfe(DisasContext *s, TCGv pc, TCGv cpsr)
3180
{
3181
    gen_set_cpsr(cpsr, 0xffffffff);
3182
    dead_tmp(cpsr);
3183
    store_reg(s, 15, pc);
3184
    s->is_jmp = DISAS_UPDATE;
3185
}
3186

    
3187
static inline void
3188
gen_set_condexec (DisasContext *s)
3189
{
3190
    if (s->condexec_mask) {
3191
        uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
3192
        TCGv tmp = new_tmp();
3193
        tcg_gen_movi_i32(tmp, val);
3194
        store_cpu_field(tmp, condexec_bits);
3195
    }
3196
}
3197

    
3198
static void gen_nop_hint(DisasContext *s, int val)
3199
{
3200
    switch (val) {
3201
    case 3: /* wfi */
3202
        gen_set_pc_im(s->pc);
3203
        s->is_jmp = DISAS_WFI;
3204
        break;
3205
    case 2: /* wfe */
3206
    case 4: /* sev */
3207
        /* TODO: Implement SEV and WFE.  May help SMP performance.  */
3208
    default: /* nop */
3209
        break;
3210
    }
3211
}
3212

    
3213
/* Neon shift by constant.  The actual ops are the same as used for variable
3214
   shifts.  [OP][U][SIZE]  */
3215
static GenOpFunc *gen_neon_shift_im[8][2][4] = {
3216
    { /* 0 */ /* VSHR */
3217
      {
3218
        gen_op_neon_shl_u8,
3219
        gen_op_neon_shl_u16,
3220
        gen_op_neon_shl_u32,
3221
        gen_op_neon_shl_u64
3222
      }, {
3223
        gen_op_neon_shl_s8,
3224
        gen_op_neon_shl_s16,
3225
        gen_op_neon_shl_s32,
3226
        gen_op_neon_shl_s64
3227
      }
3228
    }, { /* 1 */ /* VSRA */
3229
      {
3230
        gen_op_neon_shl_u8,
3231
        gen_op_neon_shl_u16,
3232
        gen_op_neon_shl_u32,
3233
        gen_op_neon_shl_u64
3234
      }, {
3235
        gen_op_neon_shl_s8,
3236
        gen_op_neon_shl_s16,
3237
        gen_op_neon_shl_s32,
3238
        gen_op_neon_shl_s64
3239
      }
3240
    }, { /* 2 */ /* VRSHR */
3241
      {
3242
        gen_op_neon_rshl_u8,
3243
        gen_op_neon_rshl_u16,
3244
        gen_op_neon_rshl_u32,
3245
        gen_op_neon_rshl_u64
3246
      }, {
3247
        gen_op_neon_rshl_s8,
3248
        gen_op_neon_rshl_s16,
3249
        gen_op_neon_rshl_s32,
3250
        gen_op_neon_rshl_s64
3251
      }
3252
    }, { /* 3 */ /* VRSRA */
3253
      {
3254
        gen_op_neon_rshl_u8,
3255
        gen_op_neon_rshl_u16,
3256
        gen_op_neon_rshl_u32,
3257
        gen_op_neon_rshl_u64
3258
      }, {
3259
        gen_op_neon_rshl_s8,
3260
        gen_op_neon_rshl_s16,
3261
        gen_op_neon_rshl_s32,
3262
        gen_op_neon_rshl_s64
3263
      }
3264
    }, { /* 4 */
3265
      {
3266
        NULL, NULL, NULL, NULL
3267
      }, { /* VSRI */
3268
        gen_op_neon_shl_u8,
3269
        gen_op_neon_shl_u16,
3270
        gen_op_neon_shl_u32,
3271
        gen_op_neon_shl_u64,
3272
      }
3273
    }, { /* 5 */
3274
      { /* VSHL */
3275
        gen_op_neon_shl_u8,
3276
        gen_op_neon_shl_u16,
3277
        gen_op_neon_shl_u32,
3278
        gen_op_neon_shl_u64,
3279
      }, { /* VSLI */
3280
        gen_op_neon_shl_u8,
3281
        gen_op_neon_shl_u16,
3282
        gen_op_neon_shl_u32,
3283
        gen_op_neon_shl_u64,
3284
      }
3285
    }, { /* 6 */ /* VQSHL */
3286
      {
3287
        gen_op_neon_qshl_u8,
3288
        gen_op_neon_qshl_u16,
3289
        gen_op_neon_qshl_u32,
3290
        gen_op_neon_qshl_u64
3291
      }, {
3292
        gen_op_neon_qshl_s8,
3293
        gen_op_neon_qshl_s16,
3294
        gen_op_neon_qshl_s32,
3295
        gen_op_neon_qshl_s64
3296
      }
3297
    }, { /* 7 */ /* VQSHLU */
3298
      {
3299
        gen_op_neon_qshl_u8,
3300
        gen_op_neon_qshl_u16,
3301
        gen_op_neon_qshl_u32,
3302
        gen_op_neon_qshl_u64
3303
      }, {
3304
        gen_op_neon_qshl_u8,
3305
        gen_op_neon_qshl_u16,
3306
        gen_op_neon_qshl_u32,
3307
        gen_op_neon_qshl_u64
3308
      }
3309
    }
3310
};
3311

    
3312
/* [R][U][size - 1] */
3313
static GenOpFunc *gen_neon_shift_im_narrow[2][2][3] = {
3314
    {
3315
      {
3316
        gen_op_neon_shl_u16,
3317
        gen_op_neon_shl_u32,
3318
        gen_op_neon_shl_u64
3319
      }, {
3320
        gen_op_neon_shl_s16,
3321
        gen_op_neon_shl_s32,
3322
        gen_op_neon_shl_s64
3323
      }
3324
    }, {
3325
      {
3326
        gen_op_neon_rshl_u16,
3327
        gen_op_neon_rshl_u32,
3328
        gen_op_neon_rshl_u64
3329
      }, {
3330
        gen_op_neon_rshl_s16,
3331
        gen_op_neon_rshl_s32,
3332
        gen_op_neon_rshl_s64
3333
      }
3334
    }
3335
};
3336

    
3337
static inline void
3338
gen_op_neon_narrow_u32 ()
3339
{
3340
    /* No-op.  */
3341
}
3342

    
3343
static GenOpFunc *gen_neon_narrow[3] = {
3344
    gen_op_neon_narrow_u8,
3345
    gen_op_neon_narrow_u16,
3346
    gen_op_neon_narrow_u32
3347
};
3348

    
3349
static GenOpFunc *gen_neon_narrow_satu[3] = {
3350
    gen_op_neon_narrow_sat_u8,
3351
    gen_op_neon_narrow_sat_u16,
3352
    gen_op_neon_narrow_sat_u32
3353
};
3354

    
3355
static GenOpFunc *gen_neon_narrow_sats[3] = {
3356
    gen_op_neon_narrow_sat_s8,
3357
    gen_op_neon_narrow_sat_s16,
3358
    gen_op_neon_narrow_sat_s32
3359
};
3360

    
3361
static inline int gen_neon_add(int size)
3362
{
3363
    switch (size) {
3364
    case 0: gen_op_neon_add_u8(); break;
3365
    case 1: gen_op_neon_add_u16(); break;
3366
    case 2: gen_op_addl_T0_T1(); break;
3367
    default: return 1;
3368
    }
3369
    return 0;
3370
}
3371

    
3372
/* 32-bit pairwise ops end up the same as the elementsise versions.  */
3373
#define gen_op_neon_pmax_s32  gen_op_neon_max_s32
3374
#define gen_op_neon_pmax_u32  gen_op_neon_max_u32
3375
#define gen_op_neon_pmin_s32  gen_op_neon_min_s32
3376
#define gen_op_neon_pmin_u32  gen_op_neon_min_u32
3377

    
3378
#define GEN_NEON_INTEGER_OP(name) do { \
3379
    switch ((size << 1) | u) { \
3380
    case 0: gen_op_neon_##name##_s8(); break; \
3381
    case 1: gen_op_neon_##name##_u8(); break; \
3382
    case 2: gen_op_neon_##name##_s16(); break; \
3383
    case 3: gen_op_neon_##name##_u16(); break; \
3384
    case 4: gen_op_neon_##name##_s32(); break; \
3385
    case 5: gen_op_neon_##name##_u32(); break; \
3386
    default: return 1; \
3387
    }} while (0)
3388

    
3389
static inline void
3390
gen_neon_movl_scratch_T0(int scratch)
3391
{
3392
  uint32_t offset;
3393

    
3394
  offset = offsetof(CPUARMState, vfp.scratch[scratch]);
3395
  gen_op_neon_setreg_T0(offset);
3396
}
3397

    
3398
static inline void
3399
gen_neon_movl_scratch_T1(int scratch)
3400
{
3401
  uint32_t offset;
3402

    
3403
  offset = offsetof(CPUARMState, vfp.scratch[scratch]);
3404
  gen_op_neon_setreg_T1(offset);
3405
}
3406

    
3407
static inline void
3408
gen_neon_movl_T0_scratch(int scratch)
3409
{
3410
  uint32_t offset;
3411

    
3412
  offset = offsetof(CPUARMState, vfp.scratch[scratch]);
3413
  gen_op_neon_getreg_T0(offset);
3414
}
3415

    
3416
static inline void
3417
gen_neon_movl_T1_scratch(int scratch)
3418
{
3419
  uint32_t offset;
3420

    
3421
  offset = offsetof(CPUARMState, vfp.scratch[scratch]);
3422
  gen_op_neon_getreg_T1(offset);
3423
}
3424

    
3425
static inline void gen_op_neon_widen_u32(void)
3426
{
3427
    gen_op_movl_T1_im(0);
3428
}
3429

    
3430
static inline void gen_neon_get_scalar(int size, int reg)
3431
{
3432
    if (size == 1) {
3433
        NEON_GET_REG(T0, reg >> 1, reg & 1);
3434
    } else {
3435
        NEON_GET_REG(T0, reg >> 2, (reg >> 1) & 1);
3436
        if (reg & 1)
3437
            gen_op_neon_dup_low16();
3438
        else
3439
            gen_op_neon_dup_high16();
3440
    }
3441
}
3442

    
3443
static void gen_neon_unzip(int reg, int q, int tmp, int size)
3444
{
3445
    int n;
3446

    
3447
    for (n = 0; n < q + 1; n += 2) {
3448
        NEON_GET_REG(T0, reg, n);
3449
        NEON_GET_REG(T0, reg, n + n);
3450
        switch (size) {
3451
        case 0: gen_op_neon_unzip_u8(); break;
3452
        case 1: gen_op_neon_zip_u16(); break; /* zip and unzip are the same.  */
3453
        case 2: /* no-op */; break;
3454
        default: abort();
3455
        }
3456
        gen_neon_movl_scratch_T0(tmp + n);
3457
        gen_neon_movl_scratch_T1(tmp + n + 1);
3458
    }
3459
}
3460

    
3461
static struct {
3462
    int nregs;
3463
    int interleave;
3464
    int spacing;
3465
} neon_ls_element_type[11] = {
3466
    {4, 4, 1},
3467
    {4, 4, 2},
3468
    {4, 1, 1},
3469
    {4, 2, 1},
3470
    {3, 3, 1},
3471
    {3, 3, 2},
3472
    {3, 1, 1},
3473
    {1, 1, 1},
3474
    {2, 2, 1},
3475
    {2, 2, 2},
3476
    {2, 1, 1}
3477
};
3478

    
3479
/* Translate a NEON load/store element instruction.  Return nonzero if the
3480
   instruction is invalid.  */
3481
static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn)
3482
{
3483
    int rd, rn, rm;
3484
    int op;
3485
    int nregs;
3486
    int interleave;
3487
    int stride;
3488
    int size;
3489
    int reg;
3490
    int pass;
3491
    int load;
3492
    int shift;
3493
    int n;
3494
    TCGv tmp;
3495
    TCGv tmp2;
3496

    
3497
    if (!vfp_enabled(env))
3498
      return 1;
3499
    VFP_DREG_D(rd, insn);
3500
    rn = (insn >> 16) & 0xf;
3501
    rm = insn & 0xf;
3502
    load = (insn & (1 << 21)) != 0;
3503
    if ((insn & (1 << 23)) == 0) {
3504
        /* Load store all elements.  */
3505
        op = (insn >> 8) & 0xf;
3506
        size = (insn >> 6) & 3;
3507
        if (op > 10 || size == 3)
3508
            return 1;
3509
        nregs = neon_ls_element_type[op].nregs;
3510
        interleave = neon_ls_element_type[op].interleave;
3511
        gen_movl_T1_reg(s, rn);
3512
        stride = (1 << size) * interleave;
3513
        for (reg = 0; reg < nregs; reg++) {
3514
            if (interleave > 2 || (interleave == 2 && nregs == 2)) {
3515
                gen_movl_T1_reg(s, rn);
3516
                gen_op_addl_T1_im((1 << size) * reg);
3517
            } else if (interleave == 2 && nregs == 4 && reg == 2) {
3518
                gen_movl_T1_reg(s, rn);
3519
                gen_op_addl_T1_im(1 << size);
3520
            }
3521
            for (pass = 0; pass < 2; pass++) {
3522
                if (size == 2) {
3523
                    if (load) {
3524
                        tmp = gen_ld32(cpu_T[1], IS_USER(s));
3525
                        tcg_gen_mov_i32(cpu_T[0], tmp);
3526
                        dead_tmp(tmp);
3527
                        NEON_SET_REG(T0, rd, pass);
3528
                    } else {
3529
                        NEON_GET_REG(T0, rd, pass);
3530
                        tmp = new_tmp();
3531
                        tcg_gen_mov_i32(tmp, cpu_T[0]);
3532
                        gen_st32(tmp, cpu_T[1], IS_USER(s));
3533
                    }
3534
                    gen_op_addl_T1_im(stride);
3535
                } else if (size == 1) {
3536
                    if (load) {
3537
                        tmp = gen_ld16u(cpu_T[1], IS_USER(s));
3538
                        gen_op_addl_T1_im(stride);
3539
                        tmp2 = gen_ld16u(cpu_T[1], IS_USER(s));
3540
                        gen_op_addl_T1_im(stride);
3541
                        gen_bfi(tmp, tmp, tmp2, 16, 0xffff);
3542
                        dead_tmp(tmp2);
3543
                        neon_store_reg(rd, pass, tmp);
3544
                    } else {
3545
                        tmp = neon_load_reg(rd, pass);
3546
                        tmp2 = new_tmp();
3547
                        tcg_gen_shri_i32(tmp2, tmp, 16);
3548
                        gen_st16(tmp, cpu_T[1], IS_USER(s));
3549
                        gen_op_addl_T1_im(stride);
3550
                        gen_st16(tmp2, cpu_T[1], IS_USER(s));
3551
                        gen_op_addl_T1_im(stride);
3552
                    }
3553
                } else /* size == 0 */ {
3554
                    if (load) {
3555
                        for (n = 0; n < 4; n++) {
3556
                            tmp = gen_ld8u(cpu_T[1], IS_USER(s));
3557
                            gen_op_addl_T1_im(stride);
3558
                            if (n == 0) {
3559
                                tmp2 = tmp;
3560
                            } else {
3561
                                gen_bfi(tmp2, tmp2, tmp, n * 8, 0xff);
3562
                                dead_tmp(tmp);
3563
                            }
3564
                        }
3565
                        neon_store_reg(rd, pass, tmp2);
3566
                    } else {
3567
                        tmp2 = neon_load_reg(rd, pass);
3568
                        for (n = 0; n < 4; n++) {
3569
                            tmp = new_tmp();
3570
                            if (n == 0) {
3571
                                tcg_gen_mov_i32(tmp, tmp2);
3572
                            } else {
3573
                                tcg_gen_shri_i32(tmp, tmp2, n * 8);
3574
                            }
3575
                            gen_st8(tmp, cpu_T[1], IS_USER(s));
3576
                            gen_op_addl_T1_im(stride);
3577
                        }
3578
                        dead_tmp(tmp2);
3579
                    }
3580
                }
3581
            }
3582
            rd += neon_ls_element_type[op].spacing;
3583
        }
3584
        stride = nregs * 8;
3585
    } else {
3586
        size = (insn >> 10) & 3;
3587
        if (size == 3) {
3588
            /* Load single element to all lanes.  */
3589
            if (!load)
3590
                return 1;
3591
            size = (insn >> 6) & 3;
3592
            nregs = ((insn >> 8) & 3) + 1;
3593
            stride = (insn & (1 << 5)) ? 2 : 1;
3594
            gen_movl_T1_reg(s, rn);
3595
            for (reg = 0; reg < nregs; reg++) {
3596
                switch (size) {
3597
                case 0:
3598
                    tmp = gen_ld8u(cpu_T[1], IS_USER(s));
3599
                    tcg_gen_mov_i32(cpu_T[0], tmp);
3600
                    dead_tmp(tmp);
3601
                    gen_op_neon_dup_u8(0);
3602
                    break;
3603
                case 1:
3604
                    tmp = gen_ld16u(cpu_T[1], IS_USER(s));
3605
                    tcg_gen_mov_i32(cpu_T[0], tmp);
3606
                    dead_tmp(tmp);
3607
                    gen_op_neon_dup_low16();
3608
                    break;
3609
                case 2:
3610
                    tmp = gen_ld32(cpu_T[0], IS_USER(s));
3611
                    tcg_gen_mov_i32(cpu_T[0], tmp);
3612
                    dead_tmp(tmp);
3613
                    break;
3614
                case 3:
3615
                    return 1;
3616
                }
3617
                gen_op_addl_T1_im(1 << size);
3618
                NEON_SET_REG(T0, rd, 0);
3619
                NEON_SET_REG(T0, rd, 1);
3620
                rd += stride;
3621
            }
3622
            stride = (1 << size) * nregs;
3623
        } else {
3624
            /* Single element.  */
3625
            pass = (insn >> 7) & 1;
3626
            switch (size) {
3627
            case 0:
3628
                shift = ((insn >> 5) & 3) * 8;
3629
                stride = 1;
3630
                break;
3631
            case 1:
3632
                shift = ((insn >> 6) & 1) * 16;
3633
                stride = (insn & (1 << 5)) ? 2 : 1;
3634
                break;
3635
            case 2:
3636
                shift = 0;
3637
                stride = (insn & (1 << 6)) ? 2 : 1;
3638
                break;
3639
            default:
3640
                abort();
3641
            }
3642
            nregs = ((insn >> 8) & 3) + 1;
3643
            gen_movl_T1_reg(s, rn);
3644
            for (reg = 0; reg < nregs; reg++) {
3645
                if (load) {
3646
                    switch (size) {
3647
                    case 0:
3648
                        tmp = gen_ld8u(cpu_T[1], IS_USER(s));
3649
                        break;
3650
                    case 1:
3651
                        tmp = gen_ld16u(cpu_T[1], IS_USER(s));
3652
                        break;
3653
                    case 2:
3654
                        tmp = gen_ld32(cpu_T[1], IS_USER(s));
3655
                        break;
3656
                    }
3657
                    if (size != 2) {
3658
                        tmp2 = neon_load_reg(rd, pass);
3659
                        gen_bfi(tmp, tmp2, tmp, shift, size ? 0xffff : 0xff);
3660
                        dead_tmp(tmp2);
3661
                    }
3662
                    neon_store_reg(rd, pass, tmp);
3663
                } else { /* Store */
3664
                    tmp = neon_load_reg(rd, pass);
3665
                    if (shift)
3666
                        tcg_gen_shri_i32(tmp, tmp, shift);
3667
                    switch (size) {
3668
                    case 0:
3669
                        gen_st8(tmp, cpu_T[1], IS_USER(s));
3670
                        break;
3671
                    case 1:
3672
                        gen_st16(tmp, cpu_T[1], IS_USER(s));
3673
                        break;
3674
                    case 2:
3675
                        gen_st32(tmp, cpu_T[1], IS_USER(s));
3676
                        break;
3677
                    }
3678
                }
3679
                rd += stride;
3680
                gen_op_addl_T1_im(1 << size);
3681
            }
3682
            stride = nregs * (1 << size);
3683
        }
3684
    }
3685
    if (rm != 15) {
3686
        TCGv base;
3687

    
3688
        base = load_reg(s, rn);
3689
        if (rm == 13) {
3690
            tcg_gen_addi_i32(base, base, stride);
3691
        } else {
3692
            TCGv index;
3693
            index = load_reg(s, rm);
3694
            tcg_gen_add_i32(base, base, index);
3695
            dead_tmp(index);
3696
        }
3697
        store_reg(s, rn, base);
3698
    }
3699
    return 0;
3700
}
3701

    
3702
/* Bitwise select.  dest = c ? t : f.  Clobbers T and F.  */
3703
static void gen_neon_bsl(TCGv dest, TCGv t, TCGv f, TCGv c)
3704
{
3705
    tcg_gen_and_i32(t, t, c);
3706
    tcg_gen_bic_i32(f, f, c);
3707
    tcg_gen_or_i32(dest, t, f);
3708
}
3709

    
3710
/* Translate a NEON data processing instruction.  Return nonzero if the
3711
   instruction is invalid.
3712
   In general we process vectors in 32-bit chunks.  This means we can reuse
3713
   some of the scalar ops, and hopefully the code generated for 32-bit
3714
   hosts won't be too awful.  The downside is that the few 64-bit operations
3715
   (mainly shifts) get complicated.  */
3716

    
3717
static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
3718
{
3719
    int op;
3720
    int q;
3721
    int rd, rn, rm;
3722
    int size;
3723
    int shift;
3724
    int pass;
3725
    int count;
3726
    int pairwise;
3727
    int u;
3728
    int n;
3729
    uint32_t imm;
3730
    TCGv tmp;
3731
    TCGv tmp2;
3732
    TCGv tmp3;
3733

    
3734
    if (!vfp_enabled(env))
3735
      return 1;
3736
    q = (insn & (1 << 6)) != 0;
3737
    u = (insn >> 24) & 1;
3738
    VFP_DREG_D(rd, insn);
3739
    VFP_DREG_N(rn, insn);
3740
    VFP_DREG_M(rm, insn);
3741
    size = (insn >> 20) & 3;
3742
    if ((insn & (1 << 23)) == 0) {
3743
        /* Three register same length.  */
3744
        op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
3745
        if (size == 3 && (op == 1 || op == 5 || op == 16)) {
3746
            for (pass = 0; pass < (q ? 2 : 1); pass++) {
3747
                NEON_GET_REG(T0, rm, pass * 2);
3748
                NEON_GET_REG(T1, rm, pass * 2 + 1);
3749
                gen_neon_movl_scratch_T0(0);
3750
                gen_neon_movl_scratch_T1(1);
3751
                NEON_GET_REG(T0, rn, pass * 2);
3752
                NEON_GET_REG(T1, rn, pass * 2 + 1);
3753
                switch (op) {
3754
                case 1: /* VQADD */
3755
                    if (u) {
3756
                        gen_op_neon_addl_saturate_u64();
3757
                    } else {
3758
                        gen_op_neon_addl_saturate_s64();
3759
                    }
3760
                    break;
3761
                case 5: /* VQSUB */
3762
                    if (u) {
3763
                        gen_op_neon_subl_saturate_u64();
3764
                    } else {
3765
                        gen_op_neon_subl_saturate_s64();
3766
                    }
3767
                    break;
3768
                case 16:
3769
                    if (u) {
3770
                        gen_op_neon_subl_u64();
3771
                    } else {
3772
                        gen_op_neon_addl_u64();
3773
                    }
3774
                    break;
3775
                default:
3776
                    abort();
3777
                }
3778
                NEON_SET_REG(T0, rd, pass * 2);
3779
                NEON_SET_REG(T1, rd, pass * 2 + 1);
3780
            }
3781
            return 0;
3782
        }
3783
        switch (op) {
3784
        case 8: /* VSHL */
3785
        case 9: /* VQSHL */
3786
        case 10: /* VRSHL */
3787
        case 11: /* VQSHL */
3788
            /* Shift operations have Rn and Rm reversed.  */
3789
            {
3790
                int tmp;
3791
                tmp = rn;
3792
                rn = rm;
3793
                rm = tmp;
3794
                pairwise = 0;
3795
            }
3796
            break;
3797
        case 20: /* VPMAX */
3798
        case 21: /* VPMIN */
3799
        case 23: /* VPADD */
3800
            pairwise = 1;
3801
            break;
3802
        case 26: /* VPADD (float) */
3803
            pairwise = (u && size < 2);
3804
            break;
3805
        case 30: /* VPMIN/VPMAX (float) */
3806
            pairwise = u;
3807
            break;
3808
        default:
3809
            pairwise = 0;
3810
            break;
3811
        }
3812
        for (pass = 0; pass < (q ? 4 : 2); pass++) {
3813

    
3814
        if (pairwise) {
3815
            /* Pairwise.  */
3816
            if (q)
3817
                n = (pass & 1) * 2;
3818
            else
3819
                n = 0;
3820
            if (pass < q + 1) {
3821
                NEON_GET_REG(T0, rn, n);
3822
                NEON_GET_REG(T1, rn, n + 1);
3823
            } else {
3824
                NEON_GET_REG(T0, rm, n);
3825
                NEON_GET_REG(T1, rm, n + 1);
3826
            }
3827
        } else {
3828
            /* Elementwise.  */
3829
            NEON_GET_REG(T0, rn, pass);
3830
            NEON_GET_REG(T1, rm, pass);
3831
        }
3832
        switch (op) {
3833
        case 0: /* VHADD */
3834
            GEN_NEON_INTEGER_OP(hadd);
3835
            break;
3836
        case 1: /* VQADD */
3837
            switch (size << 1| u) {
3838
            case 0: gen_op_neon_qadd_s8(); break;
3839
            case 1: gen_op_neon_qadd_u8(); break;
3840
            case 2: gen_op_neon_qadd_s16(); break;
3841
            case 3: gen_op_neon_qadd_u16(); break;
3842
            case 4:
3843
                gen_helper_add_saturate(cpu_T[0], cpu_T[0], cpu_T[1]);
3844
                break;
3845
            case 5:
3846
                gen_helper_add_usaturate(cpu_T[0], cpu_T[0], cpu_T[1]);
3847
                break;
3848
            default: abort();
3849
            }
3850
            break;
3851
        case 2: /* VRHADD */
3852
            GEN_NEON_INTEGER_OP(rhadd);
3853
            break;
3854
        case 3: /* Logic ops.  */
3855
            switch ((u << 2) | size) {
3856
            case 0: /* VAND */
3857
                gen_op_andl_T0_T1();
3858
                break;
3859
            case 1: /* BIC */
3860
                gen_op_bicl_T0_T1();
3861
                break;
3862
            case 2: /* VORR */
3863
                gen_op_orl_T0_T1();
3864
                break;
3865
            case 3: /* VORN */
3866
                gen_op_notl_T1();
3867
                gen_op_orl_T0_T1();
3868
                break;
3869
            case 4: /* VEOR */
3870
                gen_op_xorl_T0_T1();
3871
                break;
3872
            case 5: /* VBSL */
3873
                tmp = neon_load_reg(rd, pass);
3874
                gen_neon_bsl(cpu_T[0], cpu_T[0], cpu_T[1], tmp);
3875
                dead_tmp(tmp);
3876
                break;
3877
            case 6: /* VBIT */
3878
                tmp = neon_load_reg(rd, pass);
3879
                gen_neon_bsl(cpu_T[0], cpu_T[0], tmp, cpu_T[1]);
3880
                dead_tmp(tmp);
3881
                break;
3882
            case 7: /* VBIF */
3883
                tmp = neon_load_reg(rd, pass);
3884
                gen_neon_bsl(cpu_T[0], tmp, cpu_T[0], cpu_T[1]);
3885
                dead_tmp(tmp);
3886
                break;
3887
            }
3888
            break;
3889
        case 4: /* VHSUB */
3890
            GEN_NEON_INTEGER_OP(hsub);
3891
            break;
3892
        case 5: /* VQSUB */
3893
            switch ((size << 1) | u) {
3894
            case 0: gen_op_neon_qsub_s8(); break;
3895
            case 1: gen_op_neon_qsub_u8(); break;
3896
            case 2: gen_op_neon_qsub_s16(); break;
3897
            case 3: gen_op_neon_qsub_u16(); break;
3898
            case 4:
3899
                gen_helper_sub_saturate(cpu_T[0], cpu_T[0], cpu_T[1]);
3900
                break;
3901
            case 5:
3902
                gen_helper_sub_usaturate(cpu_T[0], cpu_T[0], cpu_T[1]);
3903
                break;
3904
            default: abort();
3905
            }
3906
            break;
3907
        case 6: /* VCGT */
3908
            GEN_NEON_INTEGER_OP(cgt);
3909
            break;
3910
        case 7: /* VCGE */
3911
            GEN_NEON_INTEGER_OP(cge);
3912
            break;
3913
        case 8: /* VSHL */
3914
            switch ((size << 1) | u) {
3915
            case 0: gen_op_neon_shl_s8(); break;
3916
            case 1: gen_op_neon_shl_u8(); break;
3917
            case 2: gen_op_neon_shl_s16(); break;
3918
            case 3: gen_op_neon_shl_u16(); break;
3919
            case 4: gen_op_neon_shl_s32(); break;
3920
            case 5: gen_op_neon_shl_u32(); break;
3921
#if 0
3922
            /* ??? Implementing these is tricky because the vector ops work
3923
               on 32-bit pieces.  */
3924
            case 6: gen_op_neon_shl_s64(); break;
3925
            case 7: gen_op_neon_shl_u64(); break;
3926
#else
3927
            case 6: case 7: cpu_abort(env, "VSHL.64 not implemented");
3928
#endif
3929
            }
3930
            break;
3931
        case 9: /* VQSHL */
3932
            switch ((size << 1) | u) {
3933
            case 0: gen_op_neon_qshl_s8(); break;
3934
            case 1: gen_op_neon_qshl_u8(); break;
3935
            case 2: gen_op_neon_qshl_s16(); break;
3936
            case 3: gen_op_neon_qshl_u16(); break;
3937
            case 4: gen_op_neon_qshl_s32(); break;
3938
            case 5: gen_op_neon_qshl_u32(); break;
3939
#if 0
3940
            /* ??? Implementing these is tricky because the vector ops work
3941
               on 32-bit pieces.  */
3942
            case 6: gen_op_neon_qshl_s64(); break;
3943
            case 7: gen_op_neon_qshl_u64(); break;
3944
#else
3945
            case 6: case 7: cpu_abort(env, "VQSHL.64 not implemented");
3946
#endif
3947
            }
3948
            break;
3949
        case 10: /* VRSHL */
3950
            switch ((size << 1) | u) {
3951
            case 0: gen_op_neon_rshl_s8(); break;
3952
            case 1: gen_op_neon_rshl_u8(); break;
3953
            case 2: gen_op_neon_rshl_s16(); break;
3954
            case 3: gen_op_neon_rshl_u16(); break;
3955
            case 4: gen_op_neon_rshl_s32(); break;
3956
            case 5: gen_op_neon_rshl_u32(); break;
3957
#if 0
3958
            /* ??? Implementing these is tricky because the vector ops work
3959
               on 32-bit pieces.  */
3960
            case 6: gen_op_neon_rshl_s64(); break;
3961
            case 7: gen_op_neon_rshl_u64(); break;
3962
#else
3963
            case 6: case 7: cpu_abort(env, "VRSHL.64 not implemented");
3964
#endif
3965
            }
3966
            break;
3967
        case 11: /* VQRSHL */
3968
            switch ((size << 1) | u) {
3969
            case 0: gen_op_neon_qrshl_s8(); break;
3970
            case 1: gen_op_neon_qrshl_u8(); break;
3971
            case 2: gen_op_neon_qrshl_s16(); break;
3972
            case 3: gen_op_neon_qrshl_u16(); break;
3973
            case 4: gen_op_neon_qrshl_s32(); break;
3974
            case 5: gen_op_neon_qrshl_u32(); break;
3975
#if 0
3976
            /* ??? Implementing these is tricky because the vector ops work
3977
               on 32-bit pieces.  */
3978
            case 6: gen_op_neon_qrshl_s64(); break;
3979
            case 7: gen_op_neon_qrshl_u64(); break;
3980
#else
3981
            case 6: case 7: cpu_abort(env, "VQRSHL.64 not implemented");
3982
#endif
3983
            }
3984
            break;
3985
        case 12: /* VMAX */
3986
            GEN_NEON_INTEGER_OP(max);
3987
            break;
3988
        case 13: /* VMIN */
3989
            GEN_NEON_INTEGER_OP(min);
3990
            break;
3991
        case 14: /* VABD */
3992
            GEN_NEON_INTEGER_OP(abd);
3993
            break;
3994
        case 15: /* VABA */
3995
            GEN_NEON_INTEGER_OP(abd);
3996
            NEON_GET_REG(T1, rd, pass);
3997
            gen_neon_add(size);
3998
            break;
3999
        case 16:
4000
            if (!u) { /* VADD */
4001
                if (gen_neon_add(size))
4002
                    return 1;
4003
            } else { /* VSUB */
4004
                switch (size) {
4005
                case 0: gen_op_neon_sub_u8(); break;
4006
                case 1: gen_op_neon_sub_u16(); break;
4007
                case 2: gen_op_subl_T0_T1(); break;
4008
                default: return 1;
4009
                }
4010
            }
4011
            break;
4012
        case 17:
4013
            if (!u) { /* VTST */
4014
                switch (size) {
4015
                case 0: gen_op_neon_tst_u8(); break;
4016
                case 1: gen_op_neon_tst_u16(); break;
4017
                case 2: gen_op_neon_tst_u32(); break;
4018
                default: return 1;
4019
                }
4020
            } else { /* VCEQ */
4021
                switch (size) {
4022
                case 0: gen_op_neon_ceq_u8(); break;
4023
                case 1: gen_op_neon_ceq_u16(); break;
4024
                case 2: gen_op_neon_ceq_u32(); break;
4025
                default: return 1;
4026
                }
4027
            }
4028
            break;
4029
        case 18: /* Multiply.  */
4030
            switch (size) {
4031
            case 0: gen_op_neon_mul_u8(); break;
4032
            case 1: gen_op_neon_mul_u16(); break;
4033
            case 2: gen_op_mul_T0_T1(); break;
4034
            default: return 1;
4035
            }
4036
            NEON_GET_REG(T1, rd, pass);
4037
            if (u) { /* VMLS */
4038
                switch (size) {
4039
                case 0: gen_op_neon_rsb_u8(); break;
4040
                case 1: gen_op_neon_rsb_u16(); break;
4041
                case 2: gen_op_rsbl_T0_T1(); break;
4042
                default: return 1;
4043
                }
4044
            } else { /* VMLA */
4045
                gen_neon_add(size);
4046
            }
4047
            break;
4048
        case 19: /* VMUL */
4049
            if (u) { /* polynomial */
4050
                gen_op_neon_mul_p8();
4051
            } else { /* Integer */
4052
                switch (size) {
4053
                case 0: gen_op_neon_mul_u8(); break;
4054
                case 1: gen_op_neon_mul_u16(); break;
4055
                case 2: gen_op_mul_T0_T1(); break;
4056
                default: return 1;
4057
                }
4058
            }
4059
            break;
4060
        case 20: /* VPMAX */
4061
            GEN_NEON_INTEGER_OP(pmax);
4062
            break;
4063
        case 21: /* VPMIN */
4064
            GEN_NEON_INTEGER_OP(pmin);
4065
            break;
4066
        case 22: /* Hultiply high.  */
4067
            if (!u) { /* VQDMULH */
4068
                switch (size) {
4069
                case 1: gen_op_neon_qdmulh_s16(); break;
4070
                case 2: gen_op_neon_qdmulh_s32(); break;
4071
                default: return 1;
4072
                }
4073
            } else { /* VQRDHMUL */
4074
                switch (size) {
4075
                case 1: gen_op_neon_qrdmulh_s16(); break;
4076
                case 2: gen_op_neon_qrdmulh_s32(); break;
4077
                default: return 1;
4078
                }
4079
            }
4080
            break;
4081
        case 23: /* VPADD */
4082
            if (u)
4083
                return 1;
4084
            switch (size) {
4085
            case 0: gen_op_neon_padd_u8(); break;
4086
            case 1: gen_op_neon_padd_u16(); break;
4087
            case 2: gen_op_addl_T0_T1(); break;
4088
            default: return 1;
4089
            }
4090
            break;
4091
        case 26: /* Floating point arithnetic.  */
4092
            switch ((u << 2) | size) {
4093
            case 0: /* VADD */
4094
                gen_op_neon_add_f32();
4095
                break;
4096
            case 2: /* VSUB */
4097
                gen_op_neon_sub_f32();
4098
                break;
4099
            case 4: /* VPADD */
4100
                gen_op_neon_add_f32();
4101
                break;
4102
            case 6: /* VABD */
4103
                gen_op_neon_abd_f32();
4104
                break;
4105
            default:
4106
                return 1;
4107
            }
4108
            break;
4109
        case 27: /* Float multiply.  */
4110
            gen_op_neon_mul_f32();
4111
            if (!u) {
4112
                NEON_GET_REG(T1, rd, pass);
4113
                if (size == 0) {
4114
                    gen_op_neon_add_f32();
4115
                } else {
4116
                    gen_op_neon_rsb_f32();
4117
                }
4118
            }
4119
            break;
4120
        case 28: /* Float compare.  */
4121
            if (!u) {
4122
                gen_op_neon_ceq_f32();
4123
            } else {
4124
                if (size == 0)
4125
                    gen_op_neon_cge_f32();
4126
                else
4127
                    gen_op_neon_cgt_f32();
4128
            }
4129
            break;
4130
        case 29: /* Float compare absolute.  */
4131
            if (!u)
4132
                return 1;
4133
            if (size == 0)
4134
                gen_op_neon_acge_f32();
4135
            else
4136
                gen_op_neon_acgt_f32();
4137
            break;
4138
        case 30: /* Float min/max.  */
4139
            if (size == 0)
4140
                gen_op_neon_max_f32();
4141
            else
4142
                gen_op_neon_min_f32();
4143
            break;
4144
        case 31:
4145
            if (size == 0)
4146
                gen_helper_recps_f32(cpu_T[0], cpu_T[0], cpu_T[1], cpu_env);
4147
            else
4148
                gen_helper_rsqrts_f32(cpu_T[0], cpu_T[0], cpu_T[1], cpu_env);
4149
            break;
4150
        default:
4151
            abort();
4152
        }
4153
        /* Save the result.  For elementwise operations we can put it
4154
           straight into the destination register.  For pairwise operations
4155
           we have to be careful to avoid clobbering the source operands.  */
4156
        if (pairwise && rd == rm) {
4157
            gen_neon_movl_scratch_T0(pass);
4158
        } else {
4159
            NEON_SET_REG(T0, rd, pass);
4160
        }
4161

    
4162
        } /* for pass */
4163
        if (pairwise && rd == rm) {
4164
            for (pass = 0; pass < (q ? 4 : 2); pass++) {
4165
                gen_neon_movl_T0_scratch(pass);
4166
                NEON_SET_REG(T0, rd, pass);
4167
            }
4168
        }
4169
    } else if (insn & (1 << 4)) {
4170
        if ((insn & 0x00380080) != 0) {
4171
            /* Two registers and shift.  */
4172
            op = (insn >> 8) & 0xf;
4173
            if (insn & (1 << 7)) {
4174
                /* 64-bit shift.   */
4175
                size = 3;
4176
            } else {
4177
                size = 2;
4178
                while ((insn & (1 << (size + 19))) == 0)
4179
                    size--;
4180
            }
4181
            shift = (insn >> 16) & ((1 << (3 + size)) - 1);
4182
            /* To avoid excessive dumplication of ops we implement shift
4183
               by immediate using the variable shift operations.  */
4184
            if (op < 8) {
4185
                /* Shift by immediate:
4186
                   VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU.  */
4187
                /* Right shifts are encoded as N - shift, where N is the
4188
                   element size in bits.  */
4189
                if (op <= 4)
4190
                    shift = shift - (1 << (size + 3));
4191
                if (size == 3) {
4192
                    count = q + 1;
4193
                } else {
4194
                    count = q ? 4: 2;
4195
                }
4196
                switch (size) {
4197
                case 0:
4198
                    imm = (uint8_t) shift;
4199
                    imm |= imm << 8;
4200
                    imm |= imm << 16;
4201
                    break;
4202
                case 1:
4203
                    imm = (uint16_t) shift;
4204
                    imm |= imm << 16;
4205
                    break;
4206
                case 2:
4207
                case 3:
4208
                    imm = shift;
4209
                    break;
4210
                default:
4211
                    abort();
4212
                }
4213

    
4214
                for (pass = 0; pass < count; pass++) {
4215
                    if (size < 3) {
4216
                        /* Operands in T0 and T1.  */
4217
                        gen_op_movl_T1_im(imm);
4218
                        NEON_GET_REG(T0, rm, pass);
4219
                    } else {
4220
                        /* Operands in {T0, T1} and env->vfp.scratch.  */
4221
                        gen_op_movl_T0_im(imm);
4222
                        gen_neon_movl_scratch_T0(0);
4223
                        gen_op_movl_T0_im((int32_t)imm >> 31);
4224
                        gen_neon_movl_scratch_T0(1);
4225
                        NEON_GET_REG(T0, rm, pass * 2);
4226
                        NEON_GET_REG(T1, rm, pass * 2 + 1);
4227
                    }
4228

    
4229
                    if (gen_neon_shift_im[op][u][size] == NULL)
4230
                        return 1;
4231
                    gen_neon_shift_im[op][u][size]();
4232

    
4233
                    if (op == 1 || op == 3) {
4234
                        /* Accumulate.  */
4235
                        if (size == 3) {
4236
                            gen_neon_movl_scratch_T0(0);
4237
                            gen_neon_movl_scratch_T1(1);
4238
                            NEON_GET_REG(T0, rd, pass * 2);
4239
                            NEON_GET_REG(T1, rd, pass * 2 + 1);
4240
                            gen_op_neon_addl_u64();
4241
                        } else {
4242
                            NEON_GET_REG(T1, rd, pass);
4243
                            gen_neon_add(size);
4244
                        }
4245
                    } else if (op == 4 || (op == 5 && u)) {
4246
                        /* Insert */
4247
                        if (size == 3) {
4248
                            cpu_abort(env, "VS[LR]I.64 not implemented");
4249
                        }
4250
                        switch (size) {
4251
                        case 0:
4252
                            if (op == 4)
4253
                                imm = 0xff >> -shift;
4254
                            else
4255
                                imm = (uint8_t)(0xff << shift);
4256
                            imm |= imm << 8;
4257
                            imm |= imm << 16;
4258
                            break;
4259
                        case 1:
4260
                            if (op == 4)
4261
                                imm = 0xffff >> -shift;
4262
                            else
4263
                                imm = (uint16_t)(0xffff << shift);
4264
                            imm |= imm << 16;
4265
                            break;
4266
                        case 2:
4267
                            if (op == 4)
4268
                                imm = 0xffffffffu >> -shift;
4269
                            else
4270
                                imm = 0xffffffffu << shift;
4271
                            break;
4272
                        default:
4273
                            abort();
4274
                        }
4275
                        tmp = neon_load_reg(rd, pass);
4276
                        tcg_gen_andi_i32(cpu_T[0], cpu_T[0], imm);
4277
                        tcg_gen_andi_i32(tmp, tmp, ~imm);
4278
                        tcg_gen_or_i32(cpu_T[0], cpu_T[0], tmp);
4279
                    }
4280
                    if (size == 3) {
4281
                        NEON_SET_REG(T0, rd, pass * 2);
4282
                        NEON_SET_REG(T1, rd, pass * 2 + 1);
4283
                    } else {
4284
                        NEON_SET_REG(T0, rd, pass);
4285
                    }
4286
                } /* for pass */
4287
            } else if (op < 10) {
4288
                /* Shift by immedaiate and narrow:
4289
                   VSHRN, VRSHRN, VQSHRN, VQRSHRN.  */
4290
                shift = shift - (1 << (size + 3));
4291
                size++;
4292
                if (size == 3) {
4293
                    count = q + 1;
4294
                } else {
4295
                    count = q ? 4: 2;
4296
                }
4297
                switch (size) {
4298
                case 1:
4299
                    imm = (uint16_t) shift;
4300
                    imm |= imm << 16;
4301
                    break;
4302
                case 2:
4303
                case 3:
4304
                    imm = shift;
4305
                    break;
4306
                default:
4307
                    abort();
4308
                }
4309

    
4310
                /* Processing MSB first means we need to do less shuffling at
4311
                   the end.  */
4312
                for (pass =  count - 1; pass >= 0; pass--) {
4313
                    /* Avoid clobbering the second operand before it has been
4314
                       written.  */
4315
                    n = pass;
4316
                    if (rd == rm)
4317
                        n ^= (count - 1);
4318
                    else
4319
                        n = pass;
4320

    
4321
                    if (size < 3) {
4322
                        /* Operands in T0 and T1.  */
4323
                        gen_op_movl_T1_im(imm);
4324
                        NEON_GET_REG(T0, rm, n);
4325
                    } else {
4326
                        /* Operands in {T0, T1} and env->vfp.scratch.  */
4327
                        gen_op_movl_T0_im(imm);
4328
                        gen_neon_movl_scratch_T0(0);
4329
                        gen_op_movl_T0_im((int32_t)imm >> 31);
4330
                        gen_neon_movl_scratch_T0(1);
4331
                        NEON_GET_REG(T0, rm, n * 2);
4332
                        NEON_GET_REG(T0, rm, n * 2 + 1);
4333
                    }
4334

    
4335
                    gen_neon_shift_im_narrow[q][u][size - 1]();
4336

    
4337
                    if (size < 3 && (pass & 1) == 0) {
4338
                        gen_neon_movl_scratch_T0(0);
4339
                    } else {
4340
                        uint32_t offset;
4341

    
4342
                        if (size < 3)
4343
                            gen_neon_movl_T1_scratch(0);
4344

    
4345
                        if (op == 8 && !u) {
4346
                            gen_neon_narrow[size - 1]();
4347
                        } else {
4348
                            if (op == 8)
4349
                                gen_neon_narrow_sats[size - 2]();
4350
                            else
4351
                                gen_neon_narrow_satu[size - 1]();
4352
                        }
4353
                        if (size == 3)
4354
                            offset = neon_reg_offset(rd, n);
4355
                        else
4356
                            offset = neon_reg_offset(rd, n >> 1);
4357
                        gen_op_neon_setreg_T0(offset);
4358
                    }
4359
                } /* for pass */
4360
            } else if (op == 10) {
4361
                /* VSHLL */
4362
                if (q)
4363
                    return 1;
4364
                for (pass = 0; pass < 2; pass++) {
4365
                    /* Avoid clobbering the input operand.  */
4366
                    if (rd == rm)
4367
                        n = 1 - pass;
4368
                    else
4369
                        n = pass;
4370

    
4371
                    NEON_GET_REG(T0, rm, n);
4372
                    GEN_NEON_INTEGER_OP(widen);
4373
                    if (shift != 0) {
4374
                        /* The shift is less than the width of the source
4375
                           type, so in some cases we can just
4376
                           shift the whole register.  */
4377
                        if (size == 1 || (size == 0 && u)) {
4378
                            gen_op_shll_T0_im(shift);
4379
                            gen_op_shll_T1_im(shift);
4380
                        } else {
4381
                            switch (size) {
4382
                            case 0: gen_op_neon_shll_u16(shift); break;
4383
                            case 2: gen_op_neon_shll_u64(shift); break;
4384
                            default: abort();
4385
                            }
4386
                        }
4387
                    }
4388
                    NEON_SET_REG(T0, rd, n * 2);
4389
                    NEON_SET_REG(T1, rd, n * 2 + 1);
4390
                }
4391
            } else if (op == 15 || op == 16) {
4392
                /* VCVT fixed-point.  */
4393
                for (pass = 0; pass < (q ? 4 : 2); pass++) {
4394
                    tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, pass));
4395
                    if (op & 1) {
4396
                        if (u)
4397
                            gen_vfp_ulto(0, shift);
4398
                        else
4399
                            gen_vfp_slto(0, shift);
4400
                    } else {
4401
                        if (u)
4402
                            gen_vfp_toul(0, shift);
4403
                        else
4404
                            gen_vfp_tosl(0, shift);
4405
                    }
4406
                    tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, pass));
4407
                }
4408
            } else {
4409
                return 1;
4410
            }
4411
        } else { /* (insn & 0x00380080) == 0 */
4412
            int invert;
4413

    
4414
            op = (insn >> 8) & 0xf;
4415
            /* One register and immediate.  */
4416
            imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf);
4417
            invert = (insn & (1 << 5)) != 0;
4418
            switch (op) {
4419
            case 0: case 1:
4420
                /* no-op */
4421
                break;
4422
            case 2: case 3:
4423
                imm <<= 8;
4424
                break;
4425
            case 4: case 5:
4426
                imm <<= 16;
4427
                break;
4428
            case 6: case 7:
4429
                imm <<= 24;
4430
                break;
4431
            case 8: case 9:
4432
                imm |= imm << 16;
4433
                break;
4434
            case 10: case 11:
4435
                imm = (imm << 8) | (imm << 24);
4436
                break;
4437
            case 12:
4438
                imm = (imm < 8) | 0xff;
4439
                break;
4440
            case 13:
4441
                imm = (imm << 16) | 0xffff;
4442
                break;
4443
            case 14:
4444
                imm |= (imm << 8) | (imm << 16) | (imm << 24);
4445
                if (invert)
4446
                    imm = ~imm;
4447
                break;
4448
            case 15:
4449
                imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
4450
                      | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
4451
                break;
4452
            }
4453
            if (invert)
4454
                imm = ~imm;
4455

    
4456
            if (op != 14 || !invert)
4457
                gen_op_movl_T1_im(imm);
4458

    
4459
            for (pass = 0; pass < (q ? 4 : 2); pass++) {
4460
                if (op & 1 && op < 12) {
4461
                    NEON_GET_REG(T0, rd, pass);
4462
                    if (invert) {
4463
                        /* The immediate value has already been inverted, so
4464
                           BIC becomes AND.  */
4465
                        gen_op_andl_T0_T1();
4466
                    } else {
4467
                        gen_op_orl_T0_T1();
4468
                    }
4469
                    NEON_SET_REG(T0, rd, pass);
4470
                } else {
4471
                    if (op == 14 && invert) {
4472
                        uint32_t tmp;
4473
                        tmp = 0;
4474
                        for (n = 0; n < 4; n++) {
4475
                            if (imm & (1 << (n + (pass & 1) * 4)))
4476
                                tmp |= 0xff << (n * 8);
4477
                        }
4478
                        gen_op_movl_T1_im(tmp);
4479
                    }
4480
                    /* VMOV, VMVN.  */
4481
                    NEON_SET_REG(T1, rd, pass);
4482
                }
4483
            }
4484
        }
4485
    } else { /* (insn & 0x00800010 == 0x00800010) */
4486
        if (size != 3) {
4487
            op = (insn >> 8) & 0xf;
4488
            if ((insn & (1 << 6)) == 0) {
4489
                /* Three registers of different lengths.  */
4490
                int src1_wide;
4491
                int src2_wide;
4492
                int prewiden;
4493
                /* prewiden, src1_wide, src2_wide */
4494
                static const int neon_3reg_wide[16][3] = {
4495
                    {1, 0, 0}, /* VADDL */
4496
                    {1, 1, 0}, /* VADDW */
4497
                    {1, 0, 0}, /* VSUBL */
4498
                    {1, 1, 0}, /* VSUBW */
4499
                    {0, 1, 1}, /* VADDHN */
4500
                    {0, 0, 0}, /* VABAL */
4501
                    {0, 1, 1}, /* VSUBHN */
4502
                    {0, 0, 0}, /* VABDL */
4503
                    {0, 0, 0}, /* VMLAL */
4504
                    {0, 0, 0}, /* VQDMLAL */
4505
                    {0, 0, 0}, /* VMLSL */
4506
                    {0, 0, 0}, /* VQDMLSL */
4507
                    {0, 0, 0}, /* Integer VMULL */
4508
                    {0, 0, 0}, /* VQDMULL */
4509
                    {0, 0, 0}  /* Polynomial VMULL */
4510
                };
4511

    
4512
                prewiden = neon_3reg_wide[op][0];
4513
                src1_wide = neon_3reg_wide[op][1];
4514
                src2_wide = neon_3reg_wide[op][2];
4515

    
4516
                /* Avoid overlapping operands.  Wide source operands are
4517
                   always aligned so will never overlap with wide
4518
                   destinations in problematic ways.  */
4519
                if (rd == rm && !src2_wide) {
4520
                    NEON_GET_REG(T0, rm, 1);
4521
                    gen_neon_movl_scratch_T0(2);
4522
                } else if (rd == rn && !src1_wide) {
4523
                    NEON_GET_REG(T0, rn, 1);
4524
                    gen_neon_movl_scratch_T0(2);
4525
                }
4526
                for (pass = 0; pass < 2; pass++) {
4527
                    /* Load the second operand into env->vfp.scratch.
4528
                       Also widen narrow operands.  */
4529
                    if (src2_wide) {
4530
                        NEON_GET_REG(T0, rm, pass * 2);
4531
                        NEON_GET_REG(T1, rm, pass * 2 + 1);
4532
                    } else {
4533
                        if (pass == 1 && rd == rm) {
4534
                            if (prewiden) {
4535
                                gen_neon_movl_T0_scratch(2);
4536
                            } else {
4537
                                gen_neon_movl_T1_scratch(2);
4538
                            }
4539
                        } else {
4540
                            if (prewiden) {
4541
                                NEON_GET_REG(T0, rm, pass);
4542
                            } else {
4543
                                NEON_GET_REG(T1, rm, pass);
4544
                            }
4545
                        }
4546
                    }
4547
                    if (prewiden && !src2_wide) {
4548
                        GEN_NEON_INTEGER_OP(widen);
4549
                    }
4550
                    if (prewiden || src2_wide) {
4551
                        gen_neon_movl_scratch_T0(0);
4552
                        gen_neon_movl_scratch_T1(1);
4553
                    }
4554

    
4555
                    /* Load the first operand.  */
4556
                    if (src1_wide) {
4557
                        NEON_GET_REG(T0, rn, pass * 2);
4558
                        NEON_GET_REG(T1, rn, pass * 2 + 1);
4559
                    } else {
4560
                        if (pass == 1 && rd == rn) {
4561
                            gen_neon_movl_T0_scratch(2);
4562
                        } else {
4563
                            NEON_GET_REG(T0, rn, pass);
4564
                        }
4565
                    }
4566
                    if (prewiden && !src1_wide) {
4567
                        GEN_NEON_INTEGER_OP(widen);
4568
                    }
4569
                    switch (op) {
4570
                    case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
4571
                        switch (size) {
4572
                        case 0: gen_op_neon_addl_u16(); break;
4573
                        case 1: gen_op_neon_addl_u32(); break;
4574
                        case 2: gen_op_neon_addl_u64(); break;
4575
                        default: abort();
4576
                        }
4577
                        break;
4578
                    case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHL, VRSUBHL */
4579
                        switch (size) {
4580
                        case 0: gen_op_neon_subl_u16(); break;
4581
                        case 1: gen_op_neon_subl_u32(); break;
4582
                        case 2: gen_op_neon_subl_u64(); break;
4583
                        default: abort();
4584
                        }
4585
                        break;
4586
                    case 5: case 7: /* VABAL, VABDL */
4587
                        switch ((size << 1) | u) {
4588
                        case 0: gen_op_neon_abdl_s16(); break;
4589
                        case 1: gen_op_neon_abdl_u16(); break;
4590
                        case 2: gen_op_neon_abdl_s32(); break;
4591
                        case 3: gen_op_neon_abdl_u32(); break;
4592
                        case 4: gen_op_neon_abdl_s64(); break;
4593
                        case 5: gen_op_neon_abdl_u64(); break;
4594
                        default: abort();
4595
                        }
4596
                        break;
4597
                    case 8: case 9: case 10: case 11: case 12: case 13:
4598
                        /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
4599
                        switch ((size << 1) | u) {
4600
                        case 0: gen_op_neon_mull_s8(); break;
4601
                        case 1: gen_op_neon_mull_u8(); break;
4602
                        case 2: gen_op_neon_mull_s16(); break;
4603
                        case 3: gen_op_neon_mull_u16(); break;
4604
                        case 4: gen_op_imull_T0_T1(); break;
4605
                        case 5: gen_op_mull_T0_T1(); break;
4606
                        default: abort();
4607
                        }
4608
                        break;
4609
                    case 14: /* Polynomial VMULL */
4610
                        cpu_abort(env, "Polynomial VMULL not implemented");
4611

    
4612
                    default: /* 15 is RESERVED.  */
4613
                        return 1;
4614
                    }
4615
                    if (op == 5 || op == 13 || (op >= 8 && op <= 11)) {
4616
                        /* Accumulate.  */
4617
                        if (op == 10 || op == 11) {
4618
                            switch (size) {
4619
                            case 0: gen_op_neon_negl_u16(); break;
4620
                            case 1: gen_op_neon_negl_u32(); break;
4621
                            case 2: gen_op_neon_negl_u64(); break;
4622
                            default: abort();
4623
                            }
4624
                        }
4625

    
4626
                        gen_neon_movl_scratch_T0(0);
4627
                        gen_neon_movl_scratch_T1(1);
4628

    
4629
                        if (op != 13) {
4630
                            NEON_GET_REG(T0, rd, pass * 2);
4631
                            NEON_GET_REG(T1, rd, pass * 2 + 1);
4632
                        }
4633

    
4634
                        switch (op) {
4635
                        case 5: case 8: case 10: /* VABAL, VMLAL, VMLSL */
4636
                            switch (size) {
4637
                            case 0: gen_op_neon_addl_u16(); break;
4638
                            case 1: gen_op_neon_addl_u32(); break;
4639
                            case 2: gen_op_neon_addl_u64(); break;
4640
                            default: abort();
4641
                            }
4642
                            break;
4643
                        case 9: case 11: /* VQDMLAL, VQDMLSL */
4644
                            switch (size) {
4645
                            case 1: gen_op_neon_addl_saturate_s32(); break;
4646
                            case 2: gen_op_neon_addl_saturate_s64(); break;
4647
                            default: abort();
4648
                            }
4649
                            /* Fall through.  */
4650
                        case 13: /* VQDMULL */
4651
                            switch (size) {
4652
                            case 1: gen_op_neon_addl_saturate_s32(); break;
4653
                            case 2: gen_op_neon_addl_saturate_s64(); break;
4654
                            default: abort();
4655
                            }
4656
                            break;
4657
                        default:
4658
                            abort();
4659
                        }
4660
                        NEON_SET_REG(T0, rd, pass * 2);
4661
                        NEON_SET_REG(T1, rd, pass * 2 + 1);
4662
                    } else if (op == 4 || op == 6) {
4663
                        /* Narrowing operation.  */
4664
                        if (u) {
4665
                            switch (size) {
4666
                            case 0: gen_op_neon_narrow_high_u8(); break;
4667
                            case 1: gen_op_neon_narrow_high_u16(); break;
4668
                            case 2: gen_op_movl_T0_T1(); break;
4669
                            default: abort();
4670
                            }
4671
                        } else {
4672
                            switch (size) {
4673
                            case 0: gen_op_neon_narrow_high_round_u8(); break;
4674
                            case 1: gen_op_neon_narrow_high_round_u16(); break;
4675
                            case 2: gen_op_neon_narrow_high_round_u32(); break;
4676
                            default: abort();
4677
                            }
4678
                        }
4679
                        NEON_SET_REG(T0, rd, pass);
4680
                    } else {
4681
                        /* Write back the result.  */
4682
                        NEON_SET_REG(T0, rd, pass * 2);
4683
                        NEON_SET_REG(T1, rd, pass * 2 + 1);
4684
                    }
4685
                }
4686
            } else {
4687
                /* Two registers and a scalar.  */
4688
                switch (op) {
4689
                case 0: /* Integer VMLA scalar */
4690
                case 1: /* Float VMLA scalar */
4691
                case 4: /* Integer VMLS scalar */
4692
                case 5: /* Floating point VMLS scalar */
4693
                case 8: /* Integer VMUL scalar */
4694
                case 9: /* Floating point VMUL scalar */
4695
                case 12: /* VQDMULH scalar */
4696
                case 13: /* VQRDMULH scalar */
4697
                    gen_neon_get_scalar(size, rm);
4698
                    gen_neon_movl_scratch_T0(0);
4699
                    for (pass = 0; pass < (u ? 4 : 2); pass++) {
4700
                        if (pass != 0)
4701
                            gen_neon_movl_T0_scratch(0);
4702
                        NEON_GET_REG(T1, rn, pass);
4703
                        if (op == 12) {
4704
                            if (size == 1) {
4705
                                gen_op_neon_qdmulh_s16();
4706
                            } else {
4707
                                gen_op_neon_qdmulh_s32();
4708
                            }
4709
                        } else if (op == 13) {
4710
                            if (size == 1) {
4711
                                gen_op_neon_qrdmulh_s16();
4712
                            } else {
4713
                                gen_op_neon_qrdmulh_s32();
4714
                            }
4715
                        } else if (op & 1) {
4716
                            gen_op_neon_mul_f32();
4717
                        } else {
4718
                            switch (size) {
4719
                            case 0: gen_op_neon_mul_u8(); break;
4720
                            case 1: gen_op_neon_mul_u16(); break;
4721
                            case 2: gen_op_mul_T0_T1(); break;
4722
                            default: return 1;
4723
                            }
4724
                        }
4725
                        if (op < 8) {
4726
                            /* Accumulate.  */
4727
                            NEON_GET_REG(T1, rd, pass);
4728
                            switch (op) {
4729
                            case 0:
4730
                                gen_neon_add(size);
4731
                                break;
4732
                            case 1:
4733
                                gen_op_neon_add_f32();
4734
                                break;
4735
                            case 4:
4736
                                switch (size) {