Statistics
| Branch: | Revision:

root / target-arm / translate.c @ 4373f3ce

History | View | Annotate | Download (281 kB)

1
/*
2
 *  ARM translation
3
 *
4
 *  Copyright (c) 2003 Fabrice Bellard
5
 *  Copyright (c) 2005-2007 CodeSourcery
6
 *  Copyright (c) 2007 OpenedHand, Ltd.
7
 *
8
 * This library is free software; you can redistribute it and/or
9
 * modify it under the terms of the GNU Lesser General Public
10
 * License as published by the Free Software Foundation; either
11
 * version 2 of the License, or (at your option) any later version.
12
 *
13
 * This library is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
 * Lesser General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with this library; if not, write to the Free Software
20
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
21
 */
22
#include <stdarg.h>
23
#include <stdlib.h>
24
#include <stdio.h>
25
#include <string.h>
26
#include <inttypes.h>
27

    
28
#include "cpu.h"
29
#include "exec-all.h"
30
#include "disas.h"
31
#include "tcg-op.h"
32

    
33
#define GEN_HELPER 1
34
#include "helpers.h"
35

    
36
#define ENABLE_ARCH_5J    0
37
#define ENABLE_ARCH_6     arm_feature(env, ARM_FEATURE_V6)
38
#define ENABLE_ARCH_6K   arm_feature(env, ARM_FEATURE_V6K)
39
#define ENABLE_ARCH_6T2   arm_feature(env, ARM_FEATURE_THUMB2)
40
#define ENABLE_ARCH_7     arm_feature(env, ARM_FEATURE_V7)
41

    
42
#define ARCH(x) if (!ENABLE_ARCH_##x) goto illegal_op;
43

    
44
/* internal defines */
45
typedef struct DisasContext {
46
    target_ulong pc;
47
    int is_jmp;
48
    /* Nonzero if this instruction has been conditionally skipped.  */
49
    int condjmp;
50
    /* The label that will be jumped to when the instruction is skipped.  */
51
    int condlabel;
52
    /* Thumb-2 condtional execution bits.  */
53
    int condexec_mask;
54
    int condexec_cond;
55
    struct TranslationBlock *tb;
56
    int singlestep_enabled;
57
    int thumb;
58
    int is_mem;
59
#if !defined(CONFIG_USER_ONLY)
60
    int user;
61
#endif
62
} DisasContext;
63

    
64
#if defined(CONFIG_USER_ONLY)
65
#define IS_USER(s) 1
66
#else
67
#define IS_USER(s) (s->user)
68
#endif
69

    
70
/* These instructions trap after executing, so defer them until after the
71
   conditional executions state has been updated.  */
72
#define DISAS_WFI 4
73
#define DISAS_SWI 5
74

    
75
/* XXX: move that elsewhere */
76
extern FILE *logfile;
77
extern int loglevel;
78

    
79
static TCGv cpu_env;
80
/* FIXME:  These should be removed.  */
81
static TCGv cpu_T[3];
82
static TCGv cpu_F0s, cpu_F1s, cpu_F0d, cpu_F1d;
83

    
84
/* initialize TCG globals.  */
85
void arm_translate_init(void)
86
{
87
    cpu_env = tcg_global_reg_new(TCG_TYPE_PTR, TCG_AREG0, "env");
88

    
89
    cpu_T[0] = tcg_global_reg_new(TCG_TYPE_I32, TCG_AREG1, "T0");
90
    cpu_T[1] = tcg_global_reg_new(TCG_TYPE_I32, TCG_AREG2, "T1");
91
    cpu_T[2] = tcg_global_reg_new(TCG_TYPE_I32, TCG_AREG3, "T2");
92
}
93

    
94
/* The code generator doesn't like lots of temporaries, so maintain our own
95
   cache for reuse within a function.  */
96
#define MAX_TEMPS 8
97
static int num_temps;
98
static TCGv temps[MAX_TEMPS];
99

    
100
/* Allocate a temporary variable.  */
101
static TCGv new_tmp(void)
102
{
103
    TCGv tmp;
104
    if (num_temps == MAX_TEMPS)
105
        abort();
106

    
107
    if (GET_TCGV(temps[num_temps]))
108
      return temps[num_temps++];
109

    
110
    tmp = tcg_temp_new(TCG_TYPE_I32);
111
    temps[num_temps++] = tmp;
112
    return tmp;
113
}
114

    
115
/* Release a temporary variable.  */
116
static void dead_tmp(TCGv tmp)
117
{
118
    int i;
119
    num_temps--;
120
    i = num_temps;
121
    if (GET_TCGV(temps[i]) == GET_TCGV(tmp))
122
        return;
123

    
124
    /* Shuffle this temp to the last slot.  */
125
    while (GET_TCGV(temps[i]) != GET_TCGV(tmp))
126
        i--;
127
    while (i < num_temps) {
128
        temps[i] = temps[i + 1];
129
        i++;
130
    }
131
    temps[i] = tmp;
132
}
133

    
134
static inline TCGv load_cpu_offset(int offset)
135
{
136
    TCGv tmp = new_tmp();
137
    tcg_gen_ld_i32(tmp, cpu_env, offset);
138
    return tmp;
139
}
140

    
141
#define load_cpu_field(name) load_cpu_offset(offsetof(CPUState, name))
142

    
143
static inline void store_cpu_offset(TCGv var, int offset)
144
{
145
    tcg_gen_st_i32(var, cpu_env, offset);
146
    dead_tmp(var);
147
}
148

    
149
#define store_cpu_field(var, name) \
150
    store_cpu_offset(var, offsetof(CPUState, name))
151

    
152
/* Set a variable to the value of a CPU register.  */
153
static void load_reg_var(DisasContext *s, TCGv var, int reg)
154
{
155
    if (reg == 15) {
156
        uint32_t addr;
157
        /* normaly, since we updated PC, we need only to add one insn */
158
        if (s->thumb)
159
            addr = (long)s->pc + 2;
160
        else
161
            addr = (long)s->pc + 4;
162
        tcg_gen_movi_i32(var, addr);
163
    } else {
164
        tcg_gen_ld_i32(var, cpu_env, offsetof(CPUState, regs[reg]));
165
    }
166
}
167

    
168
/* Create a new temporary and set it to the value of a CPU register.  */
169
static inline TCGv load_reg(DisasContext *s, int reg)
170
{
171
    TCGv tmp = new_tmp();
172
    load_reg_var(s, tmp, reg);
173
    return tmp;
174
}
175

    
176
/* Set a CPU register.  The source must be a temporary and will be
177
   marked as dead.  */
178
static void store_reg(DisasContext *s, int reg, TCGv var)
179
{
180
    if (reg == 15) {
181
        tcg_gen_andi_i32(var, var, ~1);
182
        s->is_jmp = DISAS_JUMP;
183
    }
184
    tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, regs[reg]));
185
    dead_tmp(var);
186
}
187

    
188

    
189
/* Basic operations.  */
190
#define gen_op_movl_T0_T1() tcg_gen_mov_i32(cpu_T[0], cpu_T[1])
191
#define gen_op_movl_T0_T2() tcg_gen_mov_i32(cpu_T[0], cpu_T[2])
192
#define gen_op_movl_T1_T0() tcg_gen_mov_i32(cpu_T[1], cpu_T[0])
193
#define gen_op_movl_T1_T2() tcg_gen_mov_i32(cpu_T[1], cpu_T[2])
194
#define gen_op_movl_T2_T0() tcg_gen_mov_i32(cpu_T[2], cpu_T[0])
195
#define gen_op_movl_T0_im(im) tcg_gen_movi_i32(cpu_T[0], im)
196
#define gen_op_movl_T1_im(im) tcg_gen_movi_i32(cpu_T[1], im)
197
#define gen_op_movl_T2_im(im) tcg_gen_movi_i32(cpu_T[2], im)
198

    
199
#define gen_op_addl_T1_im(im) tcg_gen_addi_i32(cpu_T[1], cpu_T[1], im)
200
#define gen_op_addl_T0_T1() tcg_gen_add_i32(cpu_T[0], cpu_T[0], cpu_T[1])
201
#define gen_op_subl_T0_T1() tcg_gen_sub_i32(cpu_T[0], cpu_T[0], cpu_T[1])
202
#define gen_op_rsbl_T0_T1() tcg_gen_sub_i32(cpu_T[0], cpu_T[1], cpu_T[0])
203

    
204
#define gen_op_andl_T0_T1() tcg_gen_and_i32(cpu_T[0], cpu_T[0], cpu_T[1])
205
#define gen_op_xorl_T0_T1() tcg_gen_xor_i32(cpu_T[0], cpu_T[0], cpu_T[1])
206
#define gen_op_orl_T0_T1() tcg_gen_or_i32(cpu_T[0], cpu_T[0], cpu_T[1])
207
#define gen_op_notl_T0() tcg_gen_not_i32(cpu_T[0], cpu_T[0])
208
#define gen_op_notl_T1() tcg_gen_not_i32(cpu_T[1], cpu_T[1])
209
#define gen_op_logic_T0_cc() gen_logic_CC(cpu_T[0]);
210
#define gen_op_logic_T1_cc() gen_logic_CC(cpu_T[1]);
211

    
212
#define gen_op_shll_T0_im(im) tcg_gen_shli_i32(cpu_T[0], cpu_T[0], im)
213
#define gen_op_shll_T1_im(im) tcg_gen_shli_i32(cpu_T[1], cpu_T[1], im)
214
#define gen_op_shrl_T1_im(im) tcg_gen_shri_i32(cpu_T[1], cpu_T[1], im)
215
#define gen_op_sarl_T1_im(im) tcg_gen_sari_i32(cpu_T[1], cpu_T[1], im)
216
#define gen_op_rorl_T1_im(im) tcg_gen_rori_i32(cpu_T[1], cpu_T[1], im)
217

    
218
/* Value extensions.  */
219
#define gen_uxtb(var) tcg_gen_andi_i32(var, var, 0xff)
220
#define gen_uxth(var) tcg_gen_andi_i32(var, var, 0xffff)
221
#define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
222
#define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
223

    
224
#define gen_sxtb16(var) gen_helper_sxtb16(var, var)
225
#define gen_uxtb16(var) gen_helper_uxtb16(var, var)
226

    
227
#define gen_op_mul_T0_T1() tcg_gen_mul_i32(cpu_T[0], cpu_T[0], cpu_T[1])
228

    
229
#define gen_op_addl_T0_T1_setq() \
230
    gen_helper_add_setq(cpu_T[0], cpu_T[0], cpu_T[1])
231
#define gen_op_addl_T0_T1_saturate() \
232
    gen_helper_add_saturate(cpu_T[0], cpu_T[0], cpu_T[1])
233
#define gen_op_subl_T0_T1_saturate() \
234
    gen_helper_sub_saturate(cpu_T[0], cpu_T[0], cpu_T[1])
235
#define gen_op_addl_T0_T1_usaturate() \
236
    gen_helper_add_usaturate(cpu_T[0], cpu_T[0], cpu_T[1])
237
#define gen_op_subl_T0_T1_usaturate() \
238
    gen_helper_sub_usaturate(cpu_T[0], cpu_T[0], cpu_T[1])
239

    
240
/* Copy the most significant bit of T0 to all bits of T1.  */
241
#define gen_op_signbit_T1_T0() tcg_gen_sari_i32(cpu_T[1], cpu_T[0], 31)
242

    
243
#define gen_set_cpsr(var, mask) gen_helper_cpsr_write(var, tcg_const_i32(mask))
244
/* Set NZCV flags from the high 4 bits of var.  */
245
#define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
246

    
247
static void gen_exception(int excp)
248
{
249
    TCGv tmp = new_tmp();
250
    tcg_gen_movi_i32(tmp, excp);
251
    gen_helper_exception(tmp);
252
    dead_tmp(tmp);
253
}
254

    
255
static void gen_smul_dual(TCGv a, TCGv b)
256
{
257
    TCGv tmp1 = new_tmp();
258
    TCGv tmp2 = new_tmp();
259
    tcg_gen_ext8s_i32(tmp1, a);
260
    tcg_gen_ext8s_i32(tmp2, b);
261
    tcg_gen_mul_i32(tmp1, tmp1, tmp2);
262
    dead_tmp(tmp2);
263
    tcg_gen_sari_i32(a, a, 16);
264
    tcg_gen_sari_i32(b, b, 16);
265
    tcg_gen_mul_i32(b, b, a);
266
    tcg_gen_mov_i32(a, tmp1);
267
    dead_tmp(tmp1);
268
}
269

    
270
/* Byteswap each halfword.  */
271
static void gen_rev16(TCGv var)
272
{
273
    TCGv tmp = new_tmp();
274
    tcg_gen_shri_i32(tmp, var, 8);
275
    tcg_gen_andi_i32(tmp, tmp, 0x00ff00ff);
276
    tcg_gen_shli_i32(var, var, 8);
277
    tcg_gen_andi_i32(var, var, 0xff00ff00);
278
    tcg_gen_or_i32(var, var, tmp);
279
    dead_tmp(tmp);
280
}
281

    
282
/* Byteswap low halfword and sign extend.  */
283
static void gen_revsh(TCGv var)
284
{
285
    TCGv tmp = new_tmp();
286
    tcg_gen_shri_i32(tmp, var, 8);
287
    tcg_gen_andi_i32(tmp, tmp, 0x00ff);
288
    tcg_gen_shli_i32(var, var, 8);
289
    tcg_gen_ext8s_i32(var, var);
290
    tcg_gen_or_i32(var, var, tmp);
291
    dead_tmp(tmp);
292
}
293

    
294
/* Unsigned bitfield extract.  */
295
static void gen_ubfx(TCGv var, int shift, uint32_t mask)
296
{
297
    if (shift)
298
        tcg_gen_shri_i32(var, var, shift);
299
    tcg_gen_andi_i32(var, var, mask);
300
}
301

    
302
/* Signed bitfield extract.  */
303
static void gen_sbfx(TCGv var, int shift, int width)
304
{
305
    uint32_t signbit;
306

    
307
    if (shift)
308
        tcg_gen_sari_i32(var, var, shift);
309
    if (shift + width < 32) {
310
        signbit = 1u << (width - 1);
311
        tcg_gen_andi_i32(var, var, (1u << width) - 1);
312
        tcg_gen_xori_i32(var, var, signbit);
313
        tcg_gen_subi_i32(var, var, signbit);
314
    }
315
}
316

    
317
/* Bitfield insertion.  Insert val into base.  Clobbers base and val.  */
318
static void gen_bfi(TCGv dest, TCGv base, TCGv val, int shift, uint32_t mask)
319
{
320
    tcg_gen_shli_i32(val, val, shift);
321
    tcg_gen_andi_i32(val, val, mask);
322
    tcg_gen_andi_i32(base, base, ~mask);
323
    tcg_gen_or_i32(dest, base, val);
324
}
325

    
326
/* Round the top 32 bits of a 64-bit value.  */
327
static void gen_roundqd(TCGv a, TCGv b)
328
{
329
    tcg_gen_shri_i32(a, a, 31);
330
    tcg_gen_add_i32(a, a, b);
331
}
332

    
333
/* FIXME: Most targets have native widening multiplication.
334
   It would be good to use that instead of a full wide multiply.  */
335
/* Unsigned 32x32->64 multiply.  */
336
static void gen_op_mull_T0_T1(void)
337
{
338
    TCGv tmp1 = tcg_temp_new(TCG_TYPE_I64);
339
    TCGv tmp2 = tcg_temp_new(TCG_TYPE_I64);
340

    
341
    tcg_gen_extu_i32_i64(tmp1, cpu_T[0]);
342
    tcg_gen_extu_i32_i64(tmp2, cpu_T[1]);
343
    tcg_gen_mul_i64(tmp1, tmp1, tmp2);
344
    tcg_gen_trunc_i64_i32(cpu_T[0], tmp1);
345
    tcg_gen_shri_i64(tmp1, tmp1, 32);
346
    tcg_gen_trunc_i64_i32(cpu_T[1], tmp1);
347
}
348

    
349
/* Signed 32x32->64 multiply.  */
350
static void gen_imull(TCGv a, TCGv b)
351
{
352
    TCGv tmp1 = tcg_temp_new(TCG_TYPE_I64);
353
    TCGv tmp2 = tcg_temp_new(TCG_TYPE_I64);
354

    
355
    tcg_gen_ext_i32_i64(tmp1, a);
356
    tcg_gen_ext_i32_i64(tmp2, b);
357
    tcg_gen_mul_i64(tmp1, tmp1, tmp2);
358
    tcg_gen_trunc_i64_i32(a, tmp1);
359
    tcg_gen_shri_i64(tmp1, tmp1, 32);
360
    tcg_gen_trunc_i64_i32(b, tmp1);
361
}
362
#define gen_op_imull_T0_T1() gen_imull(cpu_T[0], cpu_T[1])
363

    
364
/* Signed 32x16 multiply, top 32 bits.  */
365
static void gen_imulw(TCGv a, TCGv b)
366
{
367
  gen_imull(a, b);
368
  tcg_gen_shri_i32(a, a, 16);
369
  tcg_gen_shli_i32(b, b, 16);
370
  tcg_gen_or_i32(a, a, b);
371
}
372

    
373
/* Swap low and high halfwords.  */
374
static void gen_swap_half(TCGv var)
375
{
376
    TCGv tmp = new_tmp();
377
    tcg_gen_shri_i32(tmp, var, 16);
378
    tcg_gen_shli_i32(var, var, 16);
379
    tcg_gen_or_i32(var, var, tmp);
380
    dead_tmp(tmp);
381
}
382

    
383
/* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
384
    tmp = (t0 ^ t1) & 0x8000;
385
    t0 &= ~0x8000;
386
    t1 &= ~0x8000;
387
    t0 = (t0 + t1) ^ tmp;
388
 */
389

    
390
static void gen_add16(TCGv t0, TCGv t1)
391
{
392
    TCGv tmp = new_tmp();
393
    tcg_gen_xor_i32(tmp, t0, t1);
394
    tcg_gen_andi_i32(tmp, tmp, 0x8000);
395
    tcg_gen_andi_i32(t0, t0, ~0x8000);
396
    tcg_gen_andi_i32(t1, t1, ~0x8000);
397
    tcg_gen_add_i32(t0, t0, t1);
398
    tcg_gen_xor_i32(t0, t0, tmp);
399
    dead_tmp(tmp);
400
    dead_tmp(t1);
401
}
402

    
403
#define gen_set_CF(var) tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, CF))
404

    
405
/* Set CF to the top bit of var.  */
406
static void gen_set_CF_bit31(TCGv var)
407
{
408
    TCGv tmp = new_tmp();
409
    tcg_gen_shri_i32(tmp, var, 31);
410
    gen_set_CF(var);
411
    dead_tmp(tmp);
412
}
413

    
414
/* Set N and Z flags from var.  */
415
static inline void gen_logic_CC(TCGv var)
416
{
417
    tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, NZF));
418
}
419

    
420
/* T0 += T1 + CF.  */
421
static void gen_adc_T0_T1(void)
422
{
423
    TCGv tmp;
424
    gen_op_addl_T0_T1();
425
    tmp = load_cpu_field(CF);
426
    tcg_gen_add_i32(cpu_T[0], cpu_T[0], tmp);
427
    dead_tmp(tmp);
428
}
429

    
430
/* dest = T0 - T1 + CF - 1.  */
431
static void gen_sub_carry(TCGv dest, TCGv t0, TCGv t1)
432
{
433
    TCGv tmp;
434
    tcg_gen_sub_i32(dest, t0, t1);
435
    tmp = load_cpu_field(CF);
436
    tcg_gen_add_i32(dest, dest, tmp);
437
    tcg_gen_subi_i32(dest, dest, 1);
438
    dead_tmp(tmp);
439
}
440

    
441
#define gen_sbc_T0_T1() gen_sub_carry(cpu_T[0], cpu_T[0], cpu_T[1])
442
#define gen_rsc_T0_T1() gen_sub_carry(cpu_T[0], cpu_T[1], cpu_T[0])
443

    
444
/* FIXME:  Implement this natively.  */
445
static inline void tcg_gen_not_i32(TCGv t0, TCGv t1)
446
{
447
    tcg_gen_xori_i32(t0, t1, ~0);
448
}
449

    
450
/* T0 &= ~T1.  Clobbers T1.  */
451
/* FIXME: Implement bic natively.  */
452
static inline void gen_op_bicl_T0_T1(void)
453
{
454
    gen_op_notl_T1();
455
    gen_op_andl_T0_T1();
456
}
457

    
458
/* FIXME:  Implement this natively.  */
459
static void tcg_gen_rori_i32(TCGv t0, TCGv t1, int i)
460
{
461
    TCGv tmp;
462

    
463
    if (i == 0)
464
        return;
465

    
466
    tmp = new_tmp();
467
    tcg_gen_shri_i32(tmp, t1, i);
468
    tcg_gen_shli_i32(t1, t1, 32 - i);
469
    tcg_gen_or_i32(t0, t1, tmp);
470
    dead_tmp(tmp);
471
}
472

    
473
static void shifter_out_im(TCGv var, int shift)
474
{
475
    TCGv tmp = new_tmp();
476
    if (shift == 0) {
477
        tcg_gen_andi_i32(tmp, var, 1);
478
    } else {
479
        tcg_gen_shri_i32(tmp, var, shift);
480
        if (shift != 31);
481
            tcg_gen_andi_i32(tmp, tmp, 1);
482
    }
483
    gen_set_CF(tmp);
484
    dead_tmp(tmp);
485
}
486

    
487
/* Shift by immediate.  Includes special handling for shift == 0.  */
488
static inline void gen_arm_shift_im(TCGv var, int shiftop, int shift, int flags)
489
{
490
    switch (shiftop) {
491
    case 0: /* LSL */
492
        if (shift != 0) {
493
            if (flags)
494
                shifter_out_im(var, 32 - shift);
495
            tcg_gen_shli_i32(var, var, shift);
496
        }
497
        break;
498
    case 1: /* LSR */
499
        if (shift == 0) {
500
            if (flags) {
501
                tcg_gen_shri_i32(var, var, 31);
502
                gen_set_CF(var);
503
            }
504
            tcg_gen_movi_i32(var, 0);
505
        } else {
506
            if (flags)
507
                shifter_out_im(var, shift - 1);
508
            tcg_gen_shri_i32(var, var, shift);
509
        }
510
        break;
511
    case 2: /* ASR */
512
        if (shift == 0)
513
            shift = 32;
514
        if (flags)
515
            shifter_out_im(var, shift - 1);
516
        if (shift == 32)
517
          shift = 31;
518
        tcg_gen_sari_i32(var, var, shift);
519
        break;
520
    case 3: /* ROR/RRX */
521
        if (shift != 0) {
522
            if (flags)
523
                shifter_out_im(var, shift - 1);
524
            tcg_gen_rori_i32(var, var, shift); break;
525
        } else {
526
            TCGv tmp = load_cpu_field(CF);
527
            if (flags)
528
                shifter_out_im(var, 0);
529
            tcg_gen_shri_i32(var, var, 1);
530
            tcg_gen_shli_i32(tmp, tmp, 31);
531
            tcg_gen_or_i32(var, var, tmp);
532
            dead_tmp(tmp);
533
        }
534
    }
535
};
536

    
537
#define PAS_OP(pfx) \
538
    switch (op2) {  \
539
    case 0: gen_pas_helper(glue(pfx,add16)); break; \
540
    case 1: gen_pas_helper(glue(pfx,addsubx)); break; \
541
    case 2: gen_pas_helper(glue(pfx,subaddx)); break; \
542
    case 3: gen_pas_helper(glue(pfx,sub16)); break; \
543
    case 4: gen_pas_helper(glue(pfx,add8)); break; \
544
    case 7: gen_pas_helper(glue(pfx,sub8)); break; \
545
    }
546
static void gen_arm_parallel_addsub(int op1, int op2, TCGv a, TCGv b)
547
{
548
    TCGv tmp;
549

    
550
    switch (op1) {
551
#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
552
    case 1:
553
        tmp = tcg_temp_new(TCG_TYPE_PTR);
554
        tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
555
        PAS_OP(s)
556
        break;
557
    case 5:
558
        tmp = tcg_temp_new(TCG_TYPE_PTR);
559
        tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
560
        PAS_OP(u)
561
        break;
562
#undef gen_pas_helper
563
#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
564
    case 2:
565
        PAS_OP(q);
566
        break;
567
    case 3:
568
        PAS_OP(sh);
569
        break;
570
    case 6:
571
        PAS_OP(uq);
572
        break;
573
    case 7:
574
        PAS_OP(uh);
575
        break;
576
#undef gen_pas_helper
577
    }
578
}
579
#undef PAS_OP
580

    
581
/* For unknown reasons Arm and Thumb-2 use arbitrarily different encodings.  */
582
#define PAS_OP(pfx) \
583
    switch (op2) {  \
584
    case 0: gen_pas_helper(glue(pfx,add8)); break; \
585
    case 1: gen_pas_helper(glue(pfx,add16)); break; \
586
    case 2: gen_pas_helper(glue(pfx,addsubx)); break; \
587
    case 4: gen_pas_helper(glue(pfx,sub8)); break; \
588
    case 5: gen_pas_helper(glue(pfx,sub16)); break; \
589
    case 6: gen_pas_helper(glue(pfx,subaddx)); break; \
590
    }
591
static void gen_thumb2_parallel_addsub(int op1, int op2, TCGv a, TCGv b)
592
{
593
    TCGv tmp;
594

    
595
    switch (op1) {
596
#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
597
    case 0:
598
        tmp = tcg_temp_new(TCG_TYPE_PTR);
599
        tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
600
        PAS_OP(s)
601
        break;
602
    case 4:
603
        tmp = tcg_temp_new(TCG_TYPE_PTR);
604
        tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
605
        PAS_OP(u)
606
        break;
607
#undef gen_pas_helper
608
#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
609
    case 1:
610
        PAS_OP(q);
611
        break;
612
    case 2:
613
        PAS_OP(sh);
614
        break;
615
    case 5:
616
        PAS_OP(uq);
617
        break;
618
    case 6:
619
        PAS_OP(uh);
620
        break;
621
#undef gen_pas_helper
622
    }
623
}
624
#undef PAS_OP
625

    
626
static void gen_test_cc(int cc, int label)
627
{
628
    TCGv tmp;
629
    TCGv tmp2;
630
    TCGv zero;
631
    int inv;
632

    
633
    zero = tcg_const_i32(0);
634
    switch (cc) {
635
    case 0: /* eq: Z */
636
        tmp = load_cpu_field(NZF);
637
        tcg_gen_brcond_i32(TCG_COND_EQ, tmp, zero, label);
638
        break;
639
    case 1: /* ne: !Z */
640
        tmp = load_cpu_field(NZF);
641
        tcg_gen_brcond_i32(TCG_COND_NE, tmp, zero, label);
642
        break;
643
    case 2: /* cs: C */
644
        tmp = load_cpu_field(CF);
645
        tcg_gen_brcond_i32(TCG_COND_NE, tmp, zero, label);
646
        break;
647
    case 3: /* cc: !C */
648
        tmp = load_cpu_field(CF);
649
        tcg_gen_brcond_i32(TCG_COND_EQ, tmp, zero, label);
650
        break;
651
    case 4: /* mi: N */
652
        tmp = load_cpu_field(NZF);
653
        tcg_gen_brcond_i32(TCG_COND_LT, tmp, zero, label);
654
        break;
655
    case 5: /* pl: !N */
656
        tmp = load_cpu_field(NZF);
657
        tcg_gen_brcond_i32(TCG_COND_GE, tmp, zero, label);
658
        break;
659
    case 6: /* vs: V */
660
        tmp = load_cpu_field(VF);
661
        tcg_gen_brcond_i32(TCG_COND_LT, tmp, zero, label);
662
        break;
663
    case 7: /* vc: !V */
664
        tmp = load_cpu_field(VF);
665
        tcg_gen_brcond_i32(TCG_COND_GE, tmp, zero, label);
666
        break;
667
    case 8: /* hi: C && !Z */
668
        inv = gen_new_label();
669
        tmp = load_cpu_field(CF);
670
        tcg_gen_brcond_i32(TCG_COND_EQ, tmp, zero, inv);
671
        dead_tmp(tmp);
672
        tmp = load_cpu_field(NZF);
673
        tcg_gen_brcond_i32(TCG_COND_NE, tmp, zero, label);
674
        gen_set_label(inv);
675
        break;
676
    case 9: /* ls: !C || Z */
677
        tmp = load_cpu_field(CF);
678
        tcg_gen_brcond_i32(TCG_COND_EQ, tmp, zero, label);
679
        dead_tmp(tmp);
680
        tmp = load_cpu_field(NZF);
681
        tcg_gen_brcond_i32(TCG_COND_EQ, tmp, zero, label);
682
        break;
683
    case 10: /* ge: N == V -> N ^ V == 0 */
684
        tmp = load_cpu_field(VF);
685
        tmp2 = load_cpu_field(NZF);
686
        tcg_gen_xor_i32(tmp, tmp, tmp2);
687
        dead_tmp(tmp2);
688
        tcg_gen_brcond_i32(TCG_COND_GE, tmp, zero, label);
689
        break;
690
    case 11: /* lt: N != V -> N ^ V != 0 */
691
        tmp = load_cpu_field(VF);
692
        tmp2 = load_cpu_field(NZF);
693
        tcg_gen_xor_i32(tmp, tmp, tmp2);
694
        dead_tmp(tmp2);
695
        tcg_gen_brcond_i32(TCG_COND_LT, tmp, zero, label);
696
        break;
697
    case 12: /* gt: !Z && N == V */
698
        inv = gen_new_label();
699
        tmp = load_cpu_field(NZF);
700
        tcg_gen_brcond_i32(TCG_COND_EQ, tmp, zero, inv);
701
        dead_tmp(tmp);
702
        tmp = load_cpu_field(VF);
703
        tmp2 = load_cpu_field(NZF);
704
        tcg_gen_xor_i32(tmp, tmp, tmp2);
705
        dead_tmp(tmp2);
706
        tcg_gen_brcond_i32(TCG_COND_GE, tmp, zero, label);
707
        gen_set_label(inv);
708
        break;
709
    case 13: /* le: Z || N != V */
710
        tmp = load_cpu_field(NZF);
711
        tcg_gen_brcond_i32(TCG_COND_EQ, tmp, zero, label);
712
        dead_tmp(tmp);
713
        tmp = load_cpu_field(VF);
714
        tmp2 = load_cpu_field(NZF);
715
        tcg_gen_xor_i32(tmp, tmp, tmp2);
716
        dead_tmp(tmp2);
717
        tcg_gen_brcond_i32(TCG_COND_LT, tmp, zero, label);
718
        break;
719
    default:
720
        fprintf(stderr, "Bad condition code 0x%x\n", cc);
721
        abort();
722
    }
723
    dead_tmp(tmp);
724
}
725

    
726
const uint8_t table_logic_cc[16] = {
727
    1, /* and */
728
    1, /* xor */
729
    0, /* sub */
730
    0, /* rsb */
731
    0, /* add */
732
    0, /* adc */
733
    0, /* sbc */
734
    0, /* rsc */
735
    1, /* andl */
736
    1, /* xorl */
737
    0, /* cmp */
738
    0, /* cmn */
739
    1, /* orr */
740
    1, /* mov */
741
    1, /* bic */
742
    1, /* mvn */
743
};
744

    
745
static GenOpFunc *gen_shift_T1_T0[4] = {
746
    gen_op_shll_T1_T0,
747
    gen_op_shrl_T1_T0,
748
    gen_op_sarl_T1_T0,
749
    gen_op_rorl_T1_T0,
750
};
751

    
752
static GenOpFunc *gen_shift_T1_T0_cc[4] = {
753
    gen_op_shll_T1_T0_cc,
754
    gen_op_shrl_T1_T0_cc,
755
    gen_op_sarl_T1_T0_cc,
756
    gen_op_rorl_T1_T0_cc,
757
};
758

    
759
/* Set PC and Thumb state from an immediate address.  */
760
static inline void gen_bx_im(DisasContext *s, uint32_t addr)
761
{
762
    TCGv tmp;
763

    
764
    s->is_jmp = DISAS_UPDATE;
765
    tmp = new_tmp();
766
    if (s->thumb != (addr & 1)) {
767
        tcg_gen_movi_i32(tmp, addr & 1);
768
        tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUState, thumb));
769
    }
770
    tcg_gen_movi_i32(tmp, addr & ~1);
771
    tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUState, regs[15]));
772
    dead_tmp(tmp);
773
}
774

    
775
/* Set PC and Thumb state from var.  var is marked as dead.  */
776
static inline void gen_bx(DisasContext *s, TCGv var)
777
{
778
    TCGv tmp;
779

    
780
    s->is_jmp = DISAS_UPDATE;
781
    tmp = new_tmp();
782
    tcg_gen_andi_i32(tmp, var, 1);
783
    store_cpu_field(tmp, thumb);
784
    tcg_gen_andi_i32(var, var, ~1);
785
    store_cpu_field(var, regs[15]);
786
}
787

    
788
/* TODO: This should be removed.  Use gen_bx instead.  */
789
static inline void gen_bx_T0(DisasContext *s)
790
{
791
    TCGv tmp = new_tmp();
792
    tcg_gen_mov_i32(tmp, cpu_T[0]);
793
    gen_bx(s, tmp);
794
}
795

    
796
#if defined(CONFIG_USER_ONLY)
797
#define gen_ldst(name, s) gen_op_##name##_raw()
798
#else
799
#define gen_ldst(name, s) do { \
800
    s->is_mem = 1; \
801
    if (IS_USER(s)) \
802
        gen_op_##name##_user(); \
803
    else \
804
        gen_op_##name##_kernel(); \
805
    } while (0)
806
#endif
807
static inline TCGv gen_ld8s(TCGv addr, int index)
808
{
809
    TCGv tmp = new_tmp();
810
    tcg_gen_qemu_ld8s(tmp, addr, index);
811
    return tmp;
812
}
813
static inline TCGv gen_ld8u(TCGv addr, int index)
814
{
815
    TCGv tmp = new_tmp();
816
    tcg_gen_qemu_ld8u(tmp, addr, index);
817
    return tmp;
818
}
819
static inline TCGv gen_ld16s(TCGv addr, int index)
820
{
821
    TCGv tmp = new_tmp();
822
    tcg_gen_qemu_ld16s(tmp, addr, index);
823
    return tmp;
824
}
825
static inline TCGv gen_ld16u(TCGv addr, int index)
826
{
827
    TCGv tmp = new_tmp();
828
    tcg_gen_qemu_ld16u(tmp, addr, index);
829
    return tmp;
830
}
831
static inline TCGv gen_ld32(TCGv addr, int index)
832
{
833
    TCGv tmp = new_tmp();
834
    tcg_gen_qemu_ld32u(tmp, addr, index);
835
    return tmp;
836
}
837
static inline void gen_st8(TCGv val, TCGv addr, int index)
838
{
839
    tcg_gen_qemu_st8(val, addr, index);
840
    dead_tmp(val);
841
}
842
static inline void gen_st16(TCGv val, TCGv addr, int index)
843
{
844
    tcg_gen_qemu_st16(val, addr, index);
845
    dead_tmp(val);
846
}
847
static inline void gen_st32(TCGv val, TCGv addr, int index)
848
{
849
    tcg_gen_qemu_st32(val, addr, index);
850
    dead_tmp(val);
851
}
852

    
853
static inline void gen_movl_T0_reg(DisasContext *s, int reg)
854
{
855
    load_reg_var(s, cpu_T[0], reg);
856
}
857

    
858
static inline void gen_movl_T1_reg(DisasContext *s, int reg)
859
{
860
    load_reg_var(s, cpu_T[1], reg);
861
}
862

    
863
static inline void gen_movl_T2_reg(DisasContext *s, int reg)
864
{
865
    load_reg_var(s, cpu_T[2], reg);
866
}
867

    
868
static inline void gen_set_pc_T0(void)
869
{
870
    tcg_gen_st_i32(cpu_T[0], cpu_env, offsetof(CPUState, regs[15]));
871
}
872

    
873
static inline void gen_movl_reg_TN(DisasContext *s, int reg, int t)
874
{
875
    TCGv tmp;
876
    if (reg == 15) {
877
        tmp = new_tmp();
878
        tcg_gen_andi_i32(tmp, cpu_T[t], ~1);
879
    } else {
880
        tmp = cpu_T[t];
881
    }
882
    tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUState, regs[reg]));
883
    if (reg == 15) {
884
        dead_tmp(tmp);
885
        s->is_jmp = DISAS_JUMP;
886
    }
887
}
888

    
889
static inline void gen_movl_reg_T0(DisasContext *s, int reg)
890
{
891
    gen_movl_reg_TN(s, reg, 0);
892
}
893

    
894
static inline void gen_movl_reg_T1(DisasContext *s, int reg)
895
{
896
    gen_movl_reg_TN(s, reg, 1);
897
}
898

    
899
/* Force a TB lookup after an instruction that changes the CPU state.  */
900
static inline void gen_lookup_tb(DisasContext *s)
901
{
902
    gen_op_movl_T0_im(s->pc);
903
    gen_movl_reg_T0(s, 15);
904
    s->is_jmp = DISAS_UPDATE;
905
}
906

    
907
static inline void gen_add_data_offset(DisasContext *s, unsigned int insn,
908
                                       TCGv var)
909
{
910
    int val, rm, shift, shiftop;
911
    TCGv offset;
912

    
913
    if (!(insn & (1 << 25))) {
914
        /* immediate */
915
        val = insn & 0xfff;
916
        if (!(insn & (1 << 23)))
917
            val = -val;
918
        if (val != 0)
919
            tcg_gen_addi_i32(var, var, val);
920
    } else {
921
        /* shift/register */
922
        rm = (insn) & 0xf;
923
        shift = (insn >> 7) & 0x1f;
924
        shiftop = (insn >> 5) & 3;
925
        offset = load_reg(s, rm);
926
        gen_arm_shift_im(offset, shiftop, shift, 0);
927
        if (!(insn & (1 << 23)))
928
            tcg_gen_sub_i32(var, var, offset);
929
        else
930
            tcg_gen_add_i32(var, var, offset);
931
        dead_tmp(offset);
932
    }
933
}
934

    
935
static inline void gen_add_datah_offset(DisasContext *s, unsigned int insn,
936
                                        int extra, TCGv var)
937
{
938
    int val, rm;
939
    TCGv offset;
940

    
941
    if (insn & (1 << 22)) {
942
        /* immediate */
943
        val = (insn & 0xf) | ((insn >> 4) & 0xf0);
944
        if (!(insn & (1 << 23)))
945
            val = -val;
946
        val += extra;
947
        if (val != 0)
948
            tcg_gen_addi_i32(var, var, val);
949
    } else {
950
        /* register */
951
        if (extra)
952
            tcg_gen_addi_i32(var, var, extra);
953
        rm = (insn) & 0xf;
954
        offset = load_reg(s, rm);
955
        if (!(insn & (1 << 23)))
956
            tcg_gen_sub_i32(var, var, offset);
957
        else
958
            tcg_gen_add_i32(var, var, offset);
959
        dead_tmp(offset);
960
    }
961
}
962

    
963
#define VFP_OP2(name)                                                 \
964
static inline void gen_vfp_##name(int dp)                             \
965
{                                                                     \
966
    if (dp)                                                           \
967
        gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, cpu_F1d, cpu_env); \
968
    else                                                              \
969
        gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, cpu_F1s, cpu_env); \
970
}
971

    
972
#define VFP_OP1i(name)                               \
973
static inline void gen_vfp_##name(int dp, int arg)  \
974
{                                                   \
975
    if (dp)                                         \
976
        gen_op_vfp_##name##d(arg);                  \
977
    else                                            \
978
        gen_op_vfp_##name##s(arg);                  \
979
}
980

    
981
VFP_OP2(add)
982
VFP_OP2(sub)
983
VFP_OP2(mul)
984
VFP_OP2(div)
985

    
986
#undef VFP_OP2
987

    
988
static inline void gen_vfp_abs(int dp)
989
{
990
    if (dp)
991
        gen_helper_vfp_absd(cpu_F0d, cpu_F0d);
992
    else
993
        gen_helper_vfp_abss(cpu_F0s, cpu_F0s);
994
}
995

    
996
static inline void gen_vfp_neg(int dp)
997
{
998
    if (dp)
999
        gen_helper_vfp_negd(cpu_F0d, cpu_F0d);
1000
    else
1001
        gen_helper_vfp_negs(cpu_F0s, cpu_F0s);
1002
}
1003

    
1004
static inline void gen_vfp_sqrt(int dp)
1005
{
1006
    if (dp)
1007
        gen_helper_vfp_sqrtd(cpu_F0d, cpu_F0d, cpu_env);
1008
    else
1009
        gen_helper_vfp_sqrts(cpu_F0s, cpu_F0s, cpu_env);
1010
}
1011

    
1012
static inline void gen_vfp_cmp(int dp)
1013
{
1014
    if (dp)
1015
        gen_helper_vfp_cmpd(cpu_F0d, cpu_F1d, cpu_env);
1016
    else
1017
        gen_helper_vfp_cmps(cpu_F0s, cpu_F1s, cpu_env);
1018
}
1019

    
1020
static inline void gen_vfp_cmpe(int dp)
1021
{
1022
    if (dp)
1023
        gen_helper_vfp_cmped(cpu_F0d, cpu_F1d, cpu_env);
1024
    else
1025
        gen_helper_vfp_cmpes(cpu_F0s, cpu_F1s, cpu_env);
1026
}
1027

    
1028
static inline void gen_vfp_F1_ld0(int dp)
1029
{
1030
    if (dp)
1031
        tcg_gen_movi_i64(cpu_F0d, 0);
1032
    else
1033
        tcg_gen_movi_i32(cpu_F0s, 0);
1034
}
1035

    
1036
static inline void gen_vfp_uito(int dp)
1037
{
1038
    if (dp)
1039
        gen_helper_vfp_uitod(cpu_F0d, cpu_F0s, cpu_env);
1040
    else
1041
        gen_helper_vfp_uitos(cpu_F0s, cpu_F0s, cpu_env);
1042
}
1043

    
1044
static inline void gen_vfp_sito(int dp)
1045
{
1046
    if (dp)
1047
        gen_helper_vfp_uitod(cpu_F0d, cpu_F0s, cpu_env);
1048
    else
1049
        gen_helper_vfp_uitos(cpu_F0s, cpu_F0s, cpu_env);
1050
}
1051

    
1052
static inline void gen_vfp_toui(int dp)
1053
{
1054
    if (dp)
1055
        gen_helper_vfp_touid(cpu_F0s, cpu_F0d, cpu_env);
1056
    else
1057
        gen_helper_vfp_touis(cpu_F0s, cpu_F0s, cpu_env);
1058
}
1059

    
1060
static inline void gen_vfp_touiz(int dp)
1061
{
1062
    if (dp)
1063
        gen_helper_vfp_touizd(cpu_F0s, cpu_F0d, cpu_env);
1064
    else
1065
        gen_helper_vfp_touizs(cpu_F0s, cpu_F0s, cpu_env);
1066
}
1067

    
1068
static inline void gen_vfp_tosi(int dp)
1069
{
1070
    if (dp)
1071
        gen_helper_vfp_tosid(cpu_F0s, cpu_F0d, cpu_env);
1072
    else
1073
        gen_helper_vfp_tosis(cpu_F0s, cpu_F0s, cpu_env);
1074
}
1075

    
1076
static inline void gen_vfp_tosiz(int dp)
1077
{
1078
    if (dp)
1079
        gen_helper_vfp_tosizd(cpu_F0s, cpu_F0d, cpu_env);
1080
    else
1081
        gen_helper_vfp_tosizs(cpu_F0s, cpu_F0s, cpu_env);
1082
}
1083

    
1084
#define VFP_GEN_FIX(name) \
1085
static inline void gen_vfp_##name(int dp, int shift) \
1086
{ \
1087
    if (dp) \
1088
        gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, tcg_const_i32(shift), cpu_env);\
1089
    else \
1090
        gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, tcg_const_i32(shift), cpu_env);\
1091
}
1092
VFP_GEN_FIX(tosh)
1093
VFP_GEN_FIX(tosl)
1094
VFP_GEN_FIX(touh)
1095
VFP_GEN_FIX(toul)
1096
VFP_GEN_FIX(shto)
1097
VFP_GEN_FIX(slto)
1098
VFP_GEN_FIX(uhto)
1099
VFP_GEN_FIX(ulto)
1100
#undef VFP_GEN_FIX
1101

    
1102
static inline void gen_vfp_ld(DisasContext *s, int dp)
1103
{
1104
    if (dp)
1105
        tcg_gen_qemu_ld64(cpu_F0d, cpu_T[1], IS_USER(s));
1106
    else
1107
        tcg_gen_qemu_ld32u(cpu_F0s, cpu_T[1], IS_USER(s));
1108
}
1109

    
1110
static inline void gen_vfp_st(DisasContext *s, int dp)
1111
{
1112
    if (dp)
1113
        tcg_gen_qemu_st64(cpu_F0d, cpu_T[1], IS_USER(s));
1114
    else
1115
        tcg_gen_qemu_st32(cpu_F0s, cpu_T[1], IS_USER(s));
1116
}
1117

    
1118
static inline long
1119
vfp_reg_offset (int dp, int reg)
1120
{
1121
    if (dp)
1122
        return offsetof(CPUARMState, vfp.regs[reg]);
1123
    else if (reg & 1) {
1124
        return offsetof(CPUARMState, vfp.regs[reg >> 1])
1125
          + offsetof(CPU_DoubleU, l.upper);
1126
    } else {
1127
        return offsetof(CPUARMState, vfp.regs[reg >> 1])
1128
          + offsetof(CPU_DoubleU, l.lower);
1129
    }
1130
}
1131

    
1132
/* Return the offset of a 32-bit piece of a NEON register.
1133
   zero is the least significant end of the register.  */
1134
static inline long
1135
neon_reg_offset (int reg, int n)
1136
{
1137
    int sreg;
1138
    sreg = reg * 2 + n;
1139
    return vfp_reg_offset(0, sreg);
1140
}
1141

    
1142
#define NEON_GET_REG(T, reg, n) gen_op_neon_getreg_##T(neon_reg_offset(reg, n))
1143
#define NEON_SET_REG(T, reg, n) gen_op_neon_setreg_##T(neon_reg_offset(reg, n))
1144

    
1145
#define tcg_gen_ld_f32 tcg_gen_ld_i32
1146
#define tcg_gen_ld_f64 tcg_gen_ld_i64
1147
#define tcg_gen_st_f32 tcg_gen_st_i32
1148
#define tcg_gen_st_f64 tcg_gen_st_i64
1149

    
1150
static inline void gen_mov_F0_vreg(int dp, int reg)
1151
{
1152
    if (dp)
1153
        tcg_gen_ld_f64(cpu_F0d, cpu_env, vfp_reg_offset(dp, reg));
1154
    else
1155
        tcg_gen_ld_f32(cpu_F0s, cpu_env, vfp_reg_offset(dp, reg));
1156
}
1157

    
1158
static inline void gen_mov_F1_vreg(int dp, int reg)
1159
{
1160
    if (dp)
1161
        tcg_gen_ld_f64(cpu_F1d, cpu_env, vfp_reg_offset(dp, reg));
1162
    else
1163
        tcg_gen_ld_f32(cpu_F1s, cpu_env, vfp_reg_offset(dp, reg));
1164
}
1165

    
1166
static inline void gen_mov_vreg_F0(int dp, int reg)
1167
{
1168
    if (dp)
1169
        tcg_gen_st_f64(cpu_F0d, cpu_env, vfp_reg_offset(dp, reg));
1170
    else
1171
        tcg_gen_st_f32(cpu_F0s, cpu_env, vfp_reg_offset(dp, reg));
1172
}
1173

    
1174
#define ARM_CP_RW_BIT        (1 << 20)
1175

    
1176
static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn)
1177
{
1178
    int rd;
1179
    uint32_t offset;
1180

    
1181
    rd = (insn >> 16) & 0xf;
1182
    gen_movl_T1_reg(s, rd);
1183

    
1184
    offset = (insn & 0xff) << ((insn >> 7) & 2);
1185
    if (insn & (1 << 24)) {
1186
        /* Pre indexed */
1187
        if (insn & (1 << 23))
1188
            gen_op_addl_T1_im(offset);
1189
        else
1190
            gen_op_addl_T1_im(-offset);
1191

    
1192
        if (insn & (1 << 21))
1193
            gen_movl_reg_T1(s, rd);
1194
    } else if (insn & (1 << 21)) {
1195
        /* Post indexed */
1196
        if (insn & (1 << 23))
1197
            gen_op_movl_T0_im(offset);
1198
        else
1199
            gen_op_movl_T0_im(- offset);
1200
        gen_op_addl_T0_T1();
1201
        gen_movl_reg_T0(s, rd);
1202
    } else if (!(insn & (1 << 23)))
1203
        return 1;
1204
    return 0;
1205
}
1206

    
1207
static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask)
1208
{
1209
    int rd = (insn >> 0) & 0xf;
1210

    
1211
    if (insn & (1 << 8))
1212
        if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3)
1213
            return 1;
1214
        else
1215
            gen_op_iwmmxt_movl_T0_wCx(rd);
1216
    else
1217
        gen_op_iwmmxt_movl_T0_T1_wRn(rd);
1218

    
1219
    gen_op_movl_T1_im(mask);
1220
    gen_op_andl_T0_T1();
1221
    return 0;
1222
}
1223

    
1224
/* Disassemble an iwMMXt instruction.  Returns nonzero if an error occured
1225
   (ie. an undefined instruction).  */
1226
static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
1227
{
1228
    int rd, wrd;
1229
    int rdhi, rdlo, rd0, rd1, i;
1230
    TCGv tmp;
1231

    
1232
    if ((insn & 0x0e000e00) == 0x0c000000) {
1233
        if ((insn & 0x0fe00ff0) == 0x0c400000) {
1234
            wrd = insn & 0xf;
1235
            rdlo = (insn >> 12) & 0xf;
1236
            rdhi = (insn >> 16) & 0xf;
1237
            if (insn & ARM_CP_RW_BIT) {                        /* TMRRC */
1238
                gen_op_iwmmxt_movl_T0_T1_wRn(wrd);
1239
                gen_movl_reg_T0(s, rdlo);
1240
                gen_movl_reg_T1(s, rdhi);
1241
            } else {                                        /* TMCRR */
1242
                gen_movl_T0_reg(s, rdlo);
1243
                gen_movl_T1_reg(s, rdhi);
1244
                gen_op_iwmmxt_movl_wRn_T0_T1(wrd);
1245
                gen_op_iwmmxt_set_mup();
1246
            }
1247
            return 0;
1248
        }
1249

    
1250
        wrd = (insn >> 12) & 0xf;
1251
        if (gen_iwmmxt_address(s, insn))
1252
            return 1;
1253
        if (insn & ARM_CP_RW_BIT) {
1254
            if ((insn >> 28) == 0xf) {                        /* WLDRW wCx */
1255
                tmp = gen_ld32(cpu_T[1], IS_USER(s));
1256
                tcg_gen_mov_i32(cpu_T[0], tmp);
1257
                dead_tmp(tmp);
1258
                gen_op_iwmmxt_movl_wCx_T0(wrd);
1259
            } else {
1260
                if (insn & (1 << 8))
1261
                    if (insn & (1 << 22))                /* WLDRD */
1262
                        gen_ldst(iwmmxt_ldq, s);
1263
                    else                                /* WLDRW wRd */
1264
                        gen_ldst(iwmmxt_ldl, s);
1265
                else
1266
                    if (insn & (1 << 22))                /* WLDRH */
1267
                        gen_ldst(iwmmxt_ldw, s);
1268
                    else                                /* WLDRB */
1269
                        gen_ldst(iwmmxt_ldb, s);
1270
                gen_op_iwmmxt_movq_wRn_M0(wrd);
1271
            }
1272
        } else {
1273
            if ((insn >> 28) == 0xf) {                        /* WSTRW wCx */
1274
                gen_op_iwmmxt_movl_T0_wCx(wrd);
1275
                tmp = new_tmp();
1276
                tcg_gen_mov_i32(tmp, cpu_T[0]);
1277
                gen_st32(tmp, cpu_T[1], IS_USER(s));
1278
            } else {
1279
                gen_op_iwmmxt_movq_M0_wRn(wrd);
1280
                if (insn & (1 << 8))
1281
                    if (insn & (1 << 22))                /* WSTRD */
1282
                        gen_ldst(iwmmxt_stq, s);
1283
                    else                                /* WSTRW wRd */
1284
                        gen_ldst(iwmmxt_stl, s);
1285
                else
1286
                    if (insn & (1 << 22))                /* WSTRH */
1287
                        gen_ldst(iwmmxt_ldw, s);
1288
                    else                                /* WSTRB */
1289
                        gen_ldst(iwmmxt_stb, s);
1290
            }
1291
        }
1292
        return 0;
1293
    }
1294

    
1295
    if ((insn & 0x0f000000) != 0x0e000000)
1296
        return 1;
1297

    
1298
    switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1299
    case 0x000:                                                /* WOR */
1300
        wrd = (insn >> 12) & 0xf;
1301
        rd0 = (insn >> 0) & 0xf;
1302
        rd1 = (insn >> 16) & 0xf;
1303
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1304
        gen_op_iwmmxt_orq_M0_wRn(rd1);
1305
        gen_op_iwmmxt_setpsr_nz();
1306
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1307
        gen_op_iwmmxt_set_mup();
1308
        gen_op_iwmmxt_set_cup();
1309
        break;
1310
    case 0x011:                                                /* TMCR */
1311
        if (insn & 0xf)
1312
            return 1;
1313
        rd = (insn >> 12) & 0xf;
1314
        wrd = (insn >> 16) & 0xf;
1315
        switch (wrd) {
1316
        case ARM_IWMMXT_wCID:
1317
        case ARM_IWMMXT_wCASF:
1318
            break;
1319
        case ARM_IWMMXT_wCon:
1320
            gen_op_iwmmxt_set_cup();
1321
            /* Fall through.  */
1322
        case ARM_IWMMXT_wCSSF:
1323
            gen_op_iwmmxt_movl_T0_wCx(wrd);
1324
            gen_movl_T1_reg(s, rd);
1325
            gen_op_bicl_T0_T1();
1326
            gen_op_iwmmxt_movl_wCx_T0(wrd);
1327
            break;
1328
        case ARM_IWMMXT_wCGR0:
1329
        case ARM_IWMMXT_wCGR1:
1330
        case ARM_IWMMXT_wCGR2:
1331
        case ARM_IWMMXT_wCGR3:
1332
            gen_op_iwmmxt_set_cup();
1333
            gen_movl_reg_T0(s, rd);
1334
            gen_op_iwmmxt_movl_wCx_T0(wrd);
1335
            break;
1336
        default:
1337
            return 1;
1338
        }
1339
        break;
1340
    case 0x100:                                                /* WXOR */
1341
        wrd = (insn >> 12) & 0xf;
1342
        rd0 = (insn >> 0) & 0xf;
1343
        rd1 = (insn >> 16) & 0xf;
1344
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1345
        gen_op_iwmmxt_xorq_M0_wRn(rd1);
1346
        gen_op_iwmmxt_setpsr_nz();
1347
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1348
        gen_op_iwmmxt_set_mup();
1349
        gen_op_iwmmxt_set_cup();
1350
        break;
1351
    case 0x111:                                                /* TMRC */
1352
        if (insn & 0xf)
1353
            return 1;
1354
        rd = (insn >> 12) & 0xf;
1355
        wrd = (insn >> 16) & 0xf;
1356
        gen_op_iwmmxt_movl_T0_wCx(wrd);
1357
        gen_movl_reg_T0(s, rd);
1358
        break;
1359
    case 0x300:                                                /* WANDN */
1360
        wrd = (insn >> 12) & 0xf;
1361
        rd0 = (insn >> 0) & 0xf;
1362
        rd1 = (insn >> 16) & 0xf;
1363
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1364
        gen_op_iwmmxt_negq_M0();
1365
        gen_op_iwmmxt_andq_M0_wRn(rd1);
1366
        gen_op_iwmmxt_setpsr_nz();
1367
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1368
        gen_op_iwmmxt_set_mup();
1369
        gen_op_iwmmxt_set_cup();
1370
        break;
1371
    case 0x200:                                                /* WAND */
1372
        wrd = (insn >> 12) & 0xf;
1373
        rd0 = (insn >> 0) & 0xf;
1374
        rd1 = (insn >> 16) & 0xf;
1375
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1376
        gen_op_iwmmxt_andq_M0_wRn(rd1);
1377
        gen_op_iwmmxt_setpsr_nz();
1378
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1379
        gen_op_iwmmxt_set_mup();
1380
        gen_op_iwmmxt_set_cup();
1381
        break;
1382
    case 0x810: case 0xa10:                                /* WMADD */
1383
        wrd = (insn >> 12) & 0xf;
1384
        rd0 = (insn >> 0) & 0xf;
1385
        rd1 = (insn >> 16) & 0xf;
1386
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1387
        if (insn & (1 << 21))
1388
            gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1389
        else
1390
            gen_op_iwmmxt_madduq_M0_wRn(rd1);
1391
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1392
        gen_op_iwmmxt_set_mup();
1393
        break;
1394
    case 0x10e: case 0x50e: case 0x90e: case 0xd0e:        /* WUNPCKIL */
1395
        wrd = (insn >> 12) & 0xf;
1396
        rd0 = (insn >> 16) & 0xf;
1397
        rd1 = (insn >> 0) & 0xf;
1398
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1399
        switch ((insn >> 22) & 3) {
1400
        case 0:
1401
            gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1402
            break;
1403
        case 1:
1404
            gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1405
            break;
1406
        case 2:
1407
            gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1408
            break;
1409
        case 3:
1410
            return 1;
1411
        }
1412
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1413
        gen_op_iwmmxt_set_mup();
1414
        gen_op_iwmmxt_set_cup();
1415
        break;
1416
    case 0x10c: case 0x50c: case 0x90c: case 0xd0c:        /* WUNPCKIH */
1417
        wrd = (insn >> 12) & 0xf;
1418
        rd0 = (insn >> 16) & 0xf;
1419
        rd1 = (insn >> 0) & 0xf;
1420
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1421
        switch ((insn >> 22) & 3) {
1422
        case 0:
1423
            gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1424
            break;
1425
        case 1:
1426
            gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1427
            break;
1428
        case 2:
1429
            gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1430
            break;
1431
        case 3:
1432
            return 1;
1433
        }
1434
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1435
        gen_op_iwmmxt_set_mup();
1436
        gen_op_iwmmxt_set_cup();
1437
        break;
1438
    case 0x012: case 0x112: case 0x412: case 0x512:        /* WSAD */
1439
        wrd = (insn >> 12) & 0xf;
1440
        rd0 = (insn >> 16) & 0xf;
1441
        rd1 = (insn >> 0) & 0xf;
1442
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1443
        if (insn & (1 << 22))
1444
            gen_op_iwmmxt_sadw_M0_wRn(rd1);
1445
        else
1446
            gen_op_iwmmxt_sadb_M0_wRn(rd1);
1447
        if (!(insn & (1 << 20)))
1448
            gen_op_iwmmxt_addl_M0_wRn(wrd);
1449
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1450
        gen_op_iwmmxt_set_mup();
1451
        break;
1452
    case 0x010: case 0x110: case 0x210: case 0x310:        /* WMUL */
1453
        wrd = (insn >> 12) & 0xf;
1454
        rd0 = (insn >> 16) & 0xf;
1455
        rd1 = (insn >> 0) & 0xf;
1456
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1457
        if (insn & (1 << 21))
1458
            gen_op_iwmmxt_mulsw_M0_wRn(rd1, (insn & (1 << 20)) ? 16 : 0);
1459
        else
1460
            gen_op_iwmmxt_muluw_M0_wRn(rd1, (insn & (1 << 20)) ? 16 : 0);
1461
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1462
        gen_op_iwmmxt_set_mup();
1463
        break;
1464
    case 0x410: case 0x510: case 0x610: case 0x710:        /* WMAC */
1465
        wrd = (insn >> 12) & 0xf;
1466
        rd0 = (insn >> 16) & 0xf;
1467
        rd1 = (insn >> 0) & 0xf;
1468
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1469
        if (insn & (1 << 21))
1470
            gen_op_iwmmxt_macsw_M0_wRn(rd1);
1471
        else
1472
            gen_op_iwmmxt_macuw_M0_wRn(rd1);
1473
        if (!(insn & (1 << 20))) {
1474
            if (insn & (1 << 21))
1475
                gen_op_iwmmxt_addsq_M0_wRn(wrd);
1476
            else
1477
                gen_op_iwmmxt_adduq_M0_wRn(wrd);
1478
        }
1479
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1480
        gen_op_iwmmxt_set_mup();
1481
        break;
1482
    case 0x006: case 0x406: case 0x806: case 0xc06:        /* WCMPEQ */
1483
        wrd = (insn >> 12) & 0xf;
1484
        rd0 = (insn >> 16) & 0xf;
1485
        rd1 = (insn >> 0) & 0xf;
1486
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1487
        switch ((insn >> 22) & 3) {
1488
        case 0:
1489
            gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1490
            break;
1491
        case 1:
1492
            gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1493
            break;
1494
        case 2:
1495
            gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1496
            break;
1497
        case 3:
1498
            return 1;
1499
        }
1500
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1501
        gen_op_iwmmxt_set_mup();
1502
        gen_op_iwmmxt_set_cup();
1503
        break;
1504
    case 0x800: case 0x900: case 0xc00: case 0xd00:        /* WAVG2 */
1505
        wrd = (insn >> 12) & 0xf;
1506
        rd0 = (insn >> 16) & 0xf;
1507
        rd1 = (insn >> 0) & 0xf;
1508
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1509
        if (insn & (1 << 22))
1510
            gen_op_iwmmxt_avgw_M0_wRn(rd1, (insn >> 20) & 1);
1511
        else
1512
            gen_op_iwmmxt_avgb_M0_wRn(rd1, (insn >> 20) & 1);
1513
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1514
        gen_op_iwmmxt_set_mup();
1515
        gen_op_iwmmxt_set_cup();
1516
        break;
1517
    case 0x802: case 0x902: case 0xa02: case 0xb02:        /* WALIGNR */
1518
        wrd = (insn >> 12) & 0xf;
1519
        rd0 = (insn >> 16) & 0xf;
1520
        rd1 = (insn >> 0) & 0xf;
1521
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1522
        gen_op_iwmmxt_movl_T0_wCx(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1523
        gen_op_movl_T1_im(7);
1524
        gen_op_andl_T0_T1();
1525
        gen_op_iwmmxt_align_M0_T0_wRn(rd1);
1526
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1527
        gen_op_iwmmxt_set_mup();
1528
        break;
1529
    case 0x601: case 0x605: case 0x609: case 0x60d:        /* TINSR */
1530
        rd = (insn >> 12) & 0xf;
1531
        wrd = (insn >> 16) & 0xf;
1532
        gen_movl_T0_reg(s, rd);
1533
        gen_op_iwmmxt_movq_M0_wRn(wrd);
1534
        switch ((insn >> 6) & 3) {
1535
        case 0:
1536
            gen_op_movl_T1_im(0xff);
1537
            gen_op_iwmmxt_insr_M0_T0_T1((insn & 7) << 3);
1538
            break;
1539
        case 1:
1540
            gen_op_movl_T1_im(0xffff);
1541
            gen_op_iwmmxt_insr_M0_T0_T1((insn & 3) << 4);
1542
            break;
1543
        case 2:
1544
            gen_op_movl_T1_im(0xffffffff);
1545
            gen_op_iwmmxt_insr_M0_T0_T1((insn & 1) << 5);
1546
            break;
1547
        case 3:
1548
            return 1;
1549
        }
1550
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1551
        gen_op_iwmmxt_set_mup();
1552
        break;
1553
    case 0x107: case 0x507: case 0x907: case 0xd07:        /* TEXTRM */
1554
        rd = (insn >> 12) & 0xf;
1555
        wrd = (insn >> 16) & 0xf;
1556
        if (rd == 15)
1557
            return 1;
1558
        gen_op_iwmmxt_movq_M0_wRn(wrd);
1559
        switch ((insn >> 22) & 3) {
1560
        case 0:
1561
            if (insn & 8)
1562
                gen_op_iwmmxt_extrsb_T0_M0((insn & 7) << 3);
1563
            else {
1564
                gen_op_movl_T1_im(0xff);
1565
                gen_op_iwmmxt_extru_T0_M0_T1((insn & 7) << 3);
1566
            }
1567
            break;
1568
        case 1:
1569
            if (insn & 8)
1570
                gen_op_iwmmxt_extrsw_T0_M0((insn & 3) << 4);
1571
            else {
1572
                gen_op_movl_T1_im(0xffff);
1573
                gen_op_iwmmxt_extru_T0_M0_T1((insn & 3) << 4);
1574
            }
1575
            break;
1576
        case 2:
1577
            gen_op_movl_T1_im(0xffffffff);
1578
            gen_op_iwmmxt_extru_T0_M0_T1((insn & 1) << 5);
1579
            break;
1580
        case 3:
1581
            return 1;
1582
        }
1583
        gen_movl_reg_T0(s, rd);
1584
        break;
1585
    case 0x117: case 0x517: case 0x917: case 0xd17:        /* TEXTRC */
1586
        if ((insn & 0x000ff008) != 0x0003f000)
1587
            return 1;
1588
        gen_op_iwmmxt_movl_T1_wCx(ARM_IWMMXT_wCASF);
1589
        switch ((insn >> 22) & 3) {
1590
        case 0:
1591
            gen_op_shrl_T1_im(((insn & 7) << 2) + 0);
1592
            break;
1593
        case 1:
1594
            gen_op_shrl_T1_im(((insn & 3) << 3) + 4);
1595
            break;
1596
        case 2:
1597
            gen_op_shrl_T1_im(((insn & 1) << 4) + 12);
1598
            break;
1599
        case 3:
1600
            return 1;
1601
        }
1602
        gen_op_shll_T1_im(28);
1603
        gen_set_nzcv(cpu_T[1]);
1604
        break;
1605
    case 0x401: case 0x405: case 0x409: case 0x40d:        /* TBCST */
1606
        rd = (insn >> 12) & 0xf;
1607
        wrd = (insn >> 16) & 0xf;
1608
        gen_movl_T0_reg(s, rd);
1609
        switch ((insn >> 6) & 3) {
1610
        case 0:
1611
            gen_op_iwmmxt_bcstb_M0_T0();
1612
            break;
1613
        case 1:
1614
            gen_op_iwmmxt_bcstw_M0_T0();
1615
            break;
1616
        case 2:
1617
            gen_op_iwmmxt_bcstl_M0_T0();
1618
            break;
1619
        case 3:
1620
            return 1;
1621
        }
1622
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1623
        gen_op_iwmmxt_set_mup();
1624
        break;
1625
    case 0x113: case 0x513: case 0x913: case 0xd13:        /* TANDC */
1626
        if ((insn & 0x000ff00f) != 0x0003f000)
1627
            return 1;
1628
        gen_op_iwmmxt_movl_T1_wCx(ARM_IWMMXT_wCASF);
1629
        switch ((insn >> 22) & 3) {
1630
        case 0:
1631
            for (i = 0; i < 7; i ++) {
1632
                gen_op_shll_T1_im(4);
1633
                gen_op_andl_T0_T1();
1634
            }
1635
            break;
1636
        case 1:
1637
            for (i = 0; i < 3; i ++) {
1638
                gen_op_shll_T1_im(8);
1639
                gen_op_andl_T0_T1();
1640
            }
1641
            break;
1642
        case 2:
1643
            gen_op_shll_T1_im(16);
1644
            gen_op_andl_T0_T1();
1645
            break;
1646
        case 3:
1647
            return 1;
1648
        }
1649
        gen_set_nzcv(cpu_T[0]);
1650
        break;
1651
    case 0x01c: case 0x41c: case 0x81c: case 0xc1c:        /* WACC */
1652
        wrd = (insn >> 12) & 0xf;
1653
        rd0 = (insn >> 16) & 0xf;
1654
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1655
        switch ((insn >> 22) & 3) {
1656
        case 0:
1657
            gen_op_iwmmxt_addcb_M0();
1658
            break;
1659
        case 1:
1660
            gen_op_iwmmxt_addcw_M0();
1661
            break;
1662
        case 2:
1663
            gen_op_iwmmxt_addcl_M0();
1664
            break;
1665
        case 3:
1666
            return 1;
1667
        }
1668
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1669
        gen_op_iwmmxt_set_mup();
1670
        break;
1671
    case 0x115: case 0x515: case 0x915: case 0xd15:        /* TORC */
1672
        if ((insn & 0x000ff00f) != 0x0003f000)
1673
            return 1;
1674
        gen_op_iwmmxt_movl_T1_wCx(ARM_IWMMXT_wCASF);
1675
        switch ((insn >> 22) & 3) {
1676
        case 0:
1677
            for (i = 0; i < 7; i ++) {
1678
                gen_op_shll_T1_im(4);
1679
                gen_op_orl_T0_T1();
1680
            }
1681
            break;
1682
        case 1:
1683
            for (i = 0; i < 3; i ++) {
1684
                gen_op_shll_T1_im(8);
1685
                gen_op_orl_T0_T1();
1686
            }
1687
            break;
1688
        case 2:
1689
            gen_op_shll_T1_im(16);
1690
            gen_op_orl_T0_T1();
1691
            break;
1692
        case 3:
1693
            return 1;
1694
        }
1695
        gen_set_nzcv(cpu_T[0]);
1696
        break;
1697
    case 0x103: case 0x503: case 0x903: case 0xd03:        /* TMOVMSK */
1698
        rd = (insn >> 12) & 0xf;
1699
        rd0 = (insn >> 16) & 0xf;
1700
        if ((insn & 0xf) != 0)
1701
            return 1;
1702
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1703
        switch ((insn >> 22) & 3) {
1704
        case 0:
1705
            gen_op_iwmmxt_msbb_T0_M0();
1706
            break;
1707
        case 1:
1708
            gen_op_iwmmxt_msbw_T0_M0();
1709
            break;
1710
        case 2:
1711
            gen_op_iwmmxt_msbl_T0_M0();
1712
            break;
1713
        case 3:
1714
            return 1;
1715
        }
1716
        gen_movl_reg_T0(s, rd);
1717
        break;
1718
    case 0x106: case 0x306: case 0x506: case 0x706:        /* WCMPGT */
1719
    case 0x906: case 0xb06: case 0xd06: case 0xf06:
1720
        wrd = (insn >> 12) & 0xf;
1721
        rd0 = (insn >> 16) & 0xf;
1722
        rd1 = (insn >> 0) & 0xf;
1723
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1724
        switch ((insn >> 22) & 3) {
1725
        case 0:
1726
            if (insn & (1 << 21))
1727
                gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
1728
            else
1729
                gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
1730
            break;
1731
        case 1:
1732
            if (insn & (1 << 21))
1733
                gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
1734
            else
1735
                gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
1736
            break;
1737
        case 2:
1738
            if (insn & (1 << 21))
1739
                gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
1740
            else
1741
                gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
1742
            break;
1743
        case 3:
1744
            return 1;
1745
        }
1746
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1747
        gen_op_iwmmxt_set_mup();
1748
        gen_op_iwmmxt_set_cup();
1749
        break;
1750
    case 0x00e: case 0x20e: case 0x40e: case 0x60e:        /* WUNPCKEL */
1751
    case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
1752
        wrd = (insn >> 12) & 0xf;
1753
        rd0 = (insn >> 16) & 0xf;
1754
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1755
        switch ((insn >> 22) & 3) {
1756
        case 0:
1757
            if (insn & (1 << 21))
1758
                gen_op_iwmmxt_unpacklsb_M0();
1759
            else
1760
                gen_op_iwmmxt_unpacklub_M0();
1761
            break;
1762
        case 1:
1763
            if (insn & (1 << 21))
1764
                gen_op_iwmmxt_unpacklsw_M0();
1765
            else
1766
                gen_op_iwmmxt_unpackluw_M0();
1767
            break;
1768
        case 2:
1769
            if (insn & (1 << 21))
1770
                gen_op_iwmmxt_unpacklsl_M0();
1771
            else
1772
                gen_op_iwmmxt_unpacklul_M0();
1773
            break;
1774
        case 3:
1775
            return 1;
1776
        }
1777
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1778
        gen_op_iwmmxt_set_mup();
1779
        gen_op_iwmmxt_set_cup();
1780
        break;
1781
    case 0x00c: case 0x20c: case 0x40c: case 0x60c:        /* WUNPCKEH */
1782
    case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
1783
        wrd = (insn >> 12) & 0xf;
1784
        rd0 = (insn >> 16) & 0xf;
1785
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1786
        switch ((insn >> 22) & 3) {
1787
        case 0:
1788
            if (insn & (1 << 21))
1789
                gen_op_iwmmxt_unpackhsb_M0();
1790
            else
1791
                gen_op_iwmmxt_unpackhub_M0();
1792
            break;
1793
        case 1:
1794
            if (insn & (1 << 21))
1795
                gen_op_iwmmxt_unpackhsw_M0();
1796
            else
1797
                gen_op_iwmmxt_unpackhuw_M0();
1798
            break;
1799
        case 2:
1800
            if (insn & (1 << 21))
1801
                gen_op_iwmmxt_unpackhsl_M0();
1802
            else
1803
                gen_op_iwmmxt_unpackhul_M0();
1804
            break;
1805
        case 3:
1806
            return 1;
1807
        }
1808
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1809
        gen_op_iwmmxt_set_mup();
1810
        gen_op_iwmmxt_set_cup();
1811
        break;
1812
    case 0x204: case 0x604: case 0xa04: case 0xe04:        /* WSRL */
1813
    case 0x214: case 0x614: case 0xa14: case 0xe14:
1814
        wrd = (insn >> 12) & 0xf;
1815
        rd0 = (insn >> 16) & 0xf;
1816
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1817
        if (gen_iwmmxt_shift(insn, 0xff))
1818
            return 1;
1819
        switch ((insn >> 22) & 3) {
1820
        case 0:
1821
            return 1;
1822
        case 1:
1823
            gen_op_iwmmxt_srlw_M0_T0();
1824
            break;
1825
        case 2:
1826
            gen_op_iwmmxt_srll_M0_T0();
1827
            break;
1828
        case 3:
1829
            gen_op_iwmmxt_srlq_M0_T0();
1830
            break;
1831
        }
1832
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1833
        gen_op_iwmmxt_set_mup();
1834
        gen_op_iwmmxt_set_cup();
1835
        break;
1836
    case 0x004: case 0x404: case 0x804: case 0xc04:        /* WSRA */
1837
    case 0x014: case 0x414: case 0x814: case 0xc14:
1838
        wrd = (insn >> 12) & 0xf;
1839
        rd0 = (insn >> 16) & 0xf;
1840
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1841
        if (gen_iwmmxt_shift(insn, 0xff))
1842
            return 1;
1843
        switch ((insn >> 22) & 3) {
1844
        case 0:
1845
            return 1;
1846
        case 1:
1847
            gen_op_iwmmxt_sraw_M0_T0();
1848
            break;
1849
        case 2:
1850
            gen_op_iwmmxt_sral_M0_T0();
1851
            break;
1852
        case 3:
1853
            gen_op_iwmmxt_sraq_M0_T0();
1854
            break;
1855
        }
1856
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1857
        gen_op_iwmmxt_set_mup();
1858
        gen_op_iwmmxt_set_cup();
1859
        break;
1860
    case 0x104: case 0x504: case 0x904: case 0xd04:        /* WSLL */
1861
    case 0x114: case 0x514: case 0x914: case 0xd14:
1862
        wrd = (insn >> 12) & 0xf;
1863
        rd0 = (insn >> 16) & 0xf;
1864
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1865
        if (gen_iwmmxt_shift(insn, 0xff))
1866
            return 1;
1867
        switch ((insn >> 22) & 3) {
1868
        case 0:
1869
            return 1;
1870
        case 1:
1871
            gen_op_iwmmxt_sllw_M0_T0();
1872
            break;
1873
        case 2:
1874
            gen_op_iwmmxt_slll_M0_T0();
1875
            break;
1876
        case 3:
1877
            gen_op_iwmmxt_sllq_M0_T0();
1878
            break;
1879
        }
1880
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1881
        gen_op_iwmmxt_set_mup();
1882
        gen_op_iwmmxt_set_cup();
1883
        break;
1884
    case 0x304: case 0x704: case 0xb04: case 0xf04:        /* WROR */
1885
    case 0x314: case 0x714: case 0xb14: case 0xf14:
1886
        wrd = (insn >> 12) & 0xf;
1887
        rd0 = (insn >> 16) & 0xf;
1888
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1889
        switch ((insn >> 22) & 3) {
1890
        case 0:
1891
            return 1;
1892
        case 1:
1893
            if (gen_iwmmxt_shift(insn, 0xf))
1894
                return 1;
1895
            gen_op_iwmmxt_rorw_M0_T0();
1896
            break;
1897
        case 2:
1898
            if (gen_iwmmxt_shift(insn, 0x1f))
1899
                return 1;
1900
            gen_op_iwmmxt_rorl_M0_T0();
1901
            break;
1902
        case 3:
1903
            if (gen_iwmmxt_shift(insn, 0x3f))
1904
                return 1;
1905
            gen_op_iwmmxt_rorq_M0_T0();
1906
            break;
1907
        }
1908
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1909
        gen_op_iwmmxt_set_mup();
1910
        gen_op_iwmmxt_set_cup();
1911
        break;
1912
    case 0x116: case 0x316: case 0x516: case 0x716:        /* WMIN */
1913
    case 0x916: case 0xb16: case 0xd16: case 0xf16:
1914
        wrd = (insn >> 12) & 0xf;
1915
        rd0 = (insn >> 16) & 0xf;
1916
        rd1 = (insn >> 0) & 0xf;
1917
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1918
        switch ((insn >> 22) & 3) {
1919
        case 0:
1920
            if (insn & (1 << 21))
1921
                gen_op_iwmmxt_minsb_M0_wRn(rd1);
1922
            else
1923
                gen_op_iwmmxt_minub_M0_wRn(rd1);
1924
            break;
1925
        case 1:
1926
            if (insn & (1 << 21))
1927
                gen_op_iwmmxt_minsw_M0_wRn(rd1);
1928
            else
1929
                gen_op_iwmmxt_minuw_M0_wRn(rd1);
1930
            break;
1931
        case 2:
1932
            if (insn & (1 << 21))
1933
                gen_op_iwmmxt_minsl_M0_wRn(rd1);
1934
            else
1935
                gen_op_iwmmxt_minul_M0_wRn(rd1);
1936
            break;
1937
        case 3:
1938
            return 1;
1939
        }
1940
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1941
        gen_op_iwmmxt_set_mup();
1942
        break;
1943
    case 0x016: case 0x216: case 0x416: case 0x616:        /* WMAX */
1944
    case 0x816: case 0xa16: case 0xc16: case 0xe16:
1945
        wrd = (insn >> 12) & 0xf;
1946
        rd0 = (insn >> 16) & 0xf;
1947
        rd1 = (insn >> 0) & 0xf;
1948
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1949
        switch ((insn >> 22) & 3) {
1950
        case 0:
1951
            if (insn & (1 << 21))
1952
                gen_op_iwmmxt_maxsb_M0_wRn(rd1);
1953
            else
1954
                gen_op_iwmmxt_maxub_M0_wRn(rd1);
1955
            break;
1956
        case 1:
1957
            if (insn & (1 << 21))
1958
                gen_op_iwmmxt_maxsw_M0_wRn(rd1);
1959
            else
1960
                gen_op_iwmmxt_maxuw_M0_wRn(rd1);
1961
            break;
1962
        case 2:
1963
            if (insn & (1 << 21))
1964
                gen_op_iwmmxt_maxsl_M0_wRn(rd1);
1965
            else
1966
                gen_op_iwmmxt_maxul_M0_wRn(rd1);
1967
            break;
1968
        case 3:
1969
            return 1;
1970
        }
1971
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1972
        gen_op_iwmmxt_set_mup();
1973
        break;
1974
    case 0x002: case 0x102: case 0x202: case 0x302:        /* WALIGNI */
1975
    case 0x402: case 0x502: case 0x602: case 0x702:
1976
        wrd = (insn >> 12) & 0xf;
1977
        rd0 = (insn >> 16) & 0xf;
1978
        rd1 = (insn >> 0) & 0xf;
1979
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1980
        gen_op_movl_T0_im((insn >> 20) & 3);
1981
        gen_op_iwmmxt_align_M0_T0_wRn(rd1);
1982
        gen_op_iwmmxt_movq_wRn_M0(wrd);
1983
        gen_op_iwmmxt_set_mup();
1984
        break;
1985
    case 0x01a: case 0x11a: case 0x21a: case 0x31a:        /* WSUB */
1986
    case 0x41a: case 0x51a: case 0x61a: case 0x71a:
1987
    case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
1988
    case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
1989
        wrd = (insn >> 12) & 0xf;
1990
        rd0 = (insn >> 16) & 0xf;
1991
        rd1 = (insn >> 0) & 0xf;
1992
        gen_op_iwmmxt_movq_M0_wRn(rd0);
1993
        switch ((insn >> 20) & 0xf) {
1994
        case 0x0:
1995
            gen_op_iwmmxt_subnb_M0_wRn(rd1);
1996
            break;
1997
        case 0x1:
1998
            gen_op_iwmmxt_subub_M0_wRn(rd1);
1999
            break;
2000
        case 0x3:
2001
            gen_op_iwmmxt_subsb_M0_wRn(rd1);
2002
            break;
2003
        case 0x4:
2004
            gen_op_iwmmxt_subnw_M0_wRn(rd1);
2005
            break;
2006
        case 0x5:
2007
            gen_op_iwmmxt_subuw_M0_wRn(rd1);
2008
            break;
2009
        case 0x7:
2010
            gen_op_iwmmxt_subsw_M0_wRn(rd1);
2011
            break;
2012
        case 0x8:
2013
            gen_op_iwmmxt_subnl_M0_wRn(rd1);
2014
            break;
2015
        case 0x9:
2016
            gen_op_iwmmxt_subul_M0_wRn(rd1);
2017
            break;
2018
        case 0xb:
2019
            gen_op_iwmmxt_subsl_M0_wRn(rd1);
2020
            break;
2021
        default:
2022
            return 1;
2023
        }
2024
        gen_op_iwmmxt_movq_wRn_M0(wrd);
2025
        gen_op_iwmmxt_set_mup();
2026
        gen_op_iwmmxt_set_cup();
2027
        break;
2028
    case 0x01e: case 0x11e: case 0x21e: case 0x31e:        /* WSHUFH */
2029
    case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2030
    case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2031
    case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2032
        wrd = (insn >> 12) & 0xf;
2033
        rd0 = (insn >> 16) & 0xf;
2034
        gen_op_iwmmxt_movq_M0_wRn(rd0);
2035
        gen_op_movl_T0_im(((insn >> 16) & 0xf0) | (insn & 0x0f));
2036
        gen_op_iwmmxt_shufh_M0_T0();
2037
        gen_op_iwmmxt_movq_wRn_M0(wrd);
2038
        gen_op_iwmmxt_set_mup();
2039
        gen_op_iwmmxt_set_cup();
2040
        break;
2041
    case 0x018: case 0x118: case 0x218: case 0x318:        /* WADD */
2042
    case 0x418: case 0x518: case 0x618: case 0x718:
2043
    case 0x818: case 0x918: case 0xa18: case 0xb18:
2044
    case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2045
        wrd = (insn >> 12) & 0xf;
2046
        rd0 = (insn >> 16) & 0xf;
2047
        rd1 = (insn >> 0) & 0xf;
2048
        gen_op_iwmmxt_movq_M0_wRn(rd0);
2049
        switch ((insn >> 20) & 0xf) {
2050
        case 0x0:
2051
            gen_op_iwmmxt_addnb_M0_wRn(rd1);
2052
            break;
2053
        case 0x1:
2054
            gen_op_iwmmxt_addub_M0_wRn(rd1);
2055
            break;
2056
        case 0x3:
2057
            gen_op_iwmmxt_addsb_M0_wRn(rd1);
2058
            break;
2059
        case 0x4:
2060
            gen_op_iwmmxt_addnw_M0_wRn(rd1);
2061
            break;
2062
        case 0x5:
2063
            gen_op_iwmmxt_adduw_M0_wRn(rd1);
2064
            break;
2065
        case 0x7:
2066
            gen_op_iwmmxt_addsw_M0_wRn(rd1);
2067
            break;
2068
        case 0x8:
2069
            gen_op_iwmmxt_addnl_M0_wRn(rd1);
2070
            break;
2071
        case 0x9:
2072
            gen_op_iwmmxt_addul_M0_wRn(rd1);
2073
            break;
2074
        case 0xb:
2075
            gen_op_iwmmxt_addsl_M0_wRn(rd1);
2076
            break;
2077
        default:
2078
            return 1;
2079
        }
2080
        gen_op_iwmmxt_movq_wRn_M0(wrd);
2081
        gen_op_iwmmxt_set_mup();
2082
        gen_op_iwmmxt_set_cup();
2083
        break;
2084
    case 0x008: case 0x108: case 0x208: case 0x308:        /* WPACK */
2085
    case 0x408: case 0x508: case 0x608: case 0x708:
2086
    case 0x808: case 0x908: case 0xa08: case 0xb08:
2087
    case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2088
        wrd = (insn >> 12) & 0xf;
2089
        rd0 = (insn >> 16) & 0xf;
2090
        rd1 = (insn >> 0) & 0xf;
2091
        gen_op_iwmmxt_movq_M0_wRn(rd0);
2092
        if (!(insn & (1 << 20)))
2093
            return 1;
2094
        switch ((insn >> 22) & 3) {
2095
        case 0:
2096
            return 1;
2097
        case 1:
2098
            if (insn & (1 << 21))
2099
                gen_op_iwmmxt_packsw_M0_wRn(rd1);
2100
            else
2101
                gen_op_iwmmxt_packuw_M0_wRn(rd1);
2102
            break;
2103
        case 2:
2104
            if (insn & (1 << 21))
2105
                gen_op_iwmmxt_packsl_M0_wRn(rd1);
2106
            else
2107
                gen_op_iwmmxt_packul_M0_wRn(rd1);
2108
            break;
2109
        case 3:
2110
            if (insn & (1 << 21))
2111
                gen_op_iwmmxt_packsq_M0_wRn(rd1);
2112
            else
2113
                gen_op_iwmmxt_packuq_M0_wRn(rd1);
2114
            break;
2115
        }
2116
        gen_op_iwmmxt_movq_wRn_M0(wrd);
2117
        gen_op_iwmmxt_set_mup();
2118
        gen_op_iwmmxt_set_cup();
2119
        break;
2120
    case 0x201: case 0x203: case 0x205: case 0x207:
2121
    case 0x209: case 0x20b: case 0x20d: case 0x20f:
2122
    case 0x211: case 0x213: case 0x215: case 0x217:
2123
    case 0x219: case 0x21b: case 0x21d: case 0x21f:
2124
        wrd = (insn >> 5) & 0xf;
2125
        rd0 = (insn >> 12) & 0xf;
2126
        rd1 = (insn >> 0) & 0xf;
2127
        if (rd0 == 0xf || rd1 == 0xf)
2128
            return 1;
2129
        gen_op_iwmmxt_movq_M0_wRn(wrd);
2130
        switch ((insn >> 16) & 0xf) {
2131
        case 0x0:                                        /* TMIA */
2132
            gen_movl_T0_reg(s, rd0);
2133
            gen_movl_T1_reg(s, rd1);
2134
            gen_op_iwmmxt_muladdsl_M0_T0_T1();
2135
            break;
2136
        case 0x8:                                        /* TMIAPH */
2137
            gen_movl_T0_reg(s, rd0);
2138
            gen_movl_T1_reg(s, rd1);
2139
            gen_op_iwmmxt_muladdsw_M0_T0_T1();
2140
            break;
2141
        case 0xc: case 0xd: case 0xe: case 0xf:                /* TMIAxy */
2142
            gen_movl_T1_reg(s, rd0);
2143
            if (insn & (1 << 16))
2144
                gen_op_shrl_T1_im(16);
2145
            gen_op_movl_T0_T1();
2146
            gen_movl_T1_reg(s, rd1);
2147
            if (insn & (1 << 17))
2148
                gen_op_shrl_T1_im(16);
2149
            gen_op_iwmmxt_muladdswl_M0_T0_T1();
2150
            break;
2151
        default:
2152
            return 1;
2153
        }
2154
        gen_op_iwmmxt_movq_wRn_M0(wrd);
2155
        gen_op_iwmmxt_set_mup();
2156
        break;
2157
    default:
2158
        return 1;
2159
    }
2160

    
2161
    return 0;
2162
}
2163

    
2164
/* Disassemble an XScale DSP instruction.  Returns nonzero if an error occured
2165
   (ie. an undefined instruction).  */
2166
static int disas_dsp_insn(CPUState *env, DisasContext *s, uint32_t insn)
2167
{
2168
    int acc, rd0, rd1, rdhi, rdlo;
2169

    
2170
    if ((insn & 0x0ff00f10) == 0x0e200010) {
2171
        /* Multiply with Internal Accumulate Format */
2172
        rd0 = (insn >> 12) & 0xf;
2173
        rd1 = insn & 0xf;
2174
        acc = (insn >> 5) & 7;
2175

    
2176
        if (acc != 0)
2177
            return 1;
2178

    
2179
        switch ((insn >> 16) & 0xf) {
2180
        case 0x0:                                        /* MIA */
2181
            gen_movl_T0_reg(s, rd0);
2182
            gen_movl_T1_reg(s, rd1);
2183
            gen_op_iwmmxt_muladdsl_M0_T0_T1();
2184
            break;
2185
        case 0x8:                                        /* MIAPH */
2186
            gen_movl_T0_reg(s, rd0);
2187
            gen_movl_T1_reg(s, rd1);
2188
            gen_op_iwmmxt_muladdsw_M0_T0_T1();
2189
            break;
2190
        case 0xc:                                        /* MIABB */
2191
        case 0xd:                                        /* MIABT */
2192
        case 0xe:                                        /* MIATB */
2193
        case 0xf:                                        /* MIATT */
2194
            gen_movl_T1_reg(s, rd0);
2195
            if (insn & (1 << 16))
2196
                gen_op_shrl_T1_im(16);
2197
            gen_op_movl_T0_T1();
2198
            gen_movl_T1_reg(s, rd1);
2199
            if (insn & (1 << 17))
2200
                gen_op_shrl_T1_im(16);
2201
            gen_op_iwmmxt_muladdswl_M0_T0_T1();
2202
            break;
2203
        default:
2204
            return 1;
2205
        }
2206

    
2207
        gen_op_iwmmxt_movq_wRn_M0(acc);
2208
        return 0;
2209
    }
2210

    
2211
    if ((insn & 0x0fe00ff8) == 0x0c400000) {
2212
        /* Internal Accumulator Access Format */
2213
        rdhi = (insn >> 16) & 0xf;
2214
        rdlo = (insn >> 12) & 0xf;
2215
        acc = insn & 7;
2216

    
2217
        if (acc != 0)
2218
            return 1;
2219

    
2220
        if (insn & ARM_CP_RW_BIT) {                        /* MRA */
2221
            gen_op_iwmmxt_movl_T0_T1_wRn(acc);
2222
            gen_movl_reg_T0(s, rdlo);
2223
            gen_op_movl_T0_im((1 << (40 - 32)) - 1);
2224
            gen_op_andl_T0_T1();
2225
            gen_movl_reg_T0(s, rdhi);
2226
        } else {                                        /* MAR */
2227
            gen_movl_T0_reg(s, rdlo);
2228
            gen_movl_T1_reg(s, rdhi);
2229
            gen_op_iwmmxt_movl_wRn_T0_T1(acc);
2230
        }
2231
        return 0;
2232
    }
2233

    
2234
    return 1;
2235
}
2236

    
2237
/* Disassemble system coprocessor instruction.  Return nonzero if
2238
   instruction is not defined.  */
2239
static int disas_cp_insn(CPUState *env, DisasContext *s, uint32_t insn)
2240
{
2241
    uint32_t rd = (insn >> 12) & 0xf;
2242
    uint32_t cp = (insn >> 8) & 0xf;
2243
    if (IS_USER(s)) {
2244
        return 1;
2245
    }
2246

    
2247
    if (insn & ARM_CP_RW_BIT) {
2248
        if (!env->cp[cp].cp_read)
2249
            return 1;
2250
        gen_op_movl_T0_im((uint32_t) s->pc);
2251
        gen_set_pc_T0();
2252
        gen_op_movl_T0_cp(insn);
2253
        gen_movl_reg_T0(s, rd);
2254
    } else {
2255
        if (!env->cp[cp].cp_write)
2256
            return 1;
2257
        gen_op_movl_T0_im((uint32_t) s->pc);
2258
        gen_set_pc_T0();
2259
        gen_movl_T0_reg(s, rd);
2260
        gen_op_movl_cp_T0(insn);
2261
    }
2262
    return 0;
2263
}
2264

    
2265
static int cp15_user_ok(uint32_t insn)
2266
{
2267
    int cpn = (insn >> 16) & 0xf;
2268
    int cpm = insn & 0xf;
2269
    int op = ((insn >> 5) & 7) | ((insn >> 18) & 0x38);
2270

    
2271
    if (cpn == 13 && cpm == 0) {
2272
        /* TLS register.  */
2273
        if (op == 2 || (op == 3 && (insn & ARM_CP_RW_BIT)))
2274
            return 1;
2275
    }
2276
    if (cpn == 7) {
2277
        /* ISB, DSB, DMB.  */
2278
        if ((cpm == 5 && op == 4)
2279
                || (cpm == 10 && (op == 4 || op == 5)))
2280
            return 1;
2281
    }
2282
    return 0;
2283
}
2284

    
2285
/* Disassemble system coprocessor (cp15) instruction.  Return nonzero if
2286
   instruction is not defined.  */
2287
static int disas_cp15_insn(CPUState *env, DisasContext *s, uint32_t insn)
2288
{
2289
    uint32_t rd;
2290

    
2291
    /* M profile cores use memory mapped registers instead of cp15.  */
2292
    if (arm_feature(env, ARM_FEATURE_M))
2293
        return 1;
2294

    
2295
    if ((insn & (1 << 25)) == 0) {
2296
        if (insn & (1 << 20)) {
2297
            /* mrrc */
2298
            return 1;
2299
        }
2300
        /* mcrr.  Used for block cache operations, so implement as no-op.  */
2301
        return 0;
2302
    }
2303
    if ((insn & (1 << 4)) == 0) {
2304
        /* cdp */
2305
        return 1;
2306
    }
2307
    if (IS_USER(s) && !cp15_user_ok(insn)) {
2308
        return 1;
2309
    }
2310
    if ((insn & 0x0fff0fff) == 0x0e070f90
2311
        || (insn & 0x0fff0fff) == 0x0e070f58) {
2312
        /* Wait for interrupt.  */
2313
        gen_op_movl_T0_im((long)s->pc);
2314
        gen_set_pc_T0();
2315
        s->is_jmp = DISAS_WFI;
2316
        return 0;
2317
    }
2318
    rd = (insn >> 12) & 0xf;
2319
    if (insn & ARM_CP_RW_BIT) {
2320
        gen_op_movl_T0_cp15(insn);
2321
        /* If the destination register is r15 then sets condition codes.  */
2322
        if (rd != 15)
2323
            gen_movl_reg_T0(s, rd);
2324
    } else {
2325
        gen_movl_T0_reg(s, rd);
2326
        gen_op_movl_cp15_T0(insn);
2327
        /* Normally we would always end the TB here, but Linux
2328
         * arch/arm/mach-pxa/sleep.S expects two instructions following
2329
         * an MMU enable to execute from cache.  Imitate this behaviour.  */
2330
        if (!arm_feature(env, ARM_FEATURE_XSCALE) ||
2331
                (insn & 0x0fff0fff) != 0x0e010f10)
2332
            gen_lookup_tb(s);
2333
    }
2334
    return 0;
2335
}
2336

    
2337
#define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
2338
#define VFP_SREG(insn, bigbit, smallbit) \
2339
  ((VFP_REG_SHR(insn, bigbit - 1) & 0x1e) | (((insn) >> (smallbit)) & 1))
2340
#define VFP_DREG(reg, insn, bigbit, smallbit) do { \
2341
    if (arm_feature(env, ARM_FEATURE_VFP3)) { \
2342
        reg = (((insn) >> (bigbit)) & 0x0f) \
2343
              | (((insn) >> ((smallbit) - 4)) & 0x10); \
2344
    } else { \
2345
        if (insn & (1 << (smallbit))) \
2346
            return 1; \
2347
        reg = ((insn) >> (bigbit)) & 0x0f; \
2348
    }} while (0)
2349

    
2350
#define VFP_SREG_D(insn) VFP_SREG(insn, 12, 22)
2351
#define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
2352
#define VFP_SREG_N(insn) VFP_SREG(insn, 16,  7)
2353
#define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16,  7)
2354
#define VFP_SREG_M(insn) VFP_SREG(insn,  0,  5)
2355
#define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn,  0,  5)
2356

    
2357
/* Move between integer and VFP cores.  */
2358
static TCGv gen_vfp_mrs(void)
2359
{
2360
    TCGv tmp = new_tmp();
2361
    tcg_gen_mov_i32(tmp, cpu_F0s);
2362
    return tmp;
2363
}
2364

    
2365
static void gen_vfp_msr(TCGv tmp)
2366
{
2367
    tcg_gen_mov_i32(cpu_F0s, tmp);
2368
    dead_tmp(tmp);
2369
}
2370

    
2371
static inline int
2372
vfp_enabled(CPUState * env)
2373
{
2374
    return ((env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)) != 0);
2375
}
2376

    
2377
/* Disassemble a VFP instruction.  Returns nonzero if an error occured
2378
   (ie. an undefined instruction).  */
2379
static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
2380
{
2381
    uint32_t rd, rn, rm, op, i, n, offset, delta_d, delta_m, bank_mask;
2382
    int dp, veclen;
2383
    TCGv tmp;
2384

    
2385
    if (!arm_feature(env, ARM_FEATURE_VFP))
2386
        return 1;
2387

    
2388
    if (!vfp_enabled(env)) {
2389
        /* VFP disabled.  Only allow fmxr/fmrx to/from some control regs.  */
2390
        if ((insn & 0x0fe00fff) != 0x0ee00a10)
2391
            return 1;
2392
        rn = (insn >> 16) & 0xf;
2393
        if (rn != ARM_VFP_FPSID && rn != ARM_VFP_FPEXC
2394
            && rn != ARM_VFP_MVFR1 && rn != ARM_VFP_MVFR0)
2395
            return 1;
2396
    }
2397
    dp = ((insn & 0xf00) == 0xb00);
2398
    switch ((insn >> 24) & 0xf) {
2399
    case 0xe:
2400
        if (insn & (1 << 4)) {
2401
            /* single register transfer */
2402
            rd = (insn >> 12) & 0xf;
2403
            if (dp) {
2404
                int size;
2405
                int pass;
2406

    
2407
                VFP_DREG_N(rn, insn);
2408
                if (insn & 0xf)
2409
                    return 1;
2410
                if (insn & 0x00c00060
2411
                    && !arm_feature(env, ARM_FEATURE_NEON))
2412
                    return 1;
2413

    
2414
                pass = (insn >> 21) & 1;
2415
                if (insn & (1 << 22)) {
2416
                    size = 0;
2417
                    offset = ((insn >> 5) & 3) * 8;
2418
                } else if (insn & (1 << 5)) {
2419
                    size = 1;
2420
                    offset = (insn & (1 << 6)) ? 16 : 0;
2421
                } else {
2422
                    size = 2;
2423
                    offset = 0;
2424
                }
2425
                if (insn & ARM_CP_RW_BIT) {
2426
                    /* vfp->arm */
2427
                    switch (size) {
2428
                    case 0:
2429
                        NEON_GET_REG(T1, rn, pass);
2430
                        if (offset)
2431
                            gen_op_shrl_T1_im(offset);
2432
                        if (insn & (1 << 23))
2433
                            gen_uxtb(cpu_T[1]);
2434
                        else
2435
                            gen_sxtb(cpu_T[1]);
2436
                        break;
2437
                    case 1:
2438
                        NEON_GET_REG(T1, rn, pass);
2439
                        if (insn & (1 << 23)) {
2440
                            if (offset) {
2441
                                gen_op_shrl_T1_im(16);
2442
                            } else {
2443
                                gen_uxth(cpu_T[1]);
2444
                            }
2445
                        } else {
2446
                            if (offset) {
2447
                                gen_op_sarl_T1_im(16);
2448
                            } else {
2449
                                gen_sxth(cpu_T[1]);
2450
                            }
2451
                        }
2452
                        break;
2453
                    case 2:
2454
                        NEON_GET_REG(T1, rn, pass);
2455
                        break;
2456
                    }
2457
                    gen_movl_reg_T1(s, rd);
2458
                } else {
2459
                    /* arm->vfp */
2460
                    gen_movl_T0_reg(s, rd);
2461
                    if (insn & (1 << 23)) {
2462
                        /* VDUP */
2463
                        if (size == 0) {
2464
                            gen_op_neon_dup_u8(0);
2465
                        } else if (size == 1) {
2466
                            gen_op_neon_dup_low16();
2467
                        }
2468
                        NEON_SET_REG(T0, rn, 0);
2469
                        NEON_SET_REG(T0, rn, 1);
2470
                    } else {
2471
                        /* VMOV */
2472
                        switch (size) {
2473
                        case 0:
2474
                            NEON_GET_REG(T2, rn, pass);
2475
                            gen_op_movl_T1_im(0xff);
2476
                            gen_op_andl_T0_T1();
2477
                            gen_op_neon_insert_elt(offset, ~(0xff << offset));
2478
                            NEON_SET_REG(T2, rn, pass);
2479
                            break;
2480
                        case 1:
2481
                            NEON_GET_REG(T2, rn, pass);
2482
                            gen_op_movl_T1_im(0xffff);
2483
                            gen_op_andl_T0_T1();
2484
                            bank_mask = offset ? 0xffff : 0xffff0000;
2485
                            gen_op_neon_insert_elt(offset, bank_mask);
2486
                            NEON_SET_REG(T2, rn, pass);
2487
                            break;
2488
                        case 2:
2489
                            NEON_SET_REG(T0, rn, pass);
2490
                            break;
2491
                        }
2492
                    }
2493
                }
2494
            } else { /* !dp */
2495
                if ((insn & 0x6f) != 0x00)
2496
                    return 1;
2497
                rn = VFP_SREG_N(insn);
2498
                if (insn & ARM_CP_RW_BIT) {
2499
                    /* vfp->arm */
2500
                    if (insn & (1 << 21)) {
2501
                        /* system register */
2502
                        rn >>= 1;
2503

    
2504
                        switch (rn) {
2505
                        case ARM_VFP_FPSID:
2506
                            /* VFP2 allows access to FSID from userspace.
2507
                               VFP3 restricts all id registers to privileged
2508
                               accesses.  */
2509
                            if (IS_USER(s)
2510
                                && arm_feature(env, ARM_FEATURE_VFP3))
2511
                                return 1;
2512
                            tmp = load_cpu_field(vfp.xregs[rn]);
2513
                            break;
2514
                        case ARM_VFP_FPEXC:
2515
                            if (IS_USER(s))
2516
                                return 1;
2517
                            tmp = load_cpu_field(vfp.xregs[rn]);
2518
                            break;
2519
                        case ARM_VFP_FPINST:
2520
                        case ARM_VFP_FPINST2:
2521
                            /* Not present in VFP3.  */
2522
                            if (IS_USER(s)
2523
                                || arm_feature(env, ARM_FEATURE_VFP3))
2524
                                return 1;
2525
                            tmp = load_cpu_field(vfp.xregs[rn]);
2526
                            break;
2527
                        case ARM_VFP_FPSCR:
2528
                            if (rd == 15) {
2529
                                tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
2530
                                tcg_gen_andi_i32(tmp, tmp, 0xf0000000);
2531
                            } else {
2532
                                tmp = new_tmp();
2533
                                gen_helper_vfp_get_fpscr(tmp, cpu_env);
2534
                            }
2535
                            break;
2536
                        case ARM_VFP_MVFR0:
2537
                        case ARM_VFP_MVFR1:
2538
                            if (IS_USER(s)
2539
                                || !arm_feature(env, ARM_FEATURE_VFP3))
2540
                                return 1;
2541
                            tmp = load_cpu_field(vfp.xregs[rn]);
2542
                            break;
2543
                        default:
2544
                            return 1;
2545
                        }
2546
                    } else {
2547
                        gen_mov_F0_vreg(0, rn);
2548
                        tmp = gen_vfp_mrs();
2549
                    }
2550
                    if (rd == 15) {
2551
                        /* Set the 4 flag bits in the CPSR.  */
2552
                        gen_set_nzcv(tmp);
2553
                        dead_tmp(tmp);
2554
                    } else {
2555
                        store_reg(s, rd, tmp);
2556
                    }
2557
                } else {
2558
                    /* arm->vfp */
2559
                    tmp = load_reg(s, rd);
2560
                    if (insn & (1 << 21)) {
2561
                        rn >>= 1;
2562
                        /* system register */
2563
                        switch (rn) {
2564
                        case ARM_VFP_FPSID:
2565
                        case ARM_VFP_MVFR0:
2566
                        case ARM_VFP_MVFR1:
2567
                            /* Writes are ignored.  */
2568
                            break;
2569
                        case ARM_VFP_FPSCR:
2570
                            gen_helper_vfp_set_fpscr(cpu_env, tmp);
2571
                            dead_tmp(tmp);
2572
                            gen_lookup_tb(s);
2573
                            break;
2574
                        case ARM_VFP_FPEXC:
2575
                            if (IS_USER(s))
2576
                                return 1;
2577
                            store_cpu_field(tmp, vfp.xregs[rn]);
2578
                            gen_lookup_tb(s);
2579
                            break;
2580
                        case ARM_VFP_FPINST:
2581
                        case ARM_VFP_FPINST2:
2582
                            store_cpu_field(tmp, vfp.xregs[rn]);
2583
                            break;
2584
                        default:
2585
                            return 1;
2586
                        }
2587
                    } else {
2588
                        gen_vfp_msr(tmp);
2589
                        gen_mov_vreg_F0(0, rn);
2590
                    }
2591
                }
2592
            }
2593
        } else {
2594
            /* data processing */
2595
            /* The opcode is in bits 23, 21, 20 and 6.  */
2596
            op = ((insn >> 20) & 8) | ((insn >> 19) & 6) | ((insn >> 6) & 1);
2597
            if (dp) {
2598
                if (op == 15) {
2599
                    /* rn is opcode */
2600
                    rn = ((insn >> 15) & 0x1e) | ((insn >> 7) & 1);
2601
                } else {
2602
                    /* rn is register number */
2603
                    VFP_DREG_N(rn, insn);
2604
                }
2605

    
2606
                if (op == 15 && (rn == 15 || rn > 17)) {
2607
                    /* Integer or single precision destination.  */
2608
                    rd = VFP_SREG_D(insn);
2609
                } else {
2610
                    VFP_DREG_D(rd, insn);
2611
                }
2612

    
2613
                if (op == 15 && (rn == 16 || rn == 17)) {
2614
                    /* Integer source.  */
2615
                    rm = ((insn << 1) & 0x1e) | ((insn >> 5) & 1);
2616
                } else {
2617
                    VFP_DREG_M(rm, insn);
2618
                }
2619
            } else {
2620
                rn = VFP_SREG_N(insn);
2621
                if (op == 15 && rn == 15) {
2622
                    /* Double precision destination.  */
2623
                    VFP_DREG_D(rd, insn);
2624
                } else {
2625
                    rd = VFP_SREG_D(insn);
2626
                }
2627
                rm = VFP_SREG_M(insn);
2628
            }
2629

    
2630
            veclen = env->vfp.vec_len;
2631
            if (op == 15 && rn > 3)
2632
                veclen = 0;
2633

    
2634
            /* Shut up compiler warnings.  */
2635
            delta_m = 0;
2636
            delta_d = 0;
2637
            bank_mask = 0;
2638

    
2639
            if (veclen > 0) {
2640
                if (dp)
2641
                    bank_mask = 0xc;
2642
                else
2643
                    bank_mask = 0x18;
2644

    
2645
                /* Figure out what type of vector operation this is.  */
2646
                if ((rd & bank_mask) == 0) {
2647
                    /* scalar */
2648
                    veclen = 0;
2649
                } else {
2650
                    if (dp)
2651
                        delta_d = (env->vfp.vec_stride >> 1) + 1;
2652
                    else
2653
                        delta_d = env->vfp.vec_stride + 1;
2654

    
2655
                    if ((rm & bank_mask) == 0) {
2656
                        /* mixed scalar/vector */
2657
                        delta_m = 0;
2658
                    } else {
2659
                        /* vector */
2660
                        delta_m = delta_d;
2661
                    }
2662
                }
2663
            }
2664

    
2665
            /* Load the initial operands.  */
2666
            if (op == 15) {
2667
                switch (rn) {
2668
                case 16:
2669
                case 17:
2670
                    /* Integer source */
2671
                    gen_mov_F0_vreg(0, rm);
2672
                    break;
2673
                case 8:
2674
                case 9:
2675
                    /* Compare */
2676
                    gen_mov_F0_vreg(dp, rd);
2677
                    gen_mov_F1_vreg(dp, rm);
2678
                    break;
2679
                case 10:
2680
                case 11:
2681
                    /* Compare with zero */
2682
                    gen_mov_F0_vreg(dp, rd);
2683
                    gen_vfp_F1_ld0(dp);
2684
                    break;
2685
                case 20:
2686
                case 21:
2687
                case 22:
2688
                case 23:
2689
                    /* Source and destination the same.  */
2690
                    gen_mov_F0_vreg(dp, rd);
2691
                    break;
2692
                default:
2693
                    /* One source operand.  */
2694
                    gen_mov_F0_vreg(dp, rm);
2695
                    break;
2696
                }
2697
            } else {
2698
                /* Two source operands.  */
2699
                gen_mov_F0_vreg(dp, rn);
2700
                gen_mov_F1_vreg(dp, rm);
2701
            }
2702

    
2703
            for (;;) {
2704
                /* Perform the calculation.  */
2705
                switch (op) {
2706
                case 0: /* mac: fd + (fn * fm) */
2707
                    gen_vfp_mul(dp);
2708
                    gen_mov_F1_vreg(dp, rd);
2709
                    gen_vfp_add(dp);
2710
                    break;
2711
                case 1: /* nmac: fd - (fn * fm) */
2712
                    gen_vfp_mul(dp);
2713
                    gen_vfp_neg(dp);
2714
                    gen_mov_F1_vreg(dp, rd);
2715
                    gen_vfp_add(dp);
2716
                    break;
2717
                case 2: /* msc: -fd + (fn * fm) */
2718
                    gen_vfp_mul(dp);
2719
                    gen_mov_F1_vreg(dp, rd);
2720
                    gen_vfp_sub(dp);
2721
                    break;
2722
                case 3: /* nmsc: -fd - (fn * fm)  */
2723
                    gen_vfp_mul(dp);
2724
                    gen_mov_F1_vreg(dp, rd);
2725
                    gen_vfp_add(dp);
2726
                    gen_vfp_neg(dp);
2727
                    break;
2728
                case 4: /* mul: fn * fm */
2729
                    gen_vfp_mul(dp);
2730
                    break;
2731
                case 5: /* nmul: -(fn * fm) */
2732
                    gen_vfp_mul(dp);
2733
                    gen_vfp_neg(dp);
2734
                    break;
2735
                case 6: /* add: fn + fm */
2736
                    gen_vfp_add(dp);
2737
                    break;
2738
                case 7: /* sub: fn - fm */
2739
                    gen_vfp_sub(dp);
2740
                    break;
2741
                case 8: /* div: fn / fm */
2742
                    gen_vfp_div(dp);
2743
                    break;
2744
                case 14: /* fconst */
2745
                    if (!arm_feature(env, ARM_FEATURE_VFP3))
2746
                      return 1;
2747

    
2748
                    n = (insn << 12) & 0x80000000;
2749
                    i = ((insn >> 12) & 0x70) | (insn & 0xf);
2750
                    if (dp) {
2751
                        if (i & 0x40)
2752
                            i |= 0x3f80;
2753
                        else
2754
                            i |= 0x4000;
2755
                        n |= i << 16;
2756
                        tcg_gen_movi_i64(cpu_F0d, ((uint64_t)n) << 32);
2757
                    } else {
2758
                        if (i & 0x40)
2759
                            i |= 0x780;
2760
                        else
2761
                            i |= 0x800;
2762
                        n |= i << 19;
2763
                        tcg_gen_movi_i32(cpu_F0d, ((uint64_t)n) << 32);
2764
                    }
2765
                    break;
2766
                case 15: /* extension space */
2767
                    switch (rn) {
2768
                    case 0: /* cpy */
2769
                        /* no-op */
2770
                        break;
2771
                    case 1: /* abs */
2772
                        gen_vfp_abs(dp);
2773
                        break;
2774
                    case 2: /* neg */
2775
                        gen_vfp_neg(dp);
2776
                        break;
2777
                    case 3: /* sqrt */
2778
                        gen_vfp_sqrt(dp);
2779
                        break;
2780
                    case 8: /* cmp */
2781
                        gen_vfp_cmp(dp);
2782
                        break;
2783
                    case 9: /* cmpe */
2784
                        gen_vfp_cmpe(dp);
2785
                        break;
2786
                    case 10: /* cmpz */
2787
                        gen_vfp_cmp(dp);
2788
                        break;
2789
                    case 11: /* cmpez */
2790
                        gen_vfp_F1_ld0(dp);
2791
                        gen_vfp_cmpe(dp);
2792
                        break;
2793
                    case 15: /* single<->double conversion */
2794
                        if (dp)
2795
                            gen_helper_vfp_fcvtsd(cpu_F0s, cpu_F0d, cpu_env);
2796
                        else
2797
                            gen_helper_vfp_fcvtds(cpu_F0d, cpu_F0s, cpu_env);
2798
                        break;
2799
                    case 16: /* fuito */
2800
                        gen_vfp_uito(dp);
2801
                        break;
2802
                    case 17: /* fsito */
2803
                        gen_vfp_sito(dp);
2804
                        break;
2805
                    case 20: /* fshto */
2806
                        if (!arm_feature(env, ARM_FEATURE_VFP3))
2807
                          return 1;
2808
                        gen_vfp_shto(dp, rm);
2809
                        break;
2810
                    case 21: /* fslto */
2811
                        if (!arm_feature(env, ARM_FEATURE_VFP3))
2812
                          return 1;
2813
                        gen_vfp_slto(dp, rm);
2814
                        break;
2815
                    case 22: /* fuhto */
2816
                        if (!arm_feature(env, ARM_FEATURE_VFP3))
2817
                          return 1;
2818
                        gen_vfp_uhto(dp, rm);
2819
                        break;
2820
                    case 23: /* fulto */
2821
                        if (!arm_feature(env, ARM_FEATURE_VFP3))
2822
                          return 1;
2823
                        gen_vfp_ulto(dp, rm);
2824
                        break;
2825
                    case 24: /* ftoui */
2826
                        gen_vfp_toui(dp);
2827
                        break;
2828
                    case 25: /* ftouiz */
2829
                        gen_vfp_touiz(dp);
2830
                        break;
2831
                    case 26: /* ftosi */
2832
                        gen_vfp_tosi(dp);
2833
                        break;
2834
                    case 27: /* ftosiz */
2835
                        gen_vfp_tosiz(dp);
2836
                        break;
2837
                    case 28: /* ftosh */
2838
                        if (!arm_feature(env, ARM_FEATURE_VFP3))
2839
                          return 1;
2840
                        gen_vfp_tosh(dp, rm);
2841
                        break;
2842
                    case 29: /* ftosl */
2843
                        if (!arm_feature(env, ARM_FEATURE_VFP3))
2844
                          return 1;
2845
                        gen_vfp_tosl(dp, rm);
2846
                        break;
2847
                    case 30: /* ftouh */
2848
                        if (!arm_feature(env, ARM_FEATURE_VFP3))
2849
                          return 1;
2850
                        gen_vfp_touh(dp, rm);
2851
                        break;
2852
                    case 31: /* ftoul */
2853
                        if (!arm_feature(env, ARM_FEATURE_VFP3))
2854
                          return 1;
2855
                        gen_vfp_toul(dp, rm);
2856
                        break;
2857
                    default: /* undefined */
2858
                        printf ("rn:%d\n", rn);
2859
                        return 1;
2860
                    }
2861
                    break;
2862
                default: /* undefined */
2863
                    printf ("op:%d\n", op);
2864
                    return 1;
2865
                }
2866

    
2867
                /* Write back the result.  */
2868
                if (op == 15 && (rn >= 8 && rn <= 11))
2869
                    ; /* Comparison, do nothing.  */
2870
                else if (op == 15 && rn > 17)
2871
                    /* Integer result.  */
2872
                    gen_mov_vreg_F0(0, rd);
2873
                else if (op == 15 && rn == 15)
2874
                    /* conversion */
2875
                    gen_mov_vreg_F0(!dp, rd);
2876
                else
2877
                    gen_mov_vreg_F0(dp, rd);
2878

    
2879
                /* break out of the loop if we have finished  */
2880
                if (veclen == 0)
2881
                    break;
2882

    
2883
                if (op == 15 && delta_m == 0) {
2884
                    /* single source one-many */
2885
                    while (veclen--) {
2886
                        rd = ((rd + delta_d) & (bank_mask - 1))
2887
                             | (rd & bank_mask);
2888
                        gen_mov_vreg_F0(dp, rd);
2889
                    }
2890
                    break;
2891
                }
2892
                /* Setup the next operands.  */
2893
                veclen--;
2894
                rd = ((rd + delta_d) & (bank_mask - 1))
2895
                     | (rd & bank_mask);
2896

    
2897
                if (op == 15) {
2898
                    /* One source operand.  */
2899
                    rm = ((rm + delta_m) & (bank_mask - 1))
2900
                         | (rm & bank_mask);
2901
                    gen_mov_F0_vreg(dp, rm);
2902
                } else {
2903
                    /* Two source operands.  */
2904
                    rn = ((rn + delta_d) & (bank_mask - 1))
2905
                         | (rn & bank_mask);
2906
                    gen_mov_F0_vreg(dp, rn);
2907
                    if (delta_m) {
2908
                        rm = ((rm + delta_m) & (bank_mask - 1))
2909
                             | (rm & bank_mask);
2910
                        gen_mov_F1_vreg(dp, rm);
2911
                    }
2912
                }
2913
            }
2914
        }
2915
        break;
2916
    case 0xc:
2917
    case 0xd:
2918
        if (dp && (insn & 0x03e00000) == 0x00400000) {
2919
            /* two-register transfer */
2920
            rn = (insn >> 16) & 0xf;
2921
            rd = (insn >> 12) & 0xf;
2922
            if (dp) {
2923
                VFP_DREG_M(rm, insn);
2924
            } else {
2925
                rm = VFP_SREG_M(insn);
2926
            }
2927

    
2928
            if (insn & ARM_CP_RW_BIT) {
2929
                /* vfp->arm */
2930
                if (dp) {
2931
                    gen_mov_F0_vreg(0, rm * 2);
2932
                    tmp = gen_vfp_mrs();
2933
                    store_reg(s, rd, tmp);
2934
                    gen_mov_F0_vreg(0, rm * 2 + 1);
2935
                    tmp = gen_vfp_mrs();
2936
                    store_reg(s, rn, tmp);
2937
                } else {
2938
                    gen_mov_F0_vreg(0, rm);
2939
                    tmp = gen_vfp_mrs();
2940
                    store_reg(s, rn, tmp);
2941
                    gen_mov_F0_vreg(0, rm + 1);
2942
                    tmp = gen_vfp_mrs();
2943
                    store_reg(s, rd, tmp);
2944
                }
2945
            } else {
2946
                /* arm->vfp */
2947
                if (dp) {
2948
                    tmp = load_reg(s, rd);
2949
                    gen_vfp_msr(tmp);
2950
                    gen_mov_vreg_F0(0, rm * 2);
2951
                    tmp = load_reg(s, rn);
2952
                    gen_vfp_msr(tmp);
2953
                    gen_mov_vreg_F0(0, rm * 2 + 1);
2954
                } else {
2955
                    tmp = load_reg(s, rn);
2956
                    gen_vfp_msr(tmp);
2957
                    gen_mov_vreg_F0(0, rm);
2958
                    tmp = load_reg(s, rd);
2959
                    gen_vfp_msr(tmp);
2960
                    gen_mov_vreg_F0(0, rm + 1);
2961
                }
2962
            }
2963
        } else {
2964
            /* Load/store */
2965
            rn = (insn >> 16) & 0xf;
2966
            if (dp)
2967
                VFP_DREG_D(rd, insn);
2968
            else
2969
                rd = VFP_SREG_D(insn);
2970
            if (s->thumb && rn == 15) {
2971
                gen_op_movl_T1_im(s->pc & ~2);
2972
            } else {
2973
                gen_movl_T1_reg(s, rn);
2974
            }
2975
            if ((insn & 0x01200000) == 0x01000000) {
2976
                /* Single load/store */
2977
                offset = (insn & 0xff) << 2;
2978
                if ((insn & (1 << 23)) == 0)
2979
                    offset = -offset;
2980
                gen_op_addl_T1_im(offset);
2981
                if (insn & (1 << 20)) {
2982
                    gen_vfp_ld(s, dp);
2983
                    gen_mov_vreg_F0(dp, rd);
2984
                } else {
2985
                    gen_mov_F0_vreg(dp, rd);
2986
                    gen_vfp_st(s, dp);
2987
                }
2988
            } else {
2989
                /* load/store multiple */
2990
                if (dp)
2991
                    n = (insn >> 1) & 0x7f;
2992
                else
2993
                    n = insn & 0xff;
2994

    
2995
                if (insn & (1 << 24)) /* pre-decrement */
2996
                    gen_op_addl_T1_im(-((insn & 0xff) << 2));
2997

    
2998
                if (dp)
2999
                    offset = 8;
3000
                else
3001
                    offset = 4;
3002
                for (i = 0; i < n; i++) {
3003
                    if (insn & ARM_CP_RW_BIT) {
3004
                        /* load */
3005
                        gen_vfp_ld(s, dp);
3006
                        gen_mov_vreg_F0(dp, rd + i);
3007
                    } else {
3008
                        /* store */
3009
                        gen_mov_F0_vreg(dp, rd + i);
3010
                        gen_vfp_st(s, dp);
3011
                    }
3012
                    gen_op_addl_T1_im(offset);
3013
                }
3014
                if (insn & (1 << 21)) {
3015
                    /* writeback */
3016
                    if (insn & (1 << 24))
3017
                        offset = -offset * n;
3018
                    else if (dp && (insn & 1))
3019
                        offset = 4;
3020
                    else
3021
                        offset = 0;
3022

    
3023
                    if (offset != 0)
3024
                        gen_op_addl_T1_im(offset);
3025
                    gen_movl_reg_T1(s, rn);
3026
                }
3027
            }
3028
        }
3029
        break;
3030
    default:
3031
        /* Should never happen.  */
3032
        return 1;
3033
    }
3034
    return 0;
3035
}
3036

    
3037
static inline void gen_goto_tb(DisasContext *s, int n, uint32_t dest)
3038
{
3039
    TranslationBlock *tb;
3040

    
3041
    tb = s->tb;
3042
    if ((tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK)) {
3043
        tcg_gen_goto_tb(n);
3044
        gen_op_movl_T0_im(dest);
3045
        gen_set_pc_T0();
3046
        tcg_gen_exit_tb((long)tb + n);
3047
    } else {
3048
        gen_op_movl_T0_im(dest);
3049
        gen_set_pc_T0();
3050
        tcg_gen_exit_tb(0);
3051
    }
3052
}
3053

    
3054
static inline void gen_jmp (DisasContext *s, uint32_t dest)
3055
{
3056
    if (__builtin_expect(s->singlestep_enabled, 0)) {
3057
        /* An indirect jump so that we still trigger the debug exception.  */
3058
        if (s->thumb)
3059
            dest |= 1;
3060
        gen_bx_im(s, dest);
3061
    } else {
3062
        gen_goto_tb(s, 0, dest);
3063
        s->is_jmp = DISAS_TB_JUMP;
3064
    }
3065
}
3066

    
3067
static inline void gen_mulxy(TCGv t0, TCGv t1, int x, int y)
3068
{
3069
    if (x)
3070
        tcg_gen_sari_i32(t0, t0, 16);
3071
    else
3072
        gen_sxth(t0);
3073
    if (y)
3074
        tcg_gen_sari_i32(t1, t1, 16);
3075
    else
3076
        gen_sxth(t1);
3077
    tcg_gen_mul_i32(t0, t0, t1);
3078
}
3079

    
3080
/* Return the mask of PSR bits set by a MSR instruction.  */
3081
static uint32_t msr_mask(CPUState *env, DisasContext *s, int flags, int spsr) {
3082
    uint32_t mask;
3083

    
3084
    mask = 0;
3085
    if (flags & (1 << 0))
3086
        mask |= 0xff;
3087
    if (flags & (1 << 1))
3088
        mask |= 0xff00;
3089
    if (flags & (1 << 2))
3090
        mask |= 0xff0000;
3091
    if (flags & (1 << 3))
3092
        mask |= 0xff000000;
3093

    
3094
    /* Mask out undefined bits.  */
3095
    mask &= ~CPSR_RESERVED;
3096
    if (!arm_feature(env, ARM_FEATURE_V6))
3097
        mask &= ~(CPSR_E | CPSR_GE);
3098
    if (!arm_feature(env, ARM_FEATURE_THUMB2))
3099
        mask &= ~CPSR_IT;
3100
    /* Mask out execution state bits.  */
3101
    if (!spsr)
3102
        mask &= ~CPSR_EXEC;
3103
    /* Mask out privileged bits.  */
3104
    if (IS_USER(s))
3105
        mask &= CPSR_USER;
3106
    return mask;
3107
}
3108

    
3109
/* Returns nonzero if access to the PSR is not permitted.  */
3110
static int gen_set_psr_T0(DisasContext *s, uint32_t mask, int spsr)
3111
{
3112
    TCGv tmp;
3113
    if (spsr) {
3114
        /* ??? This is also undefined in system mode.  */
3115
        if (IS_USER(s))
3116
            return 1;
3117

    
3118
        tmp = load_cpu_field(spsr);
3119
        tcg_gen_andi_i32(tmp, tmp, ~mask);
3120
        tcg_gen_andi_i32(cpu_T[0], cpu_T[0], mask);
3121
        tcg_gen_or_i32(tmp, tmp, cpu_T[0]);
3122
        store_cpu_field(tmp, spsr);
3123
    } else {
3124
        gen_set_cpsr(cpu_T[0], mask);
3125
    }
3126
    gen_lookup_tb(s);
3127
    return 0;
3128
}
3129

    
3130
/* Generate an old-style exception return.  */
3131
static void gen_exception_return(DisasContext *s)
3132
{
3133
    TCGv tmp;
3134
    gen_set_pc_T0();
3135
    tmp = load_cpu_field(spsr);
3136
    gen_set_cpsr(tmp, 0xffffffff);
3137
    dead_tmp(tmp);
3138
    s->is_jmp = DISAS_UPDATE;
3139
}
3140

    
3141
/* Generate a v6 exception return.  Marks both values as dead.  */
3142
static void gen_rfe(DisasContext *s, TCGv pc, TCGv cpsr)
3143
{
3144
    gen_set_cpsr(cpsr, 0xffffffff);
3145
    dead_tmp(cpsr);
3146
    store_reg(s, 15, pc);
3147
    s->is_jmp = DISAS_UPDATE;
3148
}
3149

    
3150
static inline void
3151
gen_set_condexec (DisasContext *s)
3152
{
3153
    if (s->condexec_mask) {
3154
        uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
3155
        TCGv tmp = new_tmp();
3156
        tcg_gen_movi_i32(tmp, val);
3157
        store_cpu_field(tmp, condexec_bits);
3158
    }
3159
}
3160

    
3161
static void gen_nop_hint(DisasContext *s, int val)
3162
{
3163
    switch (val) {
3164
    case 3: /* wfi */
3165
        gen_op_movl_T0_im((long)s->pc);
3166
        gen_set_pc_T0();
3167
        s->is_jmp = DISAS_WFI;
3168
        break;
3169
    case 2: /* wfe */
3170
    case 4: /* sev */
3171
        /* TODO: Implement SEV and WFE.  May help SMP performance.  */
3172
    default: /* nop */
3173
        break;
3174
    }
3175
}
3176

    
3177
/* Neon shift by constant.  The actual ops are the same as used for variable
3178
   shifts.  [OP][U][SIZE]  */
3179
static GenOpFunc *gen_neon_shift_im[8][2][4] = {
3180
    { /* 0 */ /* VSHR */
3181
      {
3182
        gen_op_neon_shl_u8,
3183
        gen_op_neon_shl_u16,
3184
        gen_op_neon_shl_u32,
3185
        gen_op_neon_shl_u64
3186
      }, {
3187
        gen_op_neon_shl_s8,
3188
        gen_op_neon_shl_s16,
3189
        gen_op_neon_shl_s32,
3190
        gen_op_neon_shl_s64
3191
      }
3192
    }, { /* 1 */ /* VSRA */
3193
      {
3194
        gen_op_neon_shl_u8,
3195
        gen_op_neon_shl_u16,
3196
        gen_op_neon_shl_u32,
3197
        gen_op_neon_shl_u64
3198
      }, {
3199
        gen_op_neon_shl_s8,
3200
        gen_op_neon_shl_s16,
3201
        gen_op_neon_shl_s32,
3202
        gen_op_neon_shl_s64
3203
      }
3204
    }, { /* 2 */ /* VRSHR */
3205
      {
3206
        gen_op_neon_rshl_u8,
3207
        gen_op_neon_rshl_u16,
3208
        gen_op_neon_rshl_u32,
3209
        gen_op_neon_rshl_u64
3210
      }, {
3211
        gen_op_neon_rshl_s8,
3212
        gen_op_neon_rshl_s16,
3213
        gen_op_neon_rshl_s32,
3214
        gen_op_neon_rshl_s64
3215
      }
3216
    }, { /* 3 */ /* VRSRA */
3217
      {
3218
        gen_op_neon_rshl_u8,
3219
        gen_op_neon_rshl_u16,
3220
        gen_op_neon_rshl_u32,
3221
        gen_op_neon_rshl_u64
3222
      }, {
3223
        gen_op_neon_rshl_s8,
3224
        gen_op_neon_rshl_s16,
3225
        gen_op_neon_rshl_s32,
3226
        gen_op_neon_rshl_s64
3227
      }
3228
    }, { /* 4 */
3229
      {
3230
        NULL, NULL, NULL, NULL
3231
      }, { /* VSRI */
3232
        gen_op_neon_shl_u8,
3233
        gen_op_neon_shl_u16,
3234
        gen_op_neon_shl_u32,
3235
        gen_op_neon_shl_u64,
3236
      }
3237
    }, { /* 5 */
3238
      { /* VSHL */
3239
        gen_op_neon_shl_u8,
3240
        gen_op_neon_shl_u16,
3241
        gen_op_neon_shl_u32,
3242
        gen_op_neon_shl_u64,
3243
      }, { /* VSLI */
3244
        gen_op_neon_shl_u8,
3245
        gen_op_neon_shl_u16,
3246
        gen_op_neon_shl_u32,
3247
        gen_op_neon_shl_u64,
3248
      }
3249
    }, { /* 6 */ /* VQSHL */
3250
      {
3251
        gen_op_neon_qshl_u8,
3252
        gen_op_neon_qshl_u16,
3253
        gen_op_neon_qshl_u32,
3254
        gen_op_neon_qshl_u64
3255
      }, {
3256
        gen_op_neon_qshl_s8,
3257
        gen_op_neon_qshl_s16,
3258
        gen_op_neon_qshl_s32,
3259
        gen_op_neon_qshl_s64
3260
      }
3261
    }, { /* 7 */ /* VQSHLU */
3262
      {
3263
        gen_op_neon_qshl_u8,
3264
        gen_op_neon_qshl_u16,
3265
        gen_op_neon_qshl_u32,
3266
        gen_op_neon_qshl_u64
3267
      }, {
3268
        gen_op_neon_qshl_u8,
3269
        gen_op_neon_qshl_u16,
3270
        gen_op_neon_qshl_u32,
3271
        gen_op_neon_qshl_u64
3272
      }
3273
    }
3274
};
3275

    
3276
/* [R][U][size - 1] */
3277
static GenOpFunc *gen_neon_shift_im_narrow[2][2][3] = {
3278
    {
3279
      {
3280
        gen_op_neon_shl_u16,
3281
        gen_op_neon_shl_u32,
3282
        gen_op_neon_shl_u64
3283
      }, {
3284
        gen_op_neon_shl_s16,
3285
        gen_op_neon_shl_s32,
3286
        gen_op_neon_shl_s64
3287
      }
3288
    }, {
3289
      {
3290
        gen_op_neon_rshl_u16,
3291
        gen_op_neon_rshl_u32,
3292
        gen_op_neon_rshl_u64
3293
      }, {
3294
        gen_op_neon_rshl_s16,
3295
        gen_op_neon_rshl_s32,
3296
        gen_op_neon_rshl_s64
3297
      }
3298
    }
3299
};
3300

    
3301
static inline void
3302
gen_op_neon_narrow_u32 ()
3303
{
3304
    /* No-op.  */
3305
}
3306

    
3307
static GenOpFunc *gen_neon_narrow[3] = {
3308
    gen_op_neon_narrow_u8,
3309
    gen_op_neon_narrow_u16,
3310
    gen_op_neon_narrow_u32
3311
};
3312

    
3313
static GenOpFunc *gen_neon_narrow_satu[3] = {
3314
    gen_op_neon_narrow_sat_u8,
3315
    gen_op_neon_narrow_sat_u16,
3316
    gen_op_neon_narrow_sat_u32
3317
};
3318

    
3319
static GenOpFunc *gen_neon_narrow_sats[3] = {
3320
    gen_op_neon_narrow_sat_s8,
3321
    gen_op_neon_narrow_sat_s16,
3322
    gen_op_neon_narrow_sat_s32
3323
};
3324

    
3325
static inline int gen_neon_add(int size)
3326
{
3327
    switch (size) {
3328
    case 0: gen_op_neon_add_u8(); break;
3329
    case 1: gen_op_neon_add_u16(); break;
3330
    case 2: gen_op_addl_T0_T1(); break;
3331
    default: return 1;
3332
    }
3333
    return 0;
3334
}
3335

    
3336
/* 32-bit pairwise ops end up the same as the elementsise versions.  */
3337
#define gen_op_neon_pmax_s32  gen_op_neon_max_s32
3338
#define gen_op_neon_pmax_u32  gen_op_neon_max_u32
3339
#define gen_op_neon_pmin_s32  gen_op_neon_min_s32
3340
#define gen_op_neon_pmin_u32  gen_op_neon_min_u32
3341

    
3342
#define GEN_NEON_INTEGER_OP(name) do { \
3343
    switch ((size << 1) | u) { \
3344
    case 0: gen_op_neon_##name##_s8(); break; \
3345
    case 1: gen_op_neon_##name##_u8(); break; \
3346
    case 2: gen_op_neon_##name##_s16(); break; \
3347
    case 3: gen_op_neon_##name##_u16(); break; \
3348
    case 4: gen_op_neon_##name##_s32(); break; \
3349
    case 5: gen_op_neon_##name##_u32(); break; \
3350
    default: return 1; \
3351
    }} while (0)
3352

    
3353
static inline void
3354
gen_neon_movl_scratch_T0(int scratch)
3355
{
3356
  uint32_t offset;
3357

    
3358
  offset = offsetof(CPUARMState, vfp.scratch[scratch]);
3359
  gen_op_neon_setreg_T0(offset);
3360
}
3361

    
3362
static inline void
3363
gen_neon_movl_scratch_T1(int scratch)
3364
{
3365
  uint32_t offset;
3366

    
3367
  offset = offsetof(CPUARMState, vfp.scratch[scratch]);
3368
  gen_op_neon_setreg_T1(offset);
3369
}
3370

    
3371
static inline void
3372
gen_neon_movl_T0_scratch(int scratch)
3373
{
3374
  uint32_t offset;
3375

    
3376
  offset = offsetof(CPUARMState, vfp.scratch[scratch]);
3377
  gen_op_neon_getreg_T0(offset);
3378
}
3379

    
3380
static inline void
3381
gen_neon_movl_T1_scratch(int scratch)
3382
{
3383
  uint32_t offset;
3384

    
3385
  offset = offsetof(CPUARMState, vfp.scratch[scratch]);
3386
  gen_op_neon_getreg_T1(offset);
3387
}
3388

    
3389
static inline void gen_op_neon_widen_u32(void)
3390
{
3391
    gen_op_movl_T1_im(0);
3392
}
3393

    
3394
static inline void gen_neon_get_scalar(int size, int reg)
3395
{
3396
    if (size == 1) {
3397
        NEON_GET_REG(T0, reg >> 1, reg & 1);
3398
    } else {
3399
        NEON_GET_REG(T0, reg >> 2, (reg >> 1) & 1);
3400
        if (reg & 1)
3401
            gen_op_neon_dup_low16();
3402
        else
3403
            gen_op_neon_dup_high16();
3404
    }
3405
}
3406

    
3407
static void gen_neon_unzip(int reg, int q, int tmp, int size)
3408
{
3409
    int n;
3410

    
3411
    for (n = 0; n < q + 1; n += 2) {
3412
        NEON_GET_REG(T0, reg, n);
3413
        NEON_GET_REG(T0, reg, n + n);
3414
        switch (size) {
3415
        case 0: gen_op_neon_unzip_u8(); break;
3416
        case 1: gen_op_neon_zip_u16(); break; /* zip and unzip are the same.  */
3417
        case 2: /* no-op */; break;
3418
        default: abort();
3419
        }
3420
        gen_neon_movl_scratch_T0(tmp + n);
3421
        gen_neon_movl_scratch_T1(tmp + n + 1);
3422
    }
3423
}
3424

    
3425
static struct {
3426
    int nregs;
3427
    int interleave;
3428
    int spacing;
3429
} neon_ls_element_type[11] = {
3430
    {4, 4, 1},
3431
    {4, 4, 2},
3432
    {4, 1, 1},
3433
    {4, 2, 1},
3434
    {3, 3, 1},
3435
    {3, 3, 2},
3436
    {3, 1, 1},
3437
    {1, 1, 1},
3438
    {2, 2, 1},
3439
    {2, 2, 2},
3440
    {2, 1, 1}
3441
};
3442

    
3443
/* Translate a NEON load/store element instruction.  Return nonzero if the
3444
   instruction is invalid.  */
3445
static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn)
3446
{
3447
    int rd, rn, rm;
3448
    int op;
3449
    int nregs;
3450
    int interleave;
3451
    int stride;
3452
    int size;
3453
    int reg;
3454
    int pass;
3455
    int load;
3456
    int shift;
3457
    uint32_t mask;
3458
    int n;
3459
    TCGv tmp;
3460

    
3461
    if (!vfp_enabled(env))
3462
      return 1;
3463
    VFP_DREG_D(rd, insn);
3464
    rn = (insn >> 16) & 0xf;
3465
    rm = insn & 0xf;
3466
    load = (insn & (1 << 21)) != 0;
3467
    if ((insn & (1 << 23)) == 0) {
3468
        /* Load store all elements.  */
3469
        op = (insn >> 8) & 0xf;
3470
        size = (insn >> 6) & 3;
3471
        if (op > 10 || size == 3)
3472
            return 1;
3473
        nregs = neon_ls_element_type[op].nregs;
3474
        interleave = neon_ls_element_type[op].interleave;
3475
        gen_movl_T1_reg(s, rn);
3476
        stride = (1 << size) * interleave;
3477
        for (reg = 0; reg < nregs; reg++) {
3478
            if (interleave > 2 || (interleave == 2 && nregs == 2)) {
3479
                gen_movl_T1_reg(s, rn);
3480
                gen_op_addl_T1_im((1 << size) * reg);
3481
            } else if (interleave == 2 && nregs == 4 && reg == 2) {
3482
                gen_movl_T1_reg(s, rn);
3483
                gen_op_addl_T1_im(1 << size);
3484
            }
3485
            for (pass = 0; pass < 2; pass++) {
3486
                if (size == 2) {
3487
                    if (load) {
3488
                        tmp = gen_ld32(cpu_T[1], IS_USER(s));
3489
                        tcg_gen_mov_i32(cpu_T[0], tmp);
3490
                        dead_tmp(tmp);
3491
                        NEON_SET_REG(T0, rd, pass);
3492
                    } else {
3493
                        NEON_GET_REG(T0, rd, pass);
3494
                        tmp = new_tmp();
3495
                        tcg_gen_mov_i32(tmp, cpu_T[0]);
3496
                        gen_st32(tmp, cpu_T[1], IS_USER(s));
3497
                    }
3498
                    gen_op_addl_T1_im(stride);
3499
                } else if (size == 1) {
3500
                    if (load) {
3501
                        tmp = gen_ld16u(cpu_T[1], IS_USER(s));
3502
                        tcg_gen_mov_i32(cpu_T[0], tmp);
3503
                        dead_tmp(tmp);
3504
                        gen_op_addl_T1_im(stride);
3505
                        gen_op_movl_T2_T0();
3506
                        tmp = gen_ld16u(cpu_T[1], IS_USER(s));
3507
                        tcg_gen_mov_i32(cpu_T[0], tmp);
3508
                        dead_tmp(tmp);
3509
                        gen_op_addl_T1_im(stride);
3510
                        gen_op_neon_insert_elt(16, 0xffff);
3511
                        NEON_SET_REG(T2, rd, pass);
3512
                    } else {
3513
                        NEON_GET_REG(T2, rd, pass);
3514
                        gen_op_movl_T0_T2();
3515
                        tmp = new_tmp();
3516
                        tcg_gen_mov_i32(tmp, cpu_T[0]);
3517
                        gen_st16(tmp, cpu_T[1], IS_USER(s));
3518
                        gen_op_addl_T1_im(stride);
3519
                        gen_op_neon_extract_elt(16, 0xffff0000);
3520
                        tmp = new_tmp();
3521
                        tcg_gen_mov_i32(tmp, cpu_T[0]);
3522
                        gen_st16(tmp, cpu_T[1], IS_USER(s));
3523
                        gen_op_addl_T1_im(stride);
3524
                    }
3525
                } else /* size == 0 */ {
3526
                    if (load) {
3527
                        mask = 0xff;
3528
                        for (n = 0; n < 4; n++) {
3529
                            tmp = gen_ld8u(cpu_T[1], IS_USER(s));
3530
                            tcg_gen_mov_i32(cpu_T[0], tmp);
3531
                            dead_tmp(tmp);
3532
                            gen_op_addl_T1_im(stride);
3533
                            if (n == 0) {
3534
                                gen_op_movl_T2_T0();
3535
                            } else {
3536
                                gen_op_neon_insert_elt(n * 8, ~mask);
3537
                            }
3538
                            mask <<= 8;
3539
                        }
3540
                        NEON_SET_REG(T2, rd, pass);
3541
                    } else {
3542
                        NEON_GET_REG(T2, rd, pass);
3543
                        mask = 0xff;
3544
                        for (n = 0; n < 4; n++) {
3545
                            if (n == 0) {
3546
                                gen_op_movl_T0_T2();
3547
                            } else {
3548
                                gen_op_neon_extract_elt(n * 8, mask);
3549
                            }
3550
                            tmp = new_tmp();
3551
                            tcg_gen_mov_i32(tmp, cpu_T[0]);
3552
                            gen_st8(tmp, cpu_T[1], IS_USER(s));
3553
                            gen_op_addl_T1_im(stride);
3554
                            mask <<= 8;
3555
                        }
3556
                    }
3557
                }
3558
            }
3559
            rd += neon_ls_element_type[op].spacing;
3560
        }
3561
        stride = nregs * 8;
3562
    } else {
3563
        size = (insn >> 10) & 3;
3564
        if (size == 3) {
3565
            /* Load single element to all lanes.  */
3566
            if (!load)
3567
                return 1;
3568
            size = (insn >> 6) & 3;
3569
            nregs = ((insn >> 8) & 3) + 1;
3570
            stride = (insn & (1 << 5)) ? 2 : 1;
3571
            gen_movl_T1_reg(s, rn);
3572
            for (reg = 0; reg < nregs; reg++) {
3573
                switch (size) {
3574
                case 0:
3575
                    tmp = gen_ld8u(cpu_T[1], IS_USER(s));
3576
                    tcg_gen_mov_i32(cpu_T[0], tmp);
3577
                    dead_tmp(tmp);
3578
                    gen_op_neon_dup_u8(0);
3579
                    break;
3580
                case 1:
3581
                    tmp = gen_ld16u(cpu_T[1], IS_USER(s));
3582
                    tcg_gen_mov_i32(cpu_T[0], tmp);
3583
                    dead_tmp(tmp);
3584
                    gen_op_neon_dup_low16();
3585
                    break;
3586
                case 2:
3587
                    tmp = gen_ld32(cpu_T[0], IS_USER(s));
3588
                    tcg_gen_mov_i32(cpu_T[0], tmp);
3589
                    dead_tmp(tmp);
3590
                    break;
3591
                case 3:
3592
                    return 1;
3593
                }
3594
                gen_op_addl_T1_im(1 << size);
3595
                NEON_SET_REG(T0, rd, 0);
3596
                NEON_SET_REG(T0, rd, 1);
3597
                rd += stride;
3598
            }
3599
            stride = (1 << size) * nregs;
3600
        } else {
3601
            /* Single element.  */
3602
            pass = (insn >> 7) & 1;
3603
            switch (size) {
3604
            case 0:
3605
                shift = ((insn >> 5) & 3) * 8;
3606
                mask = 0xff << shift;
3607
                stride = 1;
3608
                break;
3609
            case 1:
3610
                shift = ((insn >> 6) & 1) * 16;
3611
                mask = shift ? 0xffff0000 : 0xffff;
3612
                stride = (insn & (1 << 5)) ? 2 : 1;
3613
                break;
3614
            case 2:
3615
                shift = 0;
3616
                mask = 0xffffffff;
3617
                stride = (insn & (1 << 6)) ? 2 : 1;
3618
                break;
3619
            default:
3620
                abort();
3621
            }
3622
            nregs = ((insn >> 8) & 3) + 1;
3623
            gen_movl_T1_reg(s, rn);
3624
            for (reg = 0; reg < nregs; reg++) {
3625
                if (load) {
3626
                    if (size != 2) {
3627
                        NEON_GET_REG(T2, rd, pass);
3628
                    }
3629
                    switch (size) {
3630
                    case 0:
3631
                        tmp = gen_ld8u(cpu_T[1], IS_USER(s));
3632
                        break;
3633
                    case 1:
3634
                        tmp = gen_ld16u(cpu_T[1], IS_USER(s));
3635
                        break;
3636
                    case 2:
3637
                        tmp = gen_ld32(cpu_T[1], IS_USER(s));
3638
                        break;
3639
                    }
3640
                    tcg_gen_mov_i32(cpu_T[0], tmp);
3641
                    dead_tmp(tmp);
3642
                    if (size != 2) {
3643
                        gen_op_neon_insert_elt(shift, ~mask);
3644
                        NEON_SET_REG(T0, rd, pass);
3645
                    } else {
3646
                        NEON_SET_REG(T0, rd, pass);
3647
                    }
3648
                } else { /* Store */
3649
                    if (size == 2) {
3650
                        NEON_GET_REG(T0, rd, pass);
3651
                    } else {
3652
                        NEON_GET_REG(T2, rd, pass);
3653
                        gen_op_neon_extract_elt(shift, mask);
3654
                    }
3655
                    tmp = new_tmp();
3656
                    tcg_gen_mov_i32(tmp, cpu_T[0]);
3657
                    switch (size) {
3658
                    case 0:
3659
                        gen_st8(tmp, cpu_T[1], IS_USER(s));
3660
                        break;
3661
                    case 1:
3662
                        gen_st16(tmp, cpu_T[1], IS_USER(s));
3663
                        break;
3664
                    case 2:
3665
                        gen_st32(tmp, cpu_T[1], IS_USER(s));
3666
                        break;
3667
                    }
3668
                }
3669
                rd += stride;
3670
                gen_op_addl_T1_im(1 << size);
3671
            }
3672
            stride = nregs * (1 << size);
3673
        }
3674
    }
3675
    if (rm != 15) {
3676
        TCGv base;
3677

    
3678
        base = load_reg(s, rn);
3679
        if (rm == 13) {
3680
            tcg_gen_addi_i32(base, base, stride);
3681
        } else {
3682
            TCGv index;
3683
            index = load_reg(s, rm);
3684
            tcg_gen_add_i32(base, base, index);
3685
            dead_tmp(index);
3686
        }
3687
        store_reg(s, rn, base);
3688
    }
3689
    return 0;
3690
}
3691

    
3692
/* Translate a NEON data processing instruction.  Return nonzero if the
3693
   instruction is invalid.
3694
   In general we process vectors in 32-bit chunks.  This means we can reuse
3695
   some of the scalar ops, and hopefully the code generated for 32-bit
3696
   hosts won't be too awful.  The downside is that the few 64-bit operations
3697
   (mainly shifts) get complicated.  */
3698

    
3699
static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
3700
{
3701
    int op;
3702
    int q;
3703
    int rd, rn, rm;
3704
    int size;
3705
    int shift;
3706
    int pass;
3707
    int count;
3708
    int pairwise;
3709
    int u;
3710
    int n;
3711
    uint32_t imm;
3712

    
3713
    if (!vfp_enabled(env))
3714
      return 1;
3715
    q = (insn & (1 << 6)) != 0;
3716
    u = (insn >> 24) & 1;
3717
    VFP_DREG_D(rd, insn);
3718
    VFP_DREG_N(rn, insn);
3719
    VFP_DREG_M(rm, insn);
3720
    size = (insn >> 20) & 3;
3721
    if ((insn & (1 << 23)) == 0) {
3722
        /* Three register same length.  */
3723
        op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
3724
        if (size == 3 && (op == 1 || op == 5 || op == 16)) {
3725
            for (pass = 0; pass < (q ? 2 : 1); pass++) {
3726
                NEON_GET_REG(T0, rm, pass * 2);
3727
                NEON_GET_REG(T1, rm, pass * 2 + 1);
3728
                gen_neon_movl_scratch_T0(0);
3729
                gen_neon_movl_scratch_T1(1);
3730
                NEON_GET_REG(T0, rn, pass * 2);
3731
                NEON_GET_REG(T1, rn, pass * 2 + 1);
3732
                switch (op) {
3733
                case 1: /* VQADD */
3734
                    if (u) {
3735
                        gen_op_neon_addl_saturate_u64();
3736
                    } else {
3737
                        gen_op_neon_addl_saturate_s64();
3738
                    }
3739
                    break;
3740
                case 5: /* VQSUB */
3741
                    if (u) {
3742
                        gen_op_neon_subl_saturate_u64();
3743
                    } else {
3744
                        gen_op_neon_subl_saturate_s64();
3745
                    }
3746
                    break;
3747
                case 16:
3748
                    if (u) {
3749
                        gen_op_neon_subl_u64();
3750
                    } else {
3751
                        gen_op_neon_addl_u64();
3752
                    }
3753
                    break;
3754
                default:
3755
                    abort();
3756
                }
3757
                NEON_SET_REG(T0, rd, pass * 2);
3758
                NEON_SET_REG(T1, rd, pass * 2 + 1);
3759
            }
3760
            return 0;
3761
        }
3762
        switch (op) {
3763
        case 8: /* VSHL */
3764
        case 9: /* VQSHL */
3765
        case 10: /* VRSHL */
3766
        case 11: /* VQSHL */
3767
            /* Shift operations have Rn and Rm reversed.  */
3768
            {
3769
                int tmp;
3770
                tmp = rn;
3771
                rn = rm;
3772
                rm = tmp;
3773
                pairwise = 0;
3774
            }
3775
            break;
3776
        case 20: /* VPMAX */
3777
        case 21: /* VPMIN */
3778
        case 23: /* VPADD */
3779
            pairwise = 1;
3780
            break;
3781
        case 26: /* VPADD (float) */
3782
            pairwise = (u && size < 2);
3783
            break;
3784
        case 30: /* VPMIN/VPMAX (float) */
3785
            pairwise = u;
3786
            break;
3787
        default:
3788
            pairwise = 0;
3789
            break;
3790
        }
3791
        for (pass = 0; pass < (q ? 4 : 2); pass++) {
3792

    
3793
        if (pairwise) {
3794
            /* Pairwise.  */
3795
            if (q)
3796
                n = (pass & 1) * 2;
3797
            else
3798
                n = 0;
3799
            if (pass < q + 1) {
3800
                NEON_GET_REG(T0, rn, n);
3801
                NEON_GET_REG(T1, rn, n + 1);
3802
            } else {
3803
                NEON_GET_REG(T0, rm, n);
3804
                NEON_GET_REG(T1, rm, n + 1);
3805
            }
3806
        } else {
3807
            /* Elementwise.  */
3808
            NEON_GET_REG(T0, rn, pass);
3809
            NEON_GET_REG(T1, rm, pass);
3810
        }
3811
        switch (op) {
3812
        case 0: /* VHADD */
3813
            GEN_NEON_INTEGER_OP(hadd);
3814
            break;
3815
        case 1: /* VQADD */
3816
            switch (size << 1| u) {
3817
            case 0: gen_op_neon_qadd_s8(); break;
3818
            case 1: gen_op_neon_qadd_u8(); break;
3819
            case 2: gen_op_neon_qadd_s16(); break;
3820
            case 3: gen_op_neon_qadd_u16(); break;
3821
            case 4: gen_op_addl_T0_T1_saturate(); break;
3822
            case 5: gen_op_addl_T0_T1_usaturate(); break;
3823
            default: abort();
3824
            }
3825
            break;
3826
        case 2: /* VRHADD */
3827
            GEN_NEON_INTEGER_OP(rhadd);
3828
            break;
3829
        case 3: /* Logic ops.  */
3830
            switch ((u << 2) | size) {
3831
            case 0: /* VAND */
3832
                gen_op_andl_T0_T1();
3833
                break;
3834
            case 1: /* BIC */
3835
                gen_op_bicl_T0_T1();
3836
                break;
3837
            case 2: /* VORR */
3838
                gen_op_orl_T0_T1();
3839
                break;
3840
            case 3: /* VORN */
3841
                gen_op_notl_T1();
3842
                gen_op_orl_T0_T1();
3843
                break;
3844
            case 4: /* VEOR */
3845
                gen_op_xorl_T0_T1();
3846
                break;
3847
            case 5: /* VBSL */
3848
                NEON_GET_REG(T2, rd, pass);
3849
                gen_op_neon_bsl();
3850
                break;
3851
            case 6: /* VBIT */
3852
                NEON_GET_REG(T2, rd, pass);
3853
                gen_op_neon_bit();
3854
                break;
3855
            case 7: /* VBIF */
3856
                NEON_GET_REG(T2, rd, pass);
3857
                gen_op_neon_bif();
3858
                break;
3859
            }
3860
            break;
3861
        case 4: /* VHSUB */
3862
            GEN_NEON_INTEGER_OP(hsub);
3863
            break;
3864
        case 5: /* VQSUB */
3865
            switch ((size << 1) | u) {
3866
            case 0: gen_op_neon_qsub_s8(); break;
3867
            case 1: gen_op_neon_qsub_u8(); break;
3868
            case 2: gen_op_neon_qsub_s16(); break;
3869
            case 3: gen_op_neon_qsub_u16(); break;
3870
            case 4: gen_op_subl_T0_T1_saturate(); break;
3871
            case 5: gen_op_subl_T0_T1_usaturate(); break;
3872
            default: abort();
3873
            }
3874
            break;
3875
        case 6: /* VCGT */
3876
            GEN_NEON_INTEGER_OP(cgt);
3877
            break;
3878
        case 7: /* VCGE */
3879
            GEN_NEON_INTEGER_OP(cge);
3880
            break;
3881
        case 8: /* VSHL */
3882
            switch ((size << 1) | u) {
3883
            case 0: gen_op_neon_shl_s8(); break;
3884
            case 1: gen_op_neon_shl_u8(); break;
3885
            case 2: gen_op_neon_shl_s16(); break;
3886
            case 3: gen_op_neon_shl_u16(); break;
3887
            case 4: gen_op_neon_shl_s32(); break;
3888
            case 5: gen_op_neon_shl_u32(); break;
3889
#if 0
3890
            /* ??? Implementing these is tricky because the vector ops work
3891
               on 32-bit pieces.  */
3892
            case 6: gen_op_neon_shl_s64(); break;
3893
            case 7: gen_op_neon_shl_u64(); break;
3894
#else
3895
            case 6: case 7: cpu_abort(env, "VSHL.64 not implemented");
3896
#endif
3897
            }
3898
            break;
3899
        case 9: /* VQSHL */
3900
            switch ((size << 1) | u) {
3901
            case 0: gen_op_neon_qshl_s8(); break;
3902
            case 1: gen_op_neon_qshl_u8(); break;
3903
            case 2: gen_op_neon_qshl_s16(); break;
3904
            case 3: gen_op_neon_qshl_u16(); break;
3905
            case 4: gen_op_neon_qshl_s32(); break;
3906
            case 5: gen_op_neon_qshl_u32(); break;
3907
#if 0
3908
            /* ??? Implementing these is tricky because the vector ops work
3909
               on 32-bit pieces.  */
3910
            case 6: gen_op_neon_qshl_s64(); break;
3911
            case 7: gen_op_neon_qshl_u64(); break;
3912
#else
3913
            case 6: case 7: cpu_abort(env, "VQSHL.64 not implemented");
3914
#endif
3915
            }
3916
            break;
3917
        case 10: /* VRSHL */
3918
            switch ((size << 1) | u) {
3919
            case 0: gen_op_neon_rshl_s8(); break;
3920
            case 1: gen_op_neon_rshl_u8(); break;
3921
            case 2: gen_op_neon_rshl_s16(); break;
3922
            case 3: gen_op_neon_rshl_u16(); break;
3923
            case 4: gen_op_neon_rshl_s32(); break;
3924
            case 5: gen_op_neon_rshl_u32(); break;
3925
#if 0
3926
            /* ??? Implementing these is tricky because the vector ops work
3927
               on 32-bit pieces.  */
3928
            case 6: gen_op_neon_rshl_s64(); break;
3929
            case 7: gen_op_neon_rshl_u64(); break;
3930
#else
3931
            case 6: case 7: cpu_abort(env, "VRSHL.64 not implemented");
3932
#endif
3933
            }
3934
            break;
3935
        case 11: /* VQRSHL */
3936
            switch ((size << 1) | u) {
3937
            case 0: gen_op_neon_qrshl_s8(); break;
3938
            case 1: gen_op_neon_qrshl_u8(); break;
3939
            case 2: gen_op_neon_qrshl_s16(); break;
3940
            case 3: gen_op_neon_qrshl_u16(); break;
3941
            case 4: gen_op_neon_qrshl_s32(); break;
3942
            case 5: gen_op_neon_qrshl_u32(); break;
3943
#if 0
3944
            /* ??? Implementing these is tricky because the vector ops work
3945
               on 32-bit pieces.  */
3946
            case 6: gen_op_neon_qrshl_s64(); break;
3947
            case 7: gen_op_neon_qrshl_u64(); break;
3948
#else
3949
            case 6: case 7: cpu_abort(env, "VQRSHL.64 not implemented");
3950
#endif
3951
            }
3952
            break;
3953
        case 12: /* VMAX */
3954
            GEN_NEON_INTEGER_OP(max);
3955
            break;
3956
        case 13: /* VMIN */
3957
            GEN_NEON_INTEGER_OP(min);
3958
            break;
3959
        case 14: /* VABD */
3960
            GEN_NEON_INTEGER_OP(abd);
3961
            break;
3962
        case 15: /* VABA */
3963
            GEN_NEON_INTEGER_OP(abd);
3964
            NEON_GET_REG(T1, rd, pass);
3965
            gen_neon_add(size);
3966
            break;
3967
        case 16:
3968
            if (!u) { /* VADD */
3969
                if (gen_neon_add(size))
3970
                    return 1;
3971
            } else { /* VSUB */
3972
                switch (size) {
3973
                case 0: gen_op_neon_sub_u8(); break;
3974
                case 1: gen_op_neon_sub_u16(); break;
3975
                case 2: gen_op_subl_T0_T1(); break;
3976
                default: return 1;
3977
                }
3978
            }
3979
            break;
3980
        case 17:
3981
            if (!u) { /* VTST */
3982
                switch (size) {
3983
                case 0: gen_op_neon_tst_u8(); break;
3984
                case 1: gen_op_neon_tst_u16(); break;
3985
                case 2: gen_op_neon_tst_u32(); break;
3986
                default: return 1;
3987
                }
3988
            } else { /* VCEQ */
3989
                switch (size) {
3990
                case 0: gen_op_neon_ceq_u8(); break;
3991
                case 1: gen_op_neon_ceq_u16(); break;
3992
                case 2: gen_op_neon_ceq_u32(); break;
3993
                default: return 1;
3994
                }
3995
            }
3996
            break;
3997
        case 18: /* Multiply.  */
3998
            switch (size) {
3999
            case 0: gen_op_neon_mul_u8(); break;
4000
            case 1: gen_op_neon_mul_u16(); break;
4001
            case 2: gen_op_mul_T0_T1(); break;
4002
            default: return 1;
4003
            }
4004
            NEON_GET_REG(T1, rd, pass);
4005
            if (u) { /* VMLS */
4006
                switch (size) {
4007
                case 0: gen_op_neon_rsb_u8(); break;
4008
                case 1: gen_op_neon_rsb_u16(); break;
4009
                case 2: gen_op_rsbl_T0_T1(); break;
4010
                default: return 1;
4011
                }
4012
            } else { /* VMLA */
4013
                gen_neon_add(size);
4014
            }
4015
            break;
4016
        case 19: /* VMUL */
4017
            if (u) { /* polynomial */
4018
                gen_op_neon_mul_p8();
4019
            } else { /* Integer */
4020
                switch (size) {
4021
                case 0: gen_op_neon_mul_u8(); break;
4022
                case 1: gen_op_neon_mul_u16(); break;
4023
                case 2: gen_op_mul_T0_T1(); break;
4024
                default: return 1;
4025
                }
4026
            }
4027
            break;
4028
        case 20: /* VPMAX */
4029
            GEN_NEON_INTEGER_OP(pmax);
4030
            break;
4031
        case 21: /* VPMIN */
4032
            GEN_NEON_INTEGER_OP(pmin);
4033
            break;
4034
        case 22: /* Hultiply high.  */
4035
            if (!u) { /* VQDMULH */
4036
                switch (size) {
4037
                case 1: gen_op_neon_qdmulh_s16(); break;
4038
                case 2: gen_op_neon_qdmulh_s32(); break;
4039
                default: return 1;
4040
                }
4041
            } else { /* VQRDHMUL */
4042
                switch (size) {
4043
                case 1: gen_op_neon_qrdmulh_s16(); break;
4044
                case 2: gen_op_neon_qrdmulh_s32(); break;
4045
                default: return 1;
4046
                }
4047
            }
4048
            break;
4049
        case 23: /* VPADD */
4050
            if (u)
4051
                return 1;
4052
            switch (size) {
4053
            case 0: gen_op_neon_padd_u8(); break;
4054
            case 1: gen_op_neon_padd_u16(); break;
4055
            case 2: gen_op_addl_T0_T1(); break;
4056
            default: return 1;
4057
            }
4058
            break;
4059
        case 26: /* Floating point arithnetic.  */
4060
            switch ((u << 2) | size) {
4061
            case 0: /* VADD */
4062
                gen_op_neon_add_f32();
4063
                break;
4064
            case 2: /* VSUB */
4065
                gen_op_neon_sub_f32();
4066
                break;
4067
            case 4: /* VPADD */
4068
                gen_op_neon_add_f32();
4069
                break;
4070
            case 6: /* VABD */
4071
                gen_op_neon_abd_f32();
4072
                break;
4073
            default:
4074
                return 1;
4075
            }
4076
            break;
4077
        case 27: /* Float multiply.  */
4078
            gen_op_neon_mul_f32();
4079
            if (!u) {
4080
                NEON_GET_REG(T1, rd, pass);
4081
                if (size == 0) {
4082
                    gen_op_neon_add_f32();
4083
                } else {
4084
                    gen_op_neon_rsb_f32();
4085
                }
4086
            }
4087
            break;
4088
        case 28: /* Float compare.  */
4089
            if (!u) {
4090
                gen_op_neon_ceq_f32();
4091
            } else {
4092
                if (size == 0)
4093
                    gen_op_neon_cge_f32();
4094
                else
4095
                    gen_op_neon_cgt_f32();
4096
            }
4097
            break;
4098
        case 29: /* Float compare absolute.  */
4099
            if (!u)
4100
                return 1;
4101
            if (size == 0)
4102
                gen_op_neon_acge_f32();
4103
            else
4104
                gen_op_neon_acgt_f32();
4105
            break;
4106
        case 30: /* Float min/max.  */
4107
            if (size == 0)
4108
                gen_op_neon_max_f32();
4109
            else
4110
                gen_op_neon_min_f32();
4111
            break;
4112
        case 31:
4113
            if (size == 0)
4114
                gen_helper_recps_f32(cpu_T[0], cpu_T[0], cpu_T[1], cpu_env);
4115
            else
4116
                gen_helper_rsqrts_f32(cpu_T[0], cpu_T[0], cpu_T[1], cpu_env);
4117
            break;
4118
        default:
4119
            abort();
4120
        }
4121
        /* Save the result.  For elementwise operations we can put it
4122
           straight into the destination register.  For pairwise operations
4123
           we have to be careful to avoid clobbering the source operands.  */
4124
        if (pairwise && rd == rm) {
4125
            gen_neon_movl_scratch_T0(pass);
4126
        } else {
4127
            NEON_SET_REG(T0, rd, pass);
4128
        }
4129

    
4130
        } /* for pass */
4131
        if (pairwise && rd == rm) {
4132
            for (pass = 0; pass < (q ? 4 : 2); pass++) {
4133
                gen_neon_movl_T0_scratch(pass);
4134
                NEON_SET_REG(T0, rd, pass);
4135
            }
4136
        }
4137
    } else if (insn & (1 << 4)) {
4138
        if ((insn & 0x00380080) != 0) {
4139
            /* Two registers and shift.  */
4140
            op = (insn >> 8) & 0xf;
4141
            if (insn & (1 << 7)) {
4142
                /* 64-bit shift.   */
4143
                size = 3;
4144
            } else {
4145
                size = 2;
4146
                while ((insn & (1 << (size + 19))) == 0)
4147
                    size--;
4148
            }
4149
            shift = (insn >> 16) & ((1 << (3 + size)) - 1);
4150
            /* To avoid excessive dumplication of ops we implement shift
4151
               by immediate using the variable shift operations.  */
4152
            if (op < 8) {
4153
                /* Shift by immediate:
4154
                   VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU.  */
4155
                /* Right shifts are encoded as N - shift, where N is the
4156
                   element size in bits.  */
4157
                if (op <= 4)
4158
                    shift = shift - (1 << (size + 3));
4159
                else
4160
                    shift++;
4161
                if (size == 3) {
4162
                    count = q + 1;
4163
                } else {
4164
                    count = q ? 4: 2;
4165
                }
4166
                switch (size) {
4167
                case 0:
4168
                    imm = (uint8_t) shift;
4169
                    imm |= imm << 8;
4170
                    imm |= imm << 16;
4171
                    break;
4172
                case 1:
4173
                    imm = (uint16_t) shift;
4174
                    imm |= imm << 16;
4175
                    break;
4176
                case 2:
4177
                case 3:
4178
                    imm = shift;
4179
                    break;
4180
                default:
4181
                    abort();
4182
                }
4183

    
4184
                for (pass = 0; pass < count; pass++) {
4185
                    if (size < 3) {
4186
                        /* Operands in T0 and T1.  */
4187
                        gen_op_movl_T1_im(imm);
4188
                        NEON_GET_REG(T0, rm, pass);
4189
                    } else {
4190
                        /* Operands in {T0, T1} and env->vfp.scratch.  */
4191
                        gen_op_movl_T0_im(imm);
4192
                        gen_neon_movl_scratch_T0(0);
4193
                        gen_op_movl_T0_im((int32_t)imm >> 31);
4194
                        gen_neon_movl_scratch_T0(1);
4195
                        NEON_GET_REG(T0, rm, pass * 2);
4196
                        NEON_GET_REG(T1, rm, pass * 2 + 1);
4197
                    }
4198

    
4199
                    if (gen_neon_shift_im[op][u][size] == NULL)
4200
                        return 1;
4201
                    gen_neon_shift_im[op][u][size]();
4202

    
4203
                    if (op == 1 || op == 3) {
4204
                        /* Accumulate.  */
4205
                        if (size == 3) {
4206
                            gen_neon_movl_scratch_T0(0);
4207
                            gen_neon_movl_scratch_T1(1);
4208
                            NEON_GET_REG(T0, rd, pass * 2);
4209
                            NEON_GET_REG(T1, rd, pass * 2 + 1);
4210
                            gen_op_neon_addl_u64();
4211
                        } else {
4212
                            NEON_GET_REG(T1, rd, pass);
4213
                            gen_neon_add(size);
4214
                        }
4215
                    } else if (op == 4 || (op == 5 && u)) {
4216
                        /* Insert */
4217
                        if (size == 3) {
4218
                            cpu_abort(env, "VS[LR]I.64 not implemented");
4219
                        }
4220
                        switch (size) {
4221
                        case 0:
4222
                            if (op == 4)
4223
                                imm = 0xff >> -shift;
4224
                            else
4225
                                imm = (uint8_t)(0xff << shift);
4226
                            imm |= imm << 8;
4227
                            imm |= imm << 16;
4228
                            break;
4229
                        case 1:
4230
                            if (op == 4)
4231
                                imm = 0xffff >> -shift;
4232
                            else
4233
                                imm = (uint16_t)(0xffff << shift);
4234
                            imm |= imm << 16;
4235
                            break;
4236
                        case 2:
4237
                            if (op == 4)
4238
                                imm = 0xffffffffu >> -shift;
4239
                            else
4240
                                imm = 0xffffffffu << shift;
4241
                            break;
4242
                        default:
4243
                            abort();
4244
                        }
4245
                        NEON_GET_REG(T1, rd, pass);
4246
                        gen_op_movl_T2_im(imm);
4247
                        gen_op_neon_bsl();
4248
                    }
4249
                    if (size == 3) {
4250
                        NEON_SET_REG(T0, rd, pass * 2);
4251
                        NEON_SET_REG(T1, rd, pass * 2 + 1);
4252
                    } else {
4253
                        NEON_SET_REG(T0, rd, pass);
4254
                    }
4255
                } /* for pass */
4256
            } else if (op < 10) {
4257
                /* Shift by immedaiate and narrow:
4258
                   VSHRN, VRSHRN, VQSHRN, VQRSHRN.  */
4259
                shift = shift - (1 << (size + 3));
4260
                size++;
4261
                if (size == 3) {
4262
                    count = q + 1;
4263
                } else {
4264
                    count = q ? 4: 2;
4265
                }
4266
                switch (size) {
4267
                case 1:
4268
                    imm = (uint16_t) shift;
4269
                    imm |= imm << 16;
4270
                    break;
4271
                case 2:
4272
                case 3:
4273
                    imm = shift;
4274
                    break;
4275
                default:
4276
                    abort();
4277
                }
4278

    
4279
                /* Processing MSB first means we need to do less shuffling at
4280
                   the end.  */
4281
                for (pass =  count - 1; pass >= 0; pass--) {
4282
                    /* Avoid clobbering the second operand before it has been
4283
                       written.  */
4284
                    n = pass;
4285
                    if (rd == rm)
4286
                        n ^= (count - 1);
4287
                    else
4288
                        n = pass;
4289

    
4290
                    if (size < 3) {
4291
                        /* Operands in T0 and T1.  */
4292
                        gen_op_movl_T1_im(imm);
4293
                        NEON_GET_REG(T0, rm, n);
4294
                    } else {
4295
                        /* Operands in {T0, T1} and env->vfp.scratch.  */
4296
                        gen_op_movl_T0_im(imm);
4297
                        gen_neon_movl_scratch_T0(0);
4298
                        gen_op_movl_T0_im((int32_t)imm >> 31);
4299
                        gen_neon_movl_scratch_T0(1);
4300
                        NEON_GET_REG(T0, rm, n * 2);
4301
                        NEON_GET_REG(T0, rm, n * 2 + 1);
4302
                    }
4303

    
4304
                    gen_neon_shift_im_narrow[q][u][size - 1]();
4305

    
4306
                    if (size < 3 && (pass & 1) == 0) {
4307
                        gen_neon_movl_scratch_T0(0);
4308
                    } else {
4309
                        uint32_t offset;
4310

    
4311
                        if (size < 3)
4312
                            gen_neon_movl_T1_scratch(0);
4313

    
4314
                        if (op == 8 && !u) {
4315
                            gen_neon_narrow[size - 1]();
4316
                        } else {
4317
                            if (op == 8)
4318
                                gen_neon_narrow_sats[size - 2]();
4319
                            else
4320
                                gen_neon_narrow_satu[size - 1]();
4321
                        }
4322
                        if (size == 3)
4323
                            offset = neon_reg_offset(rd, n);
4324
                        else
4325
                            offset = neon_reg_offset(rd, n >> 1);
4326
                        gen_op_neon_setreg_T0(offset);
4327
                    }
4328
                } /* for pass */
4329
            } else if (op == 10) {
4330
                /* VSHLL */
4331
                if (q)
4332
                    return 1;
4333
                for (pass = 0; pass < 2; pass++) {
4334
                    /* Avoid clobbering the input operand.  */
4335
                    if (rd == rm)
4336
                        n = 1 - pass;
4337
                    else
4338
                        n = pass;
4339

    
4340
                    NEON_GET_REG(T0, rm, n);
4341
                    GEN_NEON_INTEGER_OP(widen);
4342
                    if (shift != 0) {
4343
                        /* The shift is less than the width of the source
4344
                           type, so in some cases we can just
4345
                           shift the whole register.  */
4346
                        if (size == 1 || (size == 0 && u)) {
4347
                            gen_op_shll_T0_im(shift);
4348
                            gen_op_shll_T1_im(shift);
4349
                        } else {
4350
                            switch (size) {
4351
                            case 0: gen_op_neon_shll_u16(shift); break;
4352
                            case 2: gen_op_neon_shll_u64(shift); break;
4353
                            default: abort();
4354
                            }
4355
                        }
4356
                    }
4357
                    NEON_SET_REG(T0, rd, n * 2);
4358
                    NEON_SET_REG(T1, rd, n * 2 + 1);
4359
                }
4360
            } else if (op == 15 || op == 16) {
4361
                /* VCVT fixed-point.  */
4362
                for (pass = 0; pass < (q ? 4 : 2); pass++) {
4363
                    tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, pass));
4364
                    if (op & 1) {
4365
                        if (u)
4366
                            gen_vfp_ulto(0, shift);
4367
                        else
4368
                            gen_vfp_slto(0, shift);
4369
                    } else {
4370
                        if (u)
4371
                            gen_vfp_toul(0, shift);
4372
                        else
4373
                            gen_vfp_tosl(0, shift);
4374
                    }
4375
                    tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, pass));
4376
                }
4377
            } else {
4378
                return 1;
4379
            }
4380
        } else { /* (insn & 0x00380080) == 0 */
4381
            int invert;
4382

    
4383
            op = (insn >> 8) & 0xf;
4384
            /* One register and immediate.  */
4385
            imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf);
4386
            invert = (insn & (1 << 5)) != 0;
4387
            switch (op) {
4388
            case 0: case 1:
4389
                /* no-op */
4390
                break;
4391
            case 2: case 3:
4392
                imm <<= 8;
4393
                break;
4394
            case 4: case 5:
4395
                imm <<= 16;
4396
                break;
4397
            case 6: case 7:
4398
                imm <<= 24;
4399
                break;
4400
            case 8: case 9:
4401
                imm |= imm << 16;
4402
                break;
4403
            case 10: case 11:
4404
                imm = (imm << 8) | (imm << 24);
4405
                break;
4406
            case 12:
4407
                imm = (imm < 8) | 0xff;
4408
                break;
4409
            case 13:
4410
                imm = (imm << 16) | 0xffff;
4411
                break;
4412
            case 14:
4413
                imm |= (imm << 8) | (imm << 16) | (imm << 24);
4414
                if (invert)
4415
                    imm = ~imm;
4416
                break;
4417
            case 15:
4418
                imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
4419
                      | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
4420
                break;
4421
            }
4422
            if (invert)
4423
                imm = ~imm;
4424

    
4425
            if (op != 14 || !invert)
4426
                gen_op_movl_T1_im(imm);
4427

    
4428
            for (pass = 0; pass < (q ? 4 : 2); pass++) {
4429
                if (op & 1 && op < 12) {
4430
                    NEON_GET_REG(T0, rd, pass);
4431
                    if (invert) {
4432
                        /* The immediate value has already been inverted, so
4433
                           BIC becomes AND.  */
4434
                        gen_op_andl_T0_T1();
4435
                    } else {
4436
                        gen_op_orl_T0_T1();
4437
                    }
4438
                    NEON_SET_REG(T0, rd, pass);
4439
                } else {
4440
                    if (op == 14 && invert) {
4441
                        uint32_t tmp;
4442
                        tmp = 0;
4443
                        for (n = 0; n < 4; n++) {
4444
                            if (imm & (1 << (n + (pass & 1) * 4)))
4445
                                tmp |= 0xff << (n * 8);
4446
                        }
4447
                        gen_op_movl_T1_im(tmp);
4448
                    }
4449
                    /* VMOV, VMVN.  */
4450
                    NEON_SET_REG(T1, rd, pass);
4451
                }
4452
            }
4453
        }
4454
    } else { /* (insn & 0x00800010 == 0x00800010) */
4455
        if (size != 3) {
4456
            op = (insn >> 8) & 0xf;
4457
            if ((insn & (1 << 6)) == 0) {
4458
                /* Three registers of different lengths.  */
4459
                int src1_wide;
4460
                int src2_wide;
4461
                int prewiden;
4462
                /* prewiden, src1_wide, src2_wide */
4463
                static const int neon_3reg_wide[16][3] = {
4464
                    {1, 0, 0}, /* VADDL */
4465
                    {1, 1, 0}, /* VADDW */
4466
                    {1, 0, 0}, /* VSUBL */
4467
                    {1, 1, 0}, /* VSUBW */
4468
                    {0, 1, 1}, /* VADDHN */
4469
                    {0, 0, 0}, /* VABAL */
4470
                    {0, 1, 1}, /* VSUBHN */
4471
                    {0, 0, 0}, /* VABDL */
4472
                    {0, 0, 0}, /* VMLAL */
4473
                    {0, 0, 0}, /* VQDMLAL */
4474
                    {0, 0, 0}, /* VMLSL */
4475
                    {0, 0, 0}, /* VQDMLSL */
4476
                    {0, 0, 0}, /* Integer VMULL */
4477
                    {0, 0, 0}, /* VQDMULL */
4478
                    {0, 0, 0}  /* Polynomial VMULL */
4479
                };
4480

    
4481
                prewiden = neon_3reg_wide[op][0];
4482
                src1_wide = neon_3reg_wide[op][1];
4483
                src2_wide = neon_3reg_wide[op][2];
4484

    
4485
                /* Avoid overlapping operands.  Wide source operands are
4486
                   always aligned so will never overlap with wide
4487
                   destinations in problematic ways.  */
4488
                if (rd == rm) {
4489
                    NEON_GET_REG(T2, rm, 1);
4490
                } else if (rd == rn) {
4491
                    NEON_GET_REG(T2, rn, 1);
4492
                }
4493
                for (pass = 0; pass < 2; pass++) {
4494
                    /* Load the second operand into env->vfp.scratch.
4495
                       Also widen narrow operands.  */
4496
                    if (pass == 1 && rd == rm) {
4497
                        if (prewiden) {
4498
                            gen_op_movl_T0_T2();
4499
                        } else {
4500
                            gen_op_movl_T1_T2();
4501
                        }
4502
                    } else {
4503
                        if (src2_wide) {
4504
                            NEON_GET_REG(T0, rm, pass * 2);
4505
                            NEON_GET_REG(T1, rm, pass * 2 + 1);
4506
                        } else {
4507
                            if (prewiden) {
4508
                                NEON_GET_REG(T0, rm, pass);
4509
                            } else {
4510
                                NEON_GET_REG(T1, rm, pass);
4511
                            }
4512
                        }
4513
                    }
4514
                    if (prewiden && !src2_wide) {
4515
                        GEN_NEON_INTEGER_OP(widen);
4516
                    }
4517
                    if (prewiden || src2_wide) {
4518
                        gen_neon_movl_scratch_T0(0);
4519
                        gen_neon_movl_scratch_T1(1);
4520
                    }
4521

    
4522
                    /* Load the first operand.  */
4523
                    if (pass == 1 && rd == rn) {
4524
                        gen_op_movl_T0_T2();
4525
                    } else {
4526
                        if (src1_wide) {
4527
                            NEON_GET_REG(T0, rn, pass * 2);
4528
                            NEON_GET_REG(T1, rn, pass * 2 + 1);
4529
                        } else {
4530
                            NEON_GET_REG(T0, rn, pass);
4531
                        }
4532
                    }
4533
                    if (prewiden && !src1_wide) {
4534
                        GEN_NEON_INTEGER_OP(widen);
4535
                    }
4536
                    switch (op) {
4537
                    case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
4538
                        switch (size) {
4539
                        case 0: gen_op_neon_addl_u16(); break;
4540
                        case 1: gen_op_neon_addl_u32(); break;
4541
                        case 2: gen_op_neon_addl_u64(); break;
4542
                        default: abort();
4543
                        }
4544
                        break;
4545
                    case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHL, VRSUBHL */
4546
                        switch (size) {
4547
                        case 0: gen_op_neon_subl_u16(); break;
4548
                        case 1: gen_op_neon_subl_u32(); break;
4549
                        case 2: gen_op_neon_subl_u64(); break;
4550
                        default: abort();
4551
                        }
4552
                        break;
4553
                    case 5: case 7: /* VABAL, VABDL */
4554
                        switch ((size << 1) | u) {
4555
                        case 0: gen_op_neon_abdl_s16(); break;
4556
                        case 1: gen_op_neon_abdl_u16(); break;
4557
                        case 2: gen_op_neon_abdl_s32(); break;
4558
                        case 3: gen_op_neon_abdl_u32(); break;
4559
                        case 4: gen_op_neon_abdl_s64(); break;
4560
                        case 5: gen_op_neon_abdl_u64(); break;
4561
                        default: abort();
4562
                        }
4563
                        break;
4564
                    case 8: case 9: case 10: case 11: case 12: case 13:
4565
                        /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
4566
                        switch ((size << 1) | u) {
4567
                        case 0: gen_op_neon_mull_s8(); break;
4568
                        case 1: gen_op_neon_mull_u8(); break;
4569
                        case 2: gen_op_neon_mull_s16(); break;
4570
                        case 3: gen_op_neon_mull_u16(); break;
4571
                        case 4: gen_op_imull_T0_T1(); break;
4572
                        case 5: gen_op_mull_T0_T1(); break;
4573
                        default: abort();
4574
                        }
4575
                        break;
4576
                    case 14: /* Polynomial VMULL */
4577
                        cpu_abort(env, "Polynomial VMULL not implemented");
4578

    
4579
                    default: /* 15 is RESERVED.  */
4580
                        return 1;
4581
                    }
4582
                    if (op == 5 || op == 13 || (op >= 8 && op <= 11)) {
4583
                        /* Accumulate.  */
4584
                        if (op == 10 || op == 11) {
4585
                            switch (size) {
4586
                            case 0: gen_op_neon_negl_u16(); break;
4587
                            case 1: gen_op_neon_negl_u32(); break;
4588
                            case 2: gen_op_neon_negl_u64(); break;
4589
                            default: abort();
4590
                            }
4591
                        }
4592

    
4593
                        gen_neon_movl_scratch_T0(0);
4594
                        gen_neon_movl_scratch_T1(1);
4595

    
4596
                        if (op != 13) {
4597
                            NEON_GET_REG(T0, rd, pass * 2);
4598
                            NEON_GET_REG(T1, rd, pass * 2 + 1);
4599
                        }
4600

    
4601
                        switch (op) {
4602
                        case 5: case 8: case 10: /* VABAL, VMLAL, VMLSL */
4603
                            switch (size) {
4604
                            case 0: gen_op_neon_addl_u16(); break;
4605
                            case 1: gen_op_neon_addl_u32(); break;
4606
                            case 2: gen_op_neon_addl_u64(); break;
4607
                            default: abort();
4608
                            }
4609
                            break;
4610
                        case 9: case 11: /* VQDMLAL, VQDMLSL */
4611
                            switch (size) {
4612
                            case 1: gen_op_neon_addl_saturate_s32(); break;
4613
                            case 2: gen_op_neon_addl_saturate_s64(); break;
4614
                            default: abort();
4615
                            }
4616
                            /* Fall through.  */
4617
                        case 13: /* VQDMULL */
4618
                            switch (size) {
4619
                            case 1: gen_op_neon_addl_saturate_s32(); break;
4620
                            case 2: gen_op_neon_addl_saturate_s64(); break;
4621
                            default: abort();
4622
                            }
4623
                            break;
4624
                        default:
4625
                            abort();
4626
                        }
4627
                        NEON_SET_REG(T0, rd, pass * 2);
4628
                        NEON_SET_REG(T1, rd, pass * 2 + 1);
4629
                    } else if (op == 4 || op == 6) {
4630
                        /* Narrowing operation.  */
4631
                        if (u) {
4632
                            switch (size) {
4633
                            case 0: gen_op_neon_narrow_high_u8(); break;
4634
                            case 1: gen_op_neon_narrow_high_u16(); break;
4635
                            case 2: gen_op_movl_T0_T1(); break;
4636
                            default: abort();
4637
                            }
4638
                        } else {
4639
                            switch (size) {
4640
                            case 0: gen_op_neon_narrow_high_round_u8(); break;
4641
                            case 1: gen_op_neon_narrow_high_round_u16(); break;
4642
                            case 2: gen_op_neon_narrow_high_round_u32(); break;
4643
                            default: abort();
4644
                            }
4645
                        }
4646
                        NEON_SET_REG(T0, rd, pass);
4647
                    } else {
4648
                        /* Write back the result.  */
4649
                        NEON_SET_REG(T0, rd, pass * 2);
4650
                        NEON_SET_REG(T1, rd, pass * 2 + 1);
4651
                    }
4652
                }
4653
            } else {
4654
                /* Two registers and a scalar.  */
4655
                switch (op) {
4656
                case 0: /* Integer VMLA scalar */
4657
                case 1: /* Float VMLA scalar */
4658
                case 4: /* Integer VMLS scalar */
4659
                case 5: /* Floating point VMLS scalar */
4660
                case 8: /* Integer VMUL scalar */
4661
                case 9: /* Floating point VMUL scalar */
4662
                case 12: /* VQDMULH scalar */
4663
                case 13: /* VQRDMULH scalar */
4664
                    gen_neon_get_scalar(size, rm);
4665
                    gen_op_movl_T2_T0();
4666
                    for (pass = 0; pass < (u ? 4 : 2); pass++) {
4667
                        if (pass != 0)
4668
                            gen_op_movl_T0_T2();
4669
                        NEON_GET_REG(T1, rn, pass);
4670
                        if (op == 12) {
4671
                            if (size == 1) {
4672
                                gen_op_neon_qdmulh_s16();
4673
                            } else {
4674
                                gen_op_neon_qdmulh_s32();
4675
                            }
4676
                        } else if (op == 13) {
4677
                            if (size == 1) {
4678
                                gen_op_neon_qrdmulh_s16();
4679
                            } else {
4680
                                gen_op_neon_qrdmulh_s32();
4681
                            }
4682
                        } else if (op & 1) {
4683
                            gen_op_neon_mul_f32();
4684
                        } else {
4685
                            switch (size) {
4686
                            case 0: gen_op_neon_mul_u8(); break;
4687
                            case 1: gen_op_neon_mul_u16(); break;
4688
                            case 2: gen_op_mul_T0_T1(); break;
4689
                            default: return 1;
4690
                            }
4691
                        }
4692
                        if (op < 8) {
4693
                            /* Accumulate.  */
4694
                            NEON_GET_REG(T1, rd, pass);
4695
                            switch (op) {
4696
                            case 0:
4697
                                gen_neon_add(size);
4698
                                break;
4699
                            case 1:
4700
                                gen_op_neon_add_f32();
4701
                                break;
4702
                            case 4:
4703
                                switch (size) {
4704
                                case 0: gen_op_neon_rsb_u8(); break;
4705
                                case 1: gen_op_neon_rsb_u16(); break;
4706
                                case 2: gen_op_rsbl_T0_T1(); break;
4707
                                default: return 1;
4708
                                }
4709
                                break;
4710
                            case 5:
4711
                                gen_op_neon_rsb_f32();
4712
                                break;
4713
                            default:
4714
                                abort();
4715
                            }
4716
                        }
4717
                        NEON_SET_REG(T0, rd, pass);
4718
                    }
4719
                    break;
4720
                case 2: /* VMLAL sclar */
4721
                case 3: /* VQDMLAL scalar */
4722
                case 6: /* VMLSL scalar */
4723
                case 7: /* VQDMLSL scalar */
4724
                case 10: /* VMULL scalar */
4725
                case 11: /* VQDMULL scalar */
4726
                    if (rd == rn) {
4727
                        /* Save overlapping operands before they are
4728
                           clobbered.  */
4729
                        NEON_GET_REG(T0, rn, 1);
4730
                        gen_neon_movl_scratch_T0(2);
4731
                    }
4732
                    gen_neon_get_scalar(size, rm);