Statistics
| Branch: | Revision:

root / target-i386 / translate.c @ 8777643e

History | View | Annotate | Download (249.1 kB)

1
/*
2
 *  i386 translation
3
 *
4
 *  Copyright (c) 2003 Fabrice Bellard
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA  02110-1301 USA
19
 */
20
#include <stdarg.h>
21
#include <stdlib.h>
22
#include <stdio.h>
23
#include <string.h>
24
#include <inttypes.h>
25
#include <signal.h>
26
#include <assert.h>
27

    
28
#include "cpu.h"
29
#include "exec-all.h"
30
#include "disas.h"
31
#include "tcg-op.h"
32

    
33
#include "helper.h"
34
#define GEN_HELPER 1
35
#include "helper.h"
36

    
37
#define PREFIX_REPZ   0x01
38
#define PREFIX_REPNZ  0x02
39
#define PREFIX_LOCK   0x04
40
#define PREFIX_DATA   0x08
41
#define PREFIX_ADR    0x10
42

    
43
#ifdef TARGET_X86_64
44
#define X86_64_ONLY(x) x
45
#define X86_64_DEF(x...) x
46
#define CODE64(s) ((s)->code64)
47
#define REX_X(s) ((s)->rex_x)
48
#define REX_B(s) ((s)->rex_b)
49
/* XXX: gcc generates push/pop in some opcodes, so we cannot use them */
50
#if 1
51
#define BUGGY_64(x) NULL
52
#endif
53
#else
54
#define X86_64_ONLY(x) NULL
55
#define X86_64_DEF(x...)
56
#define CODE64(s) 0
57
#define REX_X(s) 0
58
#define REX_B(s) 0
59
#endif
60

    
61
//#define MACRO_TEST   1
62

    
63
/* global register indexes */
64
static TCGv_ptr cpu_env;
65
static TCGv cpu_A0, cpu_cc_src, cpu_cc_dst, cpu_cc_tmp;
66
static TCGv_i32 cpu_cc_op;
67
/* local temps */
68
static TCGv cpu_T[2], cpu_T3;
69
/* local register indexes (only used inside old micro ops) */
70
static TCGv cpu_tmp0, cpu_tmp4;
71
static TCGv_ptr cpu_ptr0, cpu_ptr1;
72
static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
73
static TCGv_i64 cpu_tmp1_i64;
74
static TCGv cpu_tmp5, cpu_tmp6;
75

    
76
#include "gen-icount.h"
77

    
78
#ifdef TARGET_X86_64
79
static int x86_64_hregs;
80
#endif
81

    
82
typedef struct DisasContext {
83
    /* current insn context */
84
    int override; /* -1 if no override */
85
    int prefix;
86
    int aflag, dflag;
87
    target_ulong pc; /* pc = eip + cs_base */
88
    int is_jmp; /* 1 = means jump (stop translation), 2 means CPU
89
                   static state change (stop translation) */
90
    /* current block context */
91
    target_ulong cs_base; /* base of CS segment */
92
    int pe;     /* protected mode */
93
    int code32; /* 32 bit code segment */
94
#ifdef TARGET_X86_64
95
    int lma;    /* long mode active */
96
    int code64; /* 64 bit code segment */
97
    int rex_x, rex_b;
98
#endif
99
    int ss32;   /* 32 bit stack segment */
100
    int cc_op;  /* current CC operation */
101
    int addseg; /* non zero if either DS/ES/SS have a non zero base */
102
    int f_st;   /* currently unused */
103
    int vm86;   /* vm86 mode */
104
    int cpl;
105
    int iopl;
106
    int tf;     /* TF cpu flag */
107
    int singlestep_enabled; /* "hardware" single step enabled */
108
    int jmp_opt; /* use direct block chaining for direct jumps */
109
    int mem_index; /* select memory access functions */
110
    uint64_t flags; /* all execution flags */
111
    struct TranslationBlock *tb;
112
    int popl_esp_hack; /* for correct popl with esp base handling */
113
    int rip_offset; /* only used in x86_64, but left for simplicity */
114
    int cpuid_features;
115
    int cpuid_ext_features;
116
    int cpuid_ext2_features;
117
    int cpuid_ext3_features;
118
} DisasContext;
119

    
120
static void gen_eob(DisasContext *s);
121
static void gen_jmp(DisasContext *s, target_ulong eip);
122
static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
123

    
124
/* i386 arith/logic operations */
125
enum {
126
    OP_ADDL,
127
    OP_ORL,
128
    OP_ADCL,
129
    OP_SBBL,
130
    OP_ANDL,
131
    OP_SUBL,
132
    OP_XORL,
133
    OP_CMPL,
134
};
135

    
136
/* i386 shift ops */
137
enum {
138
    OP_ROL,
139
    OP_ROR,
140
    OP_RCL,
141
    OP_RCR,
142
    OP_SHL,
143
    OP_SHR,
144
    OP_SHL1, /* undocumented */
145
    OP_SAR = 7,
146
};
147

    
148
enum {
149
    JCC_O,
150
    JCC_B,
151
    JCC_Z,
152
    JCC_BE,
153
    JCC_S,
154
    JCC_P,
155
    JCC_L,
156
    JCC_LE,
157
};
158

    
159
/* operand size */
160
enum {
161
    OT_BYTE = 0,
162
    OT_WORD,
163
    OT_LONG,
164
    OT_QUAD,
165
};
166

    
167
enum {
168
    /* I386 int registers */
169
    OR_EAX,   /* MUST be even numbered */
170
    OR_ECX,
171
    OR_EDX,
172
    OR_EBX,
173
    OR_ESP,
174
    OR_EBP,
175
    OR_ESI,
176
    OR_EDI,
177

    
178
    OR_TMP0 = 16,    /* temporary operand register */
179
    OR_TMP1,
180
    OR_A0, /* temporary register used when doing address evaluation */
181
};
182

    
183
static inline void gen_op_movl_T0_0(void)
184
{
185
    tcg_gen_movi_tl(cpu_T[0], 0);
186
}
187

    
188
static inline void gen_op_movl_T0_im(int32_t val)
189
{
190
    tcg_gen_movi_tl(cpu_T[0], val);
191
}
192

    
193
static inline void gen_op_movl_T0_imu(uint32_t val)
194
{
195
    tcg_gen_movi_tl(cpu_T[0], val);
196
}
197

    
198
static inline void gen_op_movl_T1_im(int32_t val)
199
{
200
    tcg_gen_movi_tl(cpu_T[1], val);
201
}
202

    
203
static inline void gen_op_movl_T1_imu(uint32_t val)
204
{
205
    tcg_gen_movi_tl(cpu_T[1], val);
206
}
207

    
208
static inline void gen_op_movl_A0_im(uint32_t val)
209
{
210
    tcg_gen_movi_tl(cpu_A0, val);
211
}
212

    
213
#ifdef TARGET_X86_64
214
static inline void gen_op_movq_A0_im(int64_t val)
215
{
216
    tcg_gen_movi_tl(cpu_A0, val);
217
}
218
#endif
219

    
220
static inline void gen_movtl_T0_im(target_ulong val)
221
{
222
    tcg_gen_movi_tl(cpu_T[0], val);
223
}
224

    
225
static inline void gen_movtl_T1_im(target_ulong val)
226
{
227
    tcg_gen_movi_tl(cpu_T[1], val);
228
}
229

    
230
static inline void gen_op_andl_T0_ffff(void)
231
{
232
    tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff);
233
}
234

    
235
static inline void gen_op_andl_T0_im(uint32_t val)
236
{
237
    tcg_gen_andi_tl(cpu_T[0], cpu_T[0], val);
238
}
239

    
240
static inline void gen_op_movl_T0_T1(void)
241
{
242
    tcg_gen_mov_tl(cpu_T[0], cpu_T[1]);
243
}
244

    
245
static inline void gen_op_andl_A0_ffff(void)
246
{
247
    tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffff);
248
}
249

    
250
#ifdef TARGET_X86_64
251

    
252
#define NB_OP_SIZES 4
253

    
254
#else /* !TARGET_X86_64 */
255

    
256
#define NB_OP_SIZES 3
257

    
258
#endif /* !TARGET_X86_64 */
259

    
260
#if defined(WORDS_BIGENDIAN)
261
#define REG_B_OFFSET (sizeof(target_ulong) - 1)
262
#define REG_H_OFFSET (sizeof(target_ulong) - 2)
263
#define REG_W_OFFSET (sizeof(target_ulong) - 2)
264
#define REG_L_OFFSET (sizeof(target_ulong) - 4)
265
#define REG_LH_OFFSET (sizeof(target_ulong) - 8)
266
#else
267
#define REG_B_OFFSET 0
268
#define REG_H_OFFSET 1
269
#define REG_W_OFFSET 0
270
#define REG_L_OFFSET 0
271
#define REG_LH_OFFSET 4
272
#endif
273

    
274
static inline void gen_op_mov_reg_v(int ot, int reg, TCGv t0)
275
{
276
    switch(ot) {
277
    case OT_BYTE:
278
        if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
279
            tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_B_OFFSET);
280
        } else {
281
            tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
282
        }
283
        break;
284
    case OT_WORD:
285
        tcg_gen_st16_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
286
        break;
287
#ifdef TARGET_X86_64
288
    case OT_LONG:
289
        tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
290
        /* high part of register set to zero */
291
        tcg_gen_movi_tl(cpu_tmp0, 0);
292
        tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
293
        break;
294
    default:
295
    case OT_QUAD:
296
        tcg_gen_st_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
297
        break;
298
#else
299
    default:
300
    case OT_LONG:
301
        tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
302
        break;
303
#endif
304
    }
305
}
306

    
307
static inline void gen_op_mov_reg_T0(int ot, int reg)
308
{
309
    gen_op_mov_reg_v(ot, reg, cpu_T[0]);
310
}
311

    
312
static inline void gen_op_mov_reg_T1(int ot, int reg)
313
{
314
    gen_op_mov_reg_v(ot, reg, cpu_T[1]);
315
}
316

    
317
static inline void gen_op_mov_reg_A0(int size, int reg)
318
{
319
    switch(size) {
320
    case 0:
321
        tcg_gen_st16_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
322
        break;
323
#ifdef TARGET_X86_64
324
    case 1:
325
        tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
326
        /* high part of register set to zero */
327
        tcg_gen_movi_tl(cpu_tmp0, 0);
328
        tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
329
        break;
330
    default:
331
    case 2:
332
        tcg_gen_st_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]));
333
        break;
334
#else
335
    default:
336
    case 1:
337
        tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
338
        break;
339
#endif
340
    }
341
}
342

    
343
static inline void gen_op_mov_v_reg(int ot, TCGv t0, int reg)
344
{
345
    switch(ot) {
346
    case OT_BYTE:
347
        if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
348
            goto std_case;
349
        } else {
350
            tcg_gen_ld8u_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
351
        }
352
        break;
353
    default:
354
    std_case:
355
        tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
356
        break;
357
    }
358
}
359

    
360
static inline void gen_op_mov_TN_reg(int ot, int t_index, int reg)
361
{
362
    gen_op_mov_v_reg(ot, cpu_T[t_index], reg);
363
}
364

    
365
static inline void gen_op_movl_A0_reg(int reg)
366
{
367
    tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
368
}
369

    
370
static inline void gen_op_addl_A0_im(int32_t val)
371
{
372
    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
373
#ifdef TARGET_X86_64
374
    tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
375
#endif
376
}
377

    
378
#ifdef TARGET_X86_64
379
static inline void gen_op_addq_A0_im(int64_t val)
380
{
381
    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
382
}
383
#endif
384
    
385
static void gen_add_A0_im(DisasContext *s, int val)
386
{
387
#ifdef TARGET_X86_64
388
    if (CODE64(s))
389
        gen_op_addq_A0_im(val);
390
    else
391
#endif
392
        gen_op_addl_A0_im(val);
393
}
394

    
395
static inline void gen_op_addl_T0_T1(void)
396
{
397
    tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
398
}
399

    
400
static inline void gen_op_jmp_T0(void)
401
{
402
    tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUState, eip));
403
}
404

    
405
static inline void gen_op_add_reg_im(int size, int reg, int32_t val)
406
{
407
    switch(size) {
408
    case 0:
409
        tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
410
        tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
411
        tcg_gen_st16_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
412
        break;
413
    case 1:
414
        tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
415
        tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
416
#ifdef TARGET_X86_64
417
        tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffff);
418
#endif
419
        tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
420
        break;
421
#ifdef TARGET_X86_64
422
    case 2:
423
        tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
424
        tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
425
        tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
426
        break;
427
#endif
428
    }
429
}
430

    
431
static inline void gen_op_add_reg_T0(int size, int reg)
432
{
433
    switch(size) {
434
    case 0:
435
        tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
436
        tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
437
        tcg_gen_st16_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
438
        break;
439
    case 1:
440
        tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
441
        tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
442
#ifdef TARGET_X86_64
443
        tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffff);
444
#endif
445
        tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
446
        break;
447
#ifdef TARGET_X86_64
448
    case 2:
449
        tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
450
        tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
451
        tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
452
        break;
453
#endif
454
    }
455
}
456

    
457
static inline void gen_op_set_cc_op(int32_t val)
458
{
459
    tcg_gen_movi_i32(cpu_cc_op, val);
460
}
461

    
462
static inline void gen_op_addl_A0_reg_sN(int shift, int reg)
463
{
464
    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
465
    if (shift != 0) 
466
        tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
467
    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
468
#ifdef TARGET_X86_64
469
    tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
470
#endif
471
}
472

    
473
static inline void gen_op_movl_A0_seg(int reg)
474
{
475
    tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, segs[reg].base) + REG_L_OFFSET);
476
}
477

    
478
static inline void gen_op_addl_A0_seg(int reg)
479
{
480
    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, segs[reg].base));
481
    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
482
#ifdef TARGET_X86_64
483
    tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
484
#endif
485
}
486

    
487
#ifdef TARGET_X86_64
488
static inline void gen_op_movq_A0_seg(int reg)
489
{
490
    tcg_gen_ld_tl(cpu_A0, cpu_env, offsetof(CPUState, segs[reg].base));
491
}
492

    
493
static inline void gen_op_addq_A0_seg(int reg)
494
{
495
    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, segs[reg].base));
496
    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
497
}
498

    
499
static inline void gen_op_movq_A0_reg(int reg)
500
{
501
    tcg_gen_ld_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]));
502
}
503

    
504
static inline void gen_op_addq_A0_reg_sN(int shift, int reg)
505
{
506
    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
507
    if (shift != 0) 
508
        tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
509
    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
510
}
511
#endif
512

    
513
static inline void gen_op_lds_T0_A0(int idx)
514
{
515
    int mem_index = (idx >> 2) - 1;
516
    switch(idx & 3) {
517
    case 0:
518
        tcg_gen_qemu_ld8s(cpu_T[0], cpu_A0, mem_index);
519
        break;
520
    case 1:
521
        tcg_gen_qemu_ld16s(cpu_T[0], cpu_A0, mem_index);
522
        break;
523
    default:
524
    case 2:
525
        tcg_gen_qemu_ld32s(cpu_T[0], cpu_A0, mem_index);
526
        break;
527
    }
528
}
529

    
530
static inline void gen_op_ld_v(int idx, TCGv t0, TCGv a0)
531
{
532
    int mem_index = (idx >> 2) - 1;
533
    switch(idx & 3) {
534
    case 0:
535
        tcg_gen_qemu_ld8u(t0, a0, mem_index);
536
        break;
537
    case 1:
538
        tcg_gen_qemu_ld16u(t0, a0, mem_index);
539
        break;
540
    case 2:
541
        tcg_gen_qemu_ld32u(t0, a0, mem_index);
542
        break;
543
    default:
544
    case 3:
545
        /* Should never happen on 32-bit targets.  */
546
#ifdef TARGET_X86_64
547
        tcg_gen_qemu_ld64(t0, a0, mem_index);
548
#endif
549
        break;
550
    }
551
}
552

    
553
/* XXX: always use ldu or lds */
554
static inline void gen_op_ld_T0_A0(int idx)
555
{
556
    gen_op_ld_v(idx, cpu_T[0], cpu_A0);
557
}
558

    
559
static inline void gen_op_ldu_T0_A0(int idx)
560
{
561
    gen_op_ld_v(idx, cpu_T[0], cpu_A0);
562
}
563

    
564
static inline void gen_op_ld_T1_A0(int idx)
565
{
566
    gen_op_ld_v(idx, cpu_T[1], cpu_A0);
567
}
568

    
569
static inline void gen_op_st_v(int idx, TCGv t0, TCGv a0)
570
{
571
    int mem_index = (idx >> 2) - 1;
572
    switch(idx & 3) {
573
    case 0:
574
        tcg_gen_qemu_st8(t0, a0, mem_index);
575
        break;
576
    case 1:
577
        tcg_gen_qemu_st16(t0, a0, mem_index);
578
        break;
579
    case 2:
580
        tcg_gen_qemu_st32(t0, a0, mem_index);
581
        break;
582
    default:
583
    case 3:
584
        /* Should never happen on 32-bit targets.  */
585
#ifdef TARGET_X86_64
586
        tcg_gen_qemu_st64(t0, a0, mem_index);
587
#endif
588
        break;
589
    }
590
}
591

    
592
static inline void gen_op_st_T0_A0(int idx)
593
{
594
    gen_op_st_v(idx, cpu_T[0], cpu_A0);
595
}
596

    
597
static inline void gen_op_st_T1_A0(int idx)
598
{
599
    gen_op_st_v(idx, cpu_T[1], cpu_A0);
600
}
601

    
602
static inline void gen_jmp_im(target_ulong pc)
603
{
604
    tcg_gen_movi_tl(cpu_tmp0, pc);
605
    tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, eip));
606
}
607

    
608
static inline void gen_string_movl_A0_ESI(DisasContext *s)
609
{
610
    int override;
611

    
612
    override = s->override;
613
#ifdef TARGET_X86_64
614
    if (s->aflag == 2) {
615
        if (override >= 0) {
616
            gen_op_movq_A0_seg(override);
617
            gen_op_addq_A0_reg_sN(0, R_ESI);
618
        } else {
619
            gen_op_movq_A0_reg(R_ESI);
620
        }
621
    } else
622
#endif
623
    if (s->aflag) {
624
        /* 32 bit address */
625
        if (s->addseg && override < 0)
626
            override = R_DS;
627
        if (override >= 0) {
628
            gen_op_movl_A0_seg(override);
629
            gen_op_addl_A0_reg_sN(0, R_ESI);
630
        } else {
631
            gen_op_movl_A0_reg(R_ESI);
632
        }
633
    } else {
634
        /* 16 address, always override */
635
        if (override < 0)
636
            override = R_DS;
637
        gen_op_movl_A0_reg(R_ESI);
638
        gen_op_andl_A0_ffff();
639
        gen_op_addl_A0_seg(override);
640
    }
641
}
642

    
643
static inline void gen_string_movl_A0_EDI(DisasContext *s)
644
{
645
#ifdef TARGET_X86_64
646
    if (s->aflag == 2) {
647
        gen_op_movq_A0_reg(R_EDI);
648
    } else
649
#endif
650
    if (s->aflag) {
651
        if (s->addseg) {
652
            gen_op_movl_A0_seg(R_ES);
653
            gen_op_addl_A0_reg_sN(0, R_EDI);
654
        } else {
655
            gen_op_movl_A0_reg(R_EDI);
656
        }
657
    } else {
658
        gen_op_movl_A0_reg(R_EDI);
659
        gen_op_andl_A0_ffff();
660
        gen_op_addl_A0_seg(R_ES);
661
    }
662
}
663

    
664
static inline void gen_op_movl_T0_Dshift(int ot) 
665
{
666
    tcg_gen_ld32s_tl(cpu_T[0], cpu_env, offsetof(CPUState, df));
667
    tcg_gen_shli_tl(cpu_T[0], cpu_T[0], ot);
668
};
669

    
670
static void gen_extu(int ot, TCGv reg)
671
{
672
    switch(ot) {
673
    case OT_BYTE:
674
        tcg_gen_ext8u_tl(reg, reg);
675
        break;
676
    case OT_WORD:
677
        tcg_gen_ext16u_tl(reg, reg);
678
        break;
679
    case OT_LONG:
680
        tcg_gen_ext32u_tl(reg, reg);
681
        break;
682
    default:
683
        break;
684
    }
685
}
686

    
687
static void gen_exts(int ot, TCGv reg)
688
{
689
    switch(ot) {
690
    case OT_BYTE:
691
        tcg_gen_ext8s_tl(reg, reg);
692
        break;
693
    case OT_WORD:
694
        tcg_gen_ext16s_tl(reg, reg);
695
        break;
696
    case OT_LONG:
697
        tcg_gen_ext32s_tl(reg, reg);
698
        break;
699
    default:
700
        break;
701
    }
702
}
703

    
704
static inline void gen_op_jnz_ecx(int size, int label1)
705
{
706
    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[R_ECX]));
707
    gen_extu(size + 1, cpu_tmp0);
708
    tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, label1);
709
}
710

    
711
static inline void gen_op_jz_ecx(int size, int label1)
712
{
713
    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[R_ECX]));
714
    gen_extu(size + 1, cpu_tmp0);
715
    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
716
}
717

    
718
static void gen_helper_in_func(int ot, TCGv v, TCGv_i32 n)
719
{
720
    switch (ot) {
721
    case 0: gen_helper_inb(v, n); break;
722
    case 1: gen_helper_inw(v, n); break;
723
    case 2: gen_helper_inl(v, n); break;
724
    }
725

    
726
}
727

    
728
static void gen_helper_out_func(int ot, TCGv_i32 v, TCGv_i32 n)
729
{
730
    switch (ot) {
731
    case 0: gen_helper_outb(v, n); break;
732
    case 1: gen_helper_outw(v, n); break;
733
    case 2: gen_helper_outl(v, n); break;
734
    }
735

    
736
}
737

    
738
static void gen_check_io(DisasContext *s, int ot, target_ulong cur_eip,
739
                         uint32_t svm_flags)
740
{
741
    int state_saved;
742
    target_ulong next_eip;
743

    
744
    state_saved = 0;
745
    if (s->pe && (s->cpl > s->iopl || s->vm86)) {
746
        if (s->cc_op != CC_OP_DYNAMIC)
747
            gen_op_set_cc_op(s->cc_op);
748
        gen_jmp_im(cur_eip);
749
        state_saved = 1;
750
        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
751
        switch (ot) {
752
        case 0: gen_helper_check_iob(cpu_tmp2_i32); break;
753
        case 1: gen_helper_check_iow(cpu_tmp2_i32); break;
754
        case 2: gen_helper_check_iol(cpu_tmp2_i32); break;
755
        }
756
    }
757
    if(s->flags & HF_SVMI_MASK) {
758
        if (!state_saved) {
759
            if (s->cc_op != CC_OP_DYNAMIC)
760
                gen_op_set_cc_op(s->cc_op);
761
            gen_jmp_im(cur_eip);
762
            state_saved = 1;
763
        }
764
        svm_flags |= (1 << (4 + ot));
765
        next_eip = s->pc - s->cs_base;
766
        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
767
        gen_helper_svm_check_io(cpu_tmp2_i32, tcg_const_i32(svm_flags),
768
                                tcg_const_i32(next_eip - cur_eip));
769
    }
770
}
771

    
772
static inline void gen_movs(DisasContext *s, int ot)
773
{
774
    gen_string_movl_A0_ESI(s);
775
    gen_op_ld_T0_A0(ot + s->mem_index);
776
    gen_string_movl_A0_EDI(s);
777
    gen_op_st_T0_A0(ot + s->mem_index);
778
    gen_op_movl_T0_Dshift(ot);
779
    gen_op_add_reg_T0(s->aflag, R_ESI);
780
    gen_op_add_reg_T0(s->aflag, R_EDI);
781
}
782

    
783
static inline void gen_update_cc_op(DisasContext *s)
784
{
785
    if (s->cc_op != CC_OP_DYNAMIC) {
786
        gen_op_set_cc_op(s->cc_op);
787
        s->cc_op = CC_OP_DYNAMIC;
788
    }
789
}
790

    
791
static void gen_op_update1_cc(void)
792
{
793
    tcg_gen_discard_tl(cpu_cc_src);
794
    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
795
}
796

    
797
static void gen_op_update2_cc(void)
798
{
799
    tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
800
    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
801
}
802

    
803
static inline void gen_op_cmpl_T0_T1_cc(void)
804
{
805
    tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
806
    tcg_gen_sub_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]);
807
}
808

    
809
static inline void gen_op_testl_T0_T1_cc(void)
810
{
811
    tcg_gen_discard_tl(cpu_cc_src);
812
    tcg_gen_and_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]);
813
}
814

    
815
static void gen_op_update_neg_cc(void)
816
{
817
    tcg_gen_neg_tl(cpu_cc_src, cpu_T[0]);
818
    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
819
}
820

    
821
/* compute eflags.C to reg */
822
static void gen_compute_eflags_c(TCGv reg)
823
{
824
    gen_helper_cc_compute_c(cpu_tmp2_i32, cpu_cc_op);
825
    tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
826
}
827

    
828
/* compute all eflags to cc_src */
829
static void gen_compute_eflags(TCGv reg)
830
{
831
    gen_helper_cc_compute_all(cpu_tmp2_i32, cpu_cc_op);
832
    tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
833
}
834

    
835
static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
836
{
837
    if (s->cc_op != CC_OP_DYNAMIC)
838
        gen_op_set_cc_op(s->cc_op);
839
    switch(jcc_op) {
840
    case JCC_O:
841
        gen_compute_eflags(cpu_T[0]);
842
        tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 11);
843
        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
844
        break;
845
    case JCC_B:
846
        gen_compute_eflags_c(cpu_T[0]);
847
        break;
848
    case JCC_Z:
849
        gen_compute_eflags(cpu_T[0]);
850
        tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 6);
851
        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
852
        break;
853
    case JCC_BE:
854
        gen_compute_eflags(cpu_tmp0);
855
        tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 6);
856
        tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
857
        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
858
        break;
859
    case JCC_S:
860
        gen_compute_eflags(cpu_T[0]);
861
        tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 7);
862
        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
863
        break;
864
    case JCC_P:
865
        gen_compute_eflags(cpu_T[0]);
866
        tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 2);
867
        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
868
        break;
869
    case JCC_L:
870
        gen_compute_eflags(cpu_tmp0);
871
        tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */
872
        tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 7); /* CC_S */
873
        tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
874
        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
875
        break;
876
    default:
877
    case JCC_LE:
878
        gen_compute_eflags(cpu_tmp0);
879
        tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */
880
        tcg_gen_shri_tl(cpu_tmp4, cpu_tmp0, 7); /* CC_S */
881
        tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 6); /* CC_Z */
882
        tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
883
        tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
884
        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
885
        break;
886
    }
887
}
888

    
889
/* return true if setcc_slow is not needed (WARNING: must be kept in
890
   sync with gen_jcc1) */
891
static int is_fast_jcc_case(DisasContext *s, int b)
892
{
893
    int jcc_op;
894
    jcc_op = (b >> 1) & 7;
895
    switch(s->cc_op) {
896
        /* we optimize the cmp/jcc case */
897
    case CC_OP_SUBB:
898
    case CC_OP_SUBW:
899
    case CC_OP_SUBL:
900
    case CC_OP_SUBQ:
901
        if (jcc_op == JCC_O || jcc_op == JCC_P)
902
            goto slow_jcc;
903
        break;
904

    
905
        /* some jumps are easy to compute */
906
    case CC_OP_ADDB:
907
    case CC_OP_ADDW:
908
    case CC_OP_ADDL:
909
    case CC_OP_ADDQ:
910

    
911
    case CC_OP_LOGICB:
912
    case CC_OP_LOGICW:
913
    case CC_OP_LOGICL:
914
    case CC_OP_LOGICQ:
915

    
916
    case CC_OP_INCB:
917
    case CC_OP_INCW:
918
    case CC_OP_INCL:
919
    case CC_OP_INCQ:
920

    
921
    case CC_OP_DECB:
922
    case CC_OP_DECW:
923
    case CC_OP_DECL:
924
    case CC_OP_DECQ:
925

    
926
    case CC_OP_SHLB:
927
    case CC_OP_SHLW:
928
    case CC_OP_SHLL:
929
    case CC_OP_SHLQ:
930
        if (jcc_op != JCC_Z && jcc_op != JCC_S)
931
            goto slow_jcc;
932
        break;
933
    default:
934
    slow_jcc:
935
        return 0;
936
    }
937
    return 1;
938
}
939

    
940
/* generate a conditional jump to label 'l1' according to jump opcode
941
   value 'b'. In the fast case, T0 is guaranted not to be used. */
942
static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1)
943
{
944
    int inv, jcc_op, size, cond;
945
    TCGv t0;
946

    
947
    inv = b & 1;
948
    jcc_op = (b >> 1) & 7;
949

    
950
    switch(cc_op) {
951
        /* we optimize the cmp/jcc case */
952
    case CC_OP_SUBB:
953
    case CC_OP_SUBW:
954
    case CC_OP_SUBL:
955
    case CC_OP_SUBQ:
956
        
957
        size = cc_op - CC_OP_SUBB;
958
        switch(jcc_op) {
959
        case JCC_Z:
960
        fast_jcc_z:
961
            switch(size) {
962
            case 0:
963
                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xff);
964
                t0 = cpu_tmp0;
965
                break;
966
            case 1:
967
                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xffff);
968
                t0 = cpu_tmp0;
969
                break;
970
#ifdef TARGET_X86_64
971
            case 2:
972
                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xffffffff);
973
                t0 = cpu_tmp0;
974
                break;
975
#endif
976
            default:
977
                t0 = cpu_cc_dst;
978
                break;
979
            }
980
            tcg_gen_brcondi_tl(inv ? TCG_COND_NE : TCG_COND_EQ, t0, 0, l1);
981
            break;
982
        case JCC_S:
983
        fast_jcc_s:
984
            switch(size) {
985
            case 0:
986
                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80);
987
                tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, 
988
                                   0, l1);
989
                break;
990
            case 1:
991
                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x8000);
992
                tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, 
993
                                   0, l1);
994
                break;
995
#ifdef TARGET_X86_64
996
            case 2:
997
                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80000000);
998
                tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, 
999
                                   0, l1);
1000
                break;
1001
#endif
1002
            default:
1003
                tcg_gen_brcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, cpu_cc_dst, 
1004
                                   0, l1);
1005
                break;
1006
            }
1007
            break;
1008
            
1009
        case JCC_B:
1010
            cond = inv ? TCG_COND_GEU : TCG_COND_LTU;
1011
            goto fast_jcc_b;
1012
        case JCC_BE:
1013
            cond = inv ? TCG_COND_GTU : TCG_COND_LEU;
1014
        fast_jcc_b:
1015
            tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
1016
            switch(size) {
1017
            case 0:
1018
                t0 = cpu_tmp0;
1019
                tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xff);
1020
                tcg_gen_andi_tl(t0, cpu_cc_src, 0xff);
1021
                break;
1022
            case 1:
1023
                t0 = cpu_tmp0;
1024
                tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xffff);
1025
                tcg_gen_andi_tl(t0, cpu_cc_src, 0xffff);
1026
                break;
1027
#ifdef TARGET_X86_64
1028
            case 2:
1029
                t0 = cpu_tmp0;
1030
                tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xffffffff);
1031
                tcg_gen_andi_tl(t0, cpu_cc_src, 0xffffffff);
1032
                break;
1033
#endif
1034
            default:
1035
                t0 = cpu_cc_src;
1036
                break;
1037
            }
1038
            tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1);
1039
            break;
1040
            
1041
        case JCC_L:
1042
            cond = inv ? TCG_COND_GE : TCG_COND_LT;
1043
            goto fast_jcc_l;
1044
        case JCC_LE:
1045
            cond = inv ? TCG_COND_GT : TCG_COND_LE;
1046
        fast_jcc_l:
1047
            tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
1048
            switch(size) {
1049
            case 0:
1050
                t0 = cpu_tmp0;
1051
                tcg_gen_ext8s_tl(cpu_tmp4, cpu_tmp4);
1052
                tcg_gen_ext8s_tl(t0, cpu_cc_src);
1053
                break;
1054
            case 1:
1055
                t0 = cpu_tmp0;
1056
                tcg_gen_ext16s_tl(cpu_tmp4, cpu_tmp4);
1057
                tcg_gen_ext16s_tl(t0, cpu_cc_src);
1058
                break;
1059
#ifdef TARGET_X86_64
1060
            case 2:
1061
                t0 = cpu_tmp0;
1062
                tcg_gen_ext32s_tl(cpu_tmp4, cpu_tmp4);
1063
                tcg_gen_ext32s_tl(t0, cpu_cc_src);
1064
                break;
1065
#endif
1066
            default:
1067
                t0 = cpu_cc_src;
1068
                break;
1069
            }
1070
            tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1);
1071
            break;
1072
            
1073
        default:
1074
            goto slow_jcc;
1075
        }
1076
        break;
1077
        
1078
        /* some jumps are easy to compute */
1079
    case CC_OP_ADDB:
1080
    case CC_OP_ADDW:
1081
    case CC_OP_ADDL:
1082
    case CC_OP_ADDQ:
1083
        
1084
    case CC_OP_ADCB:
1085
    case CC_OP_ADCW:
1086
    case CC_OP_ADCL:
1087
    case CC_OP_ADCQ:
1088
        
1089
    case CC_OP_SBBB:
1090
    case CC_OP_SBBW:
1091
    case CC_OP_SBBL:
1092
    case CC_OP_SBBQ:
1093
        
1094
    case CC_OP_LOGICB:
1095
    case CC_OP_LOGICW:
1096
    case CC_OP_LOGICL:
1097
    case CC_OP_LOGICQ:
1098
        
1099
    case CC_OP_INCB:
1100
    case CC_OP_INCW:
1101
    case CC_OP_INCL:
1102
    case CC_OP_INCQ:
1103
        
1104
    case CC_OP_DECB:
1105
    case CC_OP_DECW:
1106
    case CC_OP_DECL:
1107
    case CC_OP_DECQ:
1108
        
1109
    case CC_OP_SHLB:
1110
    case CC_OP_SHLW:
1111
    case CC_OP_SHLL:
1112
    case CC_OP_SHLQ:
1113
        
1114
    case CC_OP_SARB:
1115
    case CC_OP_SARW:
1116
    case CC_OP_SARL:
1117
    case CC_OP_SARQ:
1118
        switch(jcc_op) {
1119
        case JCC_Z:
1120
            size = (cc_op - CC_OP_ADDB) & 3;
1121
            goto fast_jcc_z;
1122
        case JCC_S:
1123
            size = (cc_op - CC_OP_ADDB) & 3;
1124
            goto fast_jcc_s;
1125
        default:
1126
            goto slow_jcc;
1127
        }
1128
        break;
1129
    default:
1130
    slow_jcc:
1131
        gen_setcc_slow_T0(s, jcc_op);
1132
        tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, 
1133
                           cpu_T[0], 0, l1);
1134
        break;
1135
    }
1136
}
1137

    
1138
/* XXX: does not work with gdbstub "ice" single step - not a
1139
   serious problem */
1140
static int gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1141
{
1142
    int l1, l2;
1143

    
1144
    l1 = gen_new_label();
1145
    l2 = gen_new_label();
1146
    gen_op_jnz_ecx(s->aflag, l1);
1147
    gen_set_label(l2);
1148
    gen_jmp_tb(s, next_eip, 1);
1149
    gen_set_label(l1);
1150
    return l2;
1151
}
1152

    
1153
static inline void gen_stos(DisasContext *s, int ot)
1154
{
1155
    gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
1156
    gen_string_movl_A0_EDI(s);
1157
    gen_op_st_T0_A0(ot + s->mem_index);
1158
    gen_op_movl_T0_Dshift(ot);
1159
    gen_op_add_reg_T0(s->aflag, R_EDI);
1160
}
1161

    
1162
static inline void gen_lods(DisasContext *s, int ot)
1163
{
1164
    gen_string_movl_A0_ESI(s);
1165
    gen_op_ld_T0_A0(ot + s->mem_index);
1166
    gen_op_mov_reg_T0(ot, R_EAX);
1167
    gen_op_movl_T0_Dshift(ot);
1168
    gen_op_add_reg_T0(s->aflag, R_ESI);
1169
}
1170

    
1171
static inline void gen_scas(DisasContext *s, int ot)
1172
{
1173
    gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
1174
    gen_string_movl_A0_EDI(s);
1175
    gen_op_ld_T1_A0(ot + s->mem_index);
1176
    gen_op_cmpl_T0_T1_cc();
1177
    gen_op_movl_T0_Dshift(ot);
1178
    gen_op_add_reg_T0(s->aflag, R_EDI);
1179
}
1180

    
1181
static inline void gen_cmps(DisasContext *s, int ot)
1182
{
1183
    gen_string_movl_A0_ESI(s);
1184
    gen_op_ld_T0_A0(ot + s->mem_index);
1185
    gen_string_movl_A0_EDI(s);
1186
    gen_op_ld_T1_A0(ot + s->mem_index);
1187
    gen_op_cmpl_T0_T1_cc();
1188
    gen_op_movl_T0_Dshift(ot);
1189
    gen_op_add_reg_T0(s->aflag, R_ESI);
1190
    gen_op_add_reg_T0(s->aflag, R_EDI);
1191
}
1192

    
1193
static inline void gen_ins(DisasContext *s, int ot)
1194
{
1195
    if (use_icount)
1196
        gen_io_start();
1197
    gen_string_movl_A0_EDI(s);
1198
    /* Note: we must do this dummy write first to be restartable in
1199
       case of page fault. */
1200
    gen_op_movl_T0_0();
1201
    gen_op_st_T0_A0(ot + s->mem_index);
1202
    gen_op_mov_TN_reg(OT_WORD, 1, R_EDX);
1203
    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[1]);
1204
    tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
1205
    gen_helper_in_func(ot, cpu_T[0], cpu_tmp2_i32);
1206
    gen_op_st_T0_A0(ot + s->mem_index);
1207
    gen_op_movl_T0_Dshift(ot);
1208
    gen_op_add_reg_T0(s->aflag, R_EDI);
1209
    if (use_icount)
1210
        gen_io_end();
1211
}
1212

    
1213
static inline void gen_outs(DisasContext *s, int ot)
1214
{
1215
    if (use_icount)
1216
        gen_io_start();
1217
    gen_string_movl_A0_ESI(s);
1218
    gen_op_ld_T0_A0(ot + s->mem_index);
1219

    
1220
    gen_op_mov_TN_reg(OT_WORD, 1, R_EDX);
1221
    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[1]);
1222
    tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
1223
    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[0]);
1224
    gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
1225

    
1226
    gen_op_movl_T0_Dshift(ot);
1227
    gen_op_add_reg_T0(s->aflag, R_ESI);
1228
    if (use_icount)
1229
        gen_io_end();
1230
}
1231

    
1232
/* same method as Valgrind : we generate jumps to current or next
1233
   instruction */
1234
#define GEN_REPZ(op)                                                          \
1235
static inline void gen_repz_ ## op(DisasContext *s, int ot,                   \
1236
                                 target_ulong cur_eip, target_ulong next_eip) \
1237
{                                                                             \
1238
    int l2;\
1239
    gen_update_cc_op(s);                                                      \
1240
    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1241
    gen_ ## op(s, ot);                                                        \
1242
    gen_op_add_reg_im(s->aflag, R_ECX, -1);                                   \
1243
    /* a loop would cause two single step exceptions if ECX = 1               \
1244
       before rep string_insn */                                              \
1245
    if (!s->jmp_opt)                                                          \
1246
        gen_op_jz_ecx(s->aflag, l2);                                          \
1247
    gen_jmp(s, cur_eip);                                                      \
1248
}
1249

    
1250
#define GEN_REPZ2(op)                                                         \
1251
static inline void gen_repz_ ## op(DisasContext *s, int ot,                   \
1252
                                   target_ulong cur_eip,                      \
1253
                                   target_ulong next_eip,                     \
1254
                                   int nz)                                    \
1255
{                                                                             \
1256
    int l2;\
1257
    gen_update_cc_op(s);                                                      \
1258
    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1259
    gen_ ## op(s, ot);                                                        \
1260
    gen_op_add_reg_im(s->aflag, R_ECX, -1);                                   \
1261
    gen_op_set_cc_op(CC_OP_SUBB + ot);                                        \
1262
    gen_jcc1(s, CC_OP_SUBB + ot, (JCC_Z << 1) | (nz ^ 1), l2);                \
1263
    if (!s->jmp_opt)                                                          \
1264
        gen_op_jz_ecx(s->aflag, l2);                                          \
1265
    gen_jmp(s, cur_eip);                                                      \
1266
}
1267

    
1268
GEN_REPZ(movs)
1269
GEN_REPZ(stos)
1270
GEN_REPZ(lods)
1271
GEN_REPZ(ins)
1272
GEN_REPZ(outs)
1273
GEN_REPZ2(scas)
1274
GEN_REPZ2(cmps)
1275

    
1276
static void gen_helper_fp_arith_ST0_FT0(int op)
1277
{
1278
    switch (op) {
1279
    case 0: gen_helper_fadd_ST0_FT0(); break;
1280
    case 1: gen_helper_fmul_ST0_FT0(); break;
1281
    case 2: gen_helper_fcom_ST0_FT0(); break;
1282
    case 3: gen_helper_fcom_ST0_FT0(); break;
1283
    case 4: gen_helper_fsub_ST0_FT0(); break;
1284
    case 5: gen_helper_fsubr_ST0_FT0(); break;
1285
    case 6: gen_helper_fdiv_ST0_FT0(); break;
1286
    case 7: gen_helper_fdivr_ST0_FT0(); break;
1287
    }
1288
}
1289

    
1290
/* NOTE the exception in "r" op ordering */
1291
static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1292
{
1293
    TCGv_i32 tmp = tcg_const_i32(opreg);
1294
    switch (op) {
1295
    case 0: gen_helper_fadd_STN_ST0(tmp); break;
1296
    case 1: gen_helper_fmul_STN_ST0(tmp); break;
1297
    case 4: gen_helper_fsubr_STN_ST0(tmp); break;
1298
    case 5: gen_helper_fsub_STN_ST0(tmp); break;
1299
    case 6: gen_helper_fdivr_STN_ST0(tmp); break;
1300
    case 7: gen_helper_fdiv_STN_ST0(tmp); break;
1301
    }
1302
}
1303

    
1304
/* if d == OR_TMP0, it means memory operand (address in A0) */
1305
static void gen_op(DisasContext *s1, int op, int ot, int d)
1306
{
1307
    if (d != OR_TMP0) {
1308
        gen_op_mov_TN_reg(ot, 0, d);
1309
    } else {
1310
        gen_op_ld_T0_A0(ot + s1->mem_index);
1311
    }
1312
    switch(op) {
1313
    case OP_ADCL:
1314
        if (s1->cc_op != CC_OP_DYNAMIC)
1315
            gen_op_set_cc_op(s1->cc_op);
1316
        gen_compute_eflags_c(cpu_tmp4);
1317
        tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1318
        tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
1319
        if (d != OR_TMP0)
1320
            gen_op_mov_reg_T0(ot, d);
1321
        else
1322
            gen_op_st_T0_A0(ot + s1->mem_index);
1323
        tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
1324
        tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
1325
        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp4);
1326
        tcg_gen_shli_i32(cpu_tmp2_i32, cpu_tmp2_i32, 2);
1327
        tcg_gen_addi_i32(cpu_cc_op, cpu_tmp2_i32, CC_OP_ADDB + ot);
1328
        s1->cc_op = CC_OP_DYNAMIC;
1329
        break;
1330
    case OP_SBBL:
1331
        if (s1->cc_op != CC_OP_DYNAMIC)
1332
            gen_op_set_cc_op(s1->cc_op);
1333
        gen_compute_eflags_c(cpu_tmp4);
1334
        tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1335
        tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
1336
        if (d != OR_TMP0)
1337
            gen_op_mov_reg_T0(ot, d);
1338
        else
1339
            gen_op_st_T0_A0(ot + s1->mem_index);
1340
        tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
1341
        tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
1342
        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp4);
1343
        tcg_gen_shli_i32(cpu_tmp2_i32, cpu_tmp2_i32, 2);
1344
        tcg_gen_addi_i32(cpu_cc_op, cpu_tmp2_i32, CC_OP_SUBB + ot);
1345
        s1->cc_op = CC_OP_DYNAMIC;
1346
        break;
1347
    case OP_ADDL:
1348
        gen_op_addl_T0_T1();
1349
        if (d != OR_TMP0)
1350
            gen_op_mov_reg_T0(ot, d);
1351
        else
1352
            gen_op_st_T0_A0(ot + s1->mem_index);
1353
        gen_op_update2_cc();
1354
        s1->cc_op = CC_OP_ADDB + ot;
1355
        break;
1356
    case OP_SUBL:
1357
        tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1358
        if (d != OR_TMP0)
1359
            gen_op_mov_reg_T0(ot, d);
1360
        else
1361
            gen_op_st_T0_A0(ot + s1->mem_index);
1362
        gen_op_update2_cc();
1363
        s1->cc_op = CC_OP_SUBB + ot;
1364
        break;
1365
    default:
1366
    case OP_ANDL:
1367
        tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1368
        if (d != OR_TMP0)
1369
            gen_op_mov_reg_T0(ot, d);
1370
        else
1371
            gen_op_st_T0_A0(ot + s1->mem_index);
1372
        gen_op_update1_cc();
1373
        s1->cc_op = CC_OP_LOGICB + ot;
1374
        break;
1375
    case OP_ORL:
1376
        tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1377
        if (d != OR_TMP0)
1378
            gen_op_mov_reg_T0(ot, d);
1379
        else
1380
            gen_op_st_T0_A0(ot + s1->mem_index);
1381
        gen_op_update1_cc();
1382
        s1->cc_op = CC_OP_LOGICB + ot;
1383
        break;
1384
    case OP_XORL:
1385
        tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1386
        if (d != OR_TMP0)
1387
            gen_op_mov_reg_T0(ot, d);
1388
        else
1389
            gen_op_st_T0_A0(ot + s1->mem_index);
1390
        gen_op_update1_cc();
1391
        s1->cc_op = CC_OP_LOGICB + ot;
1392
        break;
1393
    case OP_CMPL:
1394
        gen_op_cmpl_T0_T1_cc();
1395
        s1->cc_op = CC_OP_SUBB + ot;
1396
        break;
1397
    }
1398
}
1399

    
1400
/* if d == OR_TMP0, it means memory operand (address in A0) */
1401
static void gen_inc(DisasContext *s1, int ot, int d, int c)
1402
{
1403
    if (d != OR_TMP0)
1404
        gen_op_mov_TN_reg(ot, 0, d);
1405
    else
1406
        gen_op_ld_T0_A0(ot + s1->mem_index);
1407
    if (s1->cc_op != CC_OP_DYNAMIC)
1408
        gen_op_set_cc_op(s1->cc_op);
1409
    if (c > 0) {
1410
        tcg_gen_addi_tl(cpu_T[0], cpu_T[0], 1);
1411
        s1->cc_op = CC_OP_INCB + ot;
1412
    } else {
1413
        tcg_gen_addi_tl(cpu_T[0], cpu_T[0], -1);
1414
        s1->cc_op = CC_OP_DECB + ot;
1415
    }
1416
    if (d != OR_TMP0)
1417
        gen_op_mov_reg_T0(ot, d);
1418
    else
1419
        gen_op_st_T0_A0(ot + s1->mem_index);
1420
    gen_compute_eflags_c(cpu_cc_src);
1421
    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
1422
}
1423

    
1424
static void gen_shift_rm_T1(DisasContext *s, int ot, int op1, 
1425
                            int is_right, int is_arith)
1426
{
1427
    target_ulong mask;
1428
    int shift_label;
1429
    TCGv t0, t1;
1430

    
1431
    if (ot == OT_QUAD)
1432
        mask = 0x3f;
1433
    else
1434
        mask = 0x1f;
1435

    
1436
    /* load */
1437
    if (op1 == OR_TMP0)
1438
        gen_op_ld_T0_A0(ot + s->mem_index);
1439
    else
1440
        gen_op_mov_TN_reg(ot, 0, op1);
1441

    
1442
    tcg_gen_andi_tl(cpu_T[1], cpu_T[1], mask);
1443

    
1444
    tcg_gen_addi_tl(cpu_tmp5, cpu_T[1], -1);
1445

    
1446
    if (is_right) {
1447
        if (is_arith) {
1448
            gen_exts(ot, cpu_T[0]);
1449
            tcg_gen_sar_tl(cpu_T3, cpu_T[0], cpu_tmp5);
1450
            tcg_gen_sar_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1451
        } else {
1452
            gen_extu(ot, cpu_T[0]);
1453
            tcg_gen_shr_tl(cpu_T3, cpu_T[0], cpu_tmp5);
1454
            tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1455
        }
1456
    } else {
1457
        tcg_gen_shl_tl(cpu_T3, cpu_T[0], cpu_tmp5);
1458
        tcg_gen_shl_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1459
    }
1460

    
1461
    /* store */
1462
    if (op1 == OR_TMP0)
1463
        gen_op_st_T0_A0(ot + s->mem_index);
1464
    else
1465
        gen_op_mov_reg_T0(ot, op1);
1466
        
1467
    /* update eflags if non zero shift */
1468
    if (s->cc_op != CC_OP_DYNAMIC)
1469
        gen_op_set_cc_op(s->cc_op);
1470

    
1471
    /* XXX: inefficient */
1472
    t0 = tcg_temp_local_new();
1473
    t1 = tcg_temp_local_new();
1474

    
1475
    tcg_gen_mov_tl(t0, cpu_T[0]);
1476
    tcg_gen_mov_tl(t1, cpu_T3);
1477

    
1478
    shift_label = gen_new_label();
1479
    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_T[1], 0, shift_label);
1480

    
1481
    tcg_gen_mov_tl(cpu_cc_src, t1);
1482
    tcg_gen_mov_tl(cpu_cc_dst, t0);
1483
    if (is_right)
1484
        tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot);
1485
    else
1486
        tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot);
1487
        
1488
    gen_set_label(shift_label);
1489
    s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
1490

    
1491
    tcg_temp_free(t0);
1492
    tcg_temp_free(t1);
1493
}
1494

    
1495
static void gen_shift_rm_im(DisasContext *s, int ot, int op1, int op2,
1496
                            int is_right, int is_arith)
1497
{
1498
    int mask;
1499
    
1500
    if (ot == OT_QUAD)
1501
        mask = 0x3f;
1502
    else
1503
        mask = 0x1f;
1504

    
1505
    /* load */
1506
    if (op1 == OR_TMP0)
1507
        gen_op_ld_T0_A0(ot + s->mem_index);
1508
    else
1509
        gen_op_mov_TN_reg(ot, 0, op1);
1510

    
1511
    op2 &= mask;
1512
    if (op2 != 0) {
1513
        if (is_right) {
1514
            if (is_arith) {
1515
                gen_exts(ot, cpu_T[0]);
1516
                tcg_gen_sari_tl(cpu_tmp4, cpu_T[0], op2 - 1);
1517
                tcg_gen_sari_tl(cpu_T[0], cpu_T[0], op2);
1518
            } else {
1519
                gen_extu(ot, cpu_T[0]);
1520
                tcg_gen_shri_tl(cpu_tmp4, cpu_T[0], op2 - 1);
1521
                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], op2);
1522
            }
1523
        } else {
1524
            tcg_gen_shli_tl(cpu_tmp4, cpu_T[0], op2 - 1);
1525
            tcg_gen_shli_tl(cpu_T[0], cpu_T[0], op2);
1526
        }
1527
    }
1528

    
1529
    /* store */
1530
    if (op1 == OR_TMP0)
1531
        gen_op_st_T0_A0(ot + s->mem_index);
1532
    else
1533
        gen_op_mov_reg_T0(ot, op1);
1534
        
1535
    /* update eflags if non zero shift */
1536
    if (op2 != 0) {
1537
        tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
1538
        tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
1539
        if (is_right)
1540
            s->cc_op = CC_OP_SARB + ot;
1541
        else
1542
            s->cc_op = CC_OP_SHLB + ot;
1543
    }
1544
}
1545

    
1546
static inline void tcg_gen_lshift(TCGv ret, TCGv arg1, target_long arg2)
1547
{
1548
    if (arg2 >= 0)
1549
        tcg_gen_shli_tl(ret, arg1, arg2);
1550
    else
1551
        tcg_gen_shri_tl(ret, arg1, -arg2);
1552
}
1553

    
1554
/* XXX: add faster immediate case */
1555
static void gen_rot_rm_T1(DisasContext *s, int ot, int op1, 
1556
                          int is_right)
1557
{
1558
    target_ulong mask;
1559
    int label1, label2, data_bits;
1560
    TCGv t0, t1, t2, a0;
1561

    
1562
    /* XXX: inefficient, but we must use local temps */
1563
    t0 = tcg_temp_local_new();
1564
    t1 = tcg_temp_local_new();
1565
    t2 = tcg_temp_local_new();
1566
    a0 = tcg_temp_local_new();
1567

    
1568
    if (ot == OT_QUAD)
1569
        mask = 0x3f;
1570
    else
1571
        mask = 0x1f;
1572

    
1573
    /* load */
1574
    if (op1 == OR_TMP0) {
1575
        tcg_gen_mov_tl(a0, cpu_A0);
1576
        gen_op_ld_v(ot + s->mem_index, t0, a0);
1577
    } else {
1578
        gen_op_mov_v_reg(ot, t0, op1);
1579
    }
1580

    
1581
    tcg_gen_mov_tl(t1, cpu_T[1]);
1582

    
1583
    tcg_gen_andi_tl(t1, t1, mask);
1584

    
1585
    /* Must test zero case to avoid using undefined behaviour in TCG
1586
       shifts. */
1587
    label1 = gen_new_label();
1588
    tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label1);
1589
    
1590
    if (ot <= OT_WORD)
1591
        tcg_gen_andi_tl(cpu_tmp0, t1, (1 << (3 + ot)) - 1);
1592
    else
1593
        tcg_gen_mov_tl(cpu_tmp0, t1);
1594
    
1595
    gen_extu(ot, t0);
1596
    tcg_gen_mov_tl(t2, t0);
1597

    
1598
    data_bits = 8 << ot;
1599
    /* XXX: rely on behaviour of shifts when operand 2 overflows (XXX:
1600
       fix TCG definition) */
1601
    if (is_right) {
1602
        tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp0);
1603
        tcg_gen_sub_tl(cpu_tmp0, tcg_const_tl(data_bits), cpu_tmp0);
1604
        tcg_gen_shl_tl(t0, t0, cpu_tmp0);
1605
    } else {
1606
        tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp0);
1607
        tcg_gen_sub_tl(cpu_tmp0, tcg_const_tl(data_bits), cpu_tmp0);
1608
        tcg_gen_shr_tl(t0, t0, cpu_tmp0);
1609
    }
1610
    tcg_gen_or_tl(t0, t0, cpu_tmp4);
1611

    
1612
    gen_set_label(label1);
1613
    /* store */
1614
    if (op1 == OR_TMP0) {
1615
        gen_op_st_v(ot + s->mem_index, t0, a0);
1616
    } else {
1617
        gen_op_mov_reg_v(ot, op1, t0);
1618
    }
1619
    
1620
    /* update eflags */
1621
    if (s->cc_op != CC_OP_DYNAMIC)
1622
        gen_op_set_cc_op(s->cc_op);
1623

    
1624
    label2 = gen_new_label();
1625
    tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label2);
1626

    
1627
    gen_compute_eflags(cpu_cc_src);
1628
    tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C));
1629
    tcg_gen_xor_tl(cpu_tmp0, t2, t0);
1630
    tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1));
1631
    tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_O);
1632
    tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_tmp0);
1633
    if (is_right) {
1634
        tcg_gen_shri_tl(t0, t0, data_bits - 1);
1635
    }
1636
    tcg_gen_andi_tl(t0, t0, CC_C);
1637
    tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0);
1638
    
1639
    tcg_gen_discard_tl(cpu_cc_dst);
1640
    tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
1641
        
1642
    gen_set_label(label2);
1643
    s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
1644

    
1645
    tcg_temp_free(t0);
1646
    tcg_temp_free(t1);
1647
    tcg_temp_free(t2);
1648
    tcg_temp_free(a0);
1649
}
1650

    
1651
/* XXX: add faster immediate = 1 case */
1652
static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1, 
1653
                           int is_right)
1654
{
1655
    int label1;
1656

    
1657
    if (s->cc_op != CC_OP_DYNAMIC)
1658
        gen_op_set_cc_op(s->cc_op);
1659

    
1660
    /* load */
1661
    if (op1 == OR_TMP0)
1662
        gen_op_ld_T0_A0(ot + s->mem_index);
1663
    else
1664
        gen_op_mov_TN_reg(ot, 0, op1);
1665
    
1666
    if (is_right) {
1667
        switch (ot) {
1668
        case 0: gen_helper_rcrb(cpu_T[0], cpu_T[0], cpu_T[1]); break;
1669
        case 1: gen_helper_rcrw(cpu_T[0], cpu_T[0], cpu_T[1]); break;
1670
        case 2: gen_helper_rcrl(cpu_T[0], cpu_T[0], cpu_T[1]); break;
1671
#ifdef TARGET_X86_64
1672
        case 3: gen_helper_rcrq(cpu_T[0], cpu_T[0], cpu_T[1]); break;
1673
#endif
1674
        }
1675
    } else {
1676
        switch (ot) {
1677
        case 0: gen_helper_rclb(cpu_T[0], cpu_T[0], cpu_T[1]); break;
1678
        case 1: gen_helper_rclw(cpu_T[0], cpu_T[0], cpu_T[1]); break;
1679
        case 2: gen_helper_rcll(cpu_T[0], cpu_T[0], cpu_T[1]); break;
1680
#ifdef TARGET_X86_64
1681
        case 3: gen_helper_rclq(cpu_T[0], cpu_T[0], cpu_T[1]); break;
1682
#endif
1683
        }
1684
    }
1685
    /* store */
1686
    if (op1 == OR_TMP0)
1687
        gen_op_st_T0_A0(ot + s->mem_index);
1688
    else
1689
        gen_op_mov_reg_T0(ot, op1);
1690

    
1691
    /* update eflags */
1692
    label1 = gen_new_label();
1693
    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_cc_tmp, -1, label1);
1694

    
1695
    tcg_gen_mov_tl(cpu_cc_src, cpu_cc_tmp);
1696
    tcg_gen_discard_tl(cpu_cc_dst);
1697
    tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
1698
        
1699
    gen_set_label(label1);
1700
    s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
1701
}
1702

    
1703
/* XXX: add faster immediate case */
1704
static void gen_shiftd_rm_T1_T3(DisasContext *s, int ot, int op1, 
1705
                                int is_right)
1706
{
1707
    int label1, label2, data_bits;
1708
    target_ulong mask;
1709
    TCGv t0, t1, t2, a0;
1710

    
1711
    t0 = tcg_temp_local_new();
1712
    t1 = tcg_temp_local_new();
1713
    t2 = tcg_temp_local_new();
1714
    a0 = tcg_temp_local_new();
1715

    
1716
    if (ot == OT_QUAD)
1717
        mask = 0x3f;
1718
    else
1719
        mask = 0x1f;
1720

    
1721
    /* load */
1722
    if (op1 == OR_TMP0) {
1723
        tcg_gen_mov_tl(a0, cpu_A0);
1724
        gen_op_ld_v(ot + s->mem_index, t0, a0);
1725
    } else {
1726
        gen_op_mov_v_reg(ot, t0, op1);
1727
    }
1728

    
1729
    tcg_gen_andi_tl(cpu_T3, cpu_T3, mask);
1730

    
1731
    tcg_gen_mov_tl(t1, cpu_T[1]);
1732
    tcg_gen_mov_tl(t2, cpu_T3);
1733

    
1734
    /* Must test zero case to avoid using undefined behaviour in TCG
1735
       shifts. */
1736
    label1 = gen_new_label();
1737
    tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1);
1738
    
1739
    tcg_gen_addi_tl(cpu_tmp5, t2, -1);
1740
    if (ot == OT_WORD) {
1741
        /* Note: we implement the Intel behaviour for shift count > 16 */
1742
        if (is_right) {
1743
            tcg_gen_andi_tl(t0, t0, 0xffff);
1744
            tcg_gen_shli_tl(cpu_tmp0, t1, 16);
1745
            tcg_gen_or_tl(t0, t0, cpu_tmp0);
1746
            tcg_gen_ext32u_tl(t0, t0);
1747

    
1748
            tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5);
1749
            
1750
            /* only needed if count > 16, but a test would complicate */
1751
            tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(32), t2);
1752
            tcg_gen_shl_tl(cpu_tmp0, t0, cpu_tmp5);
1753

    
1754
            tcg_gen_shr_tl(t0, t0, t2);
1755

    
1756
            tcg_gen_or_tl(t0, t0, cpu_tmp0);
1757
        } else {
1758
            /* XXX: not optimal */
1759
            tcg_gen_andi_tl(t0, t0, 0xffff);
1760
            tcg_gen_shli_tl(t1, t1, 16);
1761
            tcg_gen_or_tl(t1, t1, t0);
1762
            tcg_gen_ext32u_tl(t1, t1);
1763
            
1764
            tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5);
1765
            tcg_gen_sub_tl(cpu_tmp0, tcg_const_tl(32), cpu_tmp5);
1766
            tcg_gen_shr_tl(cpu_tmp6, t1, cpu_tmp0);
1767
            tcg_gen_or_tl(cpu_tmp4, cpu_tmp4, cpu_tmp6);
1768

    
1769
            tcg_gen_shl_tl(t0, t0, t2);
1770
            tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(32), t2);
1771
            tcg_gen_shr_tl(t1, t1, cpu_tmp5);
1772
            tcg_gen_or_tl(t0, t0, t1);
1773
        }
1774
    } else {
1775
        data_bits = 8 << ot;
1776
        if (is_right) {
1777
            if (ot == OT_LONG)
1778
                tcg_gen_ext32u_tl(t0, t0);
1779

    
1780
            tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5);
1781

    
1782
            tcg_gen_shr_tl(t0, t0, t2);
1783
            tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(data_bits), t2);
1784
            tcg_gen_shl_tl(t1, t1, cpu_tmp5);
1785
            tcg_gen_or_tl(t0, t0, t1);
1786
            
1787
        } else {
1788
            if (ot == OT_LONG)
1789
                tcg_gen_ext32u_tl(t1, t1);
1790

    
1791
            tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5);
1792
            
1793
            tcg_gen_shl_tl(t0, t0, t2);
1794
            tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(data_bits), t2);
1795
            tcg_gen_shr_tl(t1, t1, cpu_tmp5);
1796
            tcg_gen_or_tl(t0, t0, t1);
1797
        }
1798
    }
1799
    tcg_gen_mov_tl(t1, cpu_tmp4);
1800

    
1801
    gen_set_label(label1);
1802
    /* store */
1803
    if (op1 == OR_TMP0) {
1804
        gen_op_st_v(ot + s->mem_index, t0, a0);
1805
    } else {
1806
        gen_op_mov_reg_v(ot, op1, t0);
1807
    }
1808
    
1809
    /* update eflags */
1810
    if (s->cc_op != CC_OP_DYNAMIC)
1811
        gen_op_set_cc_op(s->cc_op);
1812

    
1813
    label2 = gen_new_label();
1814
    tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label2);
1815

    
1816
    tcg_gen_mov_tl(cpu_cc_src, t1);
1817
    tcg_gen_mov_tl(cpu_cc_dst, t0);
1818
    if (is_right) {
1819
        tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot);
1820
    } else {
1821
        tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot);
1822
    }
1823
    gen_set_label(label2);
1824
    s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
1825

    
1826
    tcg_temp_free(t0);
1827
    tcg_temp_free(t1);
1828
    tcg_temp_free(t2);
1829
    tcg_temp_free(a0);
1830
}
1831

    
1832
static void gen_shift(DisasContext *s1, int op, int ot, int d, int s)
1833
{
1834
    if (s != OR_TMP1)
1835
        gen_op_mov_TN_reg(ot, 1, s);
1836
    switch(op) {
1837
    case OP_ROL:
1838
        gen_rot_rm_T1(s1, ot, d, 0);
1839
        break;
1840
    case OP_ROR:
1841
        gen_rot_rm_T1(s1, ot, d, 1);
1842
        break;
1843
    case OP_SHL:
1844
    case OP_SHL1:
1845
        gen_shift_rm_T1(s1, ot, d, 0, 0);
1846
        break;
1847
    case OP_SHR:
1848
        gen_shift_rm_T1(s1, ot, d, 1, 0);
1849
        break;
1850
    case OP_SAR:
1851
        gen_shift_rm_T1(s1, ot, d, 1, 1);
1852
        break;
1853
    case OP_RCL:
1854
        gen_rotc_rm_T1(s1, ot, d, 0);
1855
        break;
1856
    case OP_RCR:
1857
        gen_rotc_rm_T1(s1, ot, d, 1);
1858
        break;
1859
    }
1860
}
1861

    
1862
static void gen_shifti(DisasContext *s1, int op, int ot, int d, int c)
1863
{
1864
    switch(op) {
1865
    case OP_SHL:
1866
    case OP_SHL1:
1867
        gen_shift_rm_im(s1, ot, d, c, 0, 0);
1868
        break;
1869
    case OP_SHR:
1870
        gen_shift_rm_im(s1, ot, d, c, 1, 0);
1871
        break;
1872
    case OP_SAR:
1873
        gen_shift_rm_im(s1, ot, d, c, 1, 1);
1874
        break;
1875
    default:
1876
        /* currently not optimized */
1877
        gen_op_movl_T1_im(c);
1878
        gen_shift(s1, op, ot, d, OR_TMP1);
1879
        break;
1880
    }
1881
}
1882

    
1883
static void gen_lea_modrm(DisasContext *s, int modrm, int *reg_ptr, int *offset_ptr)
1884
{
1885
    target_long disp;
1886
    int havesib;
1887
    int base;
1888
    int index;
1889
    int scale;
1890
    int opreg;
1891
    int mod, rm, code, override, must_add_seg;
1892

    
1893
    override = s->override;
1894
    must_add_seg = s->addseg;
1895
    if (override >= 0)
1896
        must_add_seg = 1;
1897
    mod = (modrm >> 6) & 3;
1898
    rm = modrm & 7;
1899

    
1900
    if (s->aflag) {
1901

    
1902
        havesib = 0;
1903
        base = rm;
1904
        index = 0;
1905
        scale = 0;
1906

    
1907
        if (base == 4) {
1908
            havesib = 1;
1909
            code = ldub_code(s->pc++);
1910
            scale = (code >> 6) & 3;
1911
            index = ((code >> 3) & 7) | REX_X(s);
1912
            base = (code & 7);
1913
        }
1914
        base |= REX_B(s);
1915

    
1916
        switch (mod) {
1917
        case 0:
1918
            if ((base & 7) == 5) {
1919
                base = -1;
1920
                disp = (int32_t)ldl_code(s->pc);
1921
                s->pc += 4;
1922
                if (CODE64(s) && !havesib) {
1923
                    disp += s->pc + s->rip_offset;
1924
                }
1925
            } else {
1926
                disp = 0;
1927
            }
1928
            break;
1929
        case 1:
1930
            disp = (int8_t)ldub_code(s->pc++);
1931
            break;
1932
        default:
1933
        case 2:
1934
            disp = ldl_code(s->pc);
1935
            s->pc += 4;
1936
            break;
1937
        }
1938

    
1939
        if (base >= 0) {
1940
            /* for correct popl handling with esp */
1941
            if (base == 4 && s->popl_esp_hack)
1942
                disp += s->popl_esp_hack;
1943
#ifdef TARGET_X86_64
1944
            if (s->aflag == 2) {
1945
                gen_op_movq_A0_reg(base);
1946
                if (disp != 0) {
1947
                    gen_op_addq_A0_im(disp);
1948
                }
1949
            } else
1950
#endif
1951
            {
1952
                gen_op_movl_A0_reg(base);
1953
                if (disp != 0)
1954
                    gen_op_addl_A0_im(disp);
1955
            }
1956
        } else {
1957
#ifdef TARGET_X86_64
1958
            if (s->aflag == 2) {
1959
                gen_op_movq_A0_im(disp);
1960
            } else
1961
#endif
1962
            {
1963
                gen_op_movl_A0_im(disp);
1964
            }
1965
        }
1966
        /* XXX: index == 4 is always invalid */
1967
        if (havesib && (index != 4 || scale != 0)) {
1968
#ifdef TARGET_X86_64
1969
            if (s->aflag == 2) {
1970
                gen_op_addq_A0_reg_sN(scale, index);
1971
            } else
1972
#endif
1973
            {
1974
                gen_op_addl_A0_reg_sN(scale, index);
1975
            }
1976
        }
1977
        if (must_add_seg) {
1978
            if (override < 0) {
1979
                if (base == R_EBP || base == R_ESP)
1980
                    override = R_SS;
1981
                else
1982
                    override = R_DS;
1983
            }
1984
#ifdef TARGET_X86_64
1985
            if (s->aflag == 2) {
1986
                gen_op_addq_A0_seg(override);
1987
            } else
1988
#endif
1989
            {
1990
                gen_op_addl_A0_seg(override);
1991
            }
1992
        }
1993
    } else {
1994
        switch (mod) {
1995
        case 0:
1996
            if (rm == 6) {
1997
                disp = lduw_code(s->pc);
1998
                s->pc += 2;
1999
                gen_op_movl_A0_im(disp);
2000
                rm = 0; /* avoid SS override */
2001
                goto no_rm;
2002
            } else {
2003
                disp = 0;
2004
            }
2005
            break;
2006
        case 1:
2007
            disp = (int8_t)ldub_code(s->pc++);
2008
            break;
2009
        default:
2010
        case 2:
2011
            disp = lduw_code(s->pc);
2012
            s->pc += 2;
2013
            break;
2014
        }
2015
        switch(rm) {
2016
        case 0:
2017
            gen_op_movl_A0_reg(R_EBX);
2018
            gen_op_addl_A0_reg_sN(0, R_ESI);
2019
            break;
2020
        case 1:
2021
            gen_op_movl_A0_reg(R_EBX);
2022
            gen_op_addl_A0_reg_sN(0, R_EDI);
2023
            break;
2024
        case 2:
2025
            gen_op_movl_A0_reg(R_EBP);
2026
            gen_op_addl_A0_reg_sN(0, R_ESI);
2027
            break;
2028
        case 3:
2029
            gen_op_movl_A0_reg(R_EBP);
2030
            gen_op_addl_A0_reg_sN(0, R_EDI);
2031
            break;
2032
        case 4:
2033
            gen_op_movl_A0_reg(R_ESI);
2034
            break;
2035
        case 5:
2036
            gen_op_movl_A0_reg(R_EDI);
2037
            break;
2038
        case 6:
2039
            gen_op_movl_A0_reg(R_EBP);
2040
            break;
2041
        default:
2042
        case 7:
2043
            gen_op_movl_A0_reg(R_EBX);
2044
            break;
2045
        }
2046
        if (disp != 0)
2047
            gen_op_addl_A0_im(disp);
2048
        gen_op_andl_A0_ffff();
2049
    no_rm:
2050
        if (must_add_seg) {
2051
            if (override < 0) {
2052
                if (rm == 2 || rm == 3 || rm == 6)
2053
                    override = R_SS;
2054
                else
2055
                    override = R_DS;
2056
            }
2057
            gen_op_addl_A0_seg(override);
2058
        }
2059
    }
2060

    
2061
    opreg = OR_A0;
2062
    disp = 0;
2063
    *reg_ptr = opreg;
2064
    *offset_ptr = disp;
2065
}
2066

    
2067
static void gen_nop_modrm(DisasContext *s, int modrm)
2068
{
2069
    int mod, rm, base, code;
2070

    
2071
    mod = (modrm >> 6) & 3;
2072
    if (mod == 3)
2073
        return;
2074
    rm = modrm & 7;
2075

    
2076
    if (s->aflag) {
2077

    
2078
        base = rm;
2079

    
2080
        if (base == 4) {
2081
            code = ldub_code(s->pc++);
2082
            base = (code & 7);
2083
        }
2084

    
2085
        switch (mod) {
2086
        case 0:
2087
            if (base == 5) {
2088
                s->pc += 4;
2089
            }
2090
            break;
2091
        case 1:
2092
            s->pc++;
2093
            break;
2094
        default:
2095
        case 2:
2096
            s->pc += 4;
2097
            break;
2098
        }
2099
    } else {
2100
        switch (mod) {
2101
        case 0:
2102
            if (rm == 6) {
2103
                s->pc += 2;
2104
            }
2105
            break;
2106
        case 1:
2107
            s->pc++;
2108
            break;
2109
        default:
2110
        case 2:
2111
            s->pc += 2;
2112
            break;
2113
        }
2114
    }
2115
}
2116

    
2117
/* used for LEA and MOV AX, mem */
2118
static void gen_add_A0_ds_seg(DisasContext *s)
2119
{
2120
    int override, must_add_seg;
2121
    must_add_seg = s->addseg;
2122
    override = R_DS;
2123
    if (s->override >= 0) {
2124
        override = s->override;
2125
        must_add_seg = 1;
2126
    } else {
2127
        override = R_DS;
2128
    }
2129
    if (must_add_seg) {
2130
#ifdef TARGET_X86_64
2131
        if (CODE64(s)) {
2132
            gen_op_addq_A0_seg(override);
2133
        } else
2134
#endif
2135
        {
2136
            gen_op_addl_A0_seg(override);
2137
        }
2138
    }
2139
}
2140

    
2141
/* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2142
   OR_TMP0 */
2143
static void gen_ldst_modrm(DisasContext *s, int modrm, int ot, int reg, int is_store)
2144
{
2145
    int mod, rm, opreg, disp;
2146

    
2147
    mod = (modrm >> 6) & 3;
2148
    rm = (modrm & 7) | REX_B(s);
2149
    if (mod == 3) {
2150
        if (is_store) {
2151
            if (reg != OR_TMP0)
2152
                gen_op_mov_TN_reg(ot, 0, reg);
2153
            gen_op_mov_reg_T0(ot, rm);
2154
        } else {
2155
            gen_op_mov_TN_reg(ot, 0, rm);
2156
            if (reg != OR_TMP0)
2157
                gen_op_mov_reg_T0(ot, reg);
2158
        }
2159
    } else {
2160
        gen_lea_modrm(s, modrm, &opreg, &disp);
2161
        if (is_store) {
2162
            if (reg != OR_TMP0)
2163
                gen_op_mov_TN_reg(ot, 0, reg);
2164
            gen_op_st_T0_A0(ot + s->mem_index);
2165
        } else {
2166
            gen_op_ld_T0_A0(ot + s->mem_index);
2167
            if (reg != OR_TMP0)
2168
                gen_op_mov_reg_T0(ot, reg);
2169
        }
2170
    }
2171
}
2172

    
2173
static inline uint32_t insn_get(DisasContext *s, int ot)
2174
{
2175
    uint32_t ret;
2176

    
2177
    switch(ot) {
2178
    case OT_BYTE:
2179
        ret = ldub_code(s->pc);
2180
        s->pc++;
2181
        break;
2182
    case OT_WORD:
2183
        ret = lduw_code(s->pc);
2184
        s->pc += 2;
2185
        break;
2186
    default:
2187
    case OT_LONG:
2188
        ret = ldl_code(s->pc);
2189
        s->pc += 4;
2190
        break;
2191
    }
2192
    return ret;
2193
}
2194

    
2195
static inline int insn_const_size(unsigned int ot)
2196
{
2197
    if (ot <= OT_LONG)
2198
        return 1 << ot;
2199
    else
2200
        return 4;
2201
}
2202

    
2203
static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2204
{
2205
    TranslationBlock *tb;
2206
    target_ulong pc;
2207

    
2208
    pc = s->cs_base + eip;
2209
    tb = s->tb;
2210
    /* NOTE: we handle the case where the TB spans two pages here */
2211
    if ((pc & TARGET_PAGE_MASK) == (tb->pc & TARGET_PAGE_MASK) ||
2212
        (pc & TARGET_PAGE_MASK) == ((s->pc - 1) & TARGET_PAGE_MASK))  {
2213
        /* jump to same page: we can use a direct jump */
2214
        tcg_gen_goto_tb(tb_num);
2215
        gen_jmp_im(eip);
2216
        tcg_gen_exit_tb((long)tb + tb_num);
2217
    } else {
2218
        /* jump to another page: currently not optimized */
2219
        gen_jmp_im(eip);
2220
        gen_eob(s);
2221
    }
2222
}
2223

    
2224
static inline void gen_jcc(DisasContext *s, int b,
2225
                           target_ulong val, target_ulong next_eip)
2226
{
2227
    int l1, l2, cc_op;
2228

    
2229
    cc_op = s->cc_op;
2230
    if (s->cc_op != CC_OP_DYNAMIC) {
2231
        gen_op_set_cc_op(s->cc_op);
2232
        s->cc_op = CC_OP_DYNAMIC;
2233
    }
2234
    if (s->jmp_opt) {
2235
        l1 = gen_new_label();
2236
        gen_jcc1(s, cc_op, b, l1);
2237
        
2238
        gen_goto_tb(s, 0, next_eip);
2239

    
2240
        gen_set_label(l1);
2241
        gen_goto_tb(s, 1, val);
2242
        s->is_jmp = 3;
2243
    } else {
2244

    
2245
        l1 = gen_new_label();
2246
        l2 = gen_new_label();
2247
        gen_jcc1(s, cc_op, b, l1);
2248

    
2249
        gen_jmp_im(next_eip);
2250
        tcg_gen_br(l2);
2251

    
2252
        gen_set_label(l1);
2253
        gen_jmp_im(val);
2254
        gen_set_label(l2);
2255
        gen_eob(s);
2256
    }
2257
}
2258

    
2259
static void gen_setcc(DisasContext *s, int b)
2260
{
2261
    int inv, jcc_op, l1;
2262
    TCGv t0;
2263

    
2264
    if (is_fast_jcc_case(s, b)) {
2265
        /* nominal case: we use a jump */
2266
        /* XXX: make it faster by adding new instructions in TCG */
2267
        t0 = tcg_temp_local_new();
2268
        tcg_gen_movi_tl(t0, 0);
2269
        l1 = gen_new_label();
2270
        gen_jcc1(s, s->cc_op, b ^ 1, l1);
2271
        tcg_gen_movi_tl(t0, 1);
2272
        gen_set_label(l1);
2273
        tcg_gen_mov_tl(cpu_T[0], t0);
2274
        tcg_temp_free(t0);
2275
    } else {
2276
        /* slow case: it is more efficient not to generate a jump,
2277
           although it is questionnable whether this optimization is
2278
           worth to */
2279
        inv = b & 1;
2280
        jcc_op = (b >> 1) & 7;
2281
        gen_setcc_slow_T0(s, jcc_op);
2282
        if (inv) {
2283
            tcg_gen_xori_tl(cpu_T[0], cpu_T[0], 1);
2284
        }
2285
    }
2286
}
2287

    
2288
static inline void gen_op_movl_T0_seg(int seg_reg)
2289
{
2290
    tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
2291
                     offsetof(CPUX86State,segs[seg_reg].selector));
2292
}
2293

    
2294
static inline void gen_op_movl_seg_T0_vm(int seg_reg)
2295
{
2296
    tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff);
2297
    tcg_gen_st32_tl(cpu_T[0], cpu_env, 
2298
                    offsetof(CPUX86State,segs[seg_reg].selector));
2299
    tcg_gen_shli_tl(cpu_T[0], cpu_T[0], 4);
2300
    tcg_gen_st_tl(cpu_T[0], cpu_env, 
2301
                  offsetof(CPUX86State,segs[seg_reg].base));
2302
}
2303

    
2304
/* move T0 to seg_reg and compute if the CPU state may change. Never
2305
   call this function with seg_reg == R_CS */
2306
static void gen_movl_seg_T0(DisasContext *s, int seg_reg, target_ulong cur_eip)
2307
{
2308
    if (s->pe && !s->vm86) {
2309
        /* XXX: optimize by finding processor state dynamically */
2310
        if (s->cc_op != CC_OP_DYNAMIC)
2311
            gen_op_set_cc_op(s->cc_op);
2312
        gen_jmp_im(cur_eip);
2313
        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
2314
        gen_helper_load_seg(tcg_const_i32(seg_reg), cpu_tmp2_i32);
2315
        /* abort translation because the addseg value may change or
2316
           because ss32 may change. For R_SS, translation must always
2317
           stop as a special handling must be done to disable hardware
2318
           interrupts for the next instruction */
2319
        if (seg_reg == R_SS || (s->code32 && seg_reg < R_FS))
2320
            s->is_jmp = 3;
2321
    } else {
2322
        gen_op_movl_seg_T0_vm(seg_reg);
2323
        if (seg_reg == R_SS)
2324
            s->is_jmp = 3;
2325
    }
2326
}
2327

    
2328
static inline int svm_is_rep(int prefixes)
2329
{
2330
    return ((prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) ? 8 : 0);
2331
}
2332

    
2333
static inline void
2334
gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
2335
                              uint32_t type, uint64_t param)
2336
{
2337
    /* no SVM activated; fast case */
2338
    if (likely(!(s->flags & HF_SVMI_MASK)))
2339
        return;
2340
    if (s->cc_op != CC_OP_DYNAMIC)
2341
        gen_op_set_cc_op(s->cc_op);
2342
    gen_jmp_im(pc_start - s->cs_base);
2343
    gen_helper_svm_check_intercept_param(tcg_const_i32(type),
2344
                                         tcg_const_i64(param));
2345
}
2346

    
2347
static inline void
2348
gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type)
2349
{
2350
    gen_svm_check_intercept_param(s, pc_start, type, 0);
2351
}
2352

    
2353
static inline void gen_stack_update(DisasContext *s, int addend)
2354
{
2355
#ifdef TARGET_X86_64
2356
    if (CODE64(s)) {
2357
        gen_op_add_reg_im(2, R_ESP, addend);
2358
    } else
2359
#endif
2360
    if (s->ss32) {
2361
        gen_op_add_reg_im(1, R_ESP, addend);
2362
    } else {
2363
        gen_op_add_reg_im(0, R_ESP, addend);
2364
    }
2365
}
2366

    
2367
/* generate a push. It depends on ss32, addseg and dflag */
2368
static void gen_push_T0(DisasContext *s)
2369
{
2370
#ifdef TARGET_X86_64
2371
    if (CODE64(s)) {
2372
        gen_op_movq_A0_reg(R_ESP);
2373
        if (s->dflag) {
2374
            gen_op_addq_A0_im(-8);
2375
            gen_op_st_T0_A0(OT_QUAD + s->mem_index);
2376
        } else {
2377
            gen_op_addq_A0_im(-2);
2378
            gen_op_st_T0_A0(OT_WORD + s->mem_index);
2379
        }
2380
        gen_op_mov_reg_A0(2, R_ESP);
2381
    } else
2382
#endif
2383
    {
2384
        gen_op_movl_A0_reg(R_ESP);
2385
        if (!s->dflag)
2386
            gen_op_addl_A0_im(-2);
2387
        else
2388
            gen_op_addl_A0_im(-4);
2389
        if (s->ss32) {
2390
            if (s->addseg) {
2391
                tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2392
                gen_op_addl_A0_seg(R_SS);
2393
            }
2394
        } else {
2395
            gen_op_andl_A0_ffff();
2396
            tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2397
            gen_op_addl_A0_seg(R_SS);
2398
        }
2399
        gen_op_st_T0_A0(s->dflag + 1 + s->mem_index);
2400
        if (s->ss32 && !s->addseg)
2401
            gen_op_mov_reg_A0(1, R_ESP);
2402
        else
2403
            gen_op_mov_reg_T1(s->ss32 + 1, R_ESP);
2404
    }
2405
}
2406

    
2407
/* generate a push. It depends on ss32, addseg and dflag */
2408
/* slower version for T1, only used for call Ev */
2409
static void gen_push_T1(DisasContext *s)
2410
{
2411
#ifdef TARGET_X86_64
2412
    if (CODE64(s)) {
2413
        gen_op_movq_A0_reg(R_ESP);
2414
        if (s->dflag) {
2415
            gen_op_addq_A0_im(-8);
2416
            gen_op_st_T1_A0(OT_QUAD + s->mem_index);
2417
        } else {
2418
            gen_op_addq_A0_im(-2);
2419
            gen_op_st_T0_A0(OT_WORD + s->mem_index);
2420
        }
2421
        gen_op_mov_reg_A0(2, R_ESP);
2422
    } else
2423
#endif
2424
    {
2425
        gen_op_movl_A0_reg(R_ESP);
2426
        if (!s->dflag)
2427
            gen_op_addl_A0_im(-2);
2428
        else
2429
            gen_op_addl_A0_im(-4);
2430
        if (s->ss32) {
2431
            if (s->addseg) {
2432
                gen_op_addl_A0_seg(R_SS);
2433
            }
2434
        } else {
2435
            gen_op_andl_A0_ffff();
2436
            gen_op_addl_A0_seg(R_SS);
2437
        }
2438
        gen_op_st_T1_A0(s->dflag + 1 + s->mem_index);
2439

    
2440
        if (s->ss32 && !s->addseg)
2441
            gen_op_mov_reg_A0(1, R_ESP);
2442
        else
2443
            gen_stack_update(s, (-2) << s->dflag);
2444
    }
2445
}
2446

    
2447
/* two step pop is necessary for precise exceptions */
2448
static void gen_pop_T0(DisasContext *s)
2449
{
2450
#ifdef TARGET_X86_64
2451
    if (CODE64(s)) {
2452
        gen_op_movq_A0_reg(R_ESP);
2453
        gen_op_ld_T0_A0((s->dflag ? OT_QUAD : OT_WORD) + s->mem_index);
2454
    } else
2455
#endif
2456
    {
2457
        gen_op_movl_A0_reg(R_ESP);
2458
        if (s->ss32) {
2459
            if (s->addseg)
2460
                gen_op_addl_A0_seg(R_SS);
2461
        } else {
2462
            gen_op_andl_A0_ffff();
2463
            gen_op_addl_A0_seg(R_SS);
2464
        }
2465
        gen_op_ld_T0_A0(s->dflag + 1 + s->mem_index);
2466
    }
2467
}
2468

    
2469
static void gen_pop_update(DisasContext *s)
2470
{
2471
#ifdef TARGET_X86_64
2472
    if (CODE64(s) && s->dflag) {
2473
        gen_stack_update(s, 8);
2474
    } else
2475
#endif
2476
    {
2477
        gen_stack_update(s, 2 << s->dflag);
2478
    }
2479
}
2480

    
2481
static void gen_stack_A0(DisasContext *s)
2482
{
2483
    gen_op_movl_A0_reg(R_ESP);
2484
    if (!s->ss32)
2485
        gen_op_andl_A0_ffff();
2486
    tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2487
    if (s->addseg)
2488
        gen_op_addl_A0_seg(R_SS);
2489
}
2490

    
2491
/* NOTE: wrap around in 16 bit not fully handled */
2492
static void gen_pusha(DisasContext *s)
2493
{
2494
    int i;
2495
    gen_op_movl_A0_reg(R_ESP);
2496
    gen_op_addl_A0_im(-16 <<  s->dflag);
2497
    if (!s->ss32)
2498
        gen_op_andl_A0_ffff();
2499
    tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2500
    if (s->addseg)
2501
        gen_op_addl_A0_seg(R_SS);
2502
    for(i = 0;i < 8; i++) {
2503
        gen_op_mov_TN_reg(OT_LONG, 0, 7 - i);
2504
        gen_op_st_T0_A0(OT_WORD + s->dflag + s->mem_index);
2505
        gen_op_addl_A0_im(2 <<  s->dflag);
2506
    }
2507
    gen_op_mov_reg_T1(OT_WORD + s->ss32, R_ESP);
2508
}
2509

    
2510
/* NOTE: wrap around in 16 bit not fully handled */
2511
static void gen_popa(DisasContext *s)
2512
{
2513
    int i;
2514
    gen_op_movl_A0_reg(R_ESP);
2515
    if (!s->ss32)
2516
        gen_op_andl_A0_ffff();
2517
    tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2518
    tcg_gen_addi_tl(cpu_T[1], cpu_T[1], 16 <<  s->dflag);
2519
    if (s->addseg)
2520
        gen_op_addl_A0_seg(R_SS);
2521
    for(i = 0;i < 8; i++) {
2522
        /* ESP is not reloaded */
2523
        if (i != 3) {
2524
            gen_op_ld_T0_A0(OT_WORD + s->dflag + s->mem_index);
2525
            gen_op_mov_reg_T0(OT_WORD + s->dflag, 7 - i);
2526
        }
2527
        gen_op_addl_A0_im(2 <<  s->dflag);
2528
    }
2529
    gen_op_mov_reg_T1(OT_WORD + s->ss32, R_ESP);
2530
}
2531

    
2532
static void gen_enter(DisasContext *s, int esp_addend, int level)
2533
{
2534
    int ot, opsize;
2535

    
2536
    level &= 0x1f;
2537
#ifdef TARGET_X86_64
2538
    if (CODE64(s)) {
2539
        ot = s->dflag ? OT_QUAD : OT_WORD;
2540
        opsize = 1 << ot;
2541

    
2542
        gen_op_movl_A0_reg(R_ESP);
2543
        gen_op_addq_A0_im(-opsize);
2544
        tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2545

    
2546
        /* push bp */
2547
        gen_op_mov_TN_reg(OT_LONG, 0, R_EBP);
2548
        gen_op_st_T0_A0(ot + s->mem_index);
2549
        if (level) {
2550
            /* XXX: must save state */
2551
            gen_helper_enter64_level(tcg_const_i32(level),
2552
                                     tcg_const_i32((ot == OT_QUAD)),
2553
                                     cpu_T[1]);
2554
        }
2555
        gen_op_mov_reg_T1(ot, R_EBP);
2556
        tcg_gen_addi_tl(cpu_T[1], cpu_T[1], -esp_addend + (-opsize * level));
2557
        gen_op_mov_reg_T1(OT_QUAD, R_ESP);
2558
    } else
2559
#endif
2560
    {
2561
        ot = s->dflag + OT_WORD;
2562
        opsize = 2 << s->dflag;
2563

    
2564
        gen_op_movl_A0_reg(R_ESP);
2565
        gen_op_addl_A0_im(-opsize);
2566
        if (!s->ss32)
2567
            gen_op_andl_A0_ffff();
2568
        tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2569
        if (s->addseg)
2570
            gen_op_addl_A0_seg(R_SS);
2571
        /* push bp */
2572
        gen_op_mov_TN_reg(OT_LONG, 0, R_EBP);
2573
        gen_op_st_T0_A0(ot + s->mem_index);
2574
        if (level) {
2575
            /* XXX: must save state */
2576
            gen_helper_enter_level(tcg_const_i32(level),
2577
                                   tcg_const_i32(s->dflag),
2578
                                   cpu_T[1]);
2579
        }
2580
        gen_op_mov_reg_T1(ot, R_EBP);
2581
        tcg_gen_addi_tl(cpu_T[1], cpu_T[1], -esp_addend + (-opsize * level));
2582
        gen_op_mov_reg_T1(OT_WORD + s->ss32, R_ESP);
2583
    }
2584
}
2585

    
2586
static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
2587
{
2588
    if (s->cc_op != CC_OP_DYNAMIC)
2589
        gen_op_set_cc_op(s->cc_op);
2590
    gen_jmp_im(cur_eip);
2591
    gen_helper_raise_exception(tcg_const_i32(trapno));
2592
    s->is_jmp = 3;
2593
}
2594

    
2595
/* an interrupt is different from an exception because of the
2596
   privilege checks */
2597
static void gen_interrupt(DisasContext *s, int intno,
2598
                          target_ulong cur_eip, target_ulong next_eip)
2599
{
2600
    if (s->cc_op != CC_OP_DYNAMIC)
2601
        gen_op_set_cc_op(s->cc_op);
2602
    gen_jmp_im(cur_eip);
2603
    gen_helper_raise_interrupt(tcg_const_i32(intno), 
2604
                               tcg_const_i32(next_eip - cur_eip));
2605
    s->is_jmp = 3;
2606
}
2607

    
2608
static void gen_debug(DisasContext *s, target_ulong cur_eip)
2609
{
2610
    if (s->cc_op != CC_OP_DYNAMIC)
2611
        gen_op_set_cc_op(s->cc_op);
2612
    gen_jmp_im(cur_eip);
2613
    gen_helper_debug();
2614
    s->is_jmp = 3;
2615
}
2616

    
2617
/* generate a generic end of block. Trace exception is also generated
2618
   if needed */
2619
static void gen_eob(DisasContext *s)
2620
{
2621
    if (s->cc_op != CC_OP_DYNAMIC)
2622
        gen_op_set_cc_op(s->cc_op);
2623
    if (s->tb->flags & HF_INHIBIT_IRQ_MASK) {
2624
        gen_helper_reset_inhibit_irq();
2625
    }
2626
    if (s->singlestep_enabled) {
2627
        gen_helper_debug();
2628
    } else if (s->tf) {
2629
        gen_helper_single_step();
2630
    } else {
2631
        tcg_gen_exit_tb(0);
2632
    }
2633
    s->is_jmp = 3;
2634
}
2635

    
2636
/* generate a jump to eip. No segment change must happen before as a
2637
   direct call to the next block may occur */
2638
static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2639
{
2640
    if (s->jmp_opt) {
2641
        if (s->cc_op != CC_OP_DYNAMIC) {
2642
            gen_op_set_cc_op(s->cc_op);
2643
            s->cc_op = CC_OP_DYNAMIC;
2644
        }
2645
        gen_goto_tb(s, tb_num, eip);
2646
        s->is_jmp = 3;
2647
    } else {
2648
        gen_jmp_im(eip);
2649
        gen_eob(s);
2650
    }
2651
}
2652

    
2653
static void gen_jmp(DisasContext *s, target_ulong eip)
2654
{
2655
    gen_jmp_tb(s, eip, 0);
2656
}
2657

    
2658
static inline void gen_ldq_env_A0(int idx, int offset)
2659
{
2660
    int mem_index = (idx >> 2) - 1;
2661
    tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, mem_index);
2662
    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset);
2663
}
2664

    
2665
static inline void gen_stq_env_A0(int idx, int offset)
2666
{
2667
    int mem_index = (idx >> 2) - 1;
2668
    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset);
2669
    tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, mem_index);
2670
}
2671

    
2672
static inline void gen_ldo_env_A0(int idx, int offset)
2673
{
2674
    int mem_index = (idx >> 2) - 1;
2675
    tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, mem_index);
2676
    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(0)));
2677
    tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
2678
    tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_tmp0, mem_index);
2679
    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(1)));
2680
}
2681

    
2682
static inline void gen_sto_env_A0(int idx, int offset)
2683
{
2684
    int mem_index = (idx >> 2) - 1;
2685
    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(0)));
2686
    tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, mem_index);
2687
    tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
2688
    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(1)));
2689
    tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_tmp0, mem_index);
2690
}
2691

    
2692
static inline void gen_op_movo(int d_offset, int s_offset)
2693
{
2694
    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
2695
    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2696
    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + 8);
2697
    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + 8);
2698
}
2699

    
2700
static inline void gen_op_movq(int d_offset, int s_offset)
2701
{
2702
    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
2703
    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2704
}
2705

    
2706
static inline void gen_op_movl(int d_offset, int s_offset)
2707
{
2708
    tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, s_offset);
2709
    tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, d_offset);
2710
}
2711

    
2712
static inline void gen_op_movq_env_0(int d_offset)
2713
{
2714
    tcg_gen_movi_i64(cpu_tmp1_i64, 0);
2715
    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2716
}
2717

    
2718
#define SSE_SPECIAL ((void *)1)
2719
#define SSE_DUMMY ((void *)2)
2720

    
2721
#define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2722
#define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
2723
                     gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
2724

    
2725
static void *sse_op_table1[256][4] = {
2726
    /* 3DNow! extensions */
2727
    [0x0e] = { SSE_DUMMY }, /* femms */
2728
    [0x0f] = { SSE_DUMMY }, /* pf... */
2729
    /* pure SSE operations */
2730
    [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2731
    [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2732
    [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2733
    [0x13] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
2734
    [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
2735
    [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
2736
    [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd, movshdup */
2737
    [0x17] = { SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd */
2738

    
2739
    [0x28] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2740
    [0x29] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2741
    [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2742
    [0x2b] = { SSE_SPECIAL, SSE_SPECIAL },  /* movntps, movntpd */
2743
    [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2744
    [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2745
    [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
2746
    [0x2f] = { gen_helper_comiss, gen_helper_comisd },
2747
    [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2748
    [0x51] = SSE_FOP(sqrt),
2749
    [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
2750
    [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
2751
    [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
2752
    [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
2753
    [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
2754
    [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
2755
    [0x58] = SSE_FOP(add),
2756
    [0x59] = SSE_FOP(mul),
2757
    [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
2758
               gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
2759
    [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
2760
    [0x5c] = SSE_FOP(sub),
2761
    [0x5d] = SSE_FOP(min),
2762
    [0x5e] = SSE_FOP(div),
2763
    [0x5f] = SSE_FOP(max),
2764

    
2765
    [0xc2] = SSE_FOP(cmpeq),
2766
    [0xc6] = { gen_helper_shufps, gen_helper_shufpd },
2767

    
2768
    [0x38] = { SSE_SPECIAL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* SSSE3/SSE4 */
2769
    [0x3a] = { SSE_SPECIAL, SSE_SPECIAL }, /* SSSE3/SSE4 */
2770

    
2771
    /* MMX ops and their SSE extensions */
2772
    [0x60] = MMX_OP2(punpcklbw),
2773
    [0x61] = MMX_OP2(punpcklwd),
2774
    [0x62] = MMX_OP2(punpckldq),
2775
    [0x63] = MMX_OP2(packsswb),
2776
    [0x64] = MMX_OP2(pcmpgtb),
2777
    [0x65] = MMX_OP2(pcmpgtw),
2778
    [0x66] = MMX_OP2(pcmpgtl),
2779
    [0x67] = MMX_OP2(packuswb),
2780
    [0x68] = MMX_OP2(punpckhbw),
2781
    [0x69] = MMX_OP2(punpckhwd),
2782
    [0x6a] = MMX_OP2(punpckhdq),
2783
    [0x6b] = MMX_OP2(packssdw),
2784
    [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
2785
    [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
2786
    [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2787
    [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2788
    [0x70] = { gen_helper_pshufw_mmx,
2789
               gen_helper_pshufd_xmm,
2790
               gen_helper_pshufhw_xmm,
2791
               gen_helper_pshuflw_xmm },
2792
    [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2793
    [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2794
    [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2795
    [0x74] = MMX_OP2(pcmpeqb),
2796
    [0x75] = MMX_OP2(pcmpeqw),
2797
    [0x76] = MMX_OP2(pcmpeql),
2798
    [0x77] = { SSE_DUMMY }, /* emms */
2799
    [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
2800
    [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
2801
    [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2802
    [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2803
    [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2804
    [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2805
    [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
2806
    [0xd1] = MMX_OP2(psrlw),
2807
    [0xd2] = MMX_OP2(psrld),
2808
    [0xd3] = MMX_OP2(psrlq),
2809
    [0xd4] = MMX_OP2(paddq),
2810
    [0xd5] = MMX_OP2(pmullw),
2811
    [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2812
    [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2813
    [0xd8] = MMX_OP2(psubusb),
2814
    [0xd9] = MMX_OP2(psubusw),
2815
    [0xda] = MMX_OP2(pminub),
2816
    [0xdb] = MMX_OP2(pand),
2817
    [0xdc] = MMX_OP2(paddusb),
2818
    [0xdd] = MMX_OP2(paddusw),
2819
    [0xde] = MMX_OP2(pmaxub),
2820
    [0xdf] = MMX_OP2(pandn),
2821
    [0xe0] = MMX_OP2(pavgb),
2822
    [0xe1] = MMX_OP2(psraw),
2823
    [0xe2] = MMX_OP2(psrad),
2824
    [0xe3] = MMX_OP2(pavgw),
2825
    [0xe4] = MMX_OP2(pmulhuw),
2826
    [0xe5] = MMX_OP2(pmulhw),
2827
    [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
2828
    [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
2829
    [0xe8] = MMX_OP2(psubsb),
2830
    [0xe9] = MMX_OP2(psubsw),
2831
    [0xea] = MMX_OP2(pminsw),
2832
    [0xeb] = MMX_OP2(por),
2833
    [0xec] = MMX_OP2(paddsb),
2834
    [0xed] = MMX_OP2(paddsw),
2835
    [0xee] = MMX_OP2(pmaxsw),
2836
    [0xef] = MMX_OP2(pxor),
2837
    [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2838
    [0xf1] = MMX_OP2(psllw),
2839
    [0xf2] = MMX_OP2(pslld),
2840
    [0xf3] = MMX_OP2(psllq),
2841
    [0xf4] = MMX_OP2(pmuludq),
2842
    [0xf5] = MMX_OP2(pmaddwd),
2843
    [0xf6] = MMX_OP2(psadbw),
2844
    [0xf7] = MMX_OP2(maskmov),
2845
    [0xf8] = MMX_OP2(psubb),
2846
    [0xf9] = MMX_OP2(psubw),
2847
    [0xfa] = MMX_OP2(psubl),
2848
    [0xfb] = MMX_OP2(psubq),
2849
    [0xfc] = MMX_OP2(paddb),
2850
    [0xfd] = MMX_OP2(paddw),
2851
    [0xfe] = MMX_OP2(paddl),
2852
};
2853

    
2854
static void *sse_op_table2[3 * 8][2] = {
2855
    [0 + 2] = MMX_OP2(psrlw),
2856
    [0 + 4] = MMX_OP2(psraw),
2857
    [0 + 6] = MMX_OP2(psllw),
2858
    [8 + 2] = MMX_OP2(psrld),
2859
    [8 + 4] = MMX_OP2(psrad),
2860
    [8 + 6] = MMX_OP2(pslld),
2861
    [16 + 2] = MMX_OP2(psrlq),
2862
    [16 + 3] = { NULL, gen_helper_psrldq_xmm },
2863
    [16 + 6] = MMX_OP2(psllq),
2864
    [16 + 7] = { NULL, gen_helper_pslldq_xmm },
2865
};
2866

    
2867
static void *sse_op_table3[4 * 3] = {
2868
    gen_helper_cvtsi2ss,
2869
    gen_helper_cvtsi2sd,
2870
    X86_64_ONLY(gen_helper_cvtsq2ss),
2871
    X86_64_ONLY(gen_helper_cvtsq2sd),
2872

    
2873
    gen_helper_cvttss2si,
2874
    gen_helper_cvttsd2si,
2875
    X86_64_ONLY(gen_helper_cvttss2sq),
2876
    X86_64_ONLY(gen_helper_cvttsd2sq),
2877

    
2878
    gen_helper_cvtss2si,
2879
    gen_helper_cvtsd2si,
2880
    X86_64_ONLY(gen_helper_cvtss2sq),
2881
    X86_64_ONLY(gen_helper_cvtsd2sq),
2882
};
2883

    
2884
static void *sse_op_table4[8][4] = {
2885
    SSE_FOP(cmpeq),
2886
    SSE_FOP(cmplt),
2887
    SSE_FOP(cmple),
2888
    SSE_FOP(cmpunord),
2889
    SSE_FOP(cmpneq),
2890
    SSE_FOP(cmpnlt),
2891
    SSE_FOP(cmpnle),
2892
    SSE_FOP(cmpord),
2893
};
2894

    
2895
static void *sse_op_table5[256] = {
2896
    [0x0c] = gen_helper_pi2fw,
2897
    [0x0d] = gen_helper_pi2fd,
2898
    [0x1c] = gen_helper_pf2iw,
2899
    [0x1d] = gen_helper_pf2id,
2900
    [0x8a] = gen_helper_pfnacc,
2901
    [0x8e] = gen_helper_pfpnacc,
2902
    [0x90] = gen_helper_pfcmpge,
2903
    [0x94] = gen_helper_pfmin,
2904
    [0x96] = gen_helper_pfrcp,
2905
    [0x97] = gen_helper_pfrsqrt,
2906
    [0x9a] = gen_helper_pfsub,
2907
    [0x9e] = gen_helper_pfadd,
2908
    [0xa0] = gen_helper_pfcmpgt,
2909
    [0xa4] = gen_helper_pfmax,
2910
    [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
2911
    [0xa7] = gen_helper_movq, /* pfrsqit1 */
2912
    [0xaa] = gen_helper_pfsubr,
2913
    [0xae] = gen_helper_pfacc,
2914
    [0xb0] = gen_helper_pfcmpeq,
2915
    [0xb4] = gen_helper_pfmul,
2916
    [0xb6] = gen_helper_movq, /* pfrcpit2 */
2917
    [0xb7] = gen_helper_pmulhrw_mmx,
2918
    [0xbb] = gen_helper_pswapd,
2919
    [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
2920
};
2921

    
2922
struct sse_op_helper_s {
2923
    void *op[2]; uint32_t ext_mask;
2924
};
2925
#define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
2926
#define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
2927
#define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
2928
#define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
2929
static struct sse_op_helper_s sse_op_table6[256] = {
2930
    [0x00] = SSSE3_OP(pshufb),
2931
    [0x01] = SSSE3_OP(phaddw),
2932
    [0x02] = SSSE3_OP(phaddd),
2933
    [0x03] = SSSE3_OP(phaddsw),
2934
    [0x04] = SSSE3_OP(pmaddubsw),
2935
    [0x05] = SSSE3_OP(phsubw),
2936
    [0x06] = SSSE3_OP(phsubd),
2937
    [0x07] = SSSE3_OP(phsubsw),
2938
    [0x08] = SSSE3_OP(psignb),
2939
    [0x09] = SSSE3_OP(psignw),
2940
    [0x0a] = SSSE3_OP(psignd),
2941
    [0x0b] = SSSE3_OP(pmulhrsw),
2942
    [0x10] = SSE41_OP(pblendvb),
2943
    [0x14] = SSE41_OP(blendvps),
2944
    [0x15] = SSE41_OP(blendvpd),
2945
    [0x17] = SSE41_OP(ptest),
2946
    [0x1c] = SSSE3_OP(pabsb),
2947
    [0x1d] = SSSE3_OP(pabsw),
2948
    [0x1e] = SSSE3_OP(pabsd),
2949
    [0x20] = SSE41_OP(pmovsxbw),
2950
    [0x21] = SSE41_OP(pmovsxbd),
2951
    [0x22] = SSE41_OP(pmovsxbq),
2952
    [0x23] = SSE41_OP(pmovsxwd),
2953
    [0x24] = SSE41_OP(pmovsxwq),
2954
    [0x25] = SSE41_OP(pmovsxdq),
2955
    [0x28] = SSE41_OP(pmuldq),
2956
    [0x29] = SSE41_OP(pcmpeqq),
2957
    [0x2a] = SSE41_SPECIAL, /* movntqda */
2958
    [0x2b] = SSE41_OP(packusdw),
2959
    [0x30] = SSE41_OP(pmovzxbw),
2960
    [0x31] = SSE41_OP(pmovzxbd),
2961
    [0x32] = SSE41_OP(pmovzxbq),
2962
    [0x33] = SSE41_OP(pmovzxwd),
2963
    [0x34] = SSE41_OP(pmovzxwq),
2964
    [0x35] = SSE41_OP(pmovzxdq),
2965
    [0x37] = SSE42_OP(pcmpgtq),
2966
    [0x38] = SSE41_OP(pminsb),
2967
    [0x39] = SSE41_OP(pminsd),
2968
    [0x3a] = SSE41_OP(pminuw),
2969
    [0x3b] = SSE41_OP(pminud),
2970
    [0x3c] = SSE41_OP(pmaxsb),
2971
    [0x3d] = SSE41_OP(pmaxsd),
2972
    [0x3e] = SSE41_OP(pmaxuw),
2973
    [0x3f] = SSE41_OP(pmaxud),
2974
    [0x40] = SSE41_OP(pmulld),
2975
    [0x41] = SSE41_OP(phminposuw),
2976
};
2977

    
2978
static struct sse_op_helper_s sse_op_table7[256] = {
2979
    [0x08] = SSE41_OP(roundps),
2980
    [0x09] = SSE41_OP(roundpd),
2981
    [0x0a] = SSE41_OP(roundss),
2982
    [0x0b] = SSE41_OP(roundsd),
2983
    [0x0c] = SSE41_OP(blendps),
2984
    [0x0d] = SSE41_OP(blendpd),
2985
    [0x0e] = SSE41_OP(pblendw),
2986
    [0x0f] = SSSE3_OP(palignr),
2987
    [0x14] = SSE41_SPECIAL, /* pextrb */
2988
    [0x15] = SSE41_SPECIAL, /* pextrw */
2989
    [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
2990
    [0x17] = SSE41_SPECIAL, /* extractps */
2991
    [0x20] = SSE41_SPECIAL, /* pinsrb */
2992
    [0x21] = SSE41_SPECIAL, /* insertps */
2993
    [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
2994
    [0x40] = SSE41_OP(dpps),
2995
    [0x41] = SSE41_OP(dppd),
2996
    [0x42] = SSE41_OP(mpsadbw),
2997
    [0x60] = SSE42_OP(pcmpestrm),
2998
    [0x61] = SSE42_OP(pcmpestri),
2999
    [0x62] = SSE42_OP(pcmpistrm),
3000
    [0x63] = SSE42_OP(pcmpistri),
3001
};
3002

    
3003
static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
3004
{
3005
    int b1, op1_offset, op2_offset, is_xmm, val, ot;
3006
    int modrm, mod, rm, reg, reg_addr, offset_addr;
3007
    void *sse_op2;
3008

    
3009
    b &= 0xff;
3010
    if (s->prefix & PREFIX_DATA)
3011
        b1 = 1;
3012
    else if (s->prefix & PREFIX_REPZ)
3013
        b1 = 2;
3014
    else if (s->prefix & PREFIX_REPNZ)
3015
        b1 = 3;
3016
    else
3017
        b1 = 0;
3018
    sse_op2 = sse_op_table1[b][b1];
3019
    if (!sse_op2)
3020
        goto illegal_op;
3021
    if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
3022
        is_xmm = 1;
3023
    } else {
3024
        if (b1 == 0) {
3025
            /* MMX case */
3026
            is_xmm = 0;
3027
        } else {
3028
            is_xmm = 1;
3029
        }
3030
    }
3031
    /* simple MMX/SSE operation */
3032
    if (s->flags & HF_TS_MASK) {
3033
        gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
3034
        return;
3035
    }
3036
    if (s->flags & HF_EM_MASK) {
3037
    illegal_op:
3038
        gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base);
3039
        return;
3040
    }
3041
    if (is_xmm && !(s->flags & HF_OSFXSR_MASK))
3042
        if ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA))
3043
            goto illegal_op;
3044
    if (b == 0x0e) {
3045
        if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW))
3046
            goto illegal_op;
3047
        /* femms */
3048
        gen_helper_emms();
3049
        return;
3050
    }
3051
    if (b == 0x77) {
3052
        /* emms */
3053
        gen_helper_emms();
3054
        return;
3055
    }
3056
    /* prepare MMX state (XXX: optimize by storing fptt and fptags in
3057
       the static cpu state) */
3058
    if (!is_xmm) {
3059
        gen_helper_enter_mmx();
3060
    }
3061

    
3062
    modrm = ldub_code(s->pc++);
3063
    reg = ((modrm >> 3) & 7);
3064
    if (is_xmm)
3065
        reg |= rex_r;
3066
    mod = (modrm >> 6) & 3;
3067
    if (sse_op2 == SSE_SPECIAL) {
3068
        b |= (b1 << 8);
3069
        switch(b) {
3070
        case 0x0e7: /* movntq */
3071
            if (mod == 3)
3072
                goto illegal_op;
3073
            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3074
            gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx));
3075
            break;
3076
        case 0x1e7: /* movntdq */
3077
        case 0x02b: /* movntps */
3078
        case 0x12b: /* movntps */
3079
        case 0x3f0: /* lddqu */
3080
            if (mod == 3)
3081
                goto illegal_op;
3082
            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3083
            gen_sto_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
3084
            break;
3085
        case 0x6e: /* movd mm, ea */
3086
#ifdef TARGET_X86_64
3087
            if (s->dflag == 2) {
3088
                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0);
3089
                tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,fpregs[reg].mmx));
3090
            } else
3091
#endif
3092
            {
3093
                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0);
3094
                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3095
                                 offsetof(CPUX86State,fpregs[reg].mmx));
3096
                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
3097
                gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
3098
            }
3099
            break;
3100
        case 0x16e: /* movd xmm, ea */
3101
#ifdef TARGET_X86_64
3102
            if (s->dflag == 2) {
3103
                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0);
3104
                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3105
                                 offsetof(CPUX86State,xmm_regs[reg]));
3106
                gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T[0]);
3107
            } else
3108
#endif
3109
            {
3110
                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0);
3111
                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3112
                                 offsetof(CPUX86State,xmm_regs[reg]));
3113
                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
3114
                gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
3115
            }
3116
            break;
3117
        case 0x6f: /* movq mm, ea */
3118
            if (mod != 3) {
3119
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3120
                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx));
3121
            } else {
3122
                rm = (modrm & 7);
3123
                tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
3124
                               offsetof(CPUX86State,fpregs[rm].mmx));
3125
                tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
3126
                               offsetof(CPUX86State,fpregs[reg].mmx));
3127
            }
3128
            break;
3129
        case 0x010: /* movups */
3130
        case 0x110: /* movupd */
3131
        case 0x028: /* movaps */
3132
        case 0x128: /* movapd */
3133
        case 0x16f: /* movdqa xmm, ea */
3134
        case 0x26f: /* movdqu xmm, ea */
3135
            if (mod != 3) {
3136
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3137
                gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
3138
            } else {
3139
                rm = (modrm & 7) | REX_B(s);
3140
                gen_op_movo(offsetof(CPUX86State,xmm_regs[reg]),
3141
                            offsetof(CPUX86State,xmm_regs[rm]));
3142
            }
3143
            break;
3144
        case 0x210: /* movss xmm, ea */
3145
            if (mod != 3) {
3146
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3147
                gen_op_ld_T0_A0(OT_LONG + s->mem_index);
3148
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
3149
                gen_op_movl_T0_0();
3150
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)));
3151
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
3152
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
3153
            } else {
3154
                rm = (modrm & 7) | REX_B(s);
3155
                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
3156
                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)));
3157
            }
3158
            break;
3159
        case 0x310: /* movsd xmm, ea */
3160
            if (mod != 3) {
3161
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3162
                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3163
                gen_op_movl_T0_0();
3164
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
3165
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
3166
            } else {
3167
                rm = (modrm & 7) | REX_B(s);
3168
                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
3169
                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
3170
            }
3171
            break;
3172
        case 0x012: /* movlps */
3173
        case 0x112: /* movlpd */
3174
            if (mod != 3) {
3175
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3176
                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3177
            } else {
3178
                /* movhlps */
3179
                rm = (modrm & 7) | REX_B(s);
3180
                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
3181
                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1)));
3182
            }
3183
            break;
3184
        case 0x212: /* movsldup */
3185
            if (mod != 3) {
3186
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3187
                gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
3188
            } else {
3189
                rm = (modrm & 7) | REX_B(s);
3190
                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
3191
                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)));
3192
                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)),
3193
                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(2)));
3194
            }
3195
            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)),
3196
                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
3197
            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)),
3198
                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
3199
            break;
3200
        case 0x312: /* movddup */
3201
            if (mod != 3) {
3202
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3203
                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3204
            } else {
3205
                rm = (modrm & 7) | REX_B(s);
3206
                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
3207
                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
3208
            }
3209
            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)),
3210
                        offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3211
            break;
3212
        case 0x016: /* movhps */
3213
        case 0x116: /* movhpd */
3214
            if (mod != 3) {
3215
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3216
                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
3217
            } else {
3218
                /* movlhps */
3219
                rm = (modrm & 7) | REX_B(s);
3220
                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)),
3221
                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
3222
            }
3223
            break;
3224
        case 0x216: /* movshdup */
3225
            if (mod != 3) {
3226
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3227
                gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
3228
            } else {
3229
                rm = (modrm & 7) | REX_B(s);
3230
                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)),
3231
                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(1)));
3232
                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)),
3233
                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(3)));
3234
            }
3235
            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
3236
                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)));
3237
            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)),
3238
                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
3239
            break;
3240
        case 0x7e: /* movd ea, mm */
3241
#ifdef TARGET_X86_64
3242
            if (s->dflag == 2) {
3243
                tcg_gen_ld_i64(cpu_T[0], cpu_env, 
3244
                               offsetof(CPUX86State,fpregs[reg].mmx));
3245
                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1);
3246
            } else
3247
#endif
3248
            {
3249
                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
3250
                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
3251
                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1);
3252
            }
3253
            break;
3254
        case 0x17e: /* movd ea, xmm */
3255
#ifdef TARGET_X86_64
3256
            if (s->dflag == 2) {
3257
                tcg_gen_ld_i64(cpu_T[0], cpu_env, 
3258
                               offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3259
                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1);
3260
            } else
3261
#endif
3262
            {
3263
                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
3264
                                 offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
3265
                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1);
3266
            }
3267
            break;
3268
        case 0x27e: /* movq xmm, ea */
3269
            if (mod != 3) {
3270
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3271
                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3272
            } else {
3273
                rm = (modrm & 7) | REX_B(s);
3274
                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
3275
                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
3276
            }
3277
            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
3278
            break;
3279
        case 0x7f: /* movq ea, mm */
3280
            if (mod != 3) {
3281
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3282
                gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx));
3283
            } else {
3284
                rm = (modrm & 7);
3285
                gen_op_movq(offsetof(CPUX86State,fpregs[rm].mmx),
3286
                            offsetof(CPUX86State,fpregs[reg].mmx));
3287
            }
3288
            break;
3289
        case 0x011: /* movups */
3290
        case 0x111: /* movupd */
3291
        case 0x029: /* movaps */
3292
        case 0x129: /* movapd */
3293
        case 0x17f: /* movdqa ea, xmm */
3294
        case 0x27f: /* movdqu ea, xmm */
3295
            if (mod != 3) {
3296
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3297
                gen_sto_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
3298
            } else {
3299
                rm = (modrm & 7) | REX_B(s);
3300
                gen_op_movo(offsetof(CPUX86State,xmm_regs[rm]),
3301
                            offsetof(CPUX86State,xmm_regs[reg]));
3302
            }
3303
            break;
3304
        case 0x211: /* movss ea, xmm */
3305
            if (mod != 3) {
3306
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3307
                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
3308
                gen_op_st_T0_A0(OT_LONG + s->mem_index);
3309
            } else {
3310
                rm = (modrm & 7) | REX_B(s);
3311
                gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)),
3312
                            offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
3313
            }
3314
            break;
3315
        case 0x311: /* movsd ea, xmm */
3316
            if (mod != 3) {
3317
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3318
                gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3319
            } else {
3320
                rm = (modrm & 7) | REX_B(s);
3321
                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)),
3322
                            offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3323
            }
3324
            break;
3325
        case 0x013: /* movlps */
3326
        case 0x113: /* movlpd */
3327
            if (mod != 3) {
3328
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3329
                gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3330
            } else {
3331
                goto illegal_op;
3332
            }
3333
            break;
3334
        case 0x017: /* movhps */
3335
        case 0x117: /* movhpd */
3336
            if (mod != 3) {
3337
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3338
                gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
3339
            } else {
3340
                goto illegal_op;
3341
            }
3342
            break;
3343
        case 0x71: /* shift mm, im */
3344
        case 0x72:
3345
        case 0x73:
3346
        case 0x171: /* shift xmm, im */
3347
        case 0x172:
3348
        case 0x173:
3349
            val = ldub_code(s->pc++);
3350
            if (is_xmm) {
3351
                gen_op_movl_T0_im(val);
3352
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0)));
3353
                gen_op_movl_T0_0();
3354
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(1)));
3355
                op1_offset = offsetof(CPUX86State,xmm_t0);
3356
            } else {
3357
                gen_op_movl_T0_im(val);
3358
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(0)));
3359
                gen_op_movl_T0_0();
3360
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(1)));
3361
                op1_offset = offsetof(CPUX86State,mmx_t0);
3362
            }
3363
            sse_op2 = sse_op_table2[((b - 1) & 3) * 8 + (((modrm >> 3)) & 7)][b1];
3364
            if (!sse_op2)
3365
                goto illegal_op;
3366
            if (is_xmm) {
3367
                rm = (modrm & 7) | REX_B(s);
3368
                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3369
            } else {
3370
                rm = (modrm & 7);
3371
                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3372
            }
3373
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
3374
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op1_offset);
3375
            ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1);
3376
            break;
3377
        case 0x050: /* movmskps */
3378
            rm = (modrm & 7) | REX_B(s);
3379
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3380
                             offsetof(CPUX86State,xmm_regs[rm]));
3381
            gen_helper_movmskps(cpu_tmp2_i32, cpu_ptr0);
3382
            tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
3383
            gen_op_mov_reg_T0(OT_LONG, reg);
3384
            break;
3385
        case 0x150: /* movmskpd */
3386
            rm = (modrm & 7) | REX_B(s);
3387
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3388
                             offsetof(CPUX86State,xmm_regs[rm]));
3389
            gen_helper_movmskpd(cpu_tmp2_i32, cpu_ptr0);
3390
            tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
3391
            gen_op_mov_reg_T0(OT_LONG, reg);
3392
            break;
3393
        case 0x02a: /* cvtpi2ps */
3394
        case 0x12a: /* cvtpi2pd */
3395
            gen_helper_enter_mmx();
3396
            if (mod != 3) {
3397
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3398
                op2_offset = offsetof(CPUX86State,mmx_t0);
3399
                gen_ldq_env_A0(s->mem_index, op2_offset);
3400
            } else {
3401
                rm = (modrm & 7);
3402
                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3403
            }
3404
            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3405
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3406
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3407
            switch(b >> 8) {
3408
            case 0x0:
3409
                gen_helper_cvtpi2ps(cpu_ptr0, cpu_ptr1);
3410
                break;
3411
            default:
3412
            case 0x1:
3413
                gen_helper_cvtpi2pd(cpu_ptr0, cpu_ptr1);
3414
                break;
3415
            }
3416
            break;
3417
        case 0x22a: /* cvtsi2ss */
3418
        case 0x32a: /* cvtsi2sd */
3419
            ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
3420
            gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
3421
            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3422
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3423
            sse_op2 = sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2)];
3424
            if (ot == OT_LONG) {
3425
                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
3426
                ((void (*)(TCGv_ptr, TCGv_i32))sse_op2)(cpu_ptr0, cpu_tmp2_i32);
3427
            } else {
3428
                ((void (*)(TCGv_ptr, TCGv))sse_op2)(cpu_ptr0, cpu_T[0]);
3429
            }
3430
            break;
3431
        case 0x02c: /* cvttps2pi */
3432
        case 0x12c: /* cvttpd2pi */
3433
        case 0x02d: /* cvtps2pi */
3434
        case 0x12d: /* cvtpd2pi */
3435
            gen_helper_enter_mmx();
3436
            if (mod != 3) {
3437
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3438
                op2_offset = offsetof(CPUX86State,xmm_t0);
3439
                gen_ldo_env_A0(s->mem_index, op2_offset);
3440
            } else {
3441
                rm = (modrm & 7) | REX_B(s);
3442
                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3443
            }
3444
            op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
3445
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3446
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3447
            switch(b) {
3448
            case 0x02c:
3449
                gen_helper_cvttps2pi(cpu_ptr0, cpu_ptr1);
3450
                break;
3451
            case 0x12c:
3452
                gen_helper_cvttpd2pi(cpu_ptr0, cpu_ptr1);
3453
                break;
3454
            case 0x02d:
3455
                gen_helper_cvtps2pi(cpu_ptr0, cpu_ptr1);
3456
                break;
3457
            case 0x12d:
3458
                gen_helper_cvtpd2pi(cpu_ptr0, cpu_ptr1);
3459
                break;
3460
            }
3461
            break;
3462
        case 0x22c: /* cvttss2si */
3463
        case 0x32c: /* cvttsd2si */
3464
        case 0x22d: /* cvtss2si */
3465
        case 0x32d: /* cvtsd2si */
3466
            ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
3467
            if (mod != 3) {
3468
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3469
                if ((b >> 8) & 1) {
3470
                    gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_t0.XMM_Q(0)));
3471
                } else {
3472
                    gen_op_ld_T0_A0(OT_LONG + s->mem_index);
3473
                    tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0)));
3474
                }
3475
                op2_offset = offsetof(CPUX86State,xmm_t0);
3476
            } else {
3477
                rm = (modrm & 7) | REX_B(s);
3478
                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3479
            }
3480
            sse_op2 = sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2) + 4 +
3481
                                    (b & 1) * 4];
3482
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
3483
            if (ot == OT_LONG) {
3484
                ((void (*)(TCGv_i32, TCGv_ptr))sse_op2)(cpu_tmp2_i32, cpu_ptr0);
3485
                tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
3486
            } else {
3487
                ((void (*)(TCGv, TCGv_ptr))sse_op2)(cpu_T[0], cpu_ptr0);
3488
            }
3489
            gen_op_mov_reg_T0(ot, reg);
3490
            break;
3491
        case 0xc4: /* pinsrw */
3492
        case 0x1c4:
3493
            s->rip_offset = 1;
3494
            gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
3495
            val = ldub_code(s->pc++);
3496
            if (b1) {
3497
                val &= 7;
3498
                tcg_gen_st16_tl(cpu_T[0], cpu_env,
3499
                                offsetof(CPUX86State,xmm_regs[reg].XMM_W(val)));
3500
            } else {
3501
                val &= 3;
3502
                tcg_gen_st16_tl(cpu_T[0], cpu_env,
3503
                                offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3504
            }
3505
            break;
3506
        case 0xc5: /* pextrw */
3507
        case 0x1c5:
3508
            if (mod != 3)
3509
                goto illegal_op;
3510
            ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
3511
            val = ldub_code(s->pc++);
3512
            if (b1) {
3513
                val &= 7;
3514
                rm = (modrm & 7) | REX_B(s);
3515
                tcg_gen_ld16u_tl(cpu_T[0], cpu_env,
3516
                                 offsetof(CPUX86State,xmm_regs[rm].XMM_W(val)));
3517
            } else {
3518
                val &= 3;
3519
                rm = (modrm & 7);
3520
                tcg_gen_ld16u_tl(cpu_T[0], cpu_env,
3521
                                offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3522
            }
3523
            reg = ((modrm >> 3) & 7) | rex_r;
3524
            gen_op_mov_reg_T0(ot, reg);
3525
            break;
3526
        case 0x1d6: /* movq ea, xmm */
3527
            if (mod != 3) {
3528
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3529
                gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3530
            } else {
3531
                rm = (modrm & 7) | REX_B(s);
3532
                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)),
3533
                            offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3534
                gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1)));
3535
            }
3536
            break;
3537
        case 0x2d6: /* movq2dq */
3538
            gen_helper_enter_mmx();
3539
            rm = (modrm & 7);
3540
            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
3541
                        offsetof(CPUX86State,fpregs[rm].mmx));
3542
            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
3543
            break;
3544
        case 0x3d6: /* movdq2q */
3545
            gen_helper_enter_mmx();
3546
            rm = (modrm & 7) | REX_B(s);
3547
            gen_op_movq(offsetof(CPUX86State,fpregs[reg & 7].mmx),
3548
                        offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
3549
            break;
3550
        case 0xd7: /* pmovmskb */
3551
        case 0x1d7:
3552
            if (mod != 3)
3553
                goto illegal_op;
3554
            if (b1) {
3555
                rm = (modrm & 7) | REX_B(s);
3556
                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,xmm_regs[rm]));
3557
                gen_helper_pmovmskb_xmm(cpu_tmp2_i32, cpu_ptr0);
3558
            } else {
3559
                rm = (modrm & 7);
3560
                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,fpregs[rm].mmx));
3561
                gen_helper_pmovmskb_mmx(cpu_tmp2_i32, cpu_ptr0);
3562
            }
3563
            tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
3564
            reg = ((modrm >> 3) & 7) | rex_r;
3565
            gen_op_mov_reg_T0(OT_LONG, reg);
3566
            break;
3567
        case 0x138:
3568
            if (s->prefix & PREFIX_REPNZ)
3569
                goto crc32;
3570
        case 0x038:
3571
            b = modrm;
3572
            modrm = ldub_code(s->pc++);
3573
            rm = modrm & 7;
3574
            reg = ((modrm >> 3) & 7) | rex_r;
3575
            mod = (modrm >> 6) & 3;
3576

    
3577
            sse_op2 = sse_op_table6[b].op[b1];
3578
            if (!sse_op2)
3579
                goto illegal_op;
3580
            if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
3581
                goto illegal_op;
3582

    
3583
            if (b1) {
3584
                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3585
                if (mod == 3) {
3586
                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
3587
                } else {
3588
                    op2_offset = offsetof(CPUX86State,xmm_t0);
3589
                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3590
                    switch (b) {
3591
                    case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
3592
                    case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
3593
                    case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
3594
                        gen_ldq_env_A0(s->mem_index, op2_offset +
3595
                                        offsetof(XMMReg, XMM_Q(0)));
3596
                        break;
3597
                    case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
3598
                    case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
3599
                        tcg_gen_qemu_ld32u(cpu_tmp0, cpu_A0,
3600
                                          (s->mem_index >> 2) - 1);
3601
                        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp0);
3602
                        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, op2_offset +
3603
                                        offsetof(XMMReg, XMM_L(0)));
3604
                        break;
3605
                    case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
3606
                        tcg_gen_qemu_ld16u(cpu_tmp0, cpu_A0,
3607
                                          (s->mem_index >> 2) - 1);
3608
                        tcg_gen_st16_tl(cpu_tmp0, cpu_env, op2_offset +
3609
                                        offsetof(XMMReg, XMM_W(0)));
3610
                        break;
3611
                    case 0x2a:            /* movntqda */
3612
                        gen_ldo_env_A0(s->mem_index, op1_offset);
3613
                        return;
3614
                    default:
3615
                        gen_ldo_env_A0(s->mem_index, op2_offset);
3616
                    }
3617
                }
3618
            } else {
3619
                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3620
                if (mod == 3) {
3621
                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3622
                } else {
3623
                    op2_offset = offsetof(CPUX86State,mmx_t0);
3624
                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3625
                    gen_ldq_env_A0(s->mem_index, op2_offset);
3626
                }
3627
            }
3628
            if (sse_op2 == SSE_SPECIAL)
3629
                goto illegal_op;
3630

    
3631
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3632
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3633
            ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1);
3634

    
3635
            if (b == 0x17)
3636
                s->cc_op = CC_OP_EFLAGS;
3637
            break;
3638
        case 0x338: /* crc32 */
3639
        crc32:
3640
            b = modrm;
3641
            modrm = ldub_code(s->pc++);
3642
            reg = ((modrm >> 3) & 7) | rex_r;
3643

    
3644
            if (b != 0xf0 && b != 0xf1)
3645
                goto illegal_op;
3646
            if (!(s->cpuid_ext_features & CPUID_EXT_SSE42))
3647
                goto illegal_op;
3648

    
3649
            if (b == 0xf0)
3650
                ot = OT_BYTE;
3651
            else if (b == 0xf1 && s->dflag != 2)
3652
                if (s->prefix & PREFIX_DATA)
3653
                    ot = OT_WORD;
3654
                else
3655
                    ot = OT_LONG;
3656
            else
3657
                ot = OT_QUAD;
3658

    
3659
            gen_op_mov_TN_reg(OT_LONG, 0, reg);
3660
            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
3661
            gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
3662
            gen_helper_crc32(cpu_T[0], cpu_tmp2_i32,
3663
                             cpu_T[0], tcg_const_i32(8 << ot));
3664

    
3665
            ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
3666
            gen_op_mov_reg_T0(ot, reg);
3667
            break;
3668
        case 0x03a:
3669
        case 0x13a:
3670
            b = modrm;
3671
            modrm = ldub_code(s->pc++);
3672
            rm = modrm & 7;
3673
            reg = ((modrm >> 3) & 7) | rex_r;
3674
            mod = (modrm >> 6) & 3;
3675

    
3676
            sse_op2 = sse_op_table7[b].op[b1];
3677
            if (!sse_op2)
3678
                goto illegal_op;
3679
            if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
3680
                goto illegal_op;
3681

    
3682
            if (sse_op2 == SSE_SPECIAL) {
3683
                ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
3684
                rm = (modrm & 7) | REX_B(s);
3685
                if (mod != 3)
3686
                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3687
                reg = ((modrm >> 3) & 7) | rex_r;
3688
                val = ldub_code(s->pc++);
3689
                switch (b) {
3690
                case 0x14: /* pextrb */
3691
                    tcg_gen_ld8u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
3692
                                            xmm_regs[reg].XMM_B(val & 15)));
3693
                    if (mod == 3)
3694
                        gen_op_mov_reg_T0(ot, rm);
3695
                    else
3696
                        tcg_gen_qemu_st8(cpu_T[0], cpu_A0,
3697
                                        (s->mem_index >> 2) - 1);
3698
                    break;
3699
                case 0x15: /* pextrw */
3700
                    tcg_gen_ld16u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
3701
                                            xmm_regs[reg].XMM_W(val & 7)));
3702
                    if (mod == 3)
3703
                        gen_op_mov_reg_T0(ot, rm);
3704
                    else
3705
                        tcg_gen_qemu_st16(cpu_T[0], cpu_A0,
3706
                                        (s->mem_index >> 2) - 1);
3707
                    break;
3708
                case 0x16:
3709
                    if (ot == OT_LONG) { /* pextrd */
3710
                        tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
3711
                                        offsetof(CPUX86State,
3712
                                                xmm_regs[reg].XMM_L(val & 3)));
3713
                        tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
3714
                        if (mod == 3)
3715
                            gen_op_mov_reg_v(ot, rm, cpu_T[0]);
3716
                        else
3717
                            tcg_gen_qemu_st32(cpu_T[0], cpu_A0,
3718
                                            (s->mem_index >> 2) - 1);
3719
                    } else { /* pextrq */
3720
#ifdef TARGET_X86_64
3721
                        tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
3722
                                        offsetof(CPUX86State,
3723
                                                xmm_regs[reg].XMM_Q(val & 1)));
3724
                        if (mod == 3)
3725
                            gen_op_mov_reg_v(ot, rm, cpu_tmp1_i64);
3726
                        else
3727
                            tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0,
3728
                                            (s->mem_index >> 2) - 1);
3729
#else
3730
                        goto illegal_op;
3731
#endif
3732
                    }
3733
                    break;
3734
                case 0x17: /* extractps */
3735
                    tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
3736
                                            xmm_regs[reg].XMM_L(val & 3)));
3737
                    if (mod == 3)
3738
                        gen_op_mov_reg_T0(ot, rm);
3739
                    else
3740
                        tcg_gen_qemu_st32(cpu_T[0], cpu_A0,
3741
                                        (s->mem_index >> 2) - 1);
3742
                    break;
3743
                case 0x20: /* pinsrb */
3744
                    if (mod == 3)
3745
                        gen_op_mov_TN_reg(OT_LONG, 0, rm);
3746
                    else
3747
                        tcg_gen_qemu_ld8u(cpu_tmp0, cpu_A0,
3748
                                        (s->mem_index >> 2) - 1);
3749
                    tcg_gen_st8_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State,
3750
                                            xmm_regs[reg].XMM_B(val & 15)));
3751
                    break;
3752
                case 0x21: /* insertps */
3753
                    if (mod == 3) {
3754
                        tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
3755
                                        offsetof(CPUX86State,xmm_regs[rm]
3756
                                                .XMM_L((val >> 6) & 3)));
3757
                    } else {
3758
                        tcg_gen_qemu_ld32u(cpu_tmp0, cpu_A0,
3759
                                        (s->mem_index >> 2) - 1);
3760
                        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp0);
3761
                    }
3762
                    tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
3763
                                    offsetof(CPUX86State,xmm_regs[reg]
3764
                                            .XMM_L((val >> 4) & 3)));
3765
                    if ((val >> 0) & 1)
3766
                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
3767
                                        cpu_env, offsetof(CPUX86State,
3768
                                                xmm_regs[reg].XMM_L(0)));
3769
                    if ((val >> 1) & 1)
3770
                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
3771
                                        cpu_env, offsetof(CPUX86State,
3772
                                                xmm_regs[reg].XMM_L(1)));
3773
                    if ((val >> 2) & 1)
3774
                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
3775
                                        cpu_env, offsetof(CPUX86State,
3776
                                                xmm_regs[reg].XMM_L(2)));
3777
                    if ((val >> 3) & 1)
3778
                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
3779
                                        cpu_env, offsetof(CPUX86State,
3780
                                                xmm_regs[reg].XMM_L(3)));
3781
                    break;
3782
                case 0x22:
3783
                    if (ot == OT_LONG) { /* pinsrd */
3784
                        if (mod == 3)
3785
                            gen_op_mov_v_reg(ot, cpu_tmp0, rm);
3786
                        else
3787
                            tcg_gen_qemu_ld32u(cpu_tmp0, cpu_A0,
3788
                                            (s->mem_index >> 2) - 1);
3789
                        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp0);
3790
                        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
3791
                                        offsetof(CPUX86State,
3792
                                                xmm_regs[reg].XMM_L(val & 3)));
3793
                    } else { /* pinsrq */
3794
#ifdef TARGET_X86_64
3795
                        if (mod == 3)
3796
                            gen_op_mov_v_reg(ot, cpu_tmp1_i64, rm);
3797
                        else
3798
                            tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0,
3799
                                            (s->mem_index >> 2) - 1);
3800
                        tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
3801
                                        offsetof(CPUX86State,
3802
                                                xmm_regs[reg].XMM_Q(val & 1)));
3803
#else
3804
                        goto illegal_op;
3805
#endif
3806
                    }
3807
                    break;
3808
                }
3809
                return;
3810
            }
3811

    
3812
            if (b1) {
3813
                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3814
                if (mod == 3) {
3815
                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
3816
                } else {
3817
                    op2_offset = offsetof(CPUX86State,xmm_t0);
3818
                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3819
                    gen_ldo_env_A0(s->mem_index, op2_offset);
3820
                }
3821
            } else {
3822
                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3823
                if (mod == 3) {
3824
                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3825
                } else {
3826
                    op2_offset = offsetof(CPUX86State,mmx_t0);
3827
                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3828
                    gen_ldq_env_A0(s->mem_index, op2_offset);
3829
                }
3830
            }
3831
            val = ldub_code(s->pc++);
3832

    
3833
            if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
3834
                s->cc_op = CC_OP_EFLAGS;
3835

    
3836
                if (s->dflag == 2)
3837
                    /* The helper must use entire 64-bit gp registers */
3838
                    val |= 1 << 8;
3839
            }
3840

    
3841
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3842
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3843
            ((void (*)(TCGv_ptr, TCGv_ptr, TCGv_i32))sse_op2)(cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
3844
            break;
3845
        default:
3846
            goto illegal_op;
3847
        }
3848
    } else {
3849
        /* generic MMX or SSE operation */
3850
        switch(b) {
3851
        case 0x70: /* pshufx insn */
3852
        case 0xc6: /* pshufx insn */
3853
        case 0xc2: /* compare insns */
3854
            s->rip_offset = 1;
3855
            break;
3856
        default:
3857
            break;
3858
        }
3859
        if (is_xmm) {
3860
            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3861
            if (mod != 3) {
3862
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3863
                op2_offset = offsetof(CPUX86State,xmm_t0);
3864
                if (b1 >= 2 && ((b >= 0x50 && b <= 0x5f && b != 0x5b) ||
3865
                                b == 0xc2)) {
3866
                    /* specific case for SSE single instructions */
3867
                    if (b1 == 2) {
3868
                        /* 32 bit access */
3869
                        gen_op_ld_T0_A0(OT_LONG + s->mem_index);
3870
                        tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0)));
3871
                    } else {
3872
                        /* 64 bit access */
3873
                        gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_t0.XMM_D(0)));
3874
                    }
3875
                } else {
3876
                    gen_ldo_env_A0(s->mem_index, op2_offset);
3877
                }
3878
            } else {
3879
                rm = (modrm & 7) | REX_B(s);
3880
                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3881
            }
3882
        } else {
3883
            op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3884
            if (mod != 3) {
3885
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3886
                op2_offset = offsetof(CPUX86State,mmx_t0);
3887
                gen_ldq_env_A0(s->mem_index, op2_offset);
3888
            } else {
3889
                rm = (modrm & 7);
3890
                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3891
            }
3892
        }
3893
        switch(b) {
3894
        case 0x0f: /* 3DNow! data insns */
3895
            if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW))
3896
                goto illegal_op;
3897
            val = ldub_code(s->pc++);
3898
            sse_op2 = sse_op_table5[val];
3899
            if (!sse_op2)
3900
                goto illegal_op;
3901
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3902
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3903
            ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1);
3904
            break;
3905
        case 0x70: /* pshufx insn */
3906
        case 0xc6: /* pshufx insn */
3907
            val = ldub_code(s->pc++);
3908
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3909
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3910
            ((void (*)(TCGv_ptr, TCGv_ptr, TCGv_i32))sse_op2)(cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
3911
            break;
3912
        case 0xc2:
3913
            /* compare insns */
3914
            val = ldub_code(s->pc++);
3915
            if (val >= 8)
3916
                goto illegal_op;
3917
            sse_op2 = sse_op_table4[val][b1];
3918
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3919
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3920
            ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1);
3921
            break;
3922
        case 0xf7:
3923
            /* maskmov : we must prepare A0 */
3924
            if (mod != 3)
3925
                goto illegal_op;
3926
#ifdef TARGET_X86_64
3927
            if (s->aflag == 2) {
3928
                gen_op_movq_A0_reg(R_EDI);
3929
            } else
3930
#endif
3931
            {
3932
                gen_op_movl_A0_reg(R_EDI);
3933
                if (s->aflag == 0)
3934
                    gen_op_andl_A0_ffff();
3935
            }
3936
            gen_add_A0_ds_seg(s);
3937

    
3938
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3939
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3940
            ((void (*)(TCGv_ptr, TCGv_ptr, TCGv))sse_op2)(cpu_ptr0, cpu_ptr1, cpu_A0);
3941
            break;
3942
        default:
3943
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3944
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3945
            ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1);
3946
            break;
3947
        }
3948
        if (b == 0x2e || b == 0x2f) {
3949
            s->cc_op = CC_OP_EFLAGS;
3950
        }
3951
    }
3952
}
3953

    
3954
/* convert one instruction. s->is_jmp is set if the translation must
3955
   be stopped. Return the next pc value */
3956
static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
3957
{
3958
    int b, prefixes, aflag, dflag;
3959
    int shift, ot;
3960
    int modrm, reg, rm, mod, reg_addr, op, opreg, offset_addr, val;
3961
    target_ulong next_eip, tval;
3962
    int rex_w, rex_r;
3963

    
3964
    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)))
3965
        tcg_gen_debug_insn_start(pc_start);
3966
    s->pc = pc_start;
3967
    prefixes = 0;
3968
    aflag = s->code32;
3969
    dflag = s->code32;
3970
    s->override = -1;
3971
    rex_w = -1;
3972
    rex_r = 0;
3973
#ifdef TARGET_X86_64
3974
    s->rex_x = 0;
3975
    s->rex_b = 0;
3976
    x86_64_hregs = 0;
3977
#endif
3978
    s->rip_offset = 0; /* for relative ip address */
3979
 next_byte:
3980
    b = ldub_code(s->pc);
3981
    s->pc++;
3982
    /* check prefixes */
3983
#ifdef TARGET_X86_64
3984
    if (CODE64(s)) {
3985
        switch (b) {
3986
        case 0xf3:
3987
            prefixes |= PREFIX_REPZ;
3988
            goto next_byte;
3989
        case 0xf2:
3990
            prefixes |= PREFIX_REPNZ;
3991
            goto next_byte;
3992
        case 0xf0:
3993
            prefixes |= PREFIX_LOCK;
3994
            goto next_byte;
3995
        case 0x2e:
3996
            s->override = R_CS;
3997
            goto next_byte;
3998
        case 0x36:
3999
            s->override = R_SS;
4000
            goto next_byte;
4001
        case 0x3e:
4002
            s->override = R_DS;
4003
            goto next_byte;
4004
        case 0x26:
4005
            s->override = R_ES;
4006
            goto next_byte;
4007
        case 0x64:
4008
            s->override = R_FS;
4009
            goto next_byte;
4010
        case 0x65:
4011
            s->override = R_GS;
4012
            goto next_byte;
4013
        case 0x66:
4014
            prefixes |= PREFIX_DATA;
4015
            goto next_byte;
4016
        case 0x67:
4017
            prefixes |= PREFIX_ADR;
4018
            goto next_byte;
4019
        case 0x40 ... 0x4f:
4020
            /* REX prefix */
4021
            rex_w = (b >> 3) & 1;
4022
            rex_r = (b & 0x4) << 1;
4023
            s->rex_x = (b & 0x2) << 2;
4024
            REX_B(s) = (b & 0x1) << 3;
4025
            x86_64_hregs = 1; /* select uniform byte register addressing */
4026
            goto next_byte;
4027
        }
4028
        if (rex_w == 1) {
4029
            /* 0x66 is ignored if rex.w is set */
4030
            dflag = 2;
4031
        } else {
4032
            if (prefixes & PREFIX_DATA)
4033
                dflag ^= 1;
4034
        }
4035
        if (!(prefixes & PREFIX_ADR))
4036
            aflag = 2;
4037
    } else
4038
#endif
4039
    {
4040
        switch (b) {
4041
        case 0xf3:
4042
            prefixes |= PREFIX_REPZ;
4043
            goto next_byte;
4044
        case 0xf2:
4045
            prefixes |= PREFIX_REPNZ;
4046
            goto next_byte;
4047
        case 0xf0:
4048
            prefixes |= PREFIX_LOCK;
4049
            goto next_byte;
4050
        case 0x2e:
4051
            s->override = R_CS;
4052
            goto next_byte;
4053
        case 0x36:
4054
            s->override = R_SS;
4055
            goto next_byte;
4056
        case 0x3e:
4057
            s->override = R_DS;
4058
            goto next_byte;
4059
        case 0x26:
4060
            s->override = R_ES;
4061
            goto next_byte;
4062
        case 0x64:
4063
            s->override = R_FS;
4064
            goto next_byte;
4065
        case 0x65:
4066
            s->override = R_GS;
4067
            goto next_byte;
4068
        case 0x66:
4069
            prefixes |= PREFIX_DATA;
4070
            goto next_byte;
4071
        case 0x67:
4072
            prefixes |= PREFIX_ADR;
4073
            goto next_byte;
4074
        }
4075
        if (prefixes & PREFIX_DATA)
4076
            dflag ^= 1;
4077
        if (prefixes & PREFIX_ADR)
4078
            aflag ^= 1;
4079
    }
4080

    
4081
    s->prefix = prefixes;
4082
    s->aflag = aflag;
4083
    s->dflag = dflag;
4084

    
4085
    /* lock generation */
4086
    if (prefixes & PREFIX_LOCK)
4087
        gen_helper_lock();
4088

    
4089
    /* now check op code */
4090
 reswitch:
4091
    switch(b) {
4092
    case 0x0f:
4093
        /**************************/
4094
        /* extended op code */
4095
        b = ldub_code(s->pc++) | 0x100;
4096
        goto reswitch;
4097

    
4098
        /**************************/
4099
        /* arith & logic */
4100
    case 0x00 ... 0x05:
4101
    case 0x08 ... 0x0d:
4102
    case 0x10 ... 0x15:
4103
    case 0x18 ... 0x1d:
4104
    case 0x20 ... 0x25:
4105
    case 0x28 ... 0x2d:
4106
    case 0x30 ... 0x35:
4107
    case 0x38 ... 0x3d:
4108
        {
4109
            int op, f, val;
4110
            op = (b >> 3) & 7;
4111
            f = (b >> 1) & 3;
4112

    
4113
            if ((b & 1) == 0)
4114
                ot = OT_BYTE;
4115
            else
4116
                ot = dflag + OT_WORD;
4117

    
4118
            switch(f) {
4119
            case 0: /* OP Ev, Gv */
4120
                modrm = ldub_code(s->pc++);
4121
                reg = ((modrm >> 3) & 7) | rex_r;
4122
                mod = (modrm >> 6) & 3;
4123
                rm = (modrm & 7) | REX_B(s);
4124
                if (mod != 3) {
4125
                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4126
                    opreg = OR_TMP0;
4127
                } else if (op == OP_XORL && rm == reg) {
4128
                xor_zero:
4129
                    /* xor reg, reg optimisation */
4130
                    gen_op_movl_T0_0();
4131
                    s->cc_op = CC_OP_LOGICB + ot;
4132
                    gen_op_mov_reg_T0(ot, reg);
4133
                    gen_op_update1_cc();
4134
                    break;
4135
                } else {
4136
                    opreg = rm;
4137
                }
4138
                gen_op_mov_TN_reg(ot, 1, reg);
4139
                gen_op(s, op, ot, opreg);
4140
                break;
4141
            case 1: /* OP Gv, Ev */
4142
                modrm = ldub_code(s->pc++);
4143
                mod = (modrm >> 6) & 3;
4144
                reg = ((modrm >> 3) & 7) | rex_r;
4145
                rm = (modrm & 7) | REX_B(s);
4146
                if (mod != 3) {
4147
                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4148
                    gen_op_ld_T1_A0(ot + s->mem_index);
4149
                } else if (op == OP_XORL && rm == reg) {
4150
                    goto xor_zero;
4151
                } else {
4152
                    gen_op_mov_TN_reg(ot, 1, rm);
4153
                }
4154
                gen_op(s, op, ot, reg);
4155
                break;
4156
            case 2: /* OP A, Iv */
4157
                val = insn_get(s, ot);
4158
                gen_op_movl_T1_im(val);
4159
                gen_op(s, op, ot, OR_EAX);
4160
                break;
4161
            }
4162
        }
4163
        break;
4164

    
4165
    case 0x82:
4166
        if (CODE64(s))
4167
            goto illegal_op;
4168
    case 0x80: /* GRP1 */
4169
    case 0x81:
4170
    case 0x83:
4171
        {
4172
            int val;
4173

    
4174
            if ((b & 1) == 0)
4175
                ot = OT_BYTE;
4176
            else
4177
                ot = dflag + OT_WORD;
4178

    
4179
            modrm = ldub_code(s->pc++);
4180
            mod = (modrm >> 6) & 3;
4181
            rm = (modrm & 7) | REX_B(s);
4182
            op = (modrm >> 3) & 7;
4183

    
4184
            if (mod != 3) {
4185
                if (b == 0x83)
4186
                    s->rip_offset = 1;
4187
                else
4188
                    s->rip_offset = insn_const_size(ot);
4189
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4190
                opreg = OR_TMP0;
4191
            } else {
4192
                opreg = rm;
4193
            }
4194

    
4195
            switch(b) {
4196
            default:
4197
            case 0x80:
4198
            case 0x81:
4199
            case 0x82:
4200
                val = insn_get(s, ot);
4201
                break;
4202
            case 0x83:
4203
                val = (int8_t)insn_get(s, OT_BYTE);
4204
                break;
4205
            }
4206
            gen_op_movl_T1_im(val);
4207
            gen_op(s, op, ot, opreg);
4208
        }
4209
        break;
4210

    
4211
        /**************************/
4212
        /* inc, dec, and other misc arith */
4213
    case 0x40 ... 0x47: /* inc Gv */
4214
        ot = dflag ? OT_LONG : OT_WORD;
4215
        gen_inc(s, ot, OR_EAX + (b & 7), 1);
4216
        break;
4217
    case 0x48 ... 0x4f: /* dec Gv */
4218
        ot = dflag ? OT_LONG : OT_WORD;
4219
        gen_inc(s, ot, OR_EAX + (b & 7), -1);
4220
        break;
4221
    case 0xf6: /* GRP3 */
4222
    case 0xf7:
4223
        if ((b & 1) == 0)
4224
            ot = OT_BYTE;
4225
        else
4226
            ot = dflag + OT_WORD;
4227

    
4228
        modrm = ldub_code(s->pc++);
4229
        mod = (modrm >> 6) & 3;
4230
        rm = (modrm & 7) | REX_B(s);
4231
        op = (modrm >> 3) & 7;
4232
        if (mod != 3) {
4233
            if (op == 0)
4234
                s->rip_offset = insn_const_size(ot);
4235
            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4236
            gen_op_ld_T0_A0(ot + s->mem_index);
4237
        } else {
4238
            gen_op_mov_TN_reg(ot, 0, rm);
4239
        }
4240

    
4241
        switch(op) {
4242
        case 0: /* test */
4243
            val = insn_get(s, ot);
4244
            gen_op_movl_T1_im(val);
4245
            gen_op_testl_T0_T1_cc();
4246
            s->cc_op = CC_OP_LOGICB + ot;
4247
            break;
4248
        case 2: /* not */
4249
            tcg_gen_not_tl(cpu_T[0], cpu_T[0]);
4250
            if (mod != 3) {
4251
                gen_op_st_T0_A0(ot + s->mem_index);
4252
            } else {
4253
                gen_op_mov_reg_T0(ot, rm);
4254
            }
4255
            break;
4256
        case 3: /* neg */
4257
            tcg_gen_neg_tl(cpu_T[0], cpu_T[0]);
4258
            if (mod != 3) {
4259
                gen_op_st_T0_A0(ot + s->mem_index);
4260
            } else {
4261
                gen_op_mov_reg_T0(ot, rm);
4262
            }
4263
            gen_op_update_neg_cc();
4264
            s->cc_op = CC_OP_SUBB + ot;
4265
            break;
4266
        case 4: /* mul */
4267
            switch(ot) {
4268
            case OT_BYTE:
4269
                gen_op_mov_TN_reg(OT_BYTE, 1, R_EAX);
4270
                tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]);
4271
                tcg_gen_ext8u_tl(cpu_T[1], cpu_T[1]);
4272
                /* XXX: use 32 bit mul which could be faster */
4273
                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4274
                gen_op_mov_reg_T0(OT_WORD, R_EAX);
4275
                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4276
                tcg_gen_andi_tl(cpu_cc_src, cpu_T[0], 0xff00);
4277
                s->cc_op = CC_OP_MULB;
4278
                break;
4279
            case OT_WORD:
4280
                gen_op_mov_TN_reg(OT_WORD, 1, R_EAX);
4281
                tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]);
4282
                tcg_gen_ext16u_tl(cpu_T[1], cpu_T[1]);
4283
                /* XXX: use 32 bit mul which could be faster */
4284
                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4285
                gen_op_mov_reg_T0(OT_WORD, R_EAX);
4286
                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4287
                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16);
4288
                gen_op_mov_reg_T0(OT_WORD, R_EDX);
4289
                tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
4290
                s->cc_op = CC_OP_MULW;
4291
                break;
4292
            default:
4293
            case OT_LONG:
4294
#ifdef TARGET_X86_64
4295
                gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
4296
                tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]);
4297
                tcg_gen_ext32u_tl(cpu_T[1], cpu_T[1]);
4298
                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4299
                gen_op_mov_reg_T0(OT_LONG, R_EAX);
4300
                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4301
                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32);
4302
                gen_op_mov_reg_T0(OT_LONG, R_EDX);
4303
                tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
4304
#else
4305
                {
4306
                    TCGv_i64 t0, t1;
4307
                    t0 = tcg_temp_new_i64();
4308
                    t1 = tcg_temp_new_i64();
4309
                    gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
4310
                    tcg_gen_extu_i32_i64(t0, cpu_T[0]);
4311
                    tcg_gen_extu_i32_i64(t1, cpu_T[1]);
4312
                    tcg_gen_mul_i64(t0, t0, t1);
4313
                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
4314
                    gen_op_mov_reg_T0(OT_LONG, R_EAX);
4315
                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4316
                    tcg_gen_shri_i64(t0, t0, 32);
4317
                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
4318
                    gen_op_mov_reg_T0(OT_LONG, R_EDX);
4319
                    tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
4320
                }
4321
#endif
4322
                s->cc_op = CC_OP_MULL;
4323
                break;
4324
#ifdef TARGET_X86_64
4325
            case OT_QUAD:
4326
                gen_helper_mulq_EAX_T0(cpu_T[0]);
4327
                s->cc_op = CC_OP_MULQ;
4328
                break;
4329
#endif
4330
            }
4331
            break;
4332
        case 5: /* imul */
4333
            switch(ot) {
4334
            case OT_BYTE:
4335
                gen_op_mov_TN_reg(OT_BYTE, 1, R_EAX);
4336
                tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]);
4337
                tcg_gen_ext8s_tl(cpu_T[1], cpu_T[1]);
4338
                /* XXX: use 32 bit mul which could be faster */
4339
                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4340
                gen_op_mov_reg_T0(OT_WORD, R_EAX);
4341
                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4342
                tcg_gen_ext8s_tl(cpu_tmp0, cpu_T[0]);
4343
                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
4344
                s->cc_op = CC_OP_MULB;
4345
                break;
4346
            case OT_WORD:
4347
                gen_op_mov_TN_reg(OT_WORD, 1, R_EAX);
4348
                tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
4349
                tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]);
4350
                /* XXX: use 32 bit mul which could be faster */
4351
                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4352
                gen_op_mov_reg_T0(OT_WORD, R_EAX);
4353
                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4354
                tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]);
4355
                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
4356
                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16);
4357
                gen_op_mov_reg_T0(OT_WORD, R_EDX);
4358
                s->cc_op = CC_OP_MULW;
4359
                break;
4360
            default:
4361
            case OT_LONG:
4362
#ifdef TARGET_X86_64
4363
                gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
4364
                tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
4365
                tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]);
4366
                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4367
                gen_op_mov_reg_T0(OT_LONG, R_EAX);
4368
                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4369
                tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]);
4370
                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
4371
                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32);
4372
                gen_op_mov_reg_T0(OT_LONG, R_EDX);
4373
#else
4374
                {
4375
                    TCGv_i64 t0, t1;
4376
                    t0 = tcg_temp_new_i64();
4377
                    t1 = tcg_temp_new_i64();
4378
                    gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
4379
                    tcg_gen_ext_i32_i64(t0, cpu_T[0]);
4380
                    tcg_gen_ext_i32_i64(t1, cpu_T[1]);
4381
                    tcg_gen_mul_i64(t0, t0, t1);
4382
                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
4383
                    gen_op_mov_reg_T0(OT_LONG, R_EAX);
4384
                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4385
                    tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31);
4386
                    tcg_gen_shri_i64(t0, t0, 32);
4387
                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
4388
                    gen_op_mov_reg_T0(OT_LONG, R_EDX);
4389
                    tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
4390
                }
4391
#endif
4392
                s->cc_op = CC_OP_MULL;
4393
                break;
4394
#ifdef TARGET_X86_64
4395
            case OT_QUAD:
4396
                gen_helper_imulq_EAX_T0(cpu_T[0]);
4397
                s->cc_op = CC_OP_MULQ;
4398
                break;
4399
#endif
4400
            }
4401
            break;
4402
        case 6: /* div */
4403
            switch(ot) {
4404
            case OT_BYTE:
4405
                gen_jmp_im(pc_start - s->cs_base);
4406
                gen_helper_divb_AL(cpu_T[0]);
4407
                break;
4408
            case OT_WORD:
4409
                gen_jmp_im(pc_start - s->cs_base);
4410
                gen_helper_divw_AX(cpu_T[0]);
4411
                break;
4412
            default:
4413
            case OT_LONG:
4414
                gen_jmp_im(pc_start - s->cs_base);
4415
                gen_helper_divl_EAX(cpu_T[0]);
4416
                break;
4417
#ifdef TARGET_X86_64
4418
            case OT_QUAD:
4419
                gen_jmp_im(pc_start - s->cs_base);
4420
                gen_helper_divq_EAX(cpu_T[0]);
4421
                break;
4422
#endif
4423
            }
4424
            break;
4425
        case 7: /* idiv */
4426
            switch(ot) {
4427
            case OT_BYTE:
4428
                gen_jmp_im(pc_start - s->cs_base);
4429
                gen_helper_idivb_AL(cpu_T[0]);
4430
                break;
4431
            case OT_WORD:
4432
                gen_jmp_im(pc_start - s->cs_base);
4433
                gen_helper_idivw_AX(cpu_T[0]);
4434
                break;
4435
            default:
4436
            case OT_LONG:
4437
                gen_jmp_im(pc_start - s->cs_base);
4438
                gen_helper_idivl_EAX(cpu_T[0]);
4439
                break;
4440
#ifdef TARGET_X86_64
4441
            case OT_QUAD:
4442
                gen_jmp_im(pc_start - s->cs_base);
4443
                gen_helper_idivq_EAX(cpu_T[0]);
4444
                break;
4445
#endif
4446
            }
4447
            break;
4448
        default:
4449
            goto illegal_op;
4450
        }
4451
        break;
4452

    
4453
    case 0xfe: /* GRP4 */
4454
    case 0xff: /* GRP5 */
4455
        if ((b & 1) == 0)
4456
            ot = OT_BYTE;
4457
        else
4458
            ot = dflag + OT_WORD;
4459

    
4460
        modrm = ldub_code(s->pc++);
4461
        mod = (modrm >> 6) & 3;
4462
        rm = (modrm & 7) | REX_B(s);
4463
        op = (modrm >> 3) & 7;
4464
        if (op >= 2 && b == 0xfe) {
4465
            goto illegal_op;
4466
        }
4467
        if (CODE64(s)) {
4468
            if (op == 2 || op == 4) {
4469
                /* operand size for jumps is 64 bit */
4470
                ot = OT_QUAD;
4471
            } else if (op == 3 || op == 5) {
4472
                /* for call calls, the operand is 16 or 32 bit, even
4473
                   in long mode */
4474
                ot = dflag ? OT_LONG : OT_WORD;
4475
            } else if (op == 6) {
4476
                /* default push size is 64 bit */
4477
                ot = dflag ? OT_QUAD : OT_WORD;
4478
            }
4479
        }
4480
        if (mod != 3) {
4481
            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4482
            if (op >= 2 && op != 3 && op != 5)
4483
                gen_op_ld_T0_A0(ot + s->mem_index);
4484
        } else {
4485
            gen_op_mov_TN_reg(ot, 0, rm);
4486
        }
4487

    
4488
        switch(op) {
4489
        case 0: /* inc Ev */
4490
            if (mod != 3)
4491
                opreg = OR_TMP0;
4492
            else
4493
                opreg = rm;
4494
            gen_inc(s, ot, opreg, 1);
4495
            break;
4496
        case 1: /* dec Ev */
4497
            if (mod != 3)
4498
                opreg = OR_TMP0;
4499
            else
4500
                opreg = rm;
4501
            gen_inc(s, ot, opreg, -1);
4502
            break;
4503
        case 2: /* call Ev */
4504
            /* XXX: optimize if memory (no 'and' is necessary) */
4505
            if (s->dflag == 0)
4506
                gen_op_andl_T0_ffff();
4507
            next_eip = s->pc - s->cs_base;
4508
            gen_movtl_T1_im(next_eip);
4509
            gen_push_T1(s);
4510
            gen_op_jmp_T0();
4511
            gen_eob(s);
4512
            break;
4513
        case 3: /* lcall Ev */
4514
            gen_op_ld_T1_A0(ot + s->mem_index);
4515
            gen_add_A0_im(s, 1 << (ot - OT_WORD + 1));
4516
            gen_op_ldu_T0_A0(OT_WORD + s->mem_index);
4517
        do_lcall:
4518
            if (s->pe && !s->vm86) {
4519
                if (s->cc_op != CC_OP_DYNAMIC)
4520
                    gen_op_set_cc_op(s->cc_op);
4521
                gen_jmp_im(pc_start - s->cs_base);
4522
                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
4523
                gen_helper_lcall_protected(cpu_tmp2_i32, cpu_T[1],
4524
                                           tcg_const_i32(dflag), 
4525
                                           tcg_const_i32(s->pc - pc_start));
4526
            } else {
4527
                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
4528
                gen_helper_lcall_real(cpu_tmp2_i32, cpu_T[1],
4529
                                      tcg_const_i32(dflag), 
4530
                                      tcg_const_i32(s->pc - s->cs_base));
4531
            }
4532
            gen_eob(s);
4533
            break;
4534
        case 4: /* jmp Ev */
4535
            if (s->dflag == 0)
4536
                gen_op_andl_T0_ffff();
4537
            gen_op_jmp_T0();
4538
            gen_eob(s);
4539
            break;
4540
        case 5: /* ljmp Ev */
4541
            gen_op_ld_T1_A0(ot + s->mem_index);
4542
            gen_add_A0_im(s, 1 << (ot - OT_WORD + 1));
4543
            gen_op_ldu_T0_A0(OT_WORD + s->mem_index);
4544
        do_ljmp:
4545
            if (s->pe && !s->vm86) {
4546
                if (s->cc_op != CC_OP_DYNAMIC)
4547
                    gen_op_set_cc_op(s->cc_op);
4548
                gen_jmp_im(pc_start - s->cs_base);
4549
                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
4550
                gen_helper_ljmp_protected(cpu_tmp2_i32, cpu_T[1],
4551
                                          tcg_const_i32(s->pc - pc_start));
4552
            } else {
4553
                gen_op_movl_seg_T0_vm(R_CS);
4554
                gen_op_movl_T0_T1();
4555
                gen_op_jmp_T0();
4556
            }
4557
            gen_eob(s);
4558
            break;
4559
        case 6: /* push Ev */
4560
            gen_push_T0(s);
4561
            break;
4562
        default:
4563
            goto illegal_op;
4564
        }
4565
        break;
4566

    
4567
    case 0x84: /* test Ev, Gv */
4568
    case 0x85:
4569
        if ((b & 1) == 0)
4570
            ot = OT_BYTE;
4571
        else
4572
            ot = dflag + OT_WORD;
4573

    
4574
        modrm = ldub_code(s->pc++);
4575
        mod = (modrm >> 6) & 3;
4576
        rm = (modrm & 7) | REX_B(s);
4577
        reg = ((modrm >> 3) & 7) | rex_r;
4578

    
4579
        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
4580
        gen_op_mov_TN_reg(ot, 1, reg);
4581
        gen_op_testl_T0_T1_cc();
4582
        s->cc_op = CC_OP_LOGICB + ot;
4583
        break;
4584

    
4585
    case 0xa8: /* test eAX, Iv */
4586
    case 0xa9:
4587
        if ((b & 1) == 0)
4588
            ot = OT_BYTE;
4589
        else
4590
            ot = dflag + OT_WORD;
4591
        val = insn_get(s, ot);
4592

    
4593
        gen_op_mov_TN_reg(ot, 0, OR_EAX);
4594
        gen_op_movl_T1_im(val);
4595
        gen_op_testl_T0_T1_cc();
4596
        s->cc_op = CC_OP_LOGICB + ot;
4597
        break;
4598

    
4599
    case 0x98: /* CWDE/CBW */
4600
#ifdef TARGET_X86_64
4601
        if (dflag == 2) {
4602
            gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
4603
            tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
4604
            gen_op_mov_reg_T0(OT_QUAD, R_EAX);
4605
        } else
4606
#endif
4607
        if (dflag == 1) {
4608
            gen_op_mov_TN_reg(OT_WORD, 0, R_EAX);
4609
            tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
4610
            gen_op_mov_reg_T0(OT_LONG, R_EAX);
4611
        } else {
4612
            gen_op_mov_TN_reg(OT_BYTE, 0, R_EAX);
4613
            tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]);
4614
            gen_op_mov_reg_T0(OT_WORD, R_EAX);
4615
        }
4616
        break;
4617
    case 0x99: /* CDQ/CWD */
4618
#ifdef TARGET_X86_64
4619
        if (dflag == 2) {
4620
            gen_op_mov_TN_reg(OT_QUAD, 0, R_EAX);
4621
            tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 63);
4622
            gen_op_mov_reg_T0(OT_QUAD, R_EDX);
4623
        } else
4624
#endif
4625
        if (dflag == 1) {
4626
            gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
4627
            tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
4628
            tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 31);
4629
            gen_op_mov_reg_T0(OT_LONG, R_EDX);
4630
        } else {
4631
            gen_op_mov_TN_reg(OT_WORD, 0, R_EAX);
4632
            tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
4633
            tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 15);
4634
            gen_op_mov_reg_T0(OT_WORD, R_EDX);
4635
        }
4636
        break;
4637
    case 0x1af: /* imul Gv, Ev */
4638
    case 0x69: /* imul Gv, Ev, I */
4639
    case 0x6b:
4640
        ot = dflag + OT_WORD;
4641
        modrm = ldub_code(s->pc++);
4642
        reg = ((modrm >> 3) & 7) | rex_r;
4643
        if (b == 0x69)
4644
            s->rip_offset = insn_const_size(ot);
4645
        else if (b == 0x6b)
4646
            s->rip_offset = 1;
4647
        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
4648
        if (b == 0x69) {
4649
            val = insn_get(s, ot);
4650
            gen_op_movl_T1_im(val);
4651
        } else if (b == 0x6b) {
4652
            val = (int8_t)insn_get(s, OT_BYTE);
4653
            gen_op_movl_T1_im(val);
4654
        } else {
4655
            gen_op_mov_TN_reg(ot, 1, reg);
4656
        }
4657

    
4658
#ifdef TARGET_X86_64
4659
        if (ot == OT_QUAD) {
4660
            gen_helper_imulq_T0_T1(cpu_T[0], cpu_T[0], cpu_T[1]);
4661
        } else
4662
#endif
4663
        if (ot == OT_LONG) {
4664
#ifdef TARGET_X86_64
4665
                tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
4666
                tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]);
4667
                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4668
                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4669
                tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]);
4670
                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
4671
#else
4672
                {
4673
                    TCGv_i64 t0, t1;
4674
                    t0 = tcg_temp_new_i64();
4675
                    t1 = tcg_temp_new_i64();
4676
                    tcg_gen_ext_i32_i64(t0, cpu_T[0]);
4677
                    tcg_gen_ext_i32_i64(t1, cpu_T[1]);
4678
                    tcg_gen_mul_i64(t0, t0, t1);
4679
                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
4680
                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4681
                    tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31);
4682
                    tcg_gen_shri_i64(t0, t0, 32);
4683
                    tcg_gen_trunc_i64_i32(cpu_T[1], t0);
4684
                    tcg_gen_sub_tl(cpu_cc_src, cpu_T[1], cpu_tmp0);
4685
                }
4686
#endif
4687
        } else {
4688
            tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
4689
            tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]);
4690
            /* XXX: use 32 bit mul which could be faster */
4691
            tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4692
            tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4693
            tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]);
4694
            tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
4695
        }
4696
        gen_op_mov_reg_T0(ot, reg);
4697
        s->cc_op = CC_OP_MULB + ot;
4698
        break;
4699
    case 0x1c0:
4700
    case 0x1c1: /* xadd Ev, Gv */
4701
        if ((b & 1) == 0)
4702
            ot = OT_BYTE;
4703
        else
4704
            ot = dflag + OT_WORD;
4705
        modrm = ldub_code(s->pc++);
4706
        reg = ((modrm >> 3) & 7) | rex_r;
4707
        mod = (modrm >> 6) & 3;
4708
        if (mod == 3) {
4709
            rm = (modrm & 7) | REX_B(s);
4710
            gen_op_mov_TN_reg(ot, 0, reg);
4711
            gen_op_mov_TN_reg(ot, 1, rm);
4712
            gen_op_addl_T0_T1();
4713
            gen_op_mov_reg_T1(ot, reg);
4714
            gen_op_mov_reg_T0(ot, rm);
4715
        } else {
4716
            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4717
            gen_op_mov_TN_reg(ot, 0, reg);
4718
            gen_op_ld_T1_A0(ot + s->mem_index);
4719
            gen_op_addl_T0_T1();
4720
            gen_op_st_T0_A0(ot + s->mem_index);
4721
            gen_op_mov_reg_T1(ot, reg);
4722
        }
4723
        gen_op_update2_cc();
4724
        s->cc_op = CC_OP_ADDB + ot;
4725
        break;
4726
    case 0x1b0:
4727
    case 0x1b1: /* cmpxchg Ev, Gv */
4728
        {
4729
            int label1, label2;
4730
            TCGv t0, t1, t2, a0;
4731

    
4732
            if ((b & 1) == 0)
4733
                ot = OT_BYTE;
4734
            else
4735
                ot = dflag + OT_WORD;
4736
            modrm = ldub_code(s->pc++);
4737
            reg = ((modrm >> 3) & 7) | rex_r;
4738
            mod = (modrm >> 6) & 3;
4739
            t0 = tcg_temp_local_new();
4740
            t1 = tcg_temp_local_new();
4741
            t2 = tcg_temp_local_new();
4742
            a0 = tcg_temp_local_new();
4743
            gen_op_mov_v_reg(ot, t1, reg);
4744
            if (mod == 3) {
4745
                rm = (modrm & 7) | REX_B(s);
4746
                gen_op_mov_v_reg(ot, t0, rm);
4747
            } else {
4748
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4749
                tcg_gen_mov_tl(a0, cpu_A0);
4750
                gen_op_ld_v(ot + s->mem_index, t0, a0);
4751
                rm = 0; /* avoid warning */
4752
            }
4753
            label1 = gen_new_label();
4754
            tcg_gen_ld_tl(t2, cpu_env, offsetof(CPUState, regs[R_EAX]));
4755
            tcg_gen_sub_tl(t2, t2, t0);
4756
            gen_extu(ot, t2);
4757
            tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1);
4758
            if (mod == 3) {
4759
                label2 = gen_new_label();
4760
                gen_op_mov_reg_v(ot, R_EAX, t0);
4761
                tcg_gen_br(label2);
4762
                gen_set_label(label1);
4763
                gen_op_mov_reg_v(ot, rm, t1);
4764
                gen_set_label(label2);
4765
            } else {
4766
                tcg_gen_mov_tl(t1, t0);
4767
                gen_op_mov_reg_v(ot, R_EAX, t0);
4768
                gen_set_label(label1);
4769
                /* always store */
4770
                gen_op_st_v(ot + s->mem_index, t1, a0);
4771
            }
4772
            tcg_gen_mov_tl(cpu_cc_src, t0);
4773
            tcg_gen_mov_tl(cpu_cc_dst, t2);
4774
            s->cc_op = CC_OP_SUBB + ot;
4775
            tcg_temp_free(t0);
4776
            tcg_temp_free(t1);
4777
            tcg_temp_free(t2);
4778
            tcg_temp_free(a0);
4779
        }
4780
        break;
4781
    case 0x1c7: /* cmpxchg8b */
4782
        modrm = ldub_code(s->pc++);
4783
        mod = (modrm >> 6) & 3;
4784
        if ((mod == 3) || ((modrm & 0x38) != 0x8))
4785
            goto illegal_op;
4786
#ifdef TARGET_X86_64
4787
        if (dflag == 2) {
4788
            if (!(s->cpuid_ext_features & CPUID_EXT_CX16))
4789
                goto illegal_op;
4790
            gen_jmp_im(pc_start - s->cs_base);
4791
            if (s->cc_op != CC_OP_DYNAMIC)
4792
                gen_op_set_cc_op(s->cc_op);
4793
            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4794
            gen_helper_cmpxchg16b(cpu_A0);
4795
        } else
4796
#endif        
4797
        {
4798
            if (!(s->cpuid_features & CPUID_CX8))
4799
                goto illegal_op;
4800
            gen_jmp_im(pc_start - s->cs_base);
4801
            if (s->cc_op != CC_OP_DYNAMIC)
4802
                gen_op_set_cc_op(s->cc_op);
4803
            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4804
            gen_helper_cmpxchg8b(cpu_A0);
4805
        }
4806
        s->cc_op = CC_OP_EFLAGS;
4807
        break;
4808

    
4809
        /**************************/
4810
        /* push/pop */
4811
    case 0x50 ... 0x57: /* push */
4812
        gen_op_mov_TN_reg(OT_LONG, 0, (b & 7) | REX_B(s));
4813
        gen_push_T0(s);
4814
        break;
4815
    case 0x58 ... 0x5f: /* pop */
4816
        if (CODE64(s)) {
4817
            ot = dflag ? OT_QUAD : OT_WORD;
4818
        } else {
4819
            ot = dflag + OT_WORD;
4820
        }
4821
        gen_pop_T0(s);
4822
        /* NOTE: order is important for pop %sp */
4823
        gen_pop_update(s);
4824
        gen_op_mov_reg_T0(ot, (b & 7) | REX_B(s));
4825
        break;
4826
    case 0x60: /* pusha */
4827
        if (CODE64(s))
4828
            goto illegal_op;
4829
        gen_pusha(s);
4830
        break;
4831
    case 0x61: /* popa */
4832
        if (CODE64(s))
4833
            goto illegal_op;
4834
        gen_popa(s);
4835
        break;
4836
    case 0x68: /* push Iv */
4837
    case 0x6a:
4838
        if (CODE64(s)) {
4839
            ot = dflag ? OT_QUAD : OT_WORD;
4840
        } else {
4841
            ot = dflag + OT_WORD;
4842
        }
4843
        if (b == 0x68)
4844
            val = insn_get(s, ot);
4845
        else
4846
            val = (int8_t)insn_get(s, OT_BYTE);
4847
        gen_op_movl_T0_im(val);
4848
        gen_push_T0(s);
4849
        break;
4850
    case 0x8f: /* pop Ev */
4851
        if (CODE64(s)) {
4852
            ot = dflag ? OT_QUAD : OT_WORD;
4853
        } else {
4854
            ot = dflag + OT_WORD;
4855
        }
4856
        modrm = ldub_code(s->pc++);
4857
        mod = (modrm >> 6) & 3;
4858
        gen_pop_T0(s);
4859
        if (mod == 3) {
4860
            /* NOTE: order is important for pop %sp */
4861
            gen_pop_update(s);
4862
            rm = (modrm & 7) | REX_B(s);
4863
            gen_op_mov_reg_T0(ot, rm);
4864
        } else {
4865
            /* NOTE: order is important too for MMU exceptions */
4866
            s->popl_esp_hack = 1 << ot;
4867
            gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1);
4868
            s->popl_esp_hack = 0;
4869
            gen_pop_update(s);
4870
        }
4871
        break;
4872
    case 0xc8: /* enter */
4873
        {
4874
            int level;
4875
            val = lduw_code(s->pc);
4876
            s->pc += 2;
4877
            level = ldub_code(s->pc++);
4878
            gen_enter(s, val, level);
4879
        }
4880
        break;
4881
    case 0xc9: /* leave */
4882
        /* XXX: exception not precise (ESP is updated before potential exception) */
4883
        if (CODE64(s)) {
4884
            gen_op_mov_TN_reg(OT_QUAD, 0, R_EBP);
4885
            gen_op_mov_reg_T0(OT_QUAD, R_ESP);
4886
        } else if (s->ss32) {
4887
            gen_op_mov_TN_reg(OT_LONG, 0, R_EBP);
4888
            gen_op_mov_reg_T0(OT_LONG, R_ESP);
4889
        } else {
4890
            gen_op_mov_TN_reg(OT_WORD, 0, R_EBP);
4891
            gen_op_mov_reg_T0(OT_WORD, R_ESP);
4892
        }
4893
        gen_pop_T0(s);
4894
        if (CODE64(s)) {
4895
            ot = dflag ? OT_QUAD : OT_WORD;
4896
        } else {
4897
            ot = dflag + OT_WORD;
4898
        }
4899
        gen_op_mov_reg_T0(ot, R_EBP);
4900
        gen_pop_update(s);
4901
        break;
4902
    case 0x06: /* push es */
4903
    case 0x0e: /* push cs */
4904
    case 0x16: /* push ss */
4905
    case 0x1e: /* push ds */
4906
        if (CODE64(s))
4907
            goto illegal_op;
4908
        gen_op_movl_T0_seg(b >> 3);
4909
        gen_push_T0(s);
4910
        break;
4911
    case 0x1a0: /* push fs */
4912
    case 0x1a8: /* push gs */
4913
        gen_op_movl_T0_seg((b >> 3) & 7);
4914
        gen_push_T0(s);
4915
        break;
4916
    case 0x07: /* pop es */
4917
    case 0x17: /* pop ss */
4918
    case 0x1f: /* pop ds */
4919
        if (CODE64(s))
4920
            goto illegal_op;
4921
        reg = b >> 3;
4922
        gen_pop_T0(s);
4923
        gen_movl_seg_T0(s, reg, pc_start - s->cs_base);
4924
        gen_pop_update(s);
4925
        if (reg == R_SS) {
4926
            /* if reg == SS, inhibit interrupts/trace. */
4927
            /* If several instructions disable interrupts, only the