Statistics
| Branch: | Revision:

root / target-i386 / translate.c @ 72cf2d4f

History | View | Annotate | Download (251.4 kB)

1
/*
2
 *  i386 translation
3
 *
4
 *  Copyright (c) 2003 Fabrice Bellard
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18
 */
19
#include <stdarg.h>
20
#include <stdlib.h>
21
#include <stdio.h>
22
#include <string.h>
23
#include <inttypes.h>
24
#include <signal.h>
25

    
26
#include "cpu.h"
27
#include "exec-all.h"
28
#include "disas.h"
29
#include "tcg-op.h"
30

    
31
#include "helper.h"
32
#define GEN_HELPER 1
33
#include "helper.h"
34

    
35
#define PREFIX_REPZ   0x01
36
#define PREFIX_REPNZ  0x02
37
#define PREFIX_LOCK   0x04
38
#define PREFIX_DATA   0x08
39
#define PREFIX_ADR    0x10
40

    
41
#ifdef TARGET_X86_64
42
#define X86_64_ONLY(x) x
43
#define X86_64_DEF(...)  __VA_ARGS__
44
#define CODE64(s) ((s)->code64)
45
#define REX_X(s) ((s)->rex_x)
46
#define REX_B(s) ((s)->rex_b)
47
/* XXX: gcc generates push/pop in some opcodes, so we cannot use them */
48
#if 1
49
#define BUGGY_64(x) NULL
50
#endif
51
#else
52
#define X86_64_ONLY(x) NULL
53
#define X86_64_DEF(...)
54
#define CODE64(s) 0
55
#define REX_X(s) 0
56
#define REX_B(s) 0
57
#endif
58

    
59
//#define MACRO_TEST   1
60

    
61
/* global register indexes */
62
static TCGv_ptr cpu_env;
63
static TCGv cpu_A0, cpu_cc_src, cpu_cc_dst, cpu_cc_tmp;
64
static TCGv_i32 cpu_cc_op;
65
/* local temps */
66
static TCGv cpu_T[2], cpu_T3;
67
/* local register indexes (only used inside old micro ops) */
68
static TCGv cpu_tmp0, cpu_tmp4;
69
static TCGv_ptr cpu_ptr0, cpu_ptr1;
70
static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
71
static TCGv_i64 cpu_tmp1_i64;
72
static TCGv cpu_tmp5, cpu_tmp6;
73

    
74
#include "gen-icount.h"
75

    
76
#ifdef TARGET_X86_64
77
static int x86_64_hregs;
78
#endif
79

    
80
typedef struct DisasContext {
81
    /* current insn context */
82
    int override; /* -1 if no override */
83
    int prefix;
84
    int aflag, dflag;
85
    target_ulong pc; /* pc = eip + cs_base */
86
    int is_jmp; /* 1 = means jump (stop translation), 2 means CPU
87
                   static state change (stop translation) */
88
    /* current block context */
89
    target_ulong cs_base; /* base of CS segment */
90
    int pe;     /* protected mode */
91
    int code32; /* 32 bit code segment */
92
#ifdef TARGET_X86_64
93
    int lma;    /* long mode active */
94
    int code64; /* 64 bit code segment */
95
    int rex_x, rex_b;
96
#endif
97
    int ss32;   /* 32 bit stack segment */
98
    int cc_op;  /* current CC operation */
99
    int addseg; /* non zero if either DS/ES/SS have a non zero base */
100
    int f_st;   /* currently unused */
101
    int vm86;   /* vm86 mode */
102
    int cpl;
103
    int iopl;
104
    int tf;     /* TF cpu flag */
105
    int singlestep_enabled; /* "hardware" single step enabled */
106
    int jmp_opt; /* use direct block chaining for direct jumps */
107
    int mem_index; /* select memory access functions */
108
    uint64_t flags; /* all execution flags */
109
    struct TranslationBlock *tb;
110
    int popl_esp_hack; /* for correct popl with esp base handling */
111
    int rip_offset; /* only used in x86_64, but left for simplicity */
112
    int cpuid_features;
113
    int cpuid_ext_features;
114
    int cpuid_ext2_features;
115
    int cpuid_ext3_features;
116
} DisasContext;
117

    
118
static void gen_eob(DisasContext *s);
119
static void gen_jmp(DisasContext *s, target_ulong eip);
120
static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
121

    
122
/* i386 arith/logic operations */
123
enum {
124
    OP_ADDL,
125
    OP_ORL,
126
    OP_ADCL,
127
    OP_SBBL,
128
    OP_ANDL,
129
    OP_SUBL,
130
    OP_XORL,
131
    OP_CMPL,
132
};
133

    
134
/* i386 shift ops */
135
enum {
136
    OP_ROL,
137
    OP_ROR,
138
    OP_RCL,
139
    OP_RCR,
140
    OP_SHL,
141
    OP_SHR,
142
    OP_SHL1, /* undocumented */
143
    OP_SAR = 7,
144
};
145

    
146
enum {
147
    JCC_O,
148
    JCC_B,
149
    JCC_Z,
150
    JCC_BE,
151
    JCC_S,
152
    JCC_P,
153
    JCC_L,
154
    JCC_LE,
155
};
156

    
157
/* operand size */
158
enum {
159
    OT_BYTE = 0,
160
    OT_WORD,
161
    OT_LONG,
162
    OT_QUAD,
163
};
164

    
165
enum {
166
    /* I386 int registers */
167
    OR_EAX,   /* MUST be even numbered */
168
    OR_ECX,
169
    OR_EDX,
170
    OR_EBX,
171
    OR_ESP,
172
    OR_EBP,
173
    OR_ESI,
174
    OR_EDI,
175

    
176
    OR_TMP0 = 16,    /* temporary operand register */
177
    OR_TMP1,
178
    OR_A0, /* temporary register used when doing address evaluation */
179
};
180

    
181
static inline void gen_op_movl_T0_0(void)
182
{
183
    tcg_gen_movi_tl(cpu_T[0], 0);
184
}
185

    
186
static inline void gen_op_movl_T0_im(int32_t val)
187
{
188
    tcg_gen_movi_tl(cpu_T[0], val);
189
}
190

    
191
static inline void gen_op_movl_T0_imu(uint32_t val)
192
{
193
    tcg_gen_movi_tl(cpu_T[0], val);
194
}
195

    
196
static inline void gen_op_movl_T1_im(int32_t val)
197
{
198
    tcg_gen_movi_tl(cpu_T[1], val);
199
}
200

    
201
static inline void gen_op_movl_T1_imu(uint32_t val)
202
{
203
    tcg_gen_movi_tl(cpu_T[1], val);
204
}
205

    
206
static inline void gen_op_movl_A0_im(uint32_t val)
207
{
208
    tcg_gen_movi_tl(cpu_A0, val);
209
}
210

    
211
#ifdef TARGET_X86_64
212
static inline void gen_op_movq_A0_im(int64_t val)
213
{
214
    tcg_gen_movi_tl(cpu_A0, val);
215
}
216
#endif
217

    
218
static inline void gen_movtl_T0_im(target_ulong val)
219
{
220
    tcg_gen_movi_tl(cpu_T[0], val);
221
}
222

    
223
static inline void gen_movtl_T1_im(target_ulong val)
224
{
225
    tcg_gen_movi_tl(cpu_T[1], val);
226
}
227

    
228
static inline void gen_op_andl_T0_ffff(void)
229
{
230
    tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff);
231
}
232

    
233
static inline void gen_op_andl_T0_im(uint32_t val)
234
{
235
    tcg_gen_andi_tl(cpu_T[0], cpu_T[0], val);
236
}
237

    
238
static inline void gen_op_movl_T0_T1(void)
239
{
240
    tcg_gen_mov_tl(cpu_T[0], cpu_T[1]);
241
}
242

    
243
static inline void gen_op_andl_A0_ffff(void)
244
{
245
    tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffff);
246
}
247

    
248
#ifdef TARGET_X86_64
249

    
250
#define NB_OP_SIZES 4
251

    
252
#else /* !TARGET_X86_64 */
253

    
254
#define NB_OP_SIZES 3
255

    
256
#endif /* !TARGET_X86_64 */
257

    
258
#if defined(HOST_WORDS_BIGENDIAN)
259
#define REG_B_OFFSET (sizeof(target_ulong) - 1)
260
#define REG_H_OFFSET (sizeof(target_ulong) - 2)
261
#define REG_W_OFFSET (sizeof(target_ulong) - 2)
262
#define REG_L_OFFSET (sizeof(target_ulong) - 4)
263
#define REG_LH_OFFSET (sizeof(target_ulong) - 8)
264
#else
265
#define REG_B_OFFSET 0
266
#define REG_H_OFFSET 1
267
#define REG_W_OFFSET 0
268
#define REG_L_OFFSET 0
269
#define REG_LH_OFFSET 4
270
#endif
271

    
272
static inline void gen_op_mov_reg_v(int ot, int reg, TCGv t0)
273
{
274
    switch(ot) {
275
    case OT_BYTE:
276
        if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
277
            tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_B_OFFSET);
278
        } else {
279
            tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
280
        }
281
        break;
282
    case OT_WORD:
283
        tcg_gen_st16_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
284
        break;
285
#ifdef TARGET_X86_64
286
    case OT_LONG:
287
        tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
288
        /* high part of register set to zero */
289
        tcg_gen_movi_tl(cpu_tmp0, 0);
290
        tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
291
        break;
292
    default:
293
    case OT_QUAD:
294
        tcg_gen_st_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
295
        break;
296
#else
297
    default:
298
    case OT_LONG:
299
        tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
300
        break;
301
#endif
302
    }
303
}
304

    
305
static inline void gen_op_mov_reg_T0(int ot, int reg)
306
{
307
    gen_op_mov_reg_v(ot, reg, cpu_T[0]);
308
}
309

    
310
static inline void gen_op_mov_reg_T1(int ot, int reg)
311
{
312
    gen_op_mov_reg_v(ot, reg, cpu_T[1]);
313
}
314

    
315
static inline void gen_op_mov_reg_A0(int size, int reg)
316
{
317
    switch(size) {
318
    case 0:
319
        tcg_gen_st16_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
320
        break;
321
#ifdef TARGET_X86_64
322
    case 1:
323
        tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
324
        /* high part of register set to zero */
325
        tcg_gen_movi_tl(cpu_tmp0, 0);
326
        tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
327
        break;
328
    default:
329
    case 2:
330
        tcg_gen_st_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]));
331
        break;
332
#else
333
    default:
334
    case 1:
335
        tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
336
        break;
337
#endif
338
    }
339
}
340

    
341
static inline void gen_op_mov_v_reg(int ot, TCGv t0, int reg)
342
{
343
    switch(ot) {
344
    case OT_BYTE:
345
        if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
346
            goto std_case;
347
        } else {
348
            tcg_gen_ld8u_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
349
        }
350
        break;
351
    default:
352
    std_case:
353
        tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
354
        break;
355
    }
356
}
357

    
358
static inline void gen_op_mov_TN_reg(int ot, int t_index, int reg)
359
{
360
    gen_op_mov_v_reg(ot, cpu_T[t_index], reg);
361
}
362

    
363
static inline void gen_op_movl_A0_reg(int reg)
364
{
365
    tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
366
}
367

    
368
static inline void gen_op_addl_A0_im(int32_t val)
369
{
370
    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
371
#ifdef TARGET_X86_64
372
    tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
373
#endif
374
}
375

    
376
#ifdef TARGET_X86_64
377
static inline void gen_op_addq_A0_im(int64_t val)
378
{
379
    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
380
}
381
#endif
382
    
383
static void gen_add_A0_im(DisasContext *s, int val)
384
{
385
#ifdef TARGET_X86_64
386
    if (CODE64(s))
387
        gen_op_addq_A0_im(val);
388
    else
389
#endif
390
        gen_op_addl_A0_im(val);
391
}
392

    
393
static inline void gen_op_addl_T0_T1(void)
394
{
395
    tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
396
}
397

    
398
static inline void gen_op_jmp_T0(void)
399
{
400
    tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUState, eip));
401
}
402

    
403
static inline void gen_op_add_reg_im(int size, int reg, int32_t val)
404
{
405
    switch(size) {
406
    case 0:
407
        tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
408
        tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
409
        tcg_gen_st16_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
410
        break;
411
    case 1:
412
        tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
413
        tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
414
#ifdef TARGET_X86_64
415
        tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffff);
416
#endif
417
        tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
418
        break;
419
#ifdef TARGET_X86_64
420
    case 2:
421
        tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
422
        tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
423
        tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
424
        break;
425
#endif
426
    }
427
}
428

    
429
static inline void gen_op_add_reg_T0(int size, int reg)
430
{
431
    switch(size) {
432
    case 0:
433
        tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
434
        tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
435
        tcg_gen_st16_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
436
        break;
437
    case 1:
438
        tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
439
        tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
440
#ifdef TARGET_X86_64
441
        tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffff);
442
#endif
443
        tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
444
        break;
445
#ifdef TARGET_X86_64
446
    case 2:
447
        tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
448
        tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
449
        tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
450
        break;
451
#endif
452
    }
453
}
454

    
455
static inline void gen_op_set_cc_op(int32_t val)
456
{
457
    tcg_gen_movi_i32(cpu_cc_op, val);
458
}
459

    
460
static inline void gen_op_addl_A0_reg_sN(int shift, int reg)
461
{
462
    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
463
    if (shift != 0) 
464
        tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
465
    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
466
#ifdef TARGET_X86_64
467
    tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
468
#endif
469
}
470

    
471
static inline void gen_op_movl_A0_seg(int reg)
472
{
473
    tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, segs[reg].base) + REG_L_OFFSET);
474
}
475

    
476
static inline void gen_op_addl_A0_seg(int reg)
477
{
478
    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, segs[reg].base));
479
    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
480
#ifdef TARGET_X86_64
481
    tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
482
#endif
483
}
484

    
485
#ifdef TARGET_X86_64
486
static inline void gen_op_movq_A0_seg(int reg)
487
{
488
    tcg_gen_ld_tl(cpu_A0, cpu_env, offsetof(CPUState, segs[reg].base));
489
}
490

    
491
static inline void gen_op_addq_A0_seg(int reg)
492
{
493
    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, segs[reg].base));
494
    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
495
}
496

    
497
static inline void gen_op_movq_A0_reg(int reg)
498
{
499
    tcg_gen_ld_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]));
500
}
501

    
502
static inline void gen_op_addq_A0_reg_sN(int shift, int reg)
503
{
504
    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
505
    if (shift != 0) 
506
        tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
507
    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
508
}
509
#endif
510

    
511
static inline void gen_op_lds_T0_A0(int idx)
512
{
513
    int mem_index = (idx >> 2) - 1;
514
    switch(idx & 3) {
515
    case 0:
516
        tcg_gen_qemu_ld8s(cpu_T[0], cpu_A0, mem_index);
517
        break;
518
    case 1:
519
        tcg_gen_qemu_ld16s(cpu_T[0], cpu_A0, mem_index);
520
        break;
521
    default:
522
    case 2:
523
        tcg_gen_qemu_ld32s(cpu_T[0], cpu_A0, mem_index);
524
        break;
525
    }
526
}
527

    
528
static inline void gen_op_ld_v(int idx, TCGv t0, TCGv a0)
529
{
530
    int mem_index = (idx >> 2) - 1;
531
    switch(idx & 3) {
532
    case 0:
533
        tcg_gen_qemu_ld8u(t0, a0, mem_index);
534
        break;
535
    case 1:
536
        tcg_gen_qemu_ld16u(t0, a0, mem_index);
537
        break;
538
    case 2:
539
        tcg_gen_qemu_ld32u(t0, a0, mem_index);
540
        break;
541
    default:
542
    case 3:
543
        /* Should never happen on 32-bit targets.  */
544
#ifdef TARGET_X86_64
545
        tcg_gen_qemu_ld64(t0, a0, mem_index);
546
#endif
547
        break;
548
    }
549
}
550

    
551
/* XXX: always use ldu or lds */
552
static inline void gen_op_ld_T0_A0(int idx)
553
{
554
    gen_op_ld_v(idx, cpu_T[0], cpu_A0);
555
}
556

    
557
static inline void gen_op_ldu_T0_A0(int idx)
558
{
559
    gen_op_ld_v(idx, cpu_T[0], cpu_A0);
560
}
561

    
562
static inline void gen_op_ld_T1_A0(int idx)
563
{
564
    gen_op_ld_v(idx, cpu_T[1], cpu_A0);
565
}
566

    
567
static inline void gen_op_st_v(int idx, TCGv t0, TCGv a0)
568
{
569
    int mem_index = (idx >> 2) - 1;
570
    switch(idx & 3) {
571
    case 0:
572
        tcg_gen_qemu_st8(t0, a0, mem_index);
573
        break;
574
    case 1:
575
        tcg_gen_qemu_st16(t0, a0, mem_index);
576
        break;
577
    case 2:
578
        tcg_gen_qemu_st32(t0, a0, mem_index);
579
        break;
580
    default:
581
    case 3:
582
        /* Should never happen on 32-bit targets.  */
583
#ifdef TARGET_X86_64
584
        tcg_gen_qemu_st64(t0, a0, mem_index);
585
#endif
586
        break;
587
    }
588
}
589

    
590
static inline void gen_op_st_T0_A0(int idx)
591
{
592
    gen_op_st_v(idx, cpu_T[0], cpu_A0);
593
}
594

    
595
static inline void gen_op_st_T1_A0(int idx)
596
{
597
    gen_op_st_v(idx, cpu_T[1], cpu_A0);
598
}
599

    
600
static inline void gen_jmp_im(target_ulong pc)
601
{
602
    tcg_gen_movi_tl(cpu_tmp0, pc);
603
    tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, eip));
604
}
605

    
606
static inline void gen_string_movl_A0_ESI(DisasContext *s)
607
{
608
    int override;
609

    
610
    override = s->override;
611
#ifdef TARGET_X86_64
612
    if (s->aflag == 2) {
613
        if (override >= 0) {
614
            gen_op_movq_A0_seg(override);
615
            gen_op_addq_A0_reg_sN(0, R_ESI);
616
        } else {
617
            gen_op_movq_A0_reg(R_ESI);
618
        }
619
    } else
620
#endif
621
    if (s->aflag) {
622
        /* 32 bit address */
623
        if (s->addseg && override < 0)
624
            override = R_DS;
625
        if (override >= 0) {
626
            gen_op_movl_A0_seg(override);
627
            gen_op_addl_A0_reg_sN(0, R_ESI);
628
        } else {
629
            gen_op_movl_A0_reg(R_ESI);
630
        }
631
    } else {
632
        /* 16 address, always override */
633
        if (override < 0)
634
            override = R_DS;
635
        gen_op_movl_A0_reg(R_ESI);
636
        gen_op_andl_A0_ffff();
637
        gen_op_addl_A0_seg(override);
638
    }
639
}
640

    
641
static inline void gen_string_movl_A0_EDI(DisasContext *s)
642
{
643
#ifdef TARGET_X86_64
644
    if (s->aflag == 2) {
645
        gen_op_movq_A0_reg(R_EDI);
646
    } else
647
#endif
648
    if (s->aflag) {
649
        if (s->addseg) {
650
            gen_op_movl_A0_seg(R_ES);
651
            gen_op_addl_A0_reg_sN(0, R_EDI);
652
        } else {
653
            gen_op_movl_A0_reg(R_EDI);
654
        }
655
    } else {
656
        gen_op_movl_A0_reg(R_EDI);
657
        gen_op_andl_A0_ffff();
658
        gen_op_addl_A0_seg(R_ES);
659
    }
660
}
661

    
662
static inline void gen_op_movl_T0_Dshift(int ot) 
663
{
664
    tcg_gen_ld32s_tl(cpu_T[0], cpu_env, offsetof(CPUState, df));
665
    tcg_gen_shli_tl(cpu_T[0], cpu_T[0], ot);
666
};
667

    
668
static void gen_extu(int ot, TCGv reg)
669
{
670
    switch(ot) {
671
    case OT_BYTE:
672
        tcg_gen_ext8u_tl(reg, reg);
673
        break;
674
    case OT_WORD:
675
        tcg_gen_ext16u_tl(reg, reg);
676
        break;
677
    case OT_LONG:
678
        tcg_gen_ext32u_tl(reg, reg);
679
        break;
680
    default:
681
        break;
682
    }
683
}
684

    
685
static void gen_exts(int ot, TCGv reg)
686
{
687
    switch(ot) {
688
    case OT_BYTE:
689
        tcg_gen_ext8s_tl(reg, reg);
690
        break;
691
    case OT_WORD:
692
        tcg_gen_ext16s_tl(reg, reg);
693
        break;
694
    case OT_LONG:
695
        tcg_gen_ext32s_tl(reg, reg);
696
        break;
697
    default:
698
        break;
699
    }
700
}
701

    
702
static inline void gen_op_jnz_ecx(int size, int label1)
703
{
704
    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[R_ECX]));
705
    gen_extu(size + 1, cpu_tmp0);
706
    tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, label1);
707
}
708

    
709
static inline void gen_op_jz_ecx(int size, int label1)
710
{
711
    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[R_ECX]));
712
    gen_extu(size + 1, cpu_tmp0);
713
    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
714
}
715

    
716
static void gen_helper_in_func(int ot, TCGv v, TCGv_i32 n)
717
{
718
    switch (ot) {
719
    case 0: gen_helper_inb(v, n); break;
720
    case 1: gen_helper_inw(v, n); break;
721
    case 2: gen_helper_inl(v, n); break;
722
    }
723

    
724
}
725

    
726
static void gen_helper_out_func(int ot, TCGv_i32 v, TCGv_i32 n)
727
{
728
    switch (ot) {
729
    case 0: gen_helper_outb(v, n); break;
730
    case 1: gen_helper_outw(v, n); break;
731
    case 2: gen_helper_outl(v, n); break;
732
    }
733

    
734
}
735

    
736
static void gen_check_io(DisasContext *s, int ot, target_ulong cur_eip,
737
                         uint32_t svm_flags)
738
{
739
    int state_saved;
740
    target_ulong next_eip;
741

    
742
    state_saved = 0;
743
    if (s->pe && (s->cpl > s->iopl || s->vm86)) {
744
        if (s->cc_op != CC_OP_DYNAMIC)
745
            gen_op_set_cc_op(s->cc_op);
746
        gen_jmp_im(cur_eip);
747
        state_saved = 1;
748
        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
749
        switch (ot) {
750
        case 0: gen_helper_check_iob(cpu_tmp2_i32); break;
751
        case 1: gen_helper_check_iow(cpu_tmp2_i32); break;
752
        case 2: gen_helper_check_iol(cpu_tmp2_i32); break;
753
        }
754
    }
755
    if(s->flags & HF_SVMI_MASK) {
756
        if (!state_saved) {
757
            if (s->cc_op != CC_OP_DYNAMIC)
758
                gen_op_set_cc_op(s->cc_op);
759
            gen_jmp_im(cur_eip);
760
            state_saved = 1;
761
        }
762
        svm_flags |= (1 << (4 + ot));
763
        next_eip = s->pc - s->cs_base;
764
        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
765
        gen_helper_svm_check_io(cpu_tmp2_i32, tcg_const_i32(svm_flags),
766
                                tcg_const_i32(next_eip - cur_eip));
767
    }
768
}
769

    
770
static inline void gen_movs(DisasContext *s, int ot)
771
{
772
    gen_string_movl_A0_ESI(s);
773
    gen_op_ld_T0_A0(ot + s->mem_index);
774
    gen_string_movl_A0_EDI(s);
775
    gen_op_st_T0_A0(ot + s->mem_index);
776
    gen_op_movl_T0_Dshift(ot);
777
    gen_op_add_reg_T0(s->aflag, R_ESI);
778
    gen_op_add_reg_T0(s->aflag, R_EDI);
779
}
780

    
781
static inline void gen_update_cc_op(DisasContext *s)
782
{
783
    if (s->cc_op != CC_OP_DYNAMIC) {
784
        gen_op_set_cc_op(s->cc_op);
785
        s->cc_op = CC_OP_DYNAMIC;
786
    }
787
}
788

    
789
static void gen_op_update1_cc(void)
790
{
791
    tcg_gen_discard_tl(cpu_cc_src);
792
    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
793
}
794

    
795
static void gen_op_update2_cc(void)
796
{
797
    tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
798
    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
799
}
800

    
801
static inline void gen_op_cmpl_T0_T1_cc(void)
802
{
803
    tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
804
    tcg_gen_sub_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]);
805
}
806

    
807
static inline void gen_op_testl_T0_T1_cc(void)
808
{
809
    tcg_gen_discard_tl(cpu_cc_src);
810
    tcg_gen_and_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]);
811
}
812

    
813
static void gen_op_update_neg_cc(void)
814
{
815
    tcg_gen_neg_tl(cpu_cc_src, cpu_T[0]);
816
    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
817
}
818

    
819
/* compute eflags.C to reg */
820
static void gen_compute_eflags_c(TCGv reg)
821
{
822
    gen_helper_cc_compute_c(cpu_tmp2_i32, cpu_cc_op);
823
    tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
824
}
825

    
826
/* compute all eflags to cc_src */
827
static void gen_compute_eflags(TCGv reg)
828
{
829
    gen_helper_cc_compute_all(cpu_tmp2_i32, cpu_cc_op);
830
    tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
831
}
832

    
833
static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
834
{
835
    if (s->cc_op != CC_OP_DYNAMIC)
836
        gen_op_set_cc_op(s->cc_op);
837
    switch(jcc_op) {
838
    case JCC_O:
839
        gen_compute_eflags(cpu_T[0]);
840
        tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 11);
841
        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
842
        break;
843
    case JCC_B:
844
        gen_compute_eflags_c(cpu_T[0]);
845
        break;
846
    case JCC_Z:
847
        gen_compute_eflags(cpu_T[0]);
848
        tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 6);
849
        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
850
        break;
851
    case JCC_BE:
852
        gen_compute_eflags(cpu_tmp0);
853
        tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 6);
854
        tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
855
        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
856
        break;
857
    case JCC_S:
858
        gen_compute_eflags(cpu_T[0]);
859
        tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 7);
860
        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
861
        break;
862
    case JCC_P:
863
        gen_compute_eflags(cpu_T[0]);
864
        tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 2);
865
        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
866
        break;
867
    case JCC_L:
868
        gen_compute_eflags(cpu_tmp0);
869
        tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */
870
        tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 7); /* CC_S */
871
        tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
872
        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
873
        break;
874
    default:
875
    case JCC_LE:
876
        gen_compute_eflags(cpu_tmp0);
877
        tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */
878
        tcg_gen_shri_tl(cpu_tmp4, cpu_tmp0, 7); /* CC_S */
879
        tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 6); /* CC_Z */
880
        tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
881
        tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
882
        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
883
        break;
884
    }
885
}
886

    
887
/* return true if setcc_slow is not needed (WARNING: must be kept in
888
   sync with gen_jcc1) */
889
static int is_fast_jcc_case(DisasContext *s, int b)
890
{
891
    int jcc_op;
892
    jcc_op = (b >> 1) & 7;
893
    switch(s->cc_op) {
894
        /* we optimize the cmp/jcc case */
895
    case CC_OP_SUBB:
896
    case CC_OP_SUBW:
897
    case CC_OP_SUBL:
898
    case CC_OP_SUBQ:
899
        if (jcc_op == JCC_O || jcc_op == JCC_P)
900
            goto slow_jcc;
901
        break;
902

    
903
        /* some jumps are easy to compute */
904
    case CC_OP_ADDB:
905
    case CC_OP_ADDW:
906
    case CC_OP_ADDL:
907
    case CC_OP_ADDQ:
908

    
909
    case CC_OP_LOGICB:
910
    case CC_OP_LOGICW:
911
    case CC_OP_LOGICL:
912
    case CC_OP_LOGICQ:
913

    
914
    case CC_OP_INCB:
915
    case CC_OP_INCW:
916
    case CC_OP_INCL:
917
    case CC_OP_INCQ:
918

    
919
    case CC_OP_DECB:
920
    case CC_OP_DECW:
921
    case CC_OP_DECL:
922
    case CC_OP_DECQ:
923

    
924
    case CC_OP_SHLB:
925
    case CC_OP_SHLW:
926
    case CC_OP_SHLL:
927
    case CC_OP_SHLQ:
928
        if (jcc_op != JCC_Z && jcc_op != JCC_S)
929
            goto slow_jcc;
930
        break;
931
    default:
932
    slow_jcc:
933
        return 0;
934
    }
935
    return 1;
936
}
937

    
938
/* generate a conditional jump to label 'l1' according to jump opcode
939
   value 'b'. In the fast case, T0 is guaranted not to be used. */
940
static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1)
941
{
942
    int inv, jcc_op, size, cond;
943
    TCGv t0;
944

    
945
    inv = b & 1;
946
    jcc_op = (b >> 1) & 7;
947

    
948
    switch(cc_op) {
949
        /* we optimize the cmp/jcc case */
950
    case CC_OP_SUBB:
951
    case CC_OP_SUBW:
952
    case CC_OP_SUBL:
953
    case CC_OP_SUBQ:
954
        
955
        size = cc_op - CC_OP_SUBB;
956
        switch(jcc_op) {
957
        case JCC_Z:
958
        fast_jcc_z:
959
            switch(size) {
960
            case 0:
961
                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xff);
962
                t0 = cpu_tmp0;
963
                break;
964
            case 1:
965
                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xffff);
966
                t0 = cpu_tmp0;
967
                break;
968
#ifdef TARGET_X86_64
969
            case 2:
970
                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xffffffff);
971
                t0 = cpu_tmp0;
972
                break;
973
#endif
974
            default:
975
                t0 = cpu_cc_dst;
976
                break;
977
            }
978
            tcg_gen_brcondi_tl(inv ? TCG_COND_NE : TCG_COND_EQ, t0, 0, l1);
979
            break;
980
        case JCC_S:
981
        fast_jcc_s:
982
            switch(size) {
983
            case 0:
984
                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80);
985
                tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, 
986
                                   0, l1);
987
                break;
988
            case 1:
989
                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x8000);
990
                tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, 
991
                                   0, l1);
992
                break;
993
#ifdef TARGET_X86_64
994
            case 2:
995
                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80000000);
996
                tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, 
997
                                   0, l1);
998
                break;
999
#endif
1000
            default:
1001
                tcg_gen_brcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, cpu_cc_dst, 
1002
                                   0, l1);
1003
                break;
1004
            }
1005
            break;
1006
            
1007
        case JCC_B:
1008
            cond = inv ? TCG_COND_GEU : TCG_COND_LTU;
1009
            goto fast_jcc_b;
1010
        case JCC_BE:
1011
            cond = inv ? TCG_COND_GTU : TCG_COND_LEU;
1012
        fast_jcc_b:
1013
            tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
1014
            switch(size) {
1015
            case 0:
1016
                t0 = cpu_tmp0;
1017
                tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xff);
1018
                tcg_gen_andi_tl(t0, cpu_cc_src, 0xff);
1019
                break;
1020
            case 1:
1021
                t0 = cpu_tmp0;
1022
                tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xffff);
1023
                tcg_gen_andi_tl(t0, cpu_cc_src, 0xffff);
1024
                break;
1025
#ifdef TARGET_X86_64
1026
            case 2:
1027
                t0 = cpu_tmp0;
1028
                tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xffffffff);
1029
                tcg_gen_andi_tl(t0, cpu_cc_src, 0xffffffff);
1030
                break;
1031
#endif
1032
            default:
1033
                t0 = cpu_cc_src;
1034
                break;
1035
            }
1036
            tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1);
1037
            break;
1038
            
1039
        case JCC_L:
1040
            cond = inv ? TCG_COND_GE : TCG_COND_LT;
1041
            goto fast_jcc_l;
1042
        case JCC_LE:
1043
            cond = inv ? TCG_COND_GT : TCG_COND_LE;
1044
        fast_jcc_l:
1045
            tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
1046
            switch(size) {
1047
            case 0:
1048
                t0 = cpu_tmp0;
1049
                tcg_gen_ext8s_tl(cpu_tmp4, cpu_tmp4);
1050
                tcg_gen_ext8s_tl(t0, cpu_cc_src);
1051
                break;
1052
            case 1:
1053
                t0 = cpu_tmp0;
1054
                tcg_gen_ext16s_tl(cpu_tmp4, cpu_tmp4);
1055
                tcg_gen_ext16s_tl(t0, cpu_cc_src);
1056
                break;
1057
#ifdef TARGET_X86_64
1058
            case 2:
1059
                t0 = cpu_tmp0;
1060
                tcg_gen_ext32s_tl(cpu_tmp4, cpu_tmp4);
1061
                tcg_gen_ext32s_tl(t0, cpu_cc_src);
1062
                break;
1063
#endif
1064
            default:
1065
                t0 = cpu_cc_src;
1066
                break;
1067
            }
1068
            tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1);
1069
            break;
1070
            
1071
        default:
1072
            goto slow_jcc;
1073
        }
1074
        break;
1075
        
1076
        /* some jumps are easy to compute */
1077
    case CC_OP_ADDB:
1078
    case CC_OP_ADDW:
1079
    case CC_OP_ADDL:
1080
    case CC_OP_ADDQ:
1081
        
1082
    case CC_OP_ADCB:
1083
    case CC_OP_ADCW:
1084
    case CC_OP_ADCL:
1085
    case CC_OP_ADCQ:
1086
        
1087
    case CC_OP_SBBB:
1088
    case CC_OP_SBBW:
1089
    case CC_OP_SBBL:
1090
    case CC_OP_SBBQ:
1091
        
1092
    case CC_OP_LOGICB:
1093
    case CC_OP_LOGICW:
1094
    case CC_OP_LOGICL:
1095
    case CC_OP_LOGICQ:
1096
        
1097
    case CC_OP_INCB:
1098
    case CC_OP_INCW:
1099
    case CC_OP_INCL:
1100
    case CC_OP_INCQ:
1101
        
1102
    case CC_OP_DECB:
1103
    case CC_OP_DECW:
1104
    case CC_OP_DECL:
1105
    case CC_OP_DECQ:
1106
        
1107
    case CC_OP_SHLB:
1108
    case CC_OP_SHLW:
1109
    case CC_OP_SHLL:
1110
    case CC_OP_SHLQ:
1111
        
1112
    case CC_OP_SARB:
1113
    case CC_OP_SARW:
1114
    case CC_OP_SARL:
1115
    case CC_OP_SARQ:
1116
        switch(jcc_op) {
1117
        case JCC_Z:
1118
            size = (cc_op - CC_OP_ADDB) & 3;
1119
            goto fast_jcc_z;
1120
        case JCC_S:
1121
            size = (cc_op - CC_OP_ADDB) & 3;
1122
            goto fast_jcc_s;
1123
        default:
1124
            goto slow_jcc;
1125
        }
1126
        break;
1127
    default:
1128
    slow_jcc:
1129
        gen_setcc_slow_T0(s, jcc_op);
1130
        tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, 
1131
                           cpu_T[0], 0, l1);
1132
        break;
1133
    }
1134
}
1135

    
1136
/* XXX: does not work with gdbstub "ice" single step - not a
1137
   serious problem */
1138
static int gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1139
{
1140
    int l1, l2;
1141

    
1142
    l1 = gen_new_label();
1143
    l2 = gen_new_label();
1144
    gen_op_jnz_ecx(s->aflag, l1);
1145
    gen_set_label(l2);
1146
    gen_jmp_tb(s, next_eip, 1);
1147
    gen_set_label(l1);
1148
    return l2;
1149
}
1150

    
1151
static inline void gen_stos(DisasContext *s, int ot)
1152
{
1153
    gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
1154
    gen_string_movl_A0_EDI(s);
1155
    gen_op_st_T0_A0(ot + s->mem_index);
1156
    gen_op_movl_T0_Dshift(ot);
1157
    gen_op_add_reg_T0(s->aflag, R_EDI);
1158
}
1159

    
1160
static inline void gen_lods(DisasContext *s, int ot)
1161
{
1162
    gen_string_movl_A0_ESI(s);
1163
    gen_op_ld_T0_A0(ot + s->mem_index);
1164
    gen_op_mov_reg_T0(ot, R_EAX);
1165
    gen_op_movl_T0_Dshift(ot);
1166
    gen_op_add_reg_T0(s->aflag, R_ESI);
1167
}
1168

    
1169
static inline void gen_scas(DisasContext *s, int ot)
1170
{
1171
    gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
1172
    gen_string_movl_A0_EDI(s);
1173
    gen_op_ld_T1_A0(ot + s->mem_index);
1174
    gen_op_cmpl_T0_T1_cc();
1175
    gen_op_movl_T0_Dshift(ot);
1176
    gen_op_add_reg_T0(s->aflag, R_EDI);
1177
}
1178

    
1179
static inline void gen_cmps(DisasContext *s, int ot)
1180
{
1181
    gen_string_movl_A0_ESI(s);
1182
    gen_op_ld_T0_A0(ot + s->mem_index);
1183
    gen_string_movl_A0_EDI(s);
1184
    gen_op_ld_T1_A0(ot + s->mem_index);
1185
    gen_op_cmpl_T0_T1_cc();
1186
    gen_op_movl_T0_Dshift(ot);
1187
    gen_op_add_reg_T0(s->aflag, R_ESI);
1188
    gen_op_add_reg_T0(s->aflag, R_EDI);
1189
}
1190

    
1191
static inline void gen_ins(DisasContext *s, int ot)
1192
{
1193
    if (use_icount)
1194
        gen_io_start();
1195
    gen_string_movl_A0_EDI(s);
1196
    /* Note: we must do this dummy write first to be restartable in
1197
       case of page fault. */
1198
    gen_op_movl_T0_0();
1199
    gen_op_st_T0_A0(ot + s->mem_index);
1200
    gen_op_mov_TN_reg(OT_WORD, 1, R_EDX);
1201
    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[1]);
1202
    tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
1203
    gen_helper_in_func(ot, cpu_T[0], cpu_tmp2_i32);
1204
    gen_op_st_T0_A0(ot + s->mem_index);
1205
    gen_op_movl_T0_Dshift(ot);
1206
    gen_op_add_reg_T0(s->aflag, R_EDI);
1207
    if (use_icount)
1208
        gen_io_end();
1209
}
1210

    
1211
static inline void gen_outs(DisasContext *s, int ot)
1212
{
1213
    if (use_icount)
1214
        gen_io_start();
1215
    gen_string_movl_A0_ESI(s);
1216
    gen_op_ld_T0_A0(ot + s->mem_index);
1217

    
1218
    gen_op_mov_TN_reg(OT_WORD, 1, R_EDX);
1219
    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[1]);
1220
    tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
1221
    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[0]);
1222
    gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
1223

    
1224
    gen_op_movl_T0_Dshift(ot);
1225
    gen_op_add_reg_T0(s->aflag, R_ESI);
1226
    if (use_icount)
1227
        gen_io_end();
1228
}
1229

    
1230
/* same method as Valgrind : we generate jumps to current or next
1231
   instruction */
1232
#define GEN_REPZ(op)                                                          \
1233
static inline void gen_repz_ ## op(DisasContext *s, int ot,                   \
1234
                                 target_ulong cur_eip, target_ulong next_eip) \
1235
{                                                                             \
1236
    int l2;\
1237
    gen_update_cc_op(s);                                                      \
1238
    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1239
    gen_ ## op(s, ot);                                                        \
1240
    gen_op_add_reg_im(s->aflag, R_ECX, -1);                                   \
1241
    /* a loop would cause two single step exceptions if ECX = 1               \
1242
       before rep string_insn */                                              \
1243
    if (!s->jmp_opt)                                                          \
1244
        gen_op_jz_ecx(s->aflag, l2);                                          \
1245
    gen_jmp(s, cur_eip);                                                      \
1246
}
1247

    
1248
#define GEN_REPZ2(op)                                                         \
1249
static inline void gen_repz_ ## op(DisasContext *s, int ot,                   \
1250
                                   target_ulong cur_eip,                      \
1251
                                   target_ulong next_eip,                     \
1252
                                   int nz)                                    \
1253
{                                                                             \
1254
    int l2;\
1255
    gen_update_cc_op(s);                                                      \
1256
    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1257
    gen_ ## op(s, ot);                                                        \
1258
    gen_op_add_reg_im(s->aflag, R_ECX, -1);                                   \
1259
    gen_op_set_cc_op(CC_OP_SUBB + ot);                                        \
1260
    gen_jcc1(s, CC_OP_SUBB + ot, (JCC_Z << 1) | (nz ^ 1), l2);                \
1261
    if (!s->jmp_opt)                                                          \
1262
        gen_op_jz_ecx(s->aflag, l2);                                          \
1263
    gen_jmp(s, cur_eip);                                                      \
1264
}
1265

    
1266
GEN_REPZ(movs)
1267
GEN_REPZ(stos)
1268
GEN_REPZ(lods)
1269
GEN_REPZ(ins)
1270
GEN_REPZ(outs)
1271
GEN_REPZ2(scas)
1272
GEN_REPZ2(cmps)
1273

    
1274
static void gen_helper_fp_arith_ST0_FT0(int op)
1275
{
1276
    switch (op) {
1277
    case 0: gen_helper_fadd_ST0_FT0(); break;
1278
    case 1: gen_helper_fmul_ST0_FT0(); break;
1279
    case 2: gen_helper_fcom_ST0_FT0(); break;
1280
    case 3: gen_helper_fcom_ST0_FT0(); break;
1281
    case 4: gen_helper_fsub_ST0_FT0(); break;
1282
    case 5: gen_helper_fsubr_ST0_FT0(); break;
1283
    case 6: gen_helper_fdiv_ST0_FT0(); break;
1284
    case 7: gen_helper_fdivr_ST0_FT0(); break;
1285
    }
1286
}
1287

    
1288
/* NOTE the exception in "r" op ordering */
1289
static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1290
{
1291
    TCGv_i32 tmp = tcg_const_i32(opreg);
1292
    switch (op) {
1293
    case 0: gen_helper_fadd_STN_ST0(tmp); break;
1294
    case 1: gen_helper_fmul_STN_ST0(tmp); break;
1295
    case 4: gen_helper_fsubr_STN_ST0(tmp); break;
1296
    case 5: gen_helper_fsub_STN_ST0(tmp); break;
1297
    case 6: gen_helper_fdivr_STN_ST0(tmp); break;
1298
    case 7: gen_helper_fdiv_STN_ST0(tmp); break;
1299
    }
1300
}
1301

    
1302
/* if d == OR_TMP0, it means memory operand (address in A0) */
1303
static void gen_op(DisasContext *s1, int op, int ot, int d)
1304
{
1305
    if (d != OR_TMP0) {
1306
        gen_op_mov_TN_reg(ot, 0, d);
1307
    } else {
1308
        gen_op_ld_T0_A0(ot + s1->mem_index);
1309
    }
1310
    switch(op) {
1311
    case OP_ADCL:
1312
        if (s1->cc_op != CC_OP_DYNAMIC)
1313
            gen_op_set_cc_op(s1->cc_op);
1314
        gen_compute_eflags_c(cpu_tmp4);
1315
        tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1316
        tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
1317
        if (d != OR_TMP0)
1318
            gen_op_mov_reg_T0(ot, d);
1319
        else
1320
            gen_op_st_T0_A0(ot + s1->mem_index);
1321
        tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
1322
        tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
1323
        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp4);
1324
        tcg_gen_shli_i32(cpu_tmp2_i32, cpu_tmp2_i32, 2);
1325
        tcg_gen_addi_i32(cpu_cc_op, cpu_tmp2_i32, CC_OP_ADDB + ot);
1326
        s1->cc_op = CC_OP_DYNAMIC;
1327
        break;
1328
    case OP_SBBL:
1329
        if (s1->cc_op != CC_OP_DYNAMIC)
1330
            gen_op_set_cc_op(s1->cc_op);
1331
        gen_compute_eflags_c(cpu_tmp4);
1332
        tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1333
        tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
1334
        if (d != OR_TMP0)
1335
            gen_op_mov_reg_T0(ot, d);
1336
        else
1337
            gen_op_st_T0_A0(ot + s1->mem_index);
1338
        tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
1339
        tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
1340
        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp4);
1341
        tcg_gen_shli_i32(cpu_tmp2_i32, cpu_tmp2_i32, 2);
1342
        tcg_gen_addi_i32(cpu_cc_op, cpu_tmp2_i32, CC_OP_SUBB + ot);
1343
        s1->cc_op = CC_OP_DYNAMIC;
1344
        break;
1345
    case OP_ADDL:
1346
        gen_op_addl_T0_T1();
1347
        if (d != OR_TMP0)
1348
            gen_op_mov_reg_T0(ot, d);
1349
        else
1350
            gen_op_st_T0_A0(ot + s1->mem_index);
1351
        gen_op_update2_cc();
1352
        s1->cc_op = CC_OP_ADDB + ot;
1353
        break;
1354
    case OP_SUBL:
1355
        tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1356
        if (d != OR_TMP0)
1357
            gen_op_mov_reg_T0(ot, d);
1358
        else
1359
            gen_op_st_T0_A0(ot + s1->mem_index);
1360
        gen_op_update2_cc();
1361
        s1->cc_op = CC_OP_SUBB + ot;
1362
        break;
1363
    default:
1364
    case OP_ANDL:
1365
        tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1366
        if (d != OR_TMP0)
1367
            gen_op_mov_reg_T0(ot, d);
1368
        else
1369
            gen_op_st_T0_A0(ot + s1->mem_index);
1370
        gen_op_update1_cc();
1371
        s1->cc_op = CC_OP_LOGICB + ot;
1372
        break;
1373
    case OP_ORL:
1374
        tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1375
        if (d != OR_TMP0)
1376
            gen_op_mov_reg_T0(ot, d);
1377
        else
1378
            gen_op_st_T0_A0(ot + s1->mem_index);
1379
        gen_op_update1_cc();
1380
        s1->cc_op = CC_OP_LOGICB + ot;
1381
        break;
1382
    case OP_XORL:
1383
        tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1384
        if (d != OR_TMP0)
1385
            gen_op_mov_reg_T0(ot, d);
1386
        else
1387
            gen_op_st_T0_A0(ot + s1->mem_index);
1388
        gen_op_update1_cc();
1389
        s1->cc_op = CC_OP_LOGICB + ot;
1390
        break;
1391
    case OP_CMPL:
1392
        gen_op_cmpl_T0_T1_cc();
1393
        s1->cc_op = CC_OP_SUBB + ot;
1394
        break;
1395
    }
1396
}
1397

    
1398
/* if d == OR_TMP0, it means memory operand (address in A0) */
1399
static void gen_inc(DisasContext *s1, int ot, int d, int c)
1400
{
1401
    if (d != OR_TMP0)
1402
        gen_op_mov_TN_reg(ot, 0, d);
1403
    else
1404
        gen_op_ld_T0_A0(ot + s1->mem_index);
1405
    if (s1->cc_op != CC_OP_DYNAMIC)
1406
        gen_op_set_cc_op(s1->cc_op);
1407
    if (c > 0) {
1408
        tcg_gen_addi_tl(cpu_T[0], cpu_T[0], 1);
1409
        s1->cc_op = CC_OP_INCB + ot;
1410
    } else {
1411
        tcg_gen_addi_tl(cpu_T[0], cpu_T[0], -1);
1412
        s1->cc_op = CC_OP_DECB + ot;
1413
    }
1414
    if (d != OR_TMP0)
1415
        gen_op_mov_reg_T0(ot, d);
1416
    else
1417
        gen_op_st_T0_A0(ot + s1->mem_index);
1418
    gen_compute_eflags_c(cpu_cc_src);
1419
    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
1420
}
1421

    
1422
static void gen_shift_rm_T1(DisasContext *s, int ot, int op1, 
1423
                            int is_right, int is_arith)
1424
{
1425
    target_ulong mask;
1426
    int shift_label;
1427
    TCGv t0, t1;
1428

    
1429
    if (ot == OT_QUAD)
1430
        mask = 0x3f;
1431
    else
1432
        mask = 0x1f;
1433

    
1434
    /* load */
1435
    if (op1 == OR_TMP0)
1436
        gen_op_ld_T0_A0(ot + s->mem_index);
1437
    else
1438
        gen_op_mov_TN_reg(ot, 0, op1);
1439

    
1440
    tcg_gen_andi_tl(cpu_T[1], cpu_T[1], mask);
1441

    
1442
    tcg_gen_addi_tl(cpu_tmp5, cpu_T[1], -1);
1443

    
1444
    if (is_right) {
1445
        if (is_arith) {
1446
            gen_exts(ot, cpu_T[0]);
1447
            tcg_gen_sar_tl(cpu_T3, cpu_T[0], cpu_tmp5);
1448
            tcg_gen_sar_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1449
        } else {
1450
            gen_extu(ot, cpu_T[0]);
1451
            tcg_gen_shr_tl(cpu_T3, cpu_T[0], cpu_tmp5);
1452
            tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1453
        }
1454
    } else {
1455
        tcg_gen_shl_tl(cpu_T3, cpu_T[0], cpu_tmp5);
1456
        tcg_gen_shl_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1457
    }
1458

    
1459
    /* store */
1460
    if (op1 == OR_TMP0)
1461
        gen_op_st_T0_A0(ot + s->mem_index);
1462
    else
1463
        gen_op_mov_reg_T0(ot, op1);
1464
        
1465
    /* update eflags if non zero shift */
1466
    if (s->cc_op != CC_OP_DYNAMIC)
1467
        gen_op_set_cc_op(s->cc_op);
1468

    
1469
    /* XXX: inefficient */
1470
    t0 = tcg_temp_local_new();
1471
    t1 = tcg_temp_local_new();
1472

    
1473
    tcg_gen_mov_tl(t0, cpu_T[0]);
1474
    tcg_gen_mov_tl(t1, cpu_T3);
1475

    
1476
    shift_label = gen_new_label();
1477
    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_T[1], 0, shift_label);
1478

    
1479
    tcg_gen_mov_tl(cpu_cc_src, t1);
1480
    tcg_gen_mov_tl(cpu_cc_dst, t0);
1481
    if (is_right)
1482
        tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot);
1483
    else
1484
        tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot);
1485
        
1486
    gen_set_label(shift_label);
1487
    s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
1488

    
1489
    tcg_temp_free(t0);
1490
    tcg_temp_free(t1);
1491
}
1492

    
1493
static void gen_shift_rm_im(DisasContext *s, int ot, int op1, int op2,
1494
                            int is_right, int is_arith)
1495
{
1496
    int mask;
1497
    
1498
    if (ot == OT_QUAD)
1499
        mask = 0x3f;
1500
    else
1501
        mask = 0x1f;
1502

    
1503
    /* load */
1504
    if (op1 == OR_TMP0)
1505
        gen_op_ld_T0_A0(ot + s->mem_index);
1506
    else
1507
        gen_op_mov_TN_reg(ot, 0, op1);
1508

    
1509
    op2 &= mask;
1510
    if (op2 != 0) {
1511
        if (is_right) {
1512
            if (is_arith) {
1513
                gen_exts(ot, cpu_T[0]);
1514
                tcg_gen_sari_tl(cpu_tmp4, cpu_T[0], op2 - 1);
1515
                tcg_gen_sari_tl(cpu_T[0], cpu_T[0], op2);
1516
            } else {
1517
                gen_extu(ot, cpu_T[0]);
1518
                tcg_gen_shri_tl(cpu_tmp4, cpu_T[0], op2 - 1);
1519
                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], op2);
1520
            }
1521
        } else {
1522
            tcg_gen_shli_tl(cpu_tmp4, cpu_T[0], op2 - 1);
1523
            tcg_gen_shli_tl(cpu_T[0], cpu_T[0], op2);
1524
        }
1525
    }
1526

    
1527
    /* store */
1528
    if (op1 == OR_TMP0)
1529
        gen_op_st_T0_A0(ot + s->mem_index);
1530
    else
1531
        gen_op_mov_reg_T0(ot, op1);
1532
        
1533
    /* update eflags if non zero shift */
1534
    if (op2 != 0) {
1535
        tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
1536
        tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
1537
        if (is_right)
1538
            s->cc_op = CC_OP_SARB + ot;
1539
        else
1540
            s->cc_op = CC_OP_SHLB + ot;
1541
    }
1542
}
1543

    
1544
static inline void tcg_gen_lshift(TCGv ret, TCGv arg1, target_long arg2)
1545
{
1546
    if (arg2 >= 0)
1547
        tcg_gen_shli_tl(ret, arg1, arg2);
1548
    else
1549
        tcg_gen_shri_tl(ret, arg1, -arg2);
1550
}
1551

    
1552
static void gen_rot_rm_T1(DisasContext *s, int ot, int op1, 
1553
                          int is_right)
1554
{
1555
    target_ulong mask;
1556
    int label1, label2, data_bits;
1557
    TCGv t0, t1, t2, a0;
1558

    
1559
    /* XXX: inefficient, but we must use local temps */
1560
    t0 = tcg_temp_local_new();
1561
    t1 = tcg_temp_local_new();
1562
    t2 = tcg_temp_local_new();
1563
    a0 = tcg_temp_local_new();
1564

    
1565
    if (ot == OT_QUAD)
1566
        mask = 0x3f;
1567
    else
1568
        mask = 0x1f;
1569

    
1570
    /* load */
1571
    if (op1 == OR_TMP0) {
1572
        tcg_gen_mov_tl(a0, cpu_A0);
1573
        gen_op_ld_v(ot + s->mem_index, t0, a0);
1574
    } else {
1575
        gen_op_mov_v_reg(ot, t0, op1);
1576
    }
1577

    
1578
    tcg_gen_mov_tl(t1, cpu_T[1]);
1579

    
1580
    tcg_gen_andi_tl(t1, t1, mask);
1581

    
1582
    /* Must test zero case to avoid using undefined behaviour in TCG
1583
       shifts. */
1584
    label1 = gen_new_label();
1585
    tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label1);
1586
    
1587
    if (ot <= OT_WORD)
1588
        tcg_gen_andi_tl(cpu_tmp0, t1, (1 << (3 + ot)) - 1);
1589
    else
1590
        tcg_gen_mov_tl(cpu_tmp0, t1);
1591
    
1592
    gen_extu(ot, t0);
1593
    tcg_gen_mov_tl(t2, t0);
1594

    
1595
    data_bits = 8 << ot;
1596
    /* XXX: rely on behaviour of shifts when operand 2 overflows (XXX:
1597
       fix TCG definition) */
1598
    if (is_right) {
1599
        tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp0);
1600
        tcg_gen_sub_tl(cpu_tmp0, tcg_const_tl(data_bits), cpu_tmp0);
1601
        tcg_gen_shl_tl(t0, t0, cpu_tmp0);
1602
    } else {
1603
        tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp0);
1604
        tcg_gen_sub_tl(cpu_tmp0, tcg_const_tl(data_bits), cpu_tmp0);
1605
        tcg_gen_shr_tl(t0, t0, cpu_tmp0);
1606
    }
1607
    tcg_gen_or_tl(t0, t0, cpu_tmp4);
1608

    
1609
    gen_set_label(label1);
1610
    /* store */
1611
    if (op1 == OR_TMP0) {
1612
        gen_op_st_v(ot + s->mem_index, t0, a0);
1613
    } else {
1614
        gen_op_mov_reg_v(ot, op1, t0);
1615
    }
1616
    
1617
    /* update eflags */
1618
    if (s->cc_op != CC_OP_DYNAMIC)
1619
        gen_op_set_cc_op(s->cc_op);
1620

    
1621
    label2 = gen_new_label();
1622
    tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label2);
1623

    
1624
    gen_compute_eflags(cpu_cc_src);
1625
    tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C));
1626
    tcg_gen_xor_tl(cpu_tmp0, t2, t0);
1627
    tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1));
1628
    tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_O);
1629
    tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_tmp0);
1630
    if (is_right) {
1631
        tcg_gen_shri_tl(t0, t0, data_bits - 1);
1632
    }
1633
    tcg_gen_andi_tl(t0, t0, CC_C);
1634
    tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0);
1635
    
1636
    tcg_gen_discard_tl(cpu_cc_dst);
1637
    tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
1638
        
1639
    gen_set_label(label2);
1640
    s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
1641

    
1642
    tcg_temp_free(t0);
1643
    tcg_temp_free(t1);
1644
    tcg_temp_free(t2);
1645
    tcg_temp_free(a0);
1646
}
1647

    
1648
static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2,
1649
                          int is_right)
1650
{
1651
    int mask;
1652
    int data_bits;
1653
    TCGv t0, t1, a0;
1654

    
1655
    /* XXX: inefficient, but we must use local temps */
1656
    t0 = tcg_temp_local_new();
1657
    t1 = tcg_temp_local_new();
1658
    a0 = tcg_temp_local_new();
1659

    
1660
    if (ot == OT_QUAD)
1661
        mask = 0x3f;
1662
    else
1663
        mask = 0x1f;
1664

    
1665
    /* load */
1666
    if (op1 == OR_TMP0) {
1667
        tcg_gen_mov_tl(a0, cpu_A0);
1668
        gen_op_ld_v(ot + s->mem_index, t0, a0);
1669
    } else {
1670
        gen_op_mov_v_reg(ot, t0, op1);
1671
    }
1672

    
1673
    gen_extu(ot, t0);
1674
    tcg_gen_mov_tl(t1, t0);
1675

    
1676
    op2 &= mask;
1677
    data_bits = 8 << ot;
1678
    if (op2 != 0) {
1679
        int shift = op2 & ((1 << (3 + ot)) - 1);
1680
        if (is_right) {
1681
            tcg_gen_shri_tl(cpu_tmp4, t0, shift);
1682
            tcg_gen_shli_tl(t0, t0, data_bits - shift);
1683
        }
1684
        else {
1685
            tcg_gen_shli_tl(cpu_tmp4, t0, shift);
1686
            tcg_gen_shri_tl(t0, t0, data_bits - shift);
1687
        }
1688
        tcg_gen_or_tl(t0, t0, cpu_tmp4);
1689
    }
1690

    
1691
    /* store */
1692
    if (op1 == OR_TMP0) {
1693
        gen_op_st_v(ot + s->mem_index, t0, a0);
1694
    } else {
1695
        gen_op_mov_reg_v(ot, op1, t0);
1696
    }
1697

    
1698
    if (op2 != 0) {
1699
        /* update eflags */
1700
        if (s->cc_op != CC_OP_DYNAMIC)
1701
            gen_op_set_cc_op(s->cc_op);
1702

    
1703
        gen_compute_eflags(cpu_cc_src);
1704
        tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C));
1705
        tcg_gen_xor_tl(cpu_tmp0, t1, t0);
1706
        tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1));
1707
        tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_O);
1708
        tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_tmp0);
1709
        if (is_right) {
1710
            tcg_gen_shri_tl(t0, t0, data_bits - 1);
1711
        }
1712
        tcg_gen_andi_tl(t0, t0, CC_C);
1713
        tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0);
1714

    
1715
        tcg_gen_discard_tl(cpu_cc_dst);
1716
        tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
1717
        s->cc_op = CC_OP_EFLAGS;
1718
    }
1719

    
1720
    tcg_temp_free(t0);
1721
    tcg_temp_free(t1);
1722
    tcg_temp_free(a0);
1723
}
1724

    
1725
/* XXX: add faster immediate = 1 case */
1726
static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1, 
1727
                           int is_right)
1728
{
1729
    int label1;
1730

    
1731
    if (s->cc_op != CC_OP_DYNAMIC)
1732
        gen_op_set_cc_op(s->cc_op);
1733

    
1734
    /* load */
1735
    if (op1 == OR_TMP0)
1736
        gen_op_ld_T0_A0(ot + s->mem_index);
1737
    else
1738
        gen_op_mov_TN_reg(ot, 0, op1);
1739
    
1740
    if (is_right) {
1741
        switch (ot) {
1742
        case 0: gen_helper_rcrb(cpu_T[0], cpu_T[0], cpu_T[1]); break;
1743
        case 1: gen_helper_rcrw(cpu_T[0], cpu_T[0], cpu_T[1]); break;
1744
        case 2: gen_helper_rcrl(cpu_T[0], cpu_T[0], cpu_T[1]); break;
1745
#ifdef TARGET_X86_64
1746
        case 3: gen_helper_rcrq(cpu_T[0], cpu_T[0], cpu_T[1]); break;
1747
#endif
1748
        }
1749
    } else {
1750
        switch (ot) {
1751
        case 0: gen_helper_rclb(cpu_T[0], cpu_T[0], cpu_T[1]); break;
1752
        case 1: gen_helper_rclw(cpu_T[0], cpu_T[0], cpu_T[1]); break;
1753
        case 2: gen_helper_rcll(cpu_T[0], cpu_T[0], cpu_T[1]); break;
1754
#ifdef TARGET_X86_64
1755
        case 3: gen_helper_rclq(cpu_T[0], cpu_T[0], cpu_T[1]); break;
1756
#endif
1757
        }
1758
    }
1759
    /* store */
1760
    if (op1 == OR_TMP0)
1761
        gen_op_st_T0_A0(ot + s->mem_index);
1762
    else
1763
        gen_op_mov_reg_T0(ot, op1);
1764

    
1765
    /* update eflags */
1766
    label1 = gen_new_label();
1767
    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_cc_tmp, -1, label1);
1768

    
1769
    tcg_gen_mov_tl(cpu_cc_src, cpu_cc_tmp);
1770
    tcg_gen_discard_tl(cpu_cc_dst);
1771
    tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
1772
        
1773
    gen_set_label(label1);
1774
    s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
1775
}
1776

    
1777
/* XXX: add faster immediate case */
1778
static void gen_shiftd_rm_T1_T3(DisasContext *s, int ot, int op1, 
1779
                                int is_right)
1780
{
1781
    int label1, label2, data_bits;
1782
    target_ulong mask;
1783
    TCGv t0, t1, t2, a0;
1784

    
1785
    t0 = tcg_temp_local_new();
1786
    t1 = tcg_temp_local_new();
1787
    t2 = tcg_temp_local_new();
1788
    a0 = tcg_temp_local_new();
1789

    
1790
    if (ot == OT_QUAD)
1791
        mask = 0x3f;
1792
    else
1793
        mask = 0x1f;
1794

    
1795
    /* load */
1796
    if (op1 == OR_TMP0) {
1797
        tcg_gen_mov_tl(a0, cpu_A0);
1798
        gen_op_ld_v(ot + s->mem_index, t0, a0);
1799
    } else {
1800
        gen_op_mov_v_reg(ot, t0, op1);
1801
    }
1802

    
1803
    tcg_gen_andi_tl(cpu_T3, cpu_T3, mask);
1804

    
1805
    tcg_gen_mov_tl(t1, cpu_T[1]);
1806
    tcg_gen_mov_tl(t2, cpu_T3);
1807

    
1808
    /* Must test zero case to avoid using undefined behaviour in TCG
1809
       shifts. */
1810
    label1 = gen_new_label();
1811
    tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1);
1812
    
1813
    tcg_gen_addi_tl(cpu_tmp5, t2, -1);
1814
    if (ot == OT_WORD) {
1815
        /* Note: we implement the Intel behaviour for shift count > 16 */
1816
        if (is_right) {
1817
            tcg_gen_andi_tl(t0, t0, 0xffff);
1818
            tcg_gen_shli_tl(cpu_tmp0, t1, 16);
1819
            tcg_gen_or_tl(t0, t0, cpu_tmp0);
1820
            tcg_gen_ext32u_tl(t0, t0);
1821

    
1822
            tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5);
1823
            
1824
            /* only needed if count > 16, but a test would complicate */
1825
            tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(32), t2);
1826
            tcg_gen_shl_tl(cpu_tmp0, t0, cpu_tmp5);
1827

    
1828
            tcg_gen_shr_tl(t0, t0, t2);
1829

    
1830
            tcg_gen_or_tl(t0, t0, cpu_tmp0);
1831
        } else {
1832
            /* XXX: not optimal */
1833
            tcg_gen_andi_tl(t0, t0, 0xffff);
1834
            tcg_gen_shli_tl(t1, t1, 16);
1835
            tcg_gen_or_tl(t1, t1, t0);
1836
            tcg_gen_ext32u_tl(t1, t1);
1837
            
1838
            tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5);
1839
            tcg_gen_sub_tl(cpu_tmp0, tcg_const_tl(32), cpu_tmp5);
1840
            tcg_gen_shr_tl(cpu_tmp6, t1, cpu_tmp0);
1841
            tcg_gen_or_tl(cpu_tmp4, cpu_tmp4, cpu_tmp6);
1842

    
1843
            tcg_gen_shl_tl(t0, t0, t2);
1844
            tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(32), t2);
1845
            tcg_gen_shr_tl(t1, t1, cpu_tmp5);
1846
            tcg_gen_or_tl(t0, t0, t1);
1847
        }
1848
    } else {
1849
        data_bits = 8 << ot;
1850
        if (is_right) {
1851
            if (ot == OT_LONG)
1852
                tcg_gen_ext32u_tl(t0, t0);
1853

    
1854
            tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5);
1855

    
1856
            tcg_gen_shr_tl(t0, t0, t2);
1857
            tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(data_bits), t2);
1858
            tcg_gen_shl_tl(t1, t1, cpu_tmp5);
1859
            tcg_gen_or_tl(t0, t0, t1);
1860
            
1861
        } else {
1862
            if (ot == OT_LONG)
1863
                tcg_gen_ext32u_tl(t1, t1);
1864

    
1865
            tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5);
1866
            
1867
            tcg_gen_shl_tl(t0, t0, t2);
1868
            tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(data_bits), t2);
1869
            tcg_gen_shr_tl(t1, t1, cpu_tmp5);
1870
            tcg_gen_or_tl(t0, t0, t1);
1871
        }
1872
    }
1873
    tcg_gen_mov_tl(t1, cpu_tmp4);
1874

    
1875
    gen_set_label(label1);
1876
    /* store */
1877
    if (op1 == OR_TMP0) {
1878
        gen_op_st_v(ot + s->mem_index, t0, a0);
1879
    } else {
1880
        gen_op_mov_reg_v(ot, op1, t0);
1881
    }
1882
    
1883
    /* update eflags */
1884
    if (s->cc_op != CC_OP_DYNAMIC)
1885
        gen_op_set_cc_op(s->cc_op);
1886

    
1887
    label2 = gen_new_label();
1888
    tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label2);
1889

    
1890
    tcg_gen_mov_tl(cpu_cc_src, t1);
1891
    tcg_gen_mov_tl(cpu_cc_dst, t0);
1892
    if (is_right) {
1893
        tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot);
1894
    } else {
1895
        tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot);
1896
    }
1897
    gen_set_label(label2);
1898
    s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
1899

    
1900
    tcg_temp_free(t0);
1901
    tcg_temp_free(t1);
1902
    tcg_temp_free(t2);
1903
    tcg_temp_free(a0);
1904
}
1905

    
1906
static void gen_shift(DisasContext *s1, int op, int ot, int d, int s)
1907
{
1908
    if (s != OR_TMP1)
1909
        gen_op_mov_TN_reg(ot, 1, s);
1910
    switch(op) {
1911
    case OP_ROL:
1912
        gen_rot_rm_T1(s1, ot, d, 0);
1913
        break;
1914
    case OP_ROR:
1915
        gen_rot_rm_T1(s1, ot, d, 1);
1916
        break;
1917
    case OP_SHL:
1918
    case OP_SHL1:
1919
        gen_shift_rm_T1(s1, ot, d, 0, 0);
1920
        break;
1921
    case OP_SHR:
1922
        gen_shift_rm_T1(s1, ot, d, 1, 0);
1923
        break;
1924
    case OP_SAR:
1925
        gen_shift_rm_T1(s1, ot, d, 1, 1);
1926
        break;
1927
    case OP_RCL:
1928
        gen_rotc_rm_T1(s1, ot, d, 0);
1929
        break;
1930
    case OP_RCR:
1931
        gen_rotc_rm_T1(s1, ot, d, 1);
1932
        break;
1933
    }
1934
}
1935

    
1936
static void gen_shifti(DisasContext *s1, int op, int ot, int d, int c)
1937
{
1938
    switch(op) {
1939
    case OP_ROL:
1940
        gen_rot_rm_im(s1, ot, d, c, 0);
1941
        break;
1942
    case OP_ROR:
1943
        gen_rot_rm_im(s1, ot, d, c, 1);
1944
        break;
1945
    case OP_SHL:
1946
    case OP_SHL1:
1947
        gen_shift_rm_im(s1, ot, d, c, 0, 0);
1948
        break;
1949
    case OP_SHR:
1950
        gen_shift_rm_im(s1, ot, d, c, 1, 0);
1951
        break;
1952
    case OP_SAR:
1953
        gen_shift_rm_im(s1, ot, d, c, 1, 1);
1954
        break;
1955
    default:
1956
        /* currently not optimized */
1957
        gen_op_movl_T1_im(c);
1958
        gen_shift(s1, op, ot, d, OR_TMP1);
1959
        break;
1960
    }
1961
}
1962

    
1963
static void gen_lea_modrm(DisasContext *s, int modrm, int *reg_ptr, int *offset_ptr)
1964
{
1965
    target_long disp;
1966
    int havesib;
1967
    int base;
1968
    int index;
1969
    int scale;
1970
    int opreg;
1971
    int mod, rm, code, override, must_add_seg;
1972

    
1973
    override = s->override;
1974
    must_add_seg = s->addseg;
1975
    if (override >= 0)
1976
        must_add_seg = 1;
1977
    mod = (modrm >> 6) & 3;
1978
    rm = modrm & 7;
1979

    
1980
    if (s->aflag) {
1981

    
1982
        havesib = 0;
1983
        base = rm;
1984
        index = 0;
1985
        scale = 0;
1986

    
1987
        if (base == 4) {
1988
            havesib = 1;
1989
            code = ldub_code(s->pc++);
1990
            scale = (code >> 6) & 3;
1991
            index = ((code >> 3) & 7) | REX_X(s);
1992
            base = (code & 7);
1993
        }
1994
        base |= REX_B(s);
1995

    
1996
        switch (mod) {
1997
        case 0:
1998
            if ((base & 7) == 5) {
1999
                base = -1;
2000
                disp = (int32_t)ldl_code(s->pc);
2001
                s->pc += 4;
2002
                if (CODE64(s) && !havesib) {
2003
                    disp += s->pc + s->rip_offset;
2004
                }
2005
            } else {
2006
                disp = 0;
2007
            }
2008
            break;
2009
        case 1:
2010
            disp = (int8_t)ldub_code(s->pc++);
2011
            break;
2012
        default:
2013
        case 2:
2014
            disp = ldl_code(s->pc);
2015
            s->pc += 4;
2016
            break;
2017
        }
2018

    
2019
        if (base >= 0) {
2020
            /* for correct popl handling with esp */
2021
            if (base == 4 && s->popl_esp_hack)
2022
                disp += s->popl_esp_hack;
2023
#ifdef TARGET_X86_64
2024
            if (s->aflag == 2) {
2025
                gen_op_movq_A0_reg(base);
2026
                if (disp != 0) {
2027
                    gen_op_addq_A0_im(disp);
2028
                }
2029
            } else
2030
#endif
2031
            {
2032
                gen_op_movl_A0_reg(base);
2033
                if (disp != 0)
2034
                    gen_op_addl_A0_im(disp);
2035
            }
2036
        } else {
2037
#ifdef TARGET_X86_64
2038
            if (s->aflag == 2) {
2039
                gen_op_movq_A0_im(disp);
2040
            } else
2041
#endif
2042
            {
2043
                gen_op_movl_A0_im(disp);
2044
            }
2045
        }
2046
        /* XXX: index == 4 is always invalid */
2047
        if (havesib && (index != 4 || scale != 0)) {
2048
#ifdef TARGET_X86_64
2049
            if (s->aflag == 2) {
2050
                gen_op_addq_A0_reg_sN(scale, index);
2051
            } else
2052
#endif
2053
            {
2054
                gen_op_addl_A0_reg_sN(scale, index);
2055
            }
2056
        }
2057
        if (must_add_seg) {
2058
            if (override < 0) {
2059
                if (base == R_EBP || base == R_ESP)
2060
                    override = R_SS;
2061
                else
2062
                    override = R_DS;
2063
            }
2064
#ifdef TARGET_X86_64
2065
            if (s->aflag == 2) {
2066
                gen_op_addq_A0_seg(override);
2067
            } else
2068
#endif
2069
            {
2070
                gen_op_addl_A0_seg(override);
2071
            }
2072
        }
2073
    } else {
2074
        switch (mod) {
2075
        case 0:
2076
            if (rm == 6) {
2077
                disp = lduw_code(s->pc);
2078
                s->pc += 2;
2079
                gen_op_movl_A0_im(disp);
2080
                rm = 0; /* avoid SS override */
2081
                goto no_rm;
2082
            } else {
2083
                disp = 0;
2084
            }
2085
            break;
2086
        case 1:
2087
            disp = (int8_t)ldub_code(s->pc++);
2088
            break;
2089
        default:
2090
        case 2:
2091
            disp = lduw_code(s->pc);
2092
            s->pc += 2;
2093
            break;
2094
        }
2095
        switch(rm) {
2096
        case 0:
2097
            gen_op_movl_A0_reg(R_EBX);
2098
            gen_op_addl_A0_reg_sN(0, R_ESI);
2099
            break;
2100
        case 1:
2101
            gen_op_movl_A0_reg(R_EBX);
2102
            gen_op_addl_A0_reg_sN(0, R_EDI);
2103
            break;
2104
        case 2:
2105
            gen_op_movl_A0_reg(R_EBP);
2106
            gen_op_addl_A0_reg_sN(0, R_ESI);
2107
            break;
2108
        case 3:
2109
            gen_op_movl_A0_reg(R_EBP);
2110
            gen_op_addl_A0_reg_sN(0, R_EDI);
2111
            break;
2112
        case 4:
2113
            gen_op_movl_A0_reg(R_ESI);
2114
            break;
2115
        case 5:
2116
            gen_op_movl_A0_reg(R_EDI);
2117
            break;
2118
        case 6:
2119
            gen_op_movl_A0_reg(R_EBP);
2120
            break;
2121
        default:
2122
        case 7:
2123
            gen_op_movl_A0_reg(R_EBX);
2124
            break;
2125
        }
2126
        if (disp != 0)
2127
            gen_op_addl_A0_im(disp);
2128
        gen_op_andl_A0_ffff();
2129
    no_rm:
2130
        if (must_add_seg) {
2131
            if (override < 0) {
2132
                if (rm == 2 || rm == 3 || rm == 6)
2133
                    override = R_SS;
2134
                else
2135
                    override = R_DS;
2136
            }
2137
            gen_op_addl_A0_seg(override);
2138
        }
2139
    }
2140

    
2141
    opreg = OR_A0;
2142
    disp = 0;
2143
    *reg_ptr = opreg;
2144
    *offset_ptr = disp;
2145
}
2146

    
2147
static void gen_nop_modrm(DisasContext *s, int modrm)
2148
{
2149
    int mod, rm, base, code;
2150

    
2151
    mod = (modrm >> 6) & 3;
2152
    if (mod == 3)
2153
        return;
2154
    rm = modrm & 7;
2155

    
2156
    if (s->aflag) {
2157

    
2158
        base = rm;
2159

    
2160
        if (base == 4) {
2161
            code = ldub_code(s->pc++);
2162
            base = (code & 7);
2163
        }
2164

    
2165
        switch (mod) {
2166
        case 0:
2167
            if (base == 5) {
2168
                s->pc += 4;
2169
            }
2170
            break;
2171
        case 1:
2172
            s->pc++;
2173
            break;
2174
        default:
2175
        case 2:
2176
            s->pc += 4;
2177
            break;
2178
        }
2179
    } else {
2180
        switch (mod) {
2181
        case 0:
2182
            if (rm == 6) {
2183
                s->pc += 2;
2184
            }
2185
            break;
2186
        case 1:
2187
            s->pc++;
2188
            break;
2189
        default:
2190
        case 2:
2191
            s->pc += 2;
2192
            break;
2193
        }
2194
    }
2195
}
2196

    
2197
/* used for LEA and MOV AX, mem */
2198
static void gen_add_A0_ds_seg(DisasContext *s)
2199
{
2200
    int override, must_add_seg;
2201
    must_add_seg = s->addseg;
2202
    override = R_DS;
2203
    if (s->override >= 0) {
2204
        override = s->override;
2205
        must_add_seg = 1;
2206
    } else {
2207
        override = R_DS;
2208
    }
2209
    if (must_add_seg) {
2210
#ifdef TARGET_X86_64
2211
        if (CODE64(s)) {
2212
            gen_op_addq_A0_seg(override);
2213
        } else
2214
#endif
2215
        {
2216
            gen_op_addl_A0_seg(override);
2217
        }
2218
    }
2219
}
2220

    
2221
/* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2222
   OR_TMP0 */
2223
static void gen_ldst_modrm(DisasContext *s, int modrm, int ot, int reg, int is_store)
2224
{
2225
    int mod, rm, opreg, disp;
2226

    
2227
    mod = (modrm >> 6) & 3;
2228
    rm = (modrm & 7) | REX_B(s);
2229
    if (mod == 3) {
2230
        if (is_store) {
2231
            if (reg != OR_TMP0)
2232
                gen_op_mov_TN_reg(ot, 0, reg);
2233
            gen_op_mov_reg_T0(ot, rm);
2234
        } else {
2235
            gen_op_mov_TN_reg(ot, 0, rm);
2236
            if (reg != OR_TMP0)
2237
                gen_op_mov_reg_T0(ot, reg);
2238
        }
2239
    } else {
2240
        gen_lea_modrm(s, modrm, &opreg, &disp);
2241
        if (is_store) {
2242
            if (reg != OR_TMP0)
2243
                gen_op_mov_TN_reg(ot, 0, reg);
2244
            gen_op_st_T0_A0(ot + s->mem_index);
2245
        } else {
2246
            gen_op_ld_T0_A0(ot + s->mem_index);
2247
            if (reg != OR_TMP0)
2248
                gen_op_mov_reg_T0(ot, reg);
2249
        }
2250
    }
2251
}
2252

    
2253
static inline uint32_t insn_get(DisasContext *s, int ot)
2254
{
2255
    uint32_t ret;
2256

    
2257
    switch(ot) {
2258
    case OT_BYTE:
2259
        ret = ldub_code(s->pc);
2260
        s->pc++;
2261
        break;
2262
    case OT_WORD:
2263
        ret = lduw_code(s->pc);
2264
        s->pc += 2;
2265
        break;
2266
    default:
2267
    case OT_LONG:
2268
        ret = ldl_code(s->pc);
2269
        s->pc += 4;
2270
        break;
2271
    }
2272
    return ret;
2273
}
2274

    
2275
static inline int insn_const_size(unsigned int ot)
2276
{
2277
    if (ot <= OT_LONG)
2278
        return 1 << ot;
2279
    else
2280
        return 4;
2281
}
2282

    
2283
static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2284
{
2285
    TranslationBlock *tb;
2286
    target_ulong pc;
2287

    
2288
    pc = s->cs_base + eip;
2289
    tb = s->tb;
2290
    /* NOTE: we handle the case where the TB spans two pages here */
2291
    if ((pc & TARGET_PAGE_MASK) == (tb->pc & TARGET_PAGE_MASK) ||
2292
        (pc & TARGET_PAGE_MASK) == ((s->pc - 1) & TARGET_PAGE_MASK))  {
2293
        /* jump to same page: we can use a direct jump */
2294
        tcg_gen_goto_tb(tb_num);
2295
        gen_jmp_im(eip);
2296
        tcg_gen_exit_tb((long)tb + tb_num);
2297
    } else {
2298
        /* jump to another page: currently not optimized */
2299
        gen_jmp_im(eip);
2300
        gen_eob(s);
2301
    }
2302
}
2303

    
2304
static inline void gen_jcc(DisasContext *s, int b,
2305
                           target_ulong val, target_ulong next_eip)
2306
{
2307
    int l1, l2, cc_op;
2308

    
2309
    cc_op = s->cc_op;
2310
    if (s->cc_op != CC_OP_DYNAMIC) {
2311
        gen_op_set_cc_op(s->cc_op);
2312
        s->cc_op = CC_OP_DYNAMIC;
2313
    }
2314
    if (s->jmp_opt) {
2315
        l1 = gen_new_label();
2316
        gen_jcc1(s, cc_op, b, l1);
2317
        
2318
        gen_goto_tb(s, 0, next_eip);
2319

    
2320
        gen_set_label(l1);
2321
        gen_goto_tb(s, 1, val);
2322
        s->is_jmp = 3;
2323
    } else {
2324

    
2325
        l1 = gen_new_label();
2326
        l2 = gen_new_label();
2327
        gen_jcc1(s, cc_op, b, l1);
2328

    
2329
        gen_jmp_im(next_eip);
2330
        tcg_gen_br(l2);
2331

    
2332
        gen_set_label(l1);
2333
        gen_jmp_im(val);
2334
        gen_set_label(l2);
2335
        gen_eob(s);
2336
    }
2337
}
2338

    
2339
static void gen_setcc(DisasContext *s, int b)
2340
{
2341
    int inv, jcc_op, l1;
2342
    TCGv t0;
2343

    
2344
    if (is_fast_jcc_case(s, b)) {
2345
        /* nominal case: we use a jump */
2346
        /* XXX: make it faster by adding new instructions in TCG */
2347
        t0 = tcg_temp_local_new();
2348
        tcg_gen_movi_tl(t0, 0);
2349
        l1 = gen_new_label();
2350
        gen_jcc1(s, s->cc_op, b ^ 1, l1);
2351
        tcg_gen_movi_tl(t0, 1);
2352
        gen_set_label(l1);
2353
        tcg_gen_mov_tl(cpu_T[0], t0);
2354
        tcg_temp_free(t0);
2355
    } else {
2356
        /* slow case: it is more efficient not to generate a jump,
2357
           although it is questionnable whether this optimization is
2358
           worth to */
2359
        inv = b & 1;
2360
        jcc_op = (b >> 1) & 7;
2361
        gen_setcc_slow_T0(s, jcc_op);
2362
        if (inv) {
2363
            tcg_gen_xori_tl(cpu_T[0], cpu_T[0], 1);
2364
        }
2365
    }
2366
}
2367

    
2368
static inline void gen_op_movl_T0_seg(int seg_reg)
2369
{
2370
    tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
2371
                     offsetof(CPUX86State,segs[seg_reg].selector));
2372
}
2373

    
2374
static inline void gen_op_movl_seg_T0_vm(int seg_reg)
2375
{
2376
    tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff);
2377
    tcg_gen_st32_tl(cpu_T[0], cpu_env, 
2378
                    offsetof(CPUX86State,segs[seg_reg].selector));
2379
    tcg_gen_shli_tl(cpu_T[0], cpu_T[0], 4);
2380
    tcg_gen_st_tl(cpu_T[0], cpu_env, 
2381
                  offsetof(CPUX86State,segs[seg_reg].base));
2382
}
2383

    
2384
/* move T0 to seg_reg and compute if the CPU state may change. Never
2385
   call this function with seg_reg == R_CS */
2386
static void gen_movl_seg_T0(DisasContext *s, int seg_reg, target_ulong cur_eip)
2387
{
2388
    if (s->pe && !s->vm86) {
2389
        /* XXX: optimize by finding processor state dynamically */
2390
        if (s->cc_op != CC_OP_DYNAMIC)
2391
            gen_op_set_cc_op(s->cc_op);
2392
        gen_jmp_im(cur_eip);
2393
        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
2394
        gen_helper_load_seg(tcg_const_i32(seg_reg), cpu_tmp2_i32);
2395
        /* abort translation because the addseg value may change or
2396
           because ss32 may change. For R_SS, translation must always
2397
           stop as a special handling must be done to disable hardware
2398
           interrupts for the next instruction */
2399
        if (seg_reg == R_SS || (s->code32 && seg_reg < R_FS))
2400
            s->is_jmp = 3;
2401
    } else {
2402
        gen_op_movl_seg_T0_vm(seg_reg);
2403
        if (seg_reg == R_SS)
2404
            s->is_jmp = 3;
2405
    }
2406
}
2407

    
2408
static inline int svm_is_rep(int prefixes)
2409
{
2410
    return ((prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) ? 8 : 0);
2411
}
2412

    
2413
static inline void
2414
gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
2415
                              uint32_t type, uint64_t param)
2416
{
2417
    /* no SVM activated; fast case */
2418
    if (likely(!(s->flags & HF_SVMI_MASK)))
2419
        return;
2420
    if (s->cc_op != CC_OP_DYNAMIC)
2421
        gen_op_set_cc_op(s->cc_op);
2422
    gen_jmp_im(pc_start - s->cs_base);
2423
    gen_helper_svm_check_intercept_param(tcg_const_i32(type),
2424
                                         tcg_const_i64(param));
2425
}
2426

    
2427
static inline void
2428
gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type)
2429
{
2430
    gen_svm_check_intercept_param(s, pc_start, type, 0);
2431
}
2432

    
2433
static inline void gen_stack_update(DisasContext *s, int addend)
2434
{
2435
#ifdef TARGET_X86_64
2436
    if (CODE64(s)) {
2437
        gen_op_add_reg_im(2, R_ESP, addend);
2438
    } else
2439
#endif
2440
    if (s->ss32) {
2441
        gen_op_add_reg_im(1, R_ESP, addend);
2442
    } else {
2443
        gen_op_add_reg_im(0, R_ESP, addend);
2444
    }
2445
}
2446

    
2447
/* generate a push. It depends on ss32, addseg and dflag */
2448
static void gen_push_T0(DisasContext *s)
2449
{
2450
#ifdef TARGET_X86_64
2451
    if (CODE64(s)) {
2452
        gen_op_movq_A0_reg(R_ESP);
2453
        if (s->dflag) {
2454
            gen_op_addq_A0_im(-8);
2455
            gen_op_st_T0_A0(OT_QUAD + s->mem_index);
2456
        } else {
2457
            gen_op_addq_A0_im(-2);
2458
            gen_op_st_T0_A0(OT_WORD + s->mem_index);
2459
        }
2460
        gen_op_mov_reg_A0(2, R_ESP);
2461
    } else
2462
#endif
2463
    {
2464
        gen_op_movl_A0_reg(R_ESP);
2465
        if (!s->dflag)
2466
            gen_op_addl_A0_im(-2);
2467
        else
2468
            gen_op_addl_A0_im(-4);
2469
        if (s->ss32) {
2470
            if (s->addseg) {
2471
                tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2472
                gen_op_addl_A0_seg(R_SS);
2473
            }
2474
        } else {
2475
            gen_op_andl_A0_ffff();
2476
            tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2477
            gen_op_addl_A0_seg(R_SS);
2478
        }
2479
        gen_op_st_T0_A0(s->dflag + 1 + s->mem_index);
2480
        if (s->ss32 && !s->addseg)
2481
            gen_op_mov_reg_A0(1, R_ESP);
2482
        else
2483
            gen_op_mov_reg_T1(s->ss32 + 1, R_ESP);
2484
    }
2485
}
2486

    
2487
/* generate a push. It depends on ss32, addseg and dflag */
2488
/* slower version for T1, only used for call Ev */
2489
static void gen_push_T1(DisasContext *s)
2490
{
2491
#ifdef TARGET_X86_64
2492
    if (CODE64(s)) {
2493
        gen_op_movq_A0_reg(R_ESP);
2494
        if (s->dflag) {
2495
            gen_op_addq_A0_im(-8);
2496
            gen_op_st_T1_A0(OT_QUAD + s->mem_index);
2497
        } else {
2498
            gen_op_addq_A0_im(-2);
2499
            gen_op_st_T0_A0(OT_WORD + s->mem_index);
2500
        }
2501
        gen_op_mov_reg_A0(2, R_ESP);
2502
    } else
2503
#endif
2504
    {
2505
        gen_op_movl_A0_reg(R_ESP);
2506
        if (!s->dflag)
2507
            gen_op_addl_A0_im(-2);
2508
        else
2509
            gen_op_addl_A0_im(-4);
2510
        if (s->ss32) {
2511
            if (s->addseg) {
2512
                gen_op_addl_A0_seg(R_SS);
2513
            }
2514
        } else {
2515
            gen_op_andl_A0_ffff();
2516
            gen_op_addl_A0_seg(R_SS);
2517
        }
2518
        gen_op_st_T1_A0(s->dflag + 1 + s->mem_index);
2519

    
2520
        if (s->ss32 && !s->addseg)
2521
            gen_op_mov_reg_A0(1, R_ESP);
2522
        else
2523
            gen_stack_update(s, (-2) << s->dflag);
2524
    }
2525
}
2526

    
2527
/* two step pop is necessary for precise exceptions */
2528
static void gen_pop_T0(DisasContext *s)
2529
{
2530
#ifdef TARGET_X86_64
2531
    if (CODE64(s)) {
2532
        gen_op_movq_A0_reg(R_ESP);
2533
        gen_op_ld_T0_A0((s->dflag ? OT_QUAD : OT_WORD) + s->mem_index);
2534
    } else
2535
#endif
2536
    {
2537
        gen_op_movl_A0_reg(R_ESP);
2538
        if (s->ss32) {
2539
            if (s->addseg)
2540
                gen_op_addl_A0_seg(R_SS);
2541
        } else {
2542
            gen_op_andl_A0_ffff();
2543
            gen_op_addl_A0_seg(R_SS);
2544
        }
2545
        gen_op_ld_T0_A0(s->dflag + 1 + s->mem_index);
2546
    }
2547
}
2548

    
2549
static void gen_pop_update(DisasContext *s)
2550
{
2551
#ifdef TARGET_X86_64
2552
    if (CODE64(s) && s->dflag) {
2553
        gen_stack_update(s, 8);
2554
    } else
2555
#endif
2556
    {
2557
        gen_stack_update(s, 2 << s->dflag);
2558
    }
2559
}
2560

    
2561
static void gen_stack_A0(DisasContext *s)
2562
{
2563
    gen_op_movl_A0_reg(R_ESP);
2564
    if (!s->ss32)
2565
        gen_op_andl_A0_ffff();
2566
    tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2567
    if (s->addseg)
2568
        gen_op_addl_A0_seg(R_SS);
2569
}
2570

    
2571
/* NOTE: wrap around in 16 bit not fully handled */
2572
static void gen_pusha(DisasContext *s)
2573
{
2574
    int i;
2575
    gen_op_movl_A0_reg(R_ESP);
2576
    gen_op_addl_A0_im(-16 <<  s->dflag);
2577
    if (!s->ss32)
2578
        gen_op_andl_A0_ffff();
2579
    tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2580
    if (s->addseg)
2581
        gen_op_addl_A0_seg(R_SS);
2582
    for(i = 0;i < 8; i++) {
2583
        gen_op_mov_TN_reg(OT_LONG, 0, 7 - i);
2584
        gen_op_st_T0_A0(OT_WORD + s->dflag + s->mem_index);
2585
        gen_op_addl_A0_im(2 <<  s->dflag);
2586
    }
2587
    gen_op_mov_reg_T1(OT_WORD + s->ss32, R_ESP);
2588
}
2589

    
2590
/* NOTE: wrap around in 16 bit not fully handled */
2591
static void gen_popa(DisasContext *s)
2592
{
2593
    int i;
2594
    gen_op_movl_A0_reg(R_ESP);
2595
    if (!s->ss32)
2596
        gen_op_andl_A0_ffff();
2597
    tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2598
    tcg_gen_addi_tl(cpu_T[1], cpu_T[1], 16 <<  s->dflag);
2599
    if (s->addseg)
2600
        gen_op_addl_A0_seg(R_SS);
2601
    for(i = 0;i < 8; i++) {
2602
        /* ESP is not reloaded */
2603
        if (i != 3) {
2604
            gen_op_ld_T0_A0(OT_WORD + s->dflag + s->mem_index);
2605
            gen_op_mov_reg_T0(OT_WORD + s->dflag, 7 - i);
2606
        }
2607
        gen_op_addl_A0_im(2 <<  s->dflag);
2608
    }
2609
    gen_op_mov_reg_T1(OT_WORD + s->ss32, R_ESP);
2610
}
2611

    
2612
static void gen_enter(DisasContext *s, int esp_addend, int level)
2613
{
2614
    int ot, opsize;
2615

    
2616
    level &= 0x1f;
2617
#ifdef TARGET_X86_64
2618
    if (CODE64(s)) {
2619
        ot = s->dflag ? OT_QUAD : OT_WORD;
2620
        opsize = 1 << ot;
2621

    
2622
        gen_op_movl_A0_reg(R_ESP);
2623
        gen_op_addq_A0_im(-opsize);
2624
        tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2625

    
2626
        /* push bp */
2627
        gen_op_mov_TN_reg(OT_LONG, 0, R_EBP);
2628
        gen_op_st_T0_A0(ot + s->mem_index);
2629
        if (level) {
2630
            /* XXX: must save state */
2631
            gen_helper_enter64_level(tcg_const_i32(level),
2632
                                     tcg_const_i32((ot == OT_QUAD)),
2633
                                     cpu_T[1]);
2634
        }
2635
        gen_op_mov_reg_T1(ot, R_EBP);
2636
        tcg_gen_addi_tl(cpu_T[1], cpu_T[1], -esp_addend + (-opsize * level));
2637
        gen_op_mov_reg_T1(OT_QUAD, R_ESP);
2638
    } else
2639
#endif
2640
    {
2641
        ot = s->dflag + OT_WORD;
2642
        opsize = 2 << s->dflag;
2643

    
2644
        gen_op_movl_A0_reg(R_ESP);
2645
        gen_op_addl_A0_im(-opsize);
2646
        if (!s->ss32)
2647
            gen_op_andl_A0_ffff();
2648
        tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2649
        if (s->addseg)
2650
            gen_op_addl_A0_seg(R_SS);
2651
        /* push bp */
2652
        gen_op_mov_TN_reg(OT_LONG, 0, R_EBP);
2653
        gen_op_st_T0_A0(ot + s->mem_index);
2654
        if (level) {
2655
            /* XXX: must save state */
2656
            gen_helper_enter_level(tcg_const_i32(level),
2657
                                   tcg_const_i32(s->dflag),
2658
                                   cpu_T[1]);
2659
        }
2660
        gen_op_mov_reg_T1(ot, R_EBP);
2661
        tcg_gen_addi_tl(cpu_T[1], cpu_T[1], -esp_addend + (-opsize * level));
2662
        gen_op_mov_reg_T1(OT_WORD + s->ss32, R_ESP);
2663
    }
2664
}
2665

    
2666
static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
2667
{
2668
    if (s->cc_op != CC_OP_DYNAMIC)
2669
        gen_op_set_cc_op(s->cc_op);
2670
    gen_jmp_im(cur_eip);
2671
    gen_helper_raise_exception(tcg_const_i32(trapno));
2672
    s->is_jmp = 3;
2673
}
2674

    
2675
/* an interrupt is different from an exception because of the
2676
   privilege checks */
2677
static void gen_interrupt(DisasContext *s, int intno,
2678
                          target_ulong cur_eip, target_ulong next_eip)
2679
{
2680
    if (s->cc_op != CC_OP_DYNAMIC)
2681
        gen_op_set_cc_op(s->cc_op);
2682
    gen_jmp_im(cur_eip);
2683
    gen_helper_raise_interrupt(tcg_const_i32(intno), 
2684
                               tcg_const_i32(next_eip - cur_eip));
2685
    s->is_jmp = 3;
2686
}
2687

    
2688
static void gen_debug(DisasContext *s, target_ulong cur_eip)
2689
{
2690
    if (s->cc_op != CC_OP_DYNAMIC)
2691
        gen_op_set_cc_op(s->cc_op);
2692
    gen_jmp_im(cur_eip);
2693
    gen_helper_debug();
2694
    s->is_jmp = 3;
2695
}
2696

    
2697
/* generate a generic end of block. Trace exception is also generated
2698
   if needed */
2699
static void gen_eob(DisasContext *s)
2700
{
2701
    if (s->cc_op != CC_OP_DYNAMIC)
2702
        gen_op_set_cc_op(s->cc_op);
2703
    if (s->tb->flags & HF_INHIBIT_IRQ_MASK) {
2704
        gen_helper_reset_inhibit_irq();
2705
    }
2706
    if (s->tb->flags & HF_RF_MASK) {
2707
        gen_helper_reset_rf();
2708
    }
2709
    if (s->singlestep_enabled) {
2710
        gen_helper_debug();
2711
    } else if (s->tf) {
2712
        gen_helper_single_step();
2713
    } else {
2714
        tcg_gen_exit_tb(0);
2715
    }
2716
    s->is_jmp = 3;
2717
}
2718

    
2719
/* generate a jump to eip. No segment change must happen before as a
2720
   direct call to the next block may occur */
2721
static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2722
{
2723
    if (s->jmp_opt) {
2724
        if (s->cc_op != CC_OP_DYNAMIC) {
2725
            gen_op_set_cc_op(s->cc_op);
2726
            s->cc_op = CC_OP_DYNAMIC;
2727
        }
2728
        gen_goto_tb(s, tb_num, eip);
2729
        s->is_jmp = 3;
2730
    } else {
2731
        gen_jmp_im(eip);
2732
        gen_eob(s);
2733
    }
2734
}
2735

    
2736
static void gen_jmp(DisasContext *s, target_ulong eip)
2737
{
2738
    gen_jmp_tb(s, eip, 0);
2739
}
2740

    
2741
static inline void gen_ldq_env_A0(int idx, int offset)
2742
{
2743
    int mem_index = (idx >> 2) - 1;
2744
    tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, mem_index);
2745
    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset);
2746
}
2747

    
2748
static inline void gen_stq_env_A0(int idx, int offset)
2749
{
2750
    int mem_index = (idx >> 2) - 1;
2751
    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset);
2752
    tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, mem_index);
2753
}
2754

    
2755
static inline void gen_ldo_env_A0(int idx, int offset)
2756
{
2757
    int mem_index = (idx >> 2) - 1;
2758
    tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, mem_index);
2759
    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(0)));
2760
    tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
2761
    tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_tmp0, mem_index);
2762
    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(1)));
2763
}
2764

    
2765
static inline void gen_sto_env_A0(int idx, int offset)
2766
{
2767
    int mem_index = (idx >> 2) - 1;
2768
    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(0)));
2769
    tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, mem_index);
2770
    tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
2771
    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(1)));
2772
    tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_tmp0, mem_index);
2773
}
2774

    
2775
static inline void gen_op_movo(int d_offset, int s_offset)
2776
{
2777
    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
2778
    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2779
    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + 8);
2780
    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + 8);
2781
}
2782

    
2783
static inline void gen_op_movq(int d_offset, int s_offset)
2784
{
2785
    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
2786
    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2787
}
2788

    
2789
static inline void gen_op_movl(int d_offset, int s_offset)
2790
{
2791
    tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, s_offset);
2792
    tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, d_offset);
2793
}
2794

    
2795
static inline void gen_op_movq_env_0(int d_offset)
2796
{
2797
    tcg_gen_movi_i64(cpu_tmp1_i64, 0);
2798
    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2799
}
2800

    
2801
#define SSE_SPECIAL ((void *)1)
2802
#define SSE_DUMMY ((void *)2)
2803

    
2804
#define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2805
#define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
2806
                     gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
2807

    
2808
static void *sse_op_table1[256][4] = {
2809
    /* 3DNow! extensions */
2810
    [0x0e] = { SSE_DUMMY }, /* femms */
2811
    [0x0f] = { SSE_DUMMY }, /* pf... */
2812
    /* pure SSE operations */
2813
    [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2814
    [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2815
    [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2816
    [0x13] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
2817
    [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
2818
    [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
2819
    [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd, movshdup */
2820
    [0x17] = { SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd */
2821

    
2822
    [0x28] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2823
    [0x29] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2824
    [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2825
    [0x2b] = { SSE_SPECIAL, SSE_SPECIAL },  /* movntps, movntpd */
2826
    [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2827
    [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2828
    [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
2829
    [0x2f] = { gen_helper_comiss, gen_helper_comisd },
2830
    [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2831
    [0x51] = SSE_FOP(sqrt),
2832
    [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
2833
    [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
2834
    [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
2835
    [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
2836
    [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
2837
    [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
2838
    [0x58] = SSE_FOP(add),
2839
    [0x59] = SSE_FOP(mul),
2840
    [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
2841
               gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
2842
    [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
2843
    [0x5c] = SSE_FOP(sub),
2844
    [0x5d] = SSE_FOP(min),
2845
    [0x5e] = SSE_FOP(div),
2846
    [0x5f] = SSE_FOP(max),
2847

    
2848
    [0xc2] = SSE_FOP(cmpeq),
2849
    [0xc6] = { gen_helper_shufps, gen_helper_shufpd },
2850

    
2851
    [0x38] = { SSE_SPECIAL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* SSSE3/SSE4 */
2852
    [0x3a] = { SSE_SPECIAL, SSE_SPECIAL }, /* SSSE3/SSE4 */
2853

    
2854
    /* MMX ops and their SSE extensions */
2855
    [0x60] = MMX_OP2(punpcklbw),
2856
    [0x61] = MMX_OP2(punpcklwd),
2857
    [0x62] = MMX_OP2(punpckldq),
2858
    [0x63] = MMX_OP2(packsswb),
2859
    [0x64] = MMX_OP2(pcmpgtb),
2860
    [0x65] = MMX_OP2(pcmpgtw),
2861
    [0x66] = MMX_OP2(pcmpgtl),
2862
    [0x67] = MMX_OP2(packuswb),
2863
    [0x68] = MMX_OP2(punpckhbw),
2864
    [0x69] = MMX_OP2(punpckhwd),
2865
    [0x6a] = MMX_OP2(punpckhdq),
2866
    [0x6b] = MMX_OP2(packssdw),
2867
    [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
2868
    [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
2869
    [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2870
    [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2871
    [0x70] = { gen_helper_pshufw_mmx,
2872
               gen_helper_pshufd_xmm,
2873
               gen_helper_pshufhw_xmm,
2874
               gen_helper_pshuflw_xmm },
2875
    [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2876
    [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2877
    [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2878
    [0x74] = MMX_OP2(pcmpeqb),
2879
    [0x75] = MMX_OP2(pcmpeqw),
2880
    [0x76] = MMX_OP2(pcmpeql),
2881
    [0x77] = { SSE_DUMMY }, /* emms */
2882
    [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
2883
    [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
2884
    [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2885
    [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2886
    [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2887
    [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2888
    [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
2889
    [0xd1] = MMX_OP2(psrlw),
2890
    [0xd2] = MMX_OP2(psrld),
2891
    [0xd3] = MMX_OP2(psrlq),
2892
    [0xd4] = MMX_OP2(paddq),
2893
    [0xd5] = MMX_OP2(pmullw),
2894
    [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2895
    [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2896
    [0xd8] = MMX_OP2(psubusb),
2897
    [0xd9] = MMX_OP2(psubusw),
2898
    [0xda] = MMX_OP2(pminub),
2899
    [0xdb] = MMX_OP2(pand),
2900
    [0xdc] = MMX_OP2(paddusb),
2901
    [0xdd] = MMX_OP2(paddusw),
2902
    [0xde] = MMX_OP2(pmaxub),
2903
    [0xdf] = MMX_OP2(pandn),
2904
    [0xe0] = MMX_OP2(pavgb),
2905
    [0xe1] = MMX_OP2(psraw),
2906
    [0xe2] = MMX_OP2(psrad),
2907
    [0xe3] = MMX_OP2(pavgw),
2908
    [0xe4] = MMX_OP2(pmulhuw),
2909
    [0xe5] = MMX_OP2(pmulhw),
2910
    [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
2911
    [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
2912
    [0xe8] = MMX_OP2(psubsb),
2913
    [0xe9] = MMX_OP2(psubsw),
2914
    [0xea] = MMX_OP2(pminsw),
2915
    [0xeb] = MMX_OP2(por),
2916
    [0xec] = MMX_OP2(paddsb),
2917
    [0xed] = MMX_OP2(paddsw),
2918
    [0xee] = MMX_OP2(pmaxsw),
2919
    [0xef] = MMX_OP2(pxor),
2920
    [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2921
    [0xf1] = MMX_OP2(psllw),
2922
    [0xf2] = MMX_OP2(pslld),
2923
    [0xf3] = MMX_OP2(psllq),
2924
    [0xf4] = MMX_OP2(pmuludq),
2925
    [0xf5] = MMX_OP2(pmaddwd),
2926
    [0xf6] = MMX_OP2(psadbw),
2927
    [0xf7] = MMX_OP2(maskmov),
2928
    [0xf8] = MMX_OP2(psubb),
2929
    [0xf9] = MMX_OP2(psubw),
2930
    [0xfa] = MMX_OP2(psubl),
2931
    [0xfb] = MMX_OP2(psubq),
2932
    [0xfc] = MMX_OP2(paddb),
2933
    [0xfd] = MMX_OP2(paddw),
2934
    [0xfe] = MMX_OP2(paddl),
2935
};
2936

    
2937
static void *sse_op_table2[3 * 8][2] = {
2938
    [0 + 2] = MMX_OP2(psrlw),
2939
    [0 + 4] = MMX_OP2(psraw),
2940
    [0 + 6] = MMX_OP2(psllw),
2941
    [8 + 2] = MMX_OP2(psrld),
2942
    [8 + 4] = MMX_OP2(psrad),
2943
    [8 + 6] = MMX_OP2(pslld),
2944
    [16 + 2] = MMX_OP2(psrlq),
2945
    [16 + 3] = { NULL, gen_helper_psrldq_xmm },
2946
    [16 + 6] = MMX_OP2(psllq),
2947
    [16 + 7] = { NULL, gen_helper_pslldq_xmm },
2948
};
2949

    
2950
static void *sse_op_table3[4 * 3] = {
2951
    gen_helper_cvtsi2ss,
2952
    gen_helper_cvtsi2sd,
2953
    X86_64_ONLY(gen_helper_cvtsq2ss),
2954
    X86_64_ONLY(gen_helper_cvtsq2sd),
2955

    
2956
    gen_helper_cvttss2si,
2957
    gen_helper_cvttsd2si,
2958
    X86_64_ONLY(gen_helper_cvttss2sq),
2959
    X86_64_ONLY(gen_helper_cvttsd2sq),
2960

    
2961
    gen_helper_cvtss2si,
2962
    gen_helper_cvtsd2si,
2963
    X86_64_ONLY(gen_helper_cvtss2sq),
2964
    X86_64_ONLY(gen_helper_cvtsd2sq),
2965
};
2966

    
2967
static void *sse_op_table4[8][4] = {
2968
    SSE_FOP(cmpeq),
2969
    SSE_FOP(cmplt),
2970
    SSE_FOP(cmple),
2971
    SSE_FOP(cmpunord),
2972
    SSE_FOP(cmpneq),
2973
    SSE_FOP(cmpnlt),
2974
    SSE_FOP(cmpnle),
2975
    SSE_FOP(cmpord),
2976
};
2977

    
2978
static void *sse_op_table5[256] = {
2979
    [0x0c] = gen_helper_pi2fw,
2980
    [0x0d] = gen_helper_pi2fd,
2981
    [0x1c] = gen_helper_pf2iw,
2982
    [0x1d] = gen_helper_pf2id,
2983
    [0x8a] = gen_helper_pfnacc,
2984
    [0x8e] = gen_helper_pfpnacc,
2985
    [0x90] = gen_helper_pfcmpge,
2986
    [0x94] = gen_helper_pfmin,
2987
    [0x96] = gen_helper_pfrcp,
2988
    [0x97] = gen_helper_pfrsqrt,
2989
    [0x9a] = gen_helper_pfsub,
2990
    [0x9e] = gen_helper_pfadd,
2991
    [0xa0] = gen_helper_pfcmpgt,
2992
    [0xa4] = gen_helper_pfmax,
2993
    [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
2994
    [0xa7] = gen_helper_movq, /* pfrsqit1 */
2995
    [0xaa] = gen_helper_pfsubr,
2996
    [0xae] = gen_helper_pfacc,
2997
    [0xb0] = gen_helper_pfcmpeq,
2998
    [0xb4] = gen_helper_pfmul,
2999
    [0xb6] = gen_helper_movq, /* pfrcpit2 */
3000
    [0xb7] = gen_helper_pmulhrw_mmx,
3001
    [0xbb] = gen_helper_pswapd,
3002
    [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
3003
};
3004

    
3005
struct sse_op_helper_s {
3006
    void *op[2]; uint32_t ext_mask;
3007
};
3008
#define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
3009
#define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
3010
#define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
3011
#define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
3012
static struct sse_op_helper_s sse_op_table6[256] = {
3013
    [0x00] = SSSE3_OP(pshufb),
3014
    [0x01] = SSSE3_OP(phaddw),
3015
    [0x02] = SSSE3_OP(phaddd),
3016
    [0x03] = SSSE3_OP(phaddsw),
3017
    [0x04] = SSSE3_OP(pmaddubsw),
3018
    [0x05] = SSSE3_OP(phsubw),
3019
    [0x06] = SSSE3_OP(phsubd),
3020
    [0x07] = SSSE3_OP(phsubsw),
3021
    [0x08] = SSSE3_OP(psignb),
3022
    [0x09] = SSSE3_OP(psignw),
3023
    [0x0a] = SSSE3_OP(psignd),
3024
    [0x0b] = SSSE3_OP(pmulhrsw),
3025
    [0x10] = SSE41_OP(pblendvb),
3026
    [0x14] = SSE41_OP(blendvps),
3027
    [0x15] = SSE41_OP(blendvpd),
3028
    [0x17] = SSE41_OP(ptest),
3029
    [0x1c] = SSSE3_OP(pabsb),
3030
    [0x1d] = SSSE3_OP(pabsw),
3031
    [0x1e] = SSSE3_OP(pabsd),
3032
    [0x20] = SSE41_OP(pmovsxbw),
3033
    [0x21] = SSE41_OP(pmovsxbd),
3034
    [0x22] = SSE41_OP(pmovsxbq),
3035
    [0x23] = SSE41_OP(pmovsxwd),
3036
    [0x24] = SSE41_OP(pmovsxwq),
3037
    [0x25] = SSE41_OP(pmovsxdq),
3038
    [0x28] = SSE41_OP(pmuldq),
3039
    [0x29] = SSE41_OP(pcmpeqq),
3040
    [0x2a] = SSE41_SPECIAL, /* movntqda */
3041
    [0x2b] = SSE41_OP(packusdw),
3042
    [0x30] = SSE41_OP(pmovzxbw),
3043
    [0x31] = SSE41_OP(pmovzxbd),
3044
    [0x32] = SSE41_OP(pmovzxbq),
3045
    [0x33] = SSE41_OP(pmovzxwd),
3046
    [0x34] = SSE41_OP(pmovzxwq),
3047
    [0x35] = SSE41_OP(pmovzxdq),
3048
    [0x37] = SSE42_OP(pcmpgtq),
3049
    [0x38] = SSE41_OP(pminsb),
3050
    [0x39] = SSE41_OP(pminsd),
3051
    [0x3a] = SSE41_OP(pminuw),
3052
    [0x3b] = SSE41_OP(pminud),
3053
    [0x3c] = SSE41_OP(pmaxsb),
3054
    [0x3d] = SSE41_OP(pmaxsd),
3055
    [0x3e] = SSE41_OP(pmaxuw),
3056
    [0x3f] = SSE41_OP(pmaxud),
3057
    [0x40] = SSE41_OP(pmulld),
3058
    [0x41] = SSE41_OP(phminposuw),
3059
};
3060

    
3061
static struct sse_op_helper_s sse_op_table7[256] = {
3062
    [0x08] = SSE41_OP(roundps),
3063
    [0x09] = SSE41_OP(roundpd),
3064
    [0x0a] = SSE41_OP(roundss),
3065
    [0x0b] = SSE41_OP(roundsd),
3066
    [0x0c] = SSE41_OP(blendps),
3067
    [0x0d] = SSE41_OP(blendpd),
3068
    [0x0e] = SSE41_OP(pblendw),
3069
    [0x0f] = SSSE3_OP(palignr),
3070
    [0x14] = SSE41_SPECIAL, /* pextrb */
3071
    [0x15] = SSE41_SPECIAL, /* pextrw */
3072
    [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
3073
    [0x17] = SSE41_SPECIAL, /* extractps */
3074
    [0x20] = SSE41_SPECIAL, /* pinsrb */
3075
    [0x21] = SSE41_SPECIAL, /* insertps */
3076
    [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
3077
    [0x40] = SSE41_OP(dpps),
3078
    [0x41] = SSE41_OP(dppd),
3079
    [0x42] = SSE41_OP(mpsadbw),
3080
    [0x60] = SSE42_OP(pcmpestrm),
3081
    [0x61] = SSE42_OP(pcmpestri),
3082
    [0x62] = SSE42_OP(pcmpistrm),
3083
    [0x63] = SSE42_OP(pcmpistri),
3084
};
3085

    
3086
static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
3087
{
3088
    int b1, op1_offset, op2_offset, is_xmm, val, ot;
3089
    int modrm, mod, rm, reg, reg_addr, offset_addr;
3090
    void *sse_op2;
3091

    
3092
    b &= 0xff;
3093
    if (s->prefix & PREFIX_DATA)
3094
        b1 = 1;
3095
    else if (s->prefix & PREFIX_REPZ)
3096
        b1 = 2;
3097
    else if (s->prefix & PREFIX_REPNZ)
3098
        b1 = 3;
3099
    else
3100
        b1 = 0;
3101
    sse_op2 = sse_op_table1[b][b1];
3102
    if (!sse_op2)
3103
        goto illegal_op;
3104
    if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
3105
        is_xmm = 1;
3106
    } else {
3107
        if (b1 == 0) {
3108
            /* MMX case */
3109
            is_xmm = 0;
3110
        } else {
3111
            is_xmm = 1;
3112
        }
3113
    }
3114
    /* simple MMX/SSE operation */
3115
    if (s->flags & HF_TS_MASK) {
3116
        gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
3117
        return;
3118
    }
3119
    if (s->flags & HF_EM_MASK) {
3120
    illegal_op:
3121
        gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base);
3122
        return;
3123
    }
3124
    if (is_xmm && !(s->flags & HF_OSFXSR_MASK))
3125
        if ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA))
3126
            goto illegal_op;
3127
    if (b == 0x0e) {
3128
        if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW))
3129
            goto illegal_op;
3130
        /* femms */
3131
        gen_helper_emms();
3132
        return;
3133
    }
3134
    if (b == 0x77) {
3135
        /* emms */
3136
        gen_helper_emms();
3137
        return;
3138
    }
3139
    /* prepare MMX state (XXX: optimize by storing fptt and fptags in
3140
       the static cpu state) */
3141
    if (!is_xmm) {
3142
        gen_helper_enter_mmx();
3143
    }
3144

    
3145
    modrm = ldub_code(s->pc++);
3146
    reg = ((modrm >> 3) & 7);
3147
    if (is_xmm)
3148
        reg |= rex_r;
3149
    mod = (modrm >> 6) & 3;
3150
    if (sse_op2 == SSE_SPECIAL) {
3151
        b |= (b1 << 8);
3152
        switch(b) {
3153
        case 0x0e7: /* movntq */
3154
            if (mod == 3)
3155
                goto illegal_op;
3156
            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3157
            gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx));
3158
            break;
3159
        case 0x1e7: /* movntdq */
3160
        case 0x02b: /* movntps */
3161
        case 0x12b: /* movntps */
3162
        case 0x3f0: /* lddqu */
3163
            if (mod == 3)
3164
                goto illegal_op;
3165
            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3166
            gen_sto_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
3167
            break;
3168
        case 0x6e: /* movd mm, ea */
3169
#ifdef TARGET_X86_64
3170
            if (s->dflag == 2) {
3171
                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0);
3172
                tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,fpregs[reg].mmx));
3173
            } else
3174
#endif
3175
            {
3176
                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0);
3177
                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3178
                                 offsetof(CPUX86State,fpregs[reg].mmx));
3179
                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
3180
                gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
3181
            }
3182
            break;
3183
        case 0x16e: /* movd xmm, ea */
3184
#ifdef TARGET_X86_64
3185
            if (s->dflag == 2) {
3186
                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0);
3187
                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3188
                                 offsetof(CPUX86State,xmm_regs[reg]));
3189
                gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T[0]);
3190
            } else
3191
#endif
3192
            {
3193
                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0);
3194
                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3195
                                 offsetof(CPUX86State,xmm_regs[reg]));
3196
                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
3197
                gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
3198
            }
3199
            break;
3200
        case 0x6f: /* movq mm, ea */
3201
            if (mod != 3) {
3202
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3203
                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx));
3204
            } else {
3205
                rm = (modrm & 7);
3206
                tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
3207
                               offsetof(CPUX86State,fpregs[rm].mmx));
3208
                tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
3209
                               offsetof(CPUX86State,fpregs[reg].mmx));
3210
            }
3211
            break;
3212
        case 0x010: /* movups */
3213
        case 0x110: /* movupd */
3214
        case 0x028: /* movaps */
3215
        case 0x128: /* movapd */
3216
        case 0x16f: /* movdqa xmm, ea */
3217
        case 0x26f: /* movdqu xmm, ea */
3218
            if (mod != 3) {
3219
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3220
                gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
3221
            } else {
3222
                rm = (modrm & 7) | REX_B(s);
3223
                gen_op_movo(offsetof(CPUX86State,xmm_regs[reg]),
3224
                            offsetof(CPUX86State,xmm_regs[rm]));
3225
            }
3226
            break;
3227
        case 0x210: /* movss xmm, ea */
3228
            if (mod != 3) {
3229
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3230
                gen_op_ld_T0_A0(OT_LONG + s->mem_index);
3231
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
3232
                gen_op_movl_T0_0();
3233
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)));
3234
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
3235
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
3236
            } else {
3237
                rm = (modrm & 7) | REX_B(s);
3238
                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
3239
                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)));
3240
            }
3241
            break;
3242
        case 0x310: /* movsd xmm, ea */
3243
            if (mod != 3) {
3244
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3245
                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3246
                gen_op_movl_T0_0();
3247
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
3248
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
3249
            } else {
3250
                rm = (modrm & 7) | REX_B(s);
3251
                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
3252
                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
3253
            }
3254
            break;
3255
        case 0x012: /* movlps */
3256
        case 0x112: /* movlpd */
3257
            if (mod != 3) {
3258
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3259
                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3260
            } else {
3261
                /* movhlps */
3262
                rm = (modrm & 7) | REX_B(s);
3263
                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
3264
                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1)));
3265
            }
3266
            break;
3267
        case 0x212: /* movsldup */
3268
            if (mod != 3) {
3269
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3270
                gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
3271
            } else {
3272
                rm = (modrm & 7) | REX_B(s);
3273
                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
3274
                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)));
3275
                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)),
3276
                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(2)));
3277
            }
3278
            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)),
3279
                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
3280
            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)),
3281
                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
3282
            break;
3283
        case 0x312: /* movddup */
3284
            if (mod != 3) {
3285
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3286
                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3287
            } else {
3288
                rm = (modrm & 7) | REX_B(s);
3289
                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
3290
                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
3291
            }
3292
            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)),
3293
                        offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3294
            break;
3295
        case 0x016: /* movhps */
3296
        case 0x116: /* movhpd */
3297
            if (mod != 3) {
3298
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3299
                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
3300
            } else {
3301
                /* movlhps */
3302
                rm = (modrm & 7) | REX_B(s);
3303
                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)),
3304
                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
3305
            }
3306
            break;
3307
        case 0x216: /* movshdup */
3308
            if (mod != 3) {
3309
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3310
                gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
3311
            } else {
3312
                rm = (modrm & 7) | REX_B(s);
3313
                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)),
3314
                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(1)));
3315
                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)),
3316
                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(3)));
3317
            }
3318
            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
3319
                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)));
3320
            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)),
3321
                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
3322
            break;
3323
        case 0x7e: /* movd ea, mm */
3324
#ifdef TARGET_X86_64
3325
            if (s->dflag == 2) {
3326
                tcg_gen_ld_i64(cpu_T[0], cpu_env, 
3327
                               offsetof(CPUX86State,fpregs[reg].mmx));
3328
                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1);
3329
            } else
3330
#endif
3331
            {
3332
                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
3333
                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
3334
                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1);
3335
            }
3336
            break;
3337
        case 0x17e: /* movd ea, xmm */
3338
#ifdef TARGET_X86_64
3339
            if (s->dflag == 2) {
3340
                tcg_gen_ld_i64(cpu_T[0], cpu_env, 
3341
                               offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3342
                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1);
3343
            } else
3344
#endif
3345
            {
3346
                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
3347
                                 offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
3348
                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1);
3349
            }
3350
            break;
3351
        case 0x27e: /* movq xmm, ea */
3352
            if (mod != 3) {
3353
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3354
                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3355
            } else {
3356
                rm = (modrm & 7) | REX_B(s);
3357
                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
3358
                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
3359
            }
3360
            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
3361
            break;
3362
        case 0x7f: /* movq ea, mm */
3363
            if (mod != 3) {
3364
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3365
                gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx));
3366
            } else {
3367
                rm = (modrm & 7);
3368
                gen_op_movq(offsetof(CPUX86State,fpregs[rm].mmx),
3369
                            offsetof(CPUX86State,fpregs[reg].mmx));
3370
            }
3371
            break;
3372
        case 0x011: /* movups */
3373
        case 0x111: /* movupd */
3374
        case 0x029: /* movaps */
3375
        case 0x129: /* movapd */
3376
        case 0x17f: /* movdqa ea, xmm */
3377
        case 0x27f: /* movdqu ea, xmm */
3378
            if (mod != 3) {
3379
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3380
                gen_sto_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
3381
            } else {
3382
                rm = (modrm & 7) | REX_B(s);
3383
                gen_op_movo(offsetof(CPUX86State,xmm_regs[rm]),
3384
                            offsetof(CPUX86State,xmm_regs[reg]));
3385
            }
3386
            break;
3387
        case 0x211: /* movss ea, xmm */
3388
            if (mod != 3) {
3389
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3390
                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
3391
                gen_op_st_T0_A0(OT_LONG + s->mem_index);
3392
            } else {
3393
                rm = (modrm & 7) | REX_B(s);
3394
                gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)),
3395
                            offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
3396
            }
3397
            break;
3398
        case 0x311: /* movsd ea, xmm */
3399
            if (mod != 3) {
3400
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3401
                gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3402
            } else {
3403
                rm = (modrm & 7) | REX_B(s);
3404
                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)),
3405
                            offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3406
            }
3407
            break;
3408
        case 0x013: /* movlps */
3409
        case 0x113: /* movlpd */
3410
            if (mod != 3) {
3411
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3412
                gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3413
            } else {
3414
                goto illegal_op;
3415
            }
3416
            break;
3417
        case 0x017: /* movhps */
3418
        case 0x117: /* movhpd */
3419
            if (mod != 3) {
3420
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3421
                gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
3422
            } else {
3423
                goto illegal_op;
3424
            }
3425
            break;
3426
        case 0x71: /* shift mm, im */
3427
        case 0x72:
3428
        case 0x73:
3429
        case 0x171: /* shift xmm, im */
3430
        case 0x172:
3431
        case 0x173:
3432
            val = ldub_code(s->pc++);
3433
            if (is_xmm) {
3434
                gen_op_movl_T0_im(val);
3435
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0)));
3436
                gen_op_movl_T0_0();
3437
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(1)));
3438
                op1_offset = offsetof(CPUX86State,xmm_t0);
3439
            } else {
3440
                gen_op_movl_T0_im(val);
3441
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(0)));
3442
                gen_op_movl_T0_0();
3443
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(1)));
3444
                op1_offset = offsetof(CPUX86State,mmx_t0);
3445
            }
3446
            sse_op2 = sse_op_table2[((b - 1) & 3) * 8 + (((modrm >> 3)) & 7)][b1];
3447
            if (!sse_op2)
3448
                goto illegal_op;
3449
            if (is_xmm) {
3450
                rm = (modrm & 7) | REX_B(s);
3451
                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3452
            } else {
3453
                rm = (modrm & 7);
3454
                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3455
            }
3456
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
3457
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op1_offset);
3458
            ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1);
3459
            break;
3460
        case 0x050: /* movmskps */
3461
            rm = (modrm & 7) | REX_B(s);
3462
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3463
                             offsetof(CPUX86State,xmm_regs[rm]));
3464
            gen_helper_movmskps(cpu_tmp2_i32, cpu_ptr0);
3465
            tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
3466
            gen_op_mov_reg_T0(OT_LONG, reg);
3467
            break;
3468
        case 0x150: /* movmskpd */
3469
            rm = (modrm & 7) | REX_B(s);
3470
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3471
                             offsetof(CPUX86State,xmm_regs[rm]));
3472
            gen_helper_movmskpd(cpu_tmp2_i32, cpu_ptr0);
3473
            tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
3474
            gen_op_mov_reg_T0(OT_LONG, reg);
3475
            break;
3476
        case 0x02a: /* cvtpi2ps */
3477
        case 0x12a: /* cvtpi2pd */
3478
            gen_helper_enter_mmx();
3479
            if (mod != 3) {
3480
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3481
                op2_offset = offsetof(CPUX86State,mmx_t0);
3482
                gen_ldq_env_A0(s->mem_index, op2_offset);
3483
            } else {
3484
                rm = (modrm & 7);
3485
                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3486
            }
3487
            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3488
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3489
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3490
            switch(b >> 8) {
3491
            case 0x0:
3492
                gen_helper_cvtpi2ps(cpu_ptr0, cpu_ptr1);
3493
                break;
3494
            default:
3495
            case 0x1:
3496
                gen_helper_cvtpi2pd(cpu_ptr0, cpu_ptr1);
3497
                break;
3498
            }
3499
            break;
3500
        case 0x22a: /* cvtsi2ss */
3501
        case 0x32a: /* cvtsi2sd */
3502
            ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
3503
            gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
3504
            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3505
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3506
            sse_op2 = sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2)];
3507
            if (ot == OT_LONG) {
3508
                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
3509
                ((void (*)(TCGv_ptr, TCGv_i32))sse_op2)(cpu_ptr0, cpu_tmp2_i32);
3510
            } else {
3511
                ((void (*)(TCGv_ptr, TCGv))sse_op2)(cpu_ptr0, cpu_T[0]);
3512
            }
3513
            break;
3514
        case 0x02c: /* cvttps2pi */
3515
        case 0x12c: /* cvttpd2pi */
3516
        case 0x02d: /* cvtps2pi */
3517
        case 0x12d: /* cvtpd2pi */
3518
            gen_helper_enter_mmx();
3519
            if (mod != 3) {
3520
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3521
                op2_offset = offsetof(CPUX86State,xmm_t0);
3522
                gen_ldo_env_A0(s->mem_index, op2_offset);
3523
            } else {
3524
                rm = (modrm & 7) | REX_B(s);
3525
                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3526
            }
3527
            op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
3528
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3529
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3530
            switch(b) {
3531
            case 0x02c:
3532
                gen_helper_cvttps2pi(cpu_ptr0, cpu_ptr1);
3533
                break;
3534
            case 0x12c:
3535
                gen_helper_cvttpd2pi(cpu_ptr0, cpu_ptr1);
3536
                break;
3537
            case 0x02d:
3538
                gen_helper_cvtps2pi(cpu_ptr0, cpu_ptr1);
3539
                break;
3540
            case 0x12d:
3541
                gen_helper_cvtpd2pi(cpu_ptr0, cpu_ptr1);
3542
                break;
3543
            }
3544
            break;
3545
        case 0x22c: /* cvttss2si */
3546
        case 0x32c: /* cvttsd2si */
3547
        case 0x22d: /* cvtss2si */
3548
        case 0x32d: /* cvtsd2si */
3549
            ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
3550
            if (mod != 3) {
3551
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3552
                if ((b >> 8) & 1) {
3553
                    gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_t0.XMM_Q(0)));
3554
                } else {
3555
                    gen_op_ld_T0_A0(OT_LONG + s->mem_index);
3556
                    tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0)));
3557
                }
3558
                op2_offset = offsetof(CPUX86State,xmm_t0);
3559
            } else {
3560
                rm = (modrm & 7) | REX_B(s);
3561
                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3562
            }
3563
            sse_op2 = sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2) + 4 +
3564
                                    (b & 1) * 4];
3565
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
3566
            if (ot == OT_LONG) {
3567
                ((void (*)(TCGv_i32, TCGv_ptr))sse_op2)(cpu_tmp2_i32, cpu_ptr0);
3568
                tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
3569
            } else {
3570
                ((void (*)(TCGv, TCGv_ptr))sse_op2)(cpu_T[0], cpu_ptr0);
3571
            }
3572
            gen_op_mov_reg_T0(ot, reg);
3573
            break;
3574
        case 0xc4: /* pinsrw */
3575
        case 0x1c4:
3576
            s->rip_offset = 1;
3577
            gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
3578
            val = ldub_code(s->pc++);
3579
            if (b1) {
3580
                val &= 7;
3581
                tcg_gen_st16_tl(cpu_T[0], cpu_env,
3582
                                offsetof(CPUX86State,xmm_regs[reg].XMM_W(val)));
3583
            } else {
3584
                val &= 3;
3585
                tcg_gen_st16_tl(cpu_T[0], cpu_env,
3586
                                offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3587
            }
3588
            break;
3589
        case 0xc5: /* pextrw */
3590
        case 0x1c5:
3591
            if (mod != 3)
3592
                goto illegal_op;
3593
            ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
3594
            val = ldub_code(s->pc++);
3595
            if (b1) {
3596
                val &= 7;
3597
                rm = (modrm & 7) | REX_B(s);
3598
                tcg_gen_ld16u_tl(cpu_T[0], cpu_env,
3599
                                 offsetof(CPUX86State,xmm_regs[rm].XMM_W(val)));
3600
            } else {
3601
                val &= 3;
3602
                rm = (modrm & 7);
3603
                tcg_gen_ld16u_tl(cpu_T[0], cpu_env,
3604
                                offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3605
            }
3606
            reg = ((modrm >> 3) & 7) | rex_r;
3607
            gen_op_mov_reg_T0(ot, reg);
3608
            break;
3609
        case 0x1d6: /* movq ea, xmm */
3610
            if (mod != 3) {
3611
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3612
                gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3613
            } else {
3614
                rm = (modrm & 7) | REX_B(s);
3615
                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)),
3616
                            offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3617
                gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1)));
3618
            }
3619
            break;
3620
        case 0x2d6: /* movq2dq */
3621
            gen_helper_enter_mmx();
3622
            rm = (modrm & 7);
3623
            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
3624
                        offsetof(CPUX86State,fpregs[rm].mmx));
3625
            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
3626
            break;
3627
        case 0x3d6: /* movdq2q */
3628
            gen_helper_enter_mmx();
3629
            rm = (modrm & 7) | REX_B(s);
3630
            gen_op_movq(offsetof(CPUX86State,fpregs[reg & 7].mmx),
3631
                        offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
3632
            break;
3633
        case 0xd7: /* pmovmskb */
3634
        case 0x1d7:
3635
            if (mod != 3)
3636
                goto illegal_op;
3637
            if (b1) {
3638
                rm = (modrm & 7) | REX_B(s);
3639
                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,xmm_regs[rm]));
3640
                gen_helper_pmovmskb_xmm(cpu_tmp2_i32, cpu_ptr0);
3641
            } else {
3642
                rm = (modrm & 7);
3643
                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,fpregs[rm].mmx));
3644
                gen_helper_pmovmskb_mmx(cpu_tmp2_i32, cpu_ptr0);
3645
            }
3646
            tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
3647
            reg = ((modrm >> 3) & 7) | rex_r;
3648
            gen_op_mov_reg_T0(OT_LONG, reg);
3649
            break;
3650
        case 0x138:
3651
            if (s->prefix & PREFIX_REPNZ)
3652
                goto crc32;
3653
        case 0x038:
3654
            b = modrm;
3655
            modrm = ldub_code(s->pc++);
3656
            rm = modrm & 7;
3657
            reg = ((modrm >> 3) & 7) | rex_r;
3658
            mod = (modrm >> 6) & 3;
3659

    
3660
            sse_op2 = sse_op_table6[b].op[b1];
3661
            if (!sse_op2)
3662
                goto illegal_op;
3663
            if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
3664
                goto illegal_op;
3665

    
3666
            if (b1) {
3667
                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3668
                if (mod == 3) {
3669
                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
3670
                } else {
3671
                    op2_offset = offsetof(CPUX86State,xmm_t0);
3672
                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3673
                    switch (b) {
3674
                    case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
3675
                    case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
3676
                    case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
3677
                        gen_ldq_env_A0(s->mem_index, op2_offset +
3678
                                        offsetof(XMMReg, XMM_Q(0)));
3679
                        break;
3680
                    case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
3681
                    case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
3682
                        tcg_gen_qemu_ld32u(cpu_tmp0, cpu_A0,
3683
                                          (s->mem_index >> 2) - 1);
3684
                        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp0);
3685
                        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, op2_offset +
3686
                                        offsetof(XMMReg, XMM_L(0)));
3687
                        break;
3688
                    case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
3689
                        tcg_gen_qemu_ld16u(cpu_tmp0, cpu_A0,
3690
                                          (s->mem_index >> 2) - 1);
3691
                        tcg_gen_st16_tl(cpu_tmp0, cpu_env, op2_offset +
3692
                                        offsetof(XMMReg, XMM_W(0)));
3693
                        break;
3694
                    case 0x2a:            /* movntqda */
3695
                        gen_ldo_env_A0(s->mem_index, op1_offset);
3696
                        return;
3697
                    default:
3698
                        gen_ldo_env_A0(s->mem_index, op2_offset);
3699
                    }
3700
                }
3701
            } else {
3702
                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3703
                if (mod == 3) {
3704
                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3705
                } else {
3706
                    op2_offset = offsetof(CPUX86State,mmx_t0);
3707
                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3708
                    gen_ldq_env_A0(s->mem_index, op2_offset);
3709
                }
3710
            }
3711
            if (sse_op2 == SSE_SPECIAL)
3712
                goto illegal_op;
3713

    
3714
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3715
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3716
            ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1);
3717

    
3718
            if (b == 0x17)
3719
                s->cc_op = CC_OP_EFLAGS;
3720
            break;
3721
        case 0x338: /* crc32 */
3722
        crc32:
3723
            b = modrm;
3724
            modrm = ldub_code(s->pc++);
3725
            reg = ((modrm >> 3) & 7) | rex_r;
3726

    
3727
            if (b != 0xf0 && b != 0xf1)
3728
                goto illegal_op;
3729
            if (!(s->cpuid_ext_features & CPUID_EXT_SSE42))
3730
                goto illegal_op;
3731

    
3732
            if (b == 0xf0)
3733
                ot = OT_BYTE;
3734
            else if (b == 0xf1 && s->dflag != 2)
3735
                if (s->prefix & PREFIX_DATA)
3736
                    ot = OT_WORD;
3737
                else
3738
                    ot = OT_LONG;
3739
            else
3740
                ot = OT_QUAD;
3741

    
3742
            gen_op_mov_TN_reg(OT_LONG, 0, reg);
3743
            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
3744
            gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
3745
            gen_helper_crc32(cpu_T[0], cpu_tmp2_i32,
3746
                             cpu_T[0], tcg_const_i32(8 << ot));
3747

    
3748
            ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
3749
            gen_op_mov_reg_T0(ot, reg);
3750
            break;
3751
        case 0x03a:
3752
        case 0x13a:
3753
            b = modrm;
3754
            modrm = ldub_code(s->pc++);
3755
            rm = modrm & 7;
3756
            reg = ((modrm >> 3) & 7) | rex_r;
3757
            mod = (modrm >> 6) & 3;
3758

    
3759
            sse_op2 = sse_op_table7[b].op[b1];
3760
            if (!sse_op2)
3761
                goto illegal_op;
3762
            if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
3763
                goto illegal_op;
3764

    
3765
            if (sse_op2 == SSE_SPECIAL) {
3766
                ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
3767
                rm = (modrm & 7) | REX_B(s);
3768
                if (mod != 3)
3769
                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3770
                reg = ((modrm >> 3) & 7) | rex_r;
3771
                val = ldub_code(s->pc++);
3772
                switch (b) {
3773
                case 0x14: /* pextrb */
3774
                    tcg_gen_ld8u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
3775
                                            xmm_regs[reg].XMM_B(val & 15)));
3776
                    if (mod == 3)
3777
                        gen_op_mov_reg_T0(ot, rm);
3778
                    else
3779
                        tcg_gen_qemu_st8(cpu_T[0], cpu_A0,
3780
                                        (s->mem_index >> 2) - 1);
3781
                    break;
3782
                case 0x15: /* pextrw */
3783
                    tcg_gen_ld16u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
3784
                                            xmm_regs[reg].XMM_W(val & 7)));
3785
                    if (mod == 3)
3786
                        gen_op_mov_reg_T0(ot, rm);
3787
                    else
3788
                        tcg_gen_qemu_st16(cpu_T[0], cpu_A0,
3789
                                        (s->mem_index >> 2) - 1);
3790
                    break;
3791
                case 0x16:
3792
                    if (ot == OT_LONG) { /* pextrd */
3793
                        tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
3794
                                        offsetof(CPUX86State,
3795
                                                xmm_regs[reg].XMM_L(val & 3)));
3796
                        tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
3797
                        if (mod == 3)
3798
                            gen_op_mov_reg_v(ot, rm, cpu_T[0]);
3799
                        else
3800
                            tcg_gen_qemu_st32(cpu_T[0], cpu_A0,
3801
                                            (s->mem_index >> 2) - 1);
3802
                    } else { /* pextrq */
3803
#ifdef TARGET_X86_64
3804
                        tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
3805
                                        offsetof(CPUX86State,
3806
                                                xmm_regs[reg].XMM_Q(val & 1)));
3807
                        if (mod == 3)
3808
                            gen_op_mov_reg_v(ot, rm, cpu_tmp1_i64);
3809
                        else
3810
                            tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0,
3811
                                            (s->mem_index >> 2) - 1);
3812
#else
3813
                        goto illegal_op;
3814
#endif
3815
                    }
3816
                    break;
3817
                case 0x17: /* extractps */
3818
                    tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
3819
                                            xmm_regs[reg].XMM_L(val & 3)));
3820
                    if (mod == 3)
3821
                        gen_op_mov_reg_T0(ot, rm);
3822
                    else
3823
                        tcg_gen_qemu_st32(cpu_T[0], cpu_A0,
3824
                                        (s->mem_index >> 2) - 1);
3825
                    break;
3826
                case 0x20: /* pinsrb */
3827
                    if (mod == 3)
3828
                        gen_op_mov_TN_reg(OT_LONG, 0, rm);
3829
                    else
3830
                        tcg_gen_qemu_ld8u(cpu_tmp0, cpu_A0,
3831
                                        (s->mem_index >> 2) - 1);
3832
                    tcg_gen_st8_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State,
3833
                                            xmm_regs[reg].XMM_B(val & 15)));
3834
                    break;
3835
                case 0x21: /* insertps */
3836
                    if (mod == 3) {
3837
                        tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
3838
                                        offsetof(CPUX86State,xmm_regs[rm]
3839
                                                .XMM_L((val >> 6) & 3)));
3840
                    } else {
3841
                        tcg_gen_qemu_ld32u(cpu_tmp0, cpu_A0,
3842
                                        (s->mem_index >> 2) - 1);
3843
                        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp0);
3844
                    }
3845
                    tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
3846
                                    offsetof(CPUX86State,xmm_regs[reg]
3847
                                            .XMM_L((val >> 4) & 3)));
3848
                    if ((val >> 0) & 1)
3849
                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
3850
                                        cpu_env, offsetof(CPUX86State,
3851
                                                xmm_regs[reg].XMM_L(0)));
3852
                    if ((val >> 1) & 1)
3853
                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
3854
                                        cpu_env, offsetof(CPUX86State,
3855
                                                xmm_regs[reg].XMM_L(1)));
3856
                    if ((val >> 2) & 1)
3857
                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
3858
                                        cpu_env, offsetof(CPUX86State,
3859
                                                xmm_regs[reg].XMM_L(2)));
3860
                    if ((val >> 3) & 1)
3861
                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
3862
                                        cpu_env, offsetof(CPUX86State,
3863
                                                xmm_regs[reg].XMM_L(3)));
3864
                    break;
3865
                case 0x22:
3866
                    if (ot == OT_LONG) { /* pinsrd */
3867
                        if (mod == 3)
3868
                            gen_op_mov_v_reg(ot, cpu_tmp0, rm);
3869
                        else
3870
                            tcg_gen_qemu_ld32u(cpu_tmp0, cpu_A0,
3871
                                            (s->mem_index >> 2) - 1);
3872
                        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp0);
3873
                        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
3874
                                        offsetof(CPUX86State,
3875
                                                xmm_regs[reg].XMM_L(val & 3)));
3876
                    } else { /* pinsrq */
3877
#ifdef TARGET_X86_64
3878
                        if (mod == 3)
3879
                            gen_op_mov_v_reg(ot, cpu_tmp1_i64, rm);
3880
                        else
3881
                            tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0,
3882
                                            (s->mem_index >> 2) - 1);
3883
                        tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
3884
                                        offsetof(CPUX86State,
3885
                                                xmm_regs[reg].XMM_Q(val & 1)));
3886
#else
3887
                        goto illegal_op;
3888
#endif
3889
                    }
3890
                    break;
3891
                }
3892
                return;
3893
            }
3894

    
3895
            if (b1) {
3896
                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3897
                if (mod == 3) {
3898
                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
3899
                } else {
3900
                    op2_offset = offsetof(CPUX86State,xmm_t0);
3901
                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3902
                    gen_ldo_env_A0(s->mem_index, op2_offset);
3903
                }
3904
            } else {
3905
                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3906
                if (mod == 3) {
3907
                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3908
                } else {
3909
                    op2_offset = offsetof(CPUX86State,mmx_t0);
3910
                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3911
                    gen_ldq_env_A0(s->mem_index, op2_offset);
3912
                }
3913
            }
3914
            val = ldub_code(s->pc++);
3915

    
3916
            if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
3917
                s->cc_op = CC_OP_EFLAGS;
3918

    
3919
                if (s->dflag == 2)
3920
                    /* The helper must use entire 64-bit gp registers */
3921
                    val |= 1 << 8;
3922
            }
3923

    
3924
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3925
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3926
            ((void (*)(TCGv_ptr, TCGv_ptr, TCGv_i32))sse_op2)(cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
3927
            break;
3928
        default:
3929
            goto illegal_op;
3930
        }
3931
    } else {
3932
        /* generic MMX or SSE operation */
3933
        switch(b) {
3934
        case 0x70: /* pshufx insn */
3935
        case 0xc6: /* pshufx insn */
3936
        case 0xc2: /* compare insns */
3937
            s->rip_offset = 1;
3938
            break;
3939
        default:
3940
            break;
3941
        }
3942
        if (is_xmm) {
3943
            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3944
            if (mod != 3) {
3945
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3946
                op2_offset = offsetof(CPUX86State,xmm_t0);
3947
                if (b1 >= 2 && ((b >= 0x50 && b <= 0x5f && b != 0x5b) ||
3948
                                b == 0xc2)) {
3949
                    /* specific case for SSE single instructions */
3950
                    if (b1 == 2) {
3951
                        /* 32 bit access */
3952
                        gen_op_ld_T0_A0(OT_LONG + s->mem_index);
3953
                        tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0)));
3954
                    } else {
3955
                        /* 64 bit access */
3956
                        gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_t0.XMM_D(0)));
3957
                    }
3958
                } else {
3959
                    gen_ldo_env_A0(s->mem_index, op2_offset);
3960
                }
3961
            } else {
3962
                rm = (modrm & 7) | REX_B(s);
3963
                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3964
            }
3965
        } else {
3966
            op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3967
            if (mod != 3) {
3968
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3969
                op2_offset = offsetof(CPUX86State,mmx_t0);
3970
                gen_ldq_env_A0(s->mem_index, op2_offset);
3971
            } else {
3972
                rm = (modrm & 7);
3973
                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3974
            }
3975
        }
3976
        switch(b) {
3977
        case 0x0f: /* 3DNow! data insns */
3978
            if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW))
3979
                goto illegal_op;
3980
            val = ldub_code(s->pc++);
3981
            sse_op2 = sse_op_table5[val];
3982
            if (!sse_op2)
3983
                goto illegal_op;
3984
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3985
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3986
            ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1);
3987
            break;
3988
        case 0x70: /* pshufx insn */
3989
        case 0xc6: /* pshufx insn */
3990
            val = ldub_code(s->pc++);
3991
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3992
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3993
            ((void (*)(TCGv_ptr, TCGv_ptr, TCGv_i32))sse_op2)(cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
3994
            break;
3995
        case 0xc2:
3996
            /* compare insns */
3997
            val = ldub_code(s->pc++);
3998
            if (val >= 8)
3999
                goto illegal_op;
4000
            sse_op2 = sse_op_table4[val][b1];
4001
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4002
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4003
            ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1);
4004
            break;
4005
        case 0xf7:
4006
            /* maskmov : we must prepare A0 */
4007
            if (mod != 3)
4008
                goto illegal_op;
4009
#ifdef TARGET_X86_64
4010
            if (s->aflag == 2) {
4011
                gen_op_movq_A0_reg(R_EDI);
4012
            } else
4013
#endif
4014
            {
4015
                gen_op_movl_A0_reg(R_EDI);
4016
                if (s->aflag == 0)
4017
                    gen_op_andl_A0_ffff();
4018
            }
4019
            gen_add_A0_ds_seg(s);
4020

    
4021
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4022
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4023
            ((void (*)(TCGv_ptr, TCGv_ptr, TCGv))sse_op2)(cpu_ptr0, cpu_ptr1, cpu_A0);
4024
            break;
4025
        default:
4026
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4027
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4028
            ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1);
4029
            break;
4030
        }
4031
        if (b == 0x2e || b == 0x2f) {
4032
            s->cc_op = CC_OP_EFLAGS;
4033
        }
4034
    }
4035
}
4036

    
4037
/* convert one instruction. s->is_jmp is set if the translation must
4038
   be stopped. Return the next pc value */
4039
static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
4040
{
4041
    int b, prefixes, aflag, dflag;
4042
    int shift, ot;
4043
    int modrm, reg, rm, mod, reg_addr, op, opreg, offset_addr, val;
4044
    target_ulong next_eip, tval;
4045
    int rex_w, rex_r;
4046

    
4047
    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)))
4048
        tcg_gen_debug_insn_start(pc_start);
4049
    s->pc = pc_start;
4050
    prefixes = 0;
4051
    aflag = s->code32;
4052
    dflag = s->code32;
4053
    s->override = -1;
4054
    rex_w = -1;
4055
    rex_r = 0;
4056
#ifdef TARGET_X86_64
4057
    s->rex_x = 0;
4058
    s->rex_b = 0;
4059
    x86_64_hregs = 0;
4060
#endif
4061
    s->rip_offset = 0; /* for relative ip address */
4062
 next_byte:
4063
    b = ldub_code(s->pc);
4064
    s->pc++;
4065
    /* check prefixes */
4066
#ifdef TARGET_X86_64
4067
    if (CODE64(s)) {
4068
        switch (b) {
4069
        case 0xf3:
4070
            prefixes |= PREFIX_REPZ;
4071
            goto next_byte;
4072
        case 0xf2:
4073
            prefixes |= PREFIX_REPNZ;
4074
            goto next_byte;
4075
        case 0xf0:
4076
            prefixes |= PREFIX_LOCK;
4077
            goto next_byte;
4078
        case 0x2e:
4079
            s->override = R_CS;
4080
            goto next_byte;
4081
        case 0x36:
4082
            s->override = R_SS;
4083
            goto next_byte;
4084
        case 0x3e:
4085
            s->override = R_DS;
4086
            goto next_byte;
4087
        case 0x26:
4088
            s->override = R_ES;
4089
            goto next_byte;
4090
        case 0x64:
4091
            s->override = R_FS;
4092
            goto next_byte;
4093
        case 0x65:
4094
            s->override = R_GS;
4095
            goto next_byte;
4096
        case 0x66:
4097
            prefixes |= PREFIX_DATA;
4098
            goto next_byte;
4099
        case 0x67:
4100
            prefixes |= PREFIX_ADR;
4101
            goto next_byte;
4102
        case 0x40 ... 0x4f:
4103
            /* REX prefix */
4104
            rex_w = (b >> 3) & 1;
4105
            rex_r = (b & 0x4) << 1;
4106
            s->rex_x = (b & 0x2) << 2;
4107
            REX_B(s) = (b & 0x1) << 3;
4108
            x86_64_hregs = 1; /* select uniform byte register addressing */
4109
            goto next_byte;
4110
        }
4111
        if (rex_w == 1) {
4112
            /* 0x66 is ignored if rex.w is set */
4113
            dflag = 2;
4114
        } else {
4115
            if (prefixes & PREFIX_DATA)
4116
                dflag ^= 1;
4117
        }
4118
        if (!(prefixes & PREFIX_ADR))
4119
            aflag = 2;
4120
    } else
4121
#endif
4122
    {
4123
        switch (b) {
4124
        case 0xf3:
4125
            prefixes |= PREFIX_REPZ;
4126
            goto next_byte;
4127
        case 0xf2:
4128
            prefixes |= PREFIX_REPNZ;
4129
            goto next_byte;
4130
        case 0xf0:
4131
            prefixes |= PREFIX_LOCK;
4132
            goto next_byte;
4133
        case 0x2e:
4134
            s->override = R_CS;
4135
            goto next_byte;
4136
        case 0x36:
4137
            s->override = R_SS;
4138
            goto next_byte;
4139
        case 0x3e:
4140
            s->override = R_DS;
4141
            goto next_byte;
4142
        case 0x26:
4143
            s->override = R_ES;
4144
            goto next_byte;
4145
        case 0x64:
4146
            s->override = R_FS;
4147
            goto next_byte;
4148
        case 0x65:
4149
            s->override = R_GS;
4150
            goto next_byte;
4151
        case 0x66:
4152
            prefixes |= PREFIX_DATA;
4153
            goto next_byte;
4154
        case 0x67:
4155
            prefixes |= PREFIX_ADR;
4156
            goto next_byte;
4157
        }
4158
        if (prefixes & PREFIX_DATA)
4159
            dflag ^= 1;
4160
        if (prefixes & PREFIX_ADR)
4161
            aflag ^= 1;
4162
    }
4163

    
4164
    s->prefix = prefixes;
4165
    s->aflag = aflag;
4166
    s->dflag = dflag;
4167

    
4168
    /* lock generation */
4169
    if (prefixes & PREFIX_LOCK)
4170
        gen_helper_lock();
4171

    
4172
    /* now check op code */
4173
 reswitch:
4174
    switch(b) {
4175
    case 0x0f:
4176
        /**************************/
4177
        /* extended op code */
4178
        b = ldub_code(s->pc++) | 0x100;
4179
        goto reswitch;
4180

    
4181
        /**************************/
4182
        /* arith & logic */
4183
    case 0x00 ... 0x05:
4184
    case 0x08 ... 0x0d:
4185
    case 0x10 ... 0x15:
4186
    case 0x18 ... 0x1d:
4187
    case 0x20 ... 0x25:
4188
    case 0x28 ... 0x2d:
4189
    case 0x30 ... 0x35:
4190
    case 0x38 ... 0x3d:
4191
        {
4192
            int op, f, val;
4193
            op = (b >> 3) & 7;
4194
            f = (b >> 1) & 3;
4195

    
4196
            if ((b & 1) == 0)
4197
                ot = OT_BYTE;
4198
            else
4199
                ot = dflag + OT_WORD;
4200

    
4201
            switch(f) {
4202
            case 0: /* OP Ev, Gv */
4203
                modrm = ldub_code(s->pc++);
4204
                reg = ((modrm >> 3) & 7) | rex_r;
4205
                mod = (modrm >> 6) & 3;
4206
                rm = (modrm & 7) | REX_B(s);
4207
                if (mod != 3) {
4208
                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4209
                    opreg = OR_TMP0;
4210
                } else if (op == OP_XORL && rm == reg) {
4211
                xor_zero:
4212
                    /* xor reg, reg optimisation */
4213
                    gen_op_movl_T0_0();
4214
                    s->cc_op = CC_OP_LOGICB + ot;
4215
                    gen_op_mov_reg_T0(ot, reg);
4216
                    gen_op_update1_cc();
4217
                    break;
4218
                } else {
4219
                    opreg = rm;
4220
                }
4221
                gen_op_mov_TN_reg(ot, 1, reg);
4222
                gen_op(s, op, ot, opreg);
4223
                break;
4224
            case 1: /* OP Gv, Ev */
4225
                modrm = ldub_code(s->pc++);
4226
                mod = (modrm >> 6) & 3;
4227
                reg = ((modrm >> 3) & 7) | rex_r;
4228
                rm = (modrm & 7) | REX_B(s);
4229
                if (mod != 3) {
4230
                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4231
                    gen_op_ld_T1_A0(ot + s->mem_index);
4232
                } else if (op == OP_XORL && rm == reg) {
4233
                    goto xor_zero;
4234
                } else {
4235
                    gen_op_mov_TN_reg(ot, 1, rm);
4236
                }
4237
                gen_op(s, op, ot, reg);
4238
                break;
4239
            case 2: /* OP A, Iv */
4240
                val = insn_get(s, ot);
4241
                gen_op_movl_T1_im(val);
4242
                gen_op(s, op, ot, OR_EAX);
4243
                break;
4244
            }
4245
        }
4246
        break;
4247

    
4248
    case 0x82:
4249
        if (CODE64(s))
4250
            goto illegal_op;
4251
    case 0x80: /* GRP1 */
4252
    case 0x81:
4253
    case 0x83:
4254
        {
4255
            int val;
4256

    
4257
            if ((b & 1) == 0)
4258
                ot = OT_BYTE;
4259
            else
4260
                ot = dflag + OT_WORD;
4261

    
4262
            modrm = ldub_code(s->pc++);
4263
            mod = (modrm >> 6) & 3;
4264
            rm = (modrm & 7) | REX_B(s);
4265
            op = (modrm >> 3) & 7;
4266

    
4267
            if (mod != 3) {
4268
                if (b == 0x83)
4269
                    s->rip_offset = 1;
4270
                else
4271
                    s->rip_offset = insn_const_size(ot);
4272
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4273
                opreg = OR_TMP0;
4274
            } else {
4275
                opreg = rm;
4276
            }
4277

    
4278
            switch(b) {
4279
            default:
4280
            case 0x80:
4281
            case 0x81:
4282
            case 0x82:
4283
                val = insn_get(s, ot);
4284
                break;
4285
            case 0x83:
4286
                val = (int8_t)insn_get(s, OT_BYTE);
4287
                break;
4288
            }
4289
            gen_op_movl_T1_im(val);
4290
            gen_op(s, op, ot, opreg);
4291
        }
4292
        break;
4293

    
4294
        /**************************/
4295
        /* inc, dec, and other misc arith */
4296
    case 0x40 ... 0x47: /* inc Gv */
4297
        ot = dflag ? OT_LONG : OT_WORD;
4298
        gen_inc(s, ot, OR_EAX + (b & 7), 1);
4299
        break;
4300
    case 0x48 ... 0x4f: /* dec Gv */
4301
        ot = dflag ? OT_LONG : OT_WORD;
4302
        gen_inc(s, ot, OR_EAX + (b & 7), -1);
4303
        break;
4304
    case 0xf6: /* GRP3 */
4305
    case 0xf7:
4306
        if ((b & 1) == 0)
4307
            ot = OT_BYTE;
4308
        else
4309
            ot = dflag + OT_WORD;
4310

    
4311
        modrm = ldub_code(s->pc++);
4312
        mod = (modrm >> 6) & 3;
4313
        rm = (modrm & 7) | REX_B(s);
4314
        op = (modrm >> 3) & 7;
4315
        if (mod != 3) {
4316
            if (op == 0)
4317
                s->rip_offset = insn_const_size(ot);
4318
            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4319
            gen_op_ld_T0_A0(ot + s->mem_index);
4320
        } else {
4321
            gen_op_mov_TN_reg(ot, 0, rm);
4322
        }
4323

    
4324
        switch(op) {
4325
        case 0: /* test */
4326
            val = insn_get(s, ot);
4327
            gen_op_movl_T1_im(val);
4328
            gen_op_testl_T0_T1_cc();
4329
            s->cc_op = CC_OP_LOGICB + ot;
4330
            break;
4331
        case 2: /* not */
4332
            tcg_gen_not_tl(cpu_T[0], cpu_T[0]);
4333
            if (mod != 3) {
4334
                gen_op_st_T0_A0(ot + s->mem_index);
4335
            } else {
4336
                gen_op_mov_reg_T0(ot, rm);
4337
            }
4338
            break;
4339
        case 3: /* neg */
4340
            tcg_gen_neg_tl(cpu_T[0], cpu_T[0]);
4341
            if (mod != 3) {
4342
                gen_op_st_T0_A0(ot + s->mem_index);
4343
            } else {
4344
                gen_op_mov_reg_T0(ot, rm);
4345
            }
4346
            gen_op_update_neg_cc();
4347
            s->cc_op = CC_OP_SUBB + ot;
4348
            break;
4349
        case 4: /* mul */
4350
            switch(ot) {
4351
            case OT_BYTE:
4352
                gen_op_mov_TN_reg(OT_BYTE, 1, R_EAX);
4353
                tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]);
4354
                tcg_gen_ext8u_tl(cpu_T[1], cpu_T[1]);
4355
                /* XXX: use 32 bit mul which could be faster */
4356
                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4357
                gen_op_mov_reg_T0(OT_WORD, R_EAX);
4358
                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4359
                tcg_gen_andi_tl(cpu_cc_src, cpu_T[0], 0xff00);
4360
                s->cc_op = CC_OP_MULB;
4361
                break;
4362
            case OT_WORD:
4363
                gen_op_mov_TN_reg(OT_WORD, 1, R_EAX);
4364
                tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]);
4365
                tcg_gen_ext16u_tl(cpu_T[1], cpu_T[1]);
4366
                /* XXX: use 32 bit mul which could be faster */
4367
                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4368
                gen_op_mov_reg_T0(OT_WORD, R_EAX);
4369
                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4370
                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16);
4371
                gen_op_mov_reg_T0(OT_WORD, R_EDX);
4372
                tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
4373
                s->cc_op = CC_OP_MULW;
4374
                break;
4375
            default:
4376
            case OT_LONG:
4377
#ifdef TARGET_X86_64
4378
                gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
4379
                tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]);
4380
                tcg_gen_ext32u_tl(cpu_T[1], cpu_T[1]);
4381
                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4382
                gen_op_mov_reg_T0(OT_LONG, R_EAX);
4383
                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4384
                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32);
4385
                gen_op_mov_reg_T0(OT_LONG, R_EDX);
4386
                tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
4387
#else
4388
                {
4389
                    TCGv_i64 t0, t1;
4390
                    t0 = tcg_temp_new_i64();
4391
                    t1 = tcg_temp_new_i64();
4392
                    gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
4393
                    tcg_gen_extu_i32_i64(t0, cpu_T[0]);
4394
                    tcg_gen_extu_i32_i64(t1, cpu_T[1]);
4395
                    tcg_gen_mul_i64(t0, t0, t1);
4396
                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
4397
                    gen_op_mov_reg_T0(OT_LONG, R_EAX);
4398
                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4399
                    tcg_gen_shri_i64(t0, t0, 32);
4400
                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
4401
                    gen_op_mov_reg_T0(OT_LONG, R_EDX);
4402
                    tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
4403
                }
4404
#endif
4405
                s->cc_op = CC_OP_MULL;
4406
                break;
4407
#ifdef TARGET_X86_64
4408
            case OT_QUAD:
4409
                gen_helper_mulq_EAX_T0(cpu_T[0]);
4410
                s->cc_op = CC_OP_MULQ;
4411
                break;
4412
#endif
4413
            }
4414
            break;
4415
        case 5: /* imul */
4416
            switch(ot) {
4417
            case OT_BYTE:
4418
                gen_op_mov_TN_reg(OT_BYTE, 1, R_EAX);
4419
                tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]);
4420
                tcg_gen_ext8s_tl(cpu_T[1], cpu_T[1]);
4421
                /* XXX: use 32 bit mul which could be faster */
4422
                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4423
                gen_op_mov_reg_T0(OT_WORD, R_EAX);
4424
                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4425
                tcg_gen_ext8s_tl(cpu_tmp0, cpu_T[0]);
4426
                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
4427
                s->cc_op = CC_OP_MULB;
4428
                break;
4429
            case OT_WORD:
4430
                gen_op_mov_TN_reg(OT_WORD, 1, R_EAX);
4431
                tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
4432
                tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]);
4433
                /* XXX: use 32 bit mul which could be faster */
4434
                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4435
                gen_op_mov_reg_T0(OT_WORD, R_EAX);
4436
                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4437
                tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]);
4438
                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
4439
                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16);
4440
                gen_op_mov_reg_T0(OT_WORD, R_EDX);
4441
                s->cc_op = CC_OP_MULW;
4442
                break;
4443
            default:
4444
            case OT_LONG:
4445
#ifdef TARGET_X86_64
4446
                gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
4447
                tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
4448
                tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]);
4449
                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4450
                gen_op_mov_reg_T0(OT_LONG, R_EAX);
4451
                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4452
                tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]);
4453
                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
4454
                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32);
4455
                gen_op_mov_reg_T0(OT_LONG, R_EDX);
4456
#else
4457
                {
4458
                    TCGv_i64 t0, t1;
4459
                    t0 = tcg_temp_new_i64();
4460
                    t1 = tcg_temp_new_i64();
4461
                    gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
4462
                    tcg_gen_ext_i32_i64(t0, cpu_T[0]);
4463
                    tcg_gen_ext_i32_i64(t1, cpu_T[1]);
4464
                    tcg_gen_mul_i64(t0, t0, t1);
4465
                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
4466
                    gen_op_mov_reg_T0(OT_LONG, R_EAX);
4467
                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4468
                    tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31);
4469
                    tcg_gen_shri_i64(t0, t0, 32);
4470
                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
4471
                    gen_op_mov_reg_T0(OT_LONG, R_EDX);
4472
                    tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
4473
                }
4474
#endif
4475
                s->cc_op = CC_OP_MULL;
4476
                break;
4477
#ifdef TARGET_X86_64
4478
            case OT_QUAD:
4479
                gen_helper_imulq_EAX_T0(cpu_T[0]);
4480
                s->cc_op = CC_OP_MULQ;
4481
                break;
4482
#endif
4483
            }
4484
            break;
4485
        case 6: /* div */
4486
            switch(ot) {
4487
            case OT_BYTE:
4488
                gen_jmp_im(pc_start - s->cs_base);
4489
                gen_helper_divb_AL(cpu_T[0]);
4490
                break;
4491
            case OT_WORD:
4492
                gen_jmp_im(pc_start - s->cs_base);
4493
                gen_helper_divw_AX(cpu_T[0]);
4494
                break;
4495
            default:
4496
            case OT_LONG:
4497
                gen_jmp_im(pc_start - s->cs_base);
4498
                gen_helper_divl_EAX(cpu_T[0]);
4499
                break;
4500
#ifdef TARGET_X86_64
4501
            case OT_QUAD:
4502
                gen_jmp_im(pc_start - s->cs_base);
4503
                gen_helper_divq_EAX(cpu_T[0]);
4504
                break;
4505
#endif
4506
            }
4507
            break;
4508
        case 7: /* idiv */
4509
            switch(ot) {
4510
            case OT_BYTE:
4511
                gen_jmp_im(pc_start - s->cs_base);
4512
                gen_helper_idivb_AL(cpu_T[0]);
4513
                break;
4514
            case OT_WORD:
4515
                gen_jmp_im(pc_start - s->cs_base);
4516
                gen_helper_idivw_AX(cpu_T[0]);
4517
                break;
4518
            default:
4519
            case OT_LONG:
4520
                gen_jmp_im(pc_start - s->cs_base);
4521
                gen_helper_idivl_EAX(cpu_T[0]);
4522
                break;
4523
#ifdef TARGET_X86_64
4524
            case OT_QUAD:
4525
                gen_jmp_im(pc_start - s->cs_base);
4526
                gen_helper_idivq_EAX(cpu_T[0]);
4527
                break;
4528
#endif
4529
            }
4530
            break;
4531
        default:
4532
            goto illegal_op;
4533
        }
4534
        break;
4535

    
4536
    case 0xfe: /* GRP4 */
4537
    case 0xff: /* GRP5 */
4538
        if ((b & 1) == 0)
4539
            ot = OT_BYTE;
4540
        else
4541
            ot = dflag + OT_WORD;
4542

    
4543
        modrm = ldub_code(s->pc++);
4544
        mod = (modrm >> 6) & 3;
4545
        rm = (modrm & 7) | REX_B(s);
4546
        op = (modrm >> 3) & 7;
4547
        if (op >= 2 && b == 0xfe) {
4548
            goto illegal_op;
4549
        }
4550
        if (CODE64(s)) {
4551
            if (op == 2 || op == 4) {
4552
                /* operand size for jumps is 64 bit */
4553
                ot = OT_QUAD;
4554
            } else if (op == 3 || op == 5) {
4555
                /* for call calls, the operand is 16 or 32 bit, even
4556
                   in long mode */
4557
                ot = dflag ? OT_LONG : OT_WORD;
4558
            } else if (op == 6) {
4559
                /* default push size is 64 bit */
4560
                ot = dflag ? OT_QUAD : OT_WORD;
4561
            }
4562
        }
4563
        if (mod != 3) {
4564
            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4565
            if (op >= 2 && op != 3 && op != 5)
4566
                gen_op_ld_T0_A0(ot + s->mem_index);
4567
        } else {
4568
            gen_op_mov_TN_reg(ot, 0, rm);
4569
        }
4570

    
4571
        switch(op) {
4572
        case 0: /* inc Ev */
4573
            if (mod != 3)
4574
                opreg = OR_TMP0;
4575
            else
4576
                opreg = rm;
4577
            gen_inc(s, ot, opreg, 1);
4578
            break;
4579
        case 1: /* dec Ev */
4580
            if (mod != 3)
4581
                opreg = OR_TMP0;
4582
            else
4583
                opreg = rm;
4584
            gen_inc(s, ot, opreg, -1);
4585
            break;
4586
        case 2: /* call Ev */
4587
            /* XXX: optimize if memory (no 'and' is necessary) */
4588
            if (s->dflag == 0)
4589
                gen_op_andl_T0_ffff();
4590
            next_eip = s->pc - s->cs_base;
4591
            gen_movtl_T1_im(next_eip);
4592
            gen_push_T1(s);
4593
            gen_op_jmp_T0();
4594
            gen_eob(s);
4595
            break;
4596
        case 3: /* lcall Ev */
4597
            gen_op_ld_T1_A0(ot + s->mem_index);
4598
            gen_add_A0_im(s, 1 << (ot - OT_WORD + 1));
4599
            gen_op_ldu_T0_A0(OT_WORD + s->mem_index);
4600
        do_lcall:
4601
            if (s->pe && !s->vm86) {
4602
                if (s->cc_op != CC_OP_DYNAMIC)
4603
                    gen_op_set_cc_op(s->cc_op);
4604
                gen_jmp_im(pc_start - s->cs_base);
4605
                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
4606
                gen_helper_lcall_protected(cpu_tmp2_i32, cpu_T[1],
4607
                                           tcg_const_i32(dflag), 
4608
                                           tcg_const_i32(s->pc - pc_start));
4609
            } else {
4610
                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
4611
                gen_helper_lcall_real(cpu_tmp2_i32, cpu_T[1],
4612
                                      tcg_const_i32(dflag), 
4613
                                      tcg_const_i32(s->pc - s->cs_base));
4614
            }
4615
            gen_eob(s);
4616
            break;
4617
        case 4: /* jmp Ev */
4618
            if (s->dflag == 0)
4619
                gen_op_andl_T0_ffff();
4620
            gen_op_jmp_T0();
4621
            gen_eob(s);
4622
            break;
4623
        case 5: /* ljmp Ev */
4624
            gen_op_ld_T1_A0(ot + s->mem_index);
4625
            gen_add_A0_im(s, 1 << (ot - OT_WORD + 1));
4626
            gen_op_ldu_T0_A0(OT_WORD + s->mem_index);
4627
        do_ljmp:
4628
            if (s->pe && !s->vm86) {
4629
                if (s->cc_op != CC_OP_DYNAMIC)
4630
                    gen_op_set_cc_op(s->cc_op);
4631
                gen_jmp_im(pc_start - s->cs_base);
4632
                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
4633
                gen_helper_ljmp_protected(cpu_tmp2_i32, cpu_T[1],
4634
                                          tcg_const_i32(s->pc - pc_start));
4635
            } else {
4636
                gen_op_movl_seg_T0_vm(R_CS);
4637
                gen_op_movl_T0_T1();
4638
                gen_op_jmp_T0();
4639
            }
4640
            gen_eob(s);
4641
            break;
4642
        case 6: /* push Ev */
4643
            gen_push_T0(s);
4644
            break;
4645
        default:
4646
            goto illegal_op;
4647
        }
4648
        break;
4649

    
4650
    case 0x84: /* test Ev, Gv */
4651
    case 0x85:
4652
        if ((b & 1) == 0)
4653
            ot = OT_BYTE;
4654
        else
4655
            ot = dflag + OT_WORD;
4656

    
4657
        modrm = ldub_code(s->pc++);
4658
        mod = (modrm >> 6) & 3;
4659
        rm = (modrm & 7) | REX_B(s);
4660
        reg = ((modrm >> 3) & 7) | rex_r;
4661

    
4662
        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
4663
        gen_op_mov_TN_reg(ot, 1, reg);
4664
        gen_op_testl_T0_T1_cc();
4665
        s->cc_op = CC_OP_LOGICB + ot;
4666
        break;
4667

    
4668
    case 0xa8: /* test eAX, Iv */
4669
    case 0xa9:
4670
        if ((b & 1) == 0)
4671
            ot = OT_BYTE;
4672
        else
4673
            ot = dflag + OT_WORD;
4674
        val = insn_get(s, ot);
4675

    
4676
        gen_op_mov_TN_reg(ot, 0, OR_EAX);
4677
        gen_op_movl_T1_im(val);
4678
        gen_op_testl_T0_T1_cc();
4679
        s->cc_op = CC_OP_LOGICB + ot;
4680
        break;
4681

    
4682
    case 0x98: /* CWDE/CBW */
4683
#ifdef TARGET_X86_64
4684
        if (dflag == 2) {
4685
            gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
4686
            tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
4687
            gen_op_mov_reg_T0(OT_QUAD, R_EAX);
4688
        } else
4689
#endif
4690
        if (dflag == 1) {
4691
            gen_op_mov_TN_reg(OT_WORD, 0, R_EAX);
4692
            tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
4693
            gen_op_mov_reg_T0(OT_LONG, R_EAX);
4694
        } else {
4695
            gen_op_mov_TN_reg(OT_BYTE, 0, R_EAX);
4696
            tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]);
4697
            gen_op_mov_reg_T0(OT_WORD, R_EAX);
4698
        }
4699
        break;
4700
    case 0x99: /* CDQ/CWD */
4701
#ifdef TARGET_X86_64
4702
        if (dflag == 2) {
4703
            gen_op_mov_TN_reg(OT_QUAD, 0, R_EAX);
4704
            tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 63);
4705
            gen_op_mov_reg_T0(OT_QUAD, R_EDX);
4706
        } else
4707
#endif
4708
        if (dflag == 1) {
4709
            gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
4710
            tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
4711
            tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 31);
4712
            gen_op_mov_reg_T0(OT_LONG, R_EDX);
4713
        } else {
4714
            gen_op_mov_TN_reg(OT_WORD, 0, R_EAX);
4715
            tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
4716
            tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 15);
4717
            gen_op_mov_reg_T0(OT_WORD, R_EDX);
4718
        }
4719
        break;
4720
    case 0x1af: /* imul Gv, Ev */
4721
    case 0x69: /* imul Gv, Ev, I */
4722
    case 0x6b:
4723
        ot = dflag + OT_WORD;
4724
        modrm = ldub_code(s->pc++);
4725
        reg = ((modrm >> 3) & 7) | rex_r;
4726
        if (b == 0x69)
4727
            s->rip_offset = insn_const_size(ot);
4728
        else if (b == 0x6b)
4729
            s->rip_offset = 1;
4730
        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
4731
        if (b == 0x69) {
4732
            val = insn_get(s, ot);
4733
            gen_op_movl_T1_im(val);
4734
        } else if (b == 0x6b) {
4735
            val = (int8_t)insn_get(s, OT_BYTE);
4736
            gen_op_movl_T1_im(val);
4737
        } else {
4738
            gen_op_mov_TN_reg(ot, 1, reg);
4739
        }
4740

    
4741
#ifdef TARGET_X86_64
4742
        if (ot == OT_QUAD) {
4743
            gen_helper_imulq_T0_T1(cpu_T[0], cpu_T[0], cpu_T[1]);
4744
        } else
4745
#endif
4746
        if (ot == OT_LONG) {
4747
#ifdef TARGET_X86_64
4748
                tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
4749
                tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]);
4750
                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4751
                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4752
                tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]);
4753
                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
4754
#else
4755
                {
4756
                    TCGv_i64 t0, t1;
4757
                    t0 = tcg_temp_new_i64();
4758
                    t1 = tcg_temp_new_i64();
4759
                    tcg_gen_ext_i32_i64(t0, cpu_T[0]);
4760
                    tcg_gen_ext_i32_i64(t1, cpu_T[1]);
4761
                    tcg_gen_mul_i64(t0, t0, t1);
4762
                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
4763
                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4764
                    tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31);
4765
                    tcg_gen_shri_i64(t0, t0, 32);
4766
                    tcg_gen_trunc_i64_i32(cpu_T[1], t0);
4767
                    tcg_gen_sub_tl(cpu_cc_src, cpu_T[1], cpu_tmp0);
4768
                }
4769
#endif
4770
        } else {
4771
            tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
4772
            tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]);
4773
            /* XXX: use 32 bit mul which could be faster */
4774
            tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4775
            tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4776
            tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]);
4777
            tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
4778
        }
4779
        gen_op_mov_reg_T0(ot, reg);
4780
        s->cc_op = CC_OP_MULB + ot;
4781
        break;
4782
    case 0x1c0:
4783
    case 0x1c1: /* xadd Ev, Gv */
4784
        if ((b & 1) == 0)
4785
            ot = OT_BYTE;
4786
        else
4787
            ot = dflag + OT_WORD;
4788
        modrm = ldub_code(s->pc++);
4789
        reg = ((modrm >> 3) & 7) | rex_r;
4790
        mod = (modrm >> 6) & 3;
4791
        if (mod == 3) {
4792
            rm = (modrm & 7) | REX_B(s);
4793
            gen_op_mov_TN_reg(ot, 0, reg);
4794
            gen_op_mov_TN_reg(ot, 1, rm);
4795
            gen_op_addl_T0_T1();
4796
            gen_op_mov_reg_T1(ot, reg);
4797
            gen_op_mov_reg_T0(ot, rm);
4798
        } else {
4799
            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4800
            gen_op_mov_TN_reg(ot, 0, reg);
4801
            gen_op_ld_T1_A0(ot + s->mem_index);
4802
            gen_op_addl_T0_T1();
4803
            gen_op_st_T0_A0(ot + s->mem_index);
4804
            gen_op_mov_reg_T1(ot, reg);
4805
        }
4806
        gen_op_update2_cc();
4807
        s->cc_op = CC_OP_ADDB + ot;
4808
        break;
4809
    case 0x1b0:
4810
    case 0x1b1: /* cmpxchg Ev, Gv */
4811
        {
4812
            int label1, label2;
4813
            TCGv t0, t1, t2, a0;
4814

    
4815
            if ((b & 1) == 0)
4816
                ot = OT_BYTE;
4817
            else
4818
                ot = dflag + OT_WORD;
4819
            modrm = ldub_code(s->pc++);
4820
            reg = ((modrm >> 3) & 7) | rex_r;
4821
            mod = (modrm >> 6) & 3;
4822
            t0 = tcg_temp_local_new();
4823
            t1 = tcg_temp_local_new();
4824
            t2 = tcg_temp_local_new();
4825
            a0 = tcg_temp_local_new();
4826
            gen_op_mov_v_reg(ot, t1, reg);
4827
            if (mod == 3) {
4828
                rm = (modrm & 7) | REX_B(s);
4829
                gen_op_mov_v_reg(ot, t0, rm);
4830
            } else {
4831
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4832
                tcg_gen_mov_tl(a0, cpu_A0);
4833
                gen_op_ld_v(ot + s->mem_index, t0, a0);
4834
                rm = 0; /* avoid warning */
4835
            }
4836
            label1 = gen_new_label();
4837
            tcg_gen_ld_tl(t2, cpu_env, offsetof(CPUState, regs[R_EAX]));
4838
            tcg_gen_sub_tl(t2, t2, t0);
4839
            gen_extu(ot, t2);
4840
            tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1);
4841
            if (mod == 3) {
4842
                label2 = gen_new_label();
4843
                gen_op_mov_reg_v(ot, R_EAX, t0);
4844
                tcg_gen_br(label2);
4845
                gen_set_label(label1);
4846
                gen_op_mov_reg_v(ot, rm, t1);
4847
                gen_set_label(label2);
4848
            } else {
4849
                tcg_gen_mov_tl(t1, t0);
4850
                gen_op_mov_reg_v(ot, R_EAX, t0);
4851
                gen_set_label(label1);
4852
                /* always store */
4853
                gen_op_st_v(ot + s->mem_index, t1, a0);
4854
            }
4855
            tcg_gen_mov_tl(cpu_cc_src, t0);
4856
            tcg_gen_mov_tl(cpu_cc_dst, t2);
4857
            s->cc_op = CC_OP_SUBB + ot;
4858
            tcg_temp_free(t0);
4859
            tcg_temp_free(t1);
4860
            tcg_temp_free(t2);
4861
            tcg_temp_free(a0);
4862
        }
4863
        break;
4864
    case 0x1c7: /* cmpxchg8b */
4865
        modrm = ldub_code(s->pc++);
4866
        mod = (modrm >> 6) & 3;
4867
        if ((mod == 3) || ((modrm & 0x38) != 0x8))
4868
            goto illegal_op;
4869
#ifdef TARGET_X86_64
4870
        if (dflag == 2) {
4871
            if (!(s->cpuid_ext_features & CPUID_EXT_CX16))
4872
                goto illegal_op;
4873
            gen_jmp_im(pc_start - s->cs_base);
4874
            if (s->cc_op != CC_OP_DYNAMIC)
4875
                gen_op_set_cc_op(s->cc_op);
4876
            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4877
            gen_helper_cmpxchg16b(cpu_A0);
4878
        } else
4879
#endif        
4880
        {
4881
            if (!(s->cpuid_features & CPUID_CX8))
4882
                goto illegal_op;
4883
            gen_jmp_im(pc_start - s->cs_base);
4884
            if (s->cc_op != CC_OP_DYNAMIC)
4885
                gen_op_set_cc_op(s->cc_op);
4886
            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4887
            gen_helper_cmpxchg8b(cpu_A0);
4888
        }
4889
        s->cc_op = CC_OP_EFLAGS;
4890
        break;
4891

    
4892
        /**************************/
4893
        /* push/pop */
4894
    case 0x50 ... 0x57: /* push */
4895
        gen_op_mov_TN_reg(OT_LONG, 0, (b & 7) | REX_B(s));
4896
        gen_push_T0(s);
4897
        break;
4898
    case 0x58 ... 0x5f: /* pop */
4899
        if (CODE64(s)) {
4900
            ot = dflag ? OT_QUAD : OT_WORD;
4901
        } else {
4902
            ot = dflag + OT_WORD;
4903
        }
4904
        gen_pop_T0(s);
4905
        /* NOTE: order is important for pop %sp */
4906
        gen_pop_update(s);
4907
        gen_op_mov_reg_T0(ot, (b & 7) | REX_B(s));
4908
        break;
4909
    case 0x60: /* pusha */
4910
        if (CODE64(s))
4911
            goto illegal_op;
4912
        gen_pusha(s);
4913
        break;
4914
    case 0x61: /* popa */
4915
        if (CODE64(s))
4916
            goto illegal_op;
4917
        gen_popa(s);
4918
        break;
4919
    case 0x68: /* push Iv */
4920
    case 0x6a:
4921
        if (CODE64(s)) {
4922
            ot = dflag ? OT_QUAD : OT_WORD;
4923
        } else {
4924
            ot = dflag + OT_WORD;
4925
        }
4926
        if (b == 0x68)
4927
            val = insn_get(s, ot);
4928
        else
4929
            val = (int8_t)insn_get(s, OT_BYTE);
4930
        gen_op_movl_T0_im(val);
4931
        gen_push_T0(s);
4932
        break;
4933
    case 0x8f: /* pop Ev */
4934
        if (CODE64(s)) {
4935