Statistics
| Branch: | Revision:

root / target-i386 / translate.c @ efade670

History | View | Annotate | Download (237.1 kB)

1
/*
2
 *  i386 translation
3
 *
4
 *  Copyright (c) 2003 Fabrice Bellard
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, write to the Free Software
18
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19
 */
20
#include <stdarg.h>
21
#include <stdlib.h>
22
#include <stdio.h>
23
#include <string.h>
24
#include <inttypes.h>
25
#include <signal.h>
26
#include <assert.h>
27

    
28
#include "cpu.h"
29
#include "exec-all.h"
30
#include "disas.h"
31
#include "helper.h"
32
#include "tcg-op.h"
33

    
34
#define PREFIX_REPZ   0x01
35
#define PREFIX_REPNZ  0x02
36
#define PREFIX_LOCK   0x04
37
#define PREFIX_DATA   0x08
38
#define PREFIX_ADR    0x10
39

    
40
#ifdef TARGET_X86_64
41
#define X86_64_ONLY(x) x
42
#define X86_64_DEF(x...) x
43
#define CODE64(s) ((s)->code64)
44
#define REX_X(s) ((s)->rex_x)
45
#define REX_B(s) ((s)->rex_b)
46
/* XXX: gcc generates push/pop in some opcodes, so we cannot use them */
47
#if 1
48
#define BUGGY_64(x) NULL
49
#endif
50
#else
51
#define X86_64_ONLY(x) NULL
52
#define X86_64_DEF(x...)
53
#define CODE64(s) 0
54
#define REX_X(s) 0
55
#define REX_B(s) 0
56
#endif
57

    
58
//#define MACRO_TEST   1
59

    
60
/* global register indexes */
61
static TCGv cpu_env, cpu_A0, cpu_cc_op, cpu_cc_src, cpu_cc_dst, cpu_cc_tmp;
62
/* local temps */
63
static TCGv cpu_T[2], cpu_T3;
64
/* local register indexes (only used inside old micro ops) */
65
static TCGv cpu_tmp0, cpu_tmp1_i64, cpu_tmp2_i32, cpu_tmp3_i32, cpu_tmp4, cpu_ptr0, cpu_ptr1;
66
static TCGv cpu_tmp5, cpu_tmp6;
67

    
68
#include "gen-icount.h"
69

    
70
#ifdef TARGET_X86_64
71
static int x86_64_hregs;
72
#endif
73

    
74
typedef struct DisasContext {
75
    /* current insn context */
76
    int override; /* -1 if no override */
77
    int prefix;
78
    int aflag, dflag;
79
    target_ulong pc; /* pc = eip + cs_base */
80
    int is_jmp; /* 1 = means jump (stop translation), 2 means CPU
81
                   static state change (stop translation) */
82
    /* current block context */
83
    target_ulong cs_base; /* base of CS segment */
84
    int pe;     /* protected mode */
85
    int code32; /* 32 bit code segment */
86
#ifdef TARGET_X86_64
87
    int lma;    /* long mode active */
88
    int code64; /* 64 bit code segment */
89
    int rex_x, rex_b;
90
#endif
91
    int ss32;   /* 32 bit stack segment */
92
    int cc_op;  /* current CC operation */
93
    int addseg; /* non zero if either DS/ES/SS have a non zero base */
94
    int f_st;   /* currently unused */
95
    int vm86;   /* vm86 mode */
96
    int cpl;
97
    int iopl;
98
    int tf;     /* TF cpu flag */
99
    int singlestep_enabled; /* "hardware" single step enabled */
100
    int jmp_opt; /* use direct block chaining for direct jumps */
101
    int mem_index; /* select memory access functions */
102
    uint64_t flags; /* all execution flags */
103
    struct TranslationBlock *tb;
104
    int popl_esp_hack; /* for correct popl with esp base handling */
105
    int rip_offset; /* only used in x86_64, but left for simplicity */
106
    int cpuid_features;
107
    int cpuid_ext_features;
108
    int cpuid_ext2_features;
109
    int cpuid_ext3_features;
110
} DisasContext;
111

    
112
static void gen_eob(DisasContext *s);
113
static void gen_jmp(DisasContext *s, target_ulong eip);
114
static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
115

    
116
/* i386 arith/logic operations */
117
enum {
118
    OP_ADDL,
119
    OP_ORL,
120
    OP_ADCL,
121
    OP_SBBL,
122
    OP_ANDL,
123
    OP_SUBL,
124
    OP_XORL,
125
    OP_CMPL,
126
};
127

    
128
/* i386 shift ops */
129
enum {
130
    OP_ROL,
131
    OP_ROR,
132
    OP_RCL,
133
    OP_RCR,
134
    OP_SHL,
135
    OP_SHR,
136
    OP_SHL1, /* undocumented */
137
    OP_SAR = 7,
138
};
139

    
140
enum {
141
    JCC_O,
142
    JCC_B,
143
    JCC_Z,
144
    JCC_BE,
145
    JCC_S,
146
    JCC_P,
147
    JCC_L,
148
    JCC_LE,
149
};
150

    
151
/* operand size */
152
enum {
153
    OT_BYTE = 0,
154
    OT_WORD,
155
    OT_LONG,
156
    OT_QUAD,
157
};
158

    
159
enum {
160
    /* I386 int registers */
161
    OR_EAX,   /* MUST be even numbered */
162
    OR_ECX,
163
    OR_EDX,
164
    OR_EBX,
165
    OR_ESP,
166
    OR_EBP,
167
    OR_ESI,
168
    OR_EDI,
169

    
170
    OR_TMP0 = 16,    /* temporary operand register */
171
    OR_TMP1,
172
    OR_A0, /* temporary register used when doing address evaluation */
173
};
174

    
175
static inline void gen_op_movl_T0_0(void)
176
{
177
    tcg_gen_movi_tl(cpu_T[0], 0);
178
}
179

    
180
static inline void gen_op_movl_T0_im(int32_t val)
181
{
182
    tcg_gen_movi_tl(cpu_T[0], val);
183
}
184

    
185
static inline void gen_op_movl_T0_imu(uint32_t val)
186
{
187
    tcg_gen_movi_tl(cpu_T[0], val);
188
}
189

    
190
static inline void gen_op_movl_T1_im(int32_t val)
191
{
192
    tcg_gen_movi_tl(cpu_T[1], val);
193
}
194

    
195
static inline void gen_op_movl_T1_imu(uint32_t val)
196
{
197
    tcg_gen_movi_tl(cpu_T[1], val);
198
}
199

    
200
static inline void gen_op_movl_A0_im(uint32_t val)
201
{
202
    tcg_gen_movi_tl(cpu_A0, val);
203
}
204

    
205
#ifdef TARGET_X86_64
206
static inline void gen_op_movq_A0_im(int64_t val)
207
{
208
    tcg_gen_movi_tl(cpu_A0, val);
209
}
210
#endif
211

    
212
static inline void gen_movtl_T0_im(target_ulong val)
213
{
214
    tcg_gen_movi_tl(cpu_T[0], val);
215
}
216

    
217
static inline void gen_movtl_T1_im(target_ulong val)
218
{
219
    tcg_gen_movi_tl(cpu_T[1], val);
220
}
221

    
222
static inline void gen_op_andl_T0_ffff(void)
223
{
224
    tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff);
225
}
226

    
227
static inline void gen_op_andl_T0_im(uint32_t val)
228
{
229
    tcg_gen_andi_tl(cpu_T[0], cpu_T[0], val);
230
}
231

    
232
static inline void gen_op_movl_T0_T1(void)
233
{
234
    tcg_gen_mov_tl(cpu_T[0], cpu_T[1]);
235
}
236

    
237
static inline void gen_op_andl_A0_ffff(void)
238
{
239
    tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffff);
240
}
241

    
242
#ifdef TARGET_X86_64
243

    
244
#define NB_OP_SIZES 4
245

    
246
#else /* !TARGET_X86_64 */
247

    
248
#define NB_OP_SIZES 3
249

    
250
#endif /* !TARGET_X86_64 */
251

    
252
#if defined(WORDS_BIGENDIAN)
253
#define REG_B_OFFSET (sizeof(target_ulong) - 1)
254
#define REG_H_OFFSET (sizeof(target_ulong) - 2)
255
#define REG_W_OFFSET (sizeof(target_ulong) - 2)
256
#define REG_L_OFFSET (sizeof(target_ulong) - 4)
257
#define REG_LH_OFFSET (sizeof(target_ulong) - 8)
258
#else
259
#define REG_B_OFFSET 0
260
#define REG_H_OFFSET 1
261
#define REG_W_OFFSET 0
262
#define REG_L_OFFSET 0
263
#define REG_LH_OFFSET 4
264
#endif
265

    
266
static inline void gen_op_mov_reg_v(int ot, int reg, TCGv t0)
267
{
268
    switch(ot) {
269
    case OT_BYTE:
270
        if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
271
            tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_B_OFFSET);
272
        } else {
273
            tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
274
        }
275
        break;
276
    case OT_WORD:
277
        tcg_gen_st16_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
278
        break;
279
#ifdef TARGET_X86_64
280
    case OT_LONG:
281
        tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
282
        /* high part of register set to zero */
283
        tcg_gen_movi_tl(cpu_tmp0, 0);
284
        tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
285
        break;
286
    default:
287
    case OT_QUAD:
288
        tcg_gen_st_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
289
        break;
290
#else
291
    default:
292
    case OT_LONG:
293
        tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
294
        break;
295
#endif
296
    }
297
}
298

    
299
static inline void gen_op_mov_reg_T0(int ot, int reg)
300
{
301
    gen_op_mov_reg_v(ot, reg, cpu_T[0]);
302
}
303

    
304
static inline void gen_op_mov_reg_T1(int ot, int reg)
305
{
306
    gen_op_mov_reg_v(ot, reg, cpu_T[1]);
307
}
308

    
309
static inline void gen_op_mov_reg_A0(int size, int reg)
310
{
311
    switch(size) {
312
    case 0:
313
        tcg_gen_st16_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
314
        break;
315
#ifdef TARGET_X86_64
316
    case 1:
317
        tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
318
        /* high part of register set to zero */
319
        tcg_gen_movi_tl(cpu_tmp0, 0);
320
        tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
321
        break;
322
    default:
323
    case 2:
324
        tcg_gen_st_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]));
325
        break;
326
#else
327
    default:
328
    case 1:
329
        tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
330
        break;
331
#endif
332
    }
333
}
334

    
335
static inline void gen_op_mov_v_reg(int ot, TCGv t0, int reg)
336
{
337
    switch(ot) {
338
    case OT_BYTE:
339
        if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
340
            goto std_case;
341
        } else {
342
            tcg_gen_ld8u_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
343
        }
344
        break;
345
    default:
346
    std_case:
347
        tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
348
        break;
349
    }
350
}
351

    
352
static inline void gen_op_mov_TN_reg(int ot, int t_index, int reg)
353
{
354
    gen_op_mov_v_reg(ot, cpu_T[t_index], reg);
355
}
356

    
357
static inline void gen_op_movl_A0_reg(int reg)
358
{
359
    tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
360
}
361

    
362
static inline void gen_op_addl_A0_im(int32_t val)
363
{
364
    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
365
#ifdef TARGET_X86_64
366
    tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
367
#endif
368
}
369

    
370
#ifdef TARGET_X86_64
371
static inline void gen_op_addq_A0_im(int64_t val)
372
{
373
    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
374
}
375
#endif
376
    
377
static void gen_add_A0_im(DisasContext *s, int val)
378
{
379
#ifdef TARGET_X86_64
380
    if (CODE64(s))
381
        gen_op_addq_A0_im(val);
382
    else
383
#endif
384
        gen_op_addl_A0_im(val);
385
}
386

    
387
static inline void gen_op_addl_T0_T1(void)
388
{
389
    tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
390
}
391

    
392
static inline void gen_op_jmp_T0(void)
393
{
394
    tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUState, eip));
395
}
396

    
397
static inline void gen_op_add_reg_im(int size, int reg, int32_t val)
398
{
399
    switch(size) {
400
    case 0:
401
        tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
402
        tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
403
        tcg_gen_st16_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
404
        break;
405
    case 1:
406
        tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
407
        tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
408
#ifdef TARGET_X86_64
409
        tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffff);
410
#endif
411
        tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
412
        break;
413
#ifdef TARGET_X86_64
414
    case 2:
415
        tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
416
        tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
417
        tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
418
        break;
419
#endif
420
    }
421
}
422

    
423
static inline void gen_op_add_reg_T0(int size, int reg)
424
{
425
    switch(size) {
426
    case 0:
427
        tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
428
        tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
429
        tcg_gen_st16_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
430
        break;
431
    case 1:
432
        tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
433
        tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
434
#ifdef TARGET_X86_64
435
        tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffff);
436
#endif
437
        tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
438
        break;
439
#ifdef TARGET_X86_64
440
    case 2:
441
        tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
442
        tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
443
        tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
444
        break;
445
#endif
446
    }
447
}
448

    
449
static inline void gen_op_set_cc_op(int32_t val)
450
{
451
    tcg_gen_movi_i32(cpu_cc_op, val);
452
}
453

    
454
static inline void gen_op_addl_A0_reg_sN(int shift, int reg)
455
{
456
    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
457
    if (shift != 0) 
458
        tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
459
    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
460
#ifdef TARGET_X86_64
461
    tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
462
#endif
463
}
464

    
465
static inline void gen_op_movl_A0_seg(int reg)
466
{
467
    tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, segs[reg].base) + REG_L_OFFSET);
468
}
469

    
470
static inline void gen_op_addl_A0_seg(int reg)
471
{
472
    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, segs[reg].base));
473
    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
474
#ifdef TARGET_X86_64
475
    tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
476
#endif
477
}
478

    
479
#ifdef TARGET_X86_64
480
static inline void gen_op_movq_A0_seg(int reg)
481
{
482
    tcg_gen_ld_tl(cpu_A0, cpu_env, offsetof(CPUState, segs[reg].base));
483
}
484

    
485
static inline void gen_op_addq_A0_seg(int reg)
486
{
487
    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, segs[reg].base));
488
    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
489
}
490

    
491
static inline void gen_op_movq_A0_reg(int reg)
492
{
493
    tcg_gen_ld_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]));
494
}
495

    
496
static inline void gen_op_addq_A0_reg_sN(int shift, int reg)
497
{
498
    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
499
    if (shift != 0) 
500
        tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
501
    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
502
}
503
#endif
504

    
505
static inline void gen_op_lds_T0_A0(int idx)
506
{
507
    int mem_index = (idx >> 2) - 1;
508
    switch(idx & 3) {
509
    case 0:
510
        tcg_gen_qemu_ld8s(cpu_T[0], cpu_A0, mem_index);
511
        break;
512
    case 1:
513
        tcg_gen_qemu_ld16s(cpu_T[0], cpu_A0, mem_index);
514
        break;
515
    default:
516
    case 2:
517
        tcg_gen_qemu_ld32s(cpu_T[0], cpu_A0, mem_index);
518
        break;
519
    }
520
}
521

    
522
static inline void gen_op_ld_v(int idx, TCGv t0, TCGv a0)
523
{
524
    int mem_index = (idx >> 2) - 1;
525
    switch(idx & 3) {
526
    case 0:
527
        tcg_gen_qemu_ld8u(t0, a0, mem_index);
528
        break;
529
    case 1:
530
        tcg_gen_qemu_ld16u(t0, a0, mem_index);
531
        break;
532
    case 2:
533
        tcg_gen_qemu_ld32u(t0, a0, mem_index);
534
        break;
535
    default:
536
    case 3:
537
        tcg_gen_qemu_ld64(t0, a0, mem_index);
538
        break;
539
    }
540
}
541

    
542
/* XXX: always use ldu or lds */
543
static inline void gen_op_ld_T0_A0(int idx)
544
{
545
    gen_op_ld_v(idx, cpu_T[0], cpu_A0);
546
}
547

    
548
static inline void gen_op_ldu_T0_A0(int idx)
549
{
550
    gen_op_ld_v(idx, cpu_T[0], cpu_A0);
551
}
552

    
553
static inline void gen_op_ld_T1_A0(int idx)
554
{
555
    gen_op_ld_v(idx, cpu_T[1], cpu_A0);
556
}
557

    
558
static inline void gen_op_st_v(int idx, TCGv t0, TCGv a0)
559
{
560
    int mem_index = (idx >> 2) - 1;
561
    switch(idx & 3) {
562
    case 0:
563
        tcg_gen_qemu_st8(t0, a0, mem_index);
564
        break;
565
    case 1:
566
        tcg_gen_qemu_st16(t0, a0, mem_index);
567
        break;
568
    case 2:
569
        tcg_gen_qemu_st32(t0, a0, mem_index);
570
        break;
571
    default:
572
    case 3:
573
        tcg_gen_qemu_st64(t0, a0, mem_index);
574
        break;
575
    }
576
}
577

    
578
static inline void gen_op_st_T0_A0(int idx)
579
{
580
    gen_op_st_v(idx, cpu_T[0], cpu_A0);
581
}
582

    
583
static inline void gen_op_st_T1_A0(int idx)
584
{
585
    gen_op_st_v(idx, cpu_T[1], cpu_A0);
586
}
587

    
588
static inline void gen_jmp_im(target_ulong pc)
589
{
590
    tcg_gen_movi_tl(cpu_tmp0, pc);
591
    tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, eip));
592
}
593

    
594
static inline void gen_string_movl_A0_ESI(DisasContext *s)
595
{
596
    int override;
597

    
598
    override = s->override;
599
#ifdef TARGET_X86_64
600
    if (s->aflag == 2) {
601
        if (override >= 0) {
602
            gen_op_movq_A0_seg(override);
603
            gen_op_addq_A0_reg_sN(0, R_ESI);
604
        } else {
605
            gen_op_movq_A0_reg(R_ESI);
606
        }
607
    } else
608
#endif
609
    if (s->aflag) {
610
        /* 32 bit address */
611
        if (s->addseg && override < 0)
612
            override = R_DS;
613
        if (override >= 0) {
614
            gen_op_movl_A0_seg(override);
615
            gen_op_addl_A0_reg_sN(0, R_ESI);
616
        } else {
617
            gen_op_movl_A0_reg(R_ESI);
618
        }
619
    } else {
620
        /* 16 address, always override */
621
        if (override < 0)
622
            override = R_DS;
623
        gen_op_movl_A0_reg(R_ESI);
624
        gen_op_andl_A0_ffff();
625
        gen_op_addl_A0_seg(override);
626
    }
627
}
628

    
629
static inline void gen_string_movl_A0_EDI(DisasContext *s)
630
{
631
#ifdef TARGET_X86_64
632
    if (s->aflag == 2) {
633
        gen_op_movq_A0_reg(R_EDI);
634
    } else
635
#endif
636
    if (s->aflag) {
637
        if (s->addseg) {
638
            gen_op_movl_A0_seg(R_ES);
639
            gen_op_addl_A0_reg_sN(0, R_EDI);
640
        } else {
641
            gen_op_movl_A0_reg(R_EDI);
642
        }
643
    } else {
644
        gen_op_movl_A0_reg(R_EDI);
645
        gen_op_andl_A0_ffff();
646
        gen_op_addl_A0_seg(R_ES);
647
    }
648
}
649

    
650
static inline void gen_op_movl_T0_Dshift(int ot) 
651
{
652
    tcg_gen_ld32s_tl(cpu_T[0], cpu_env, offsetof(CPUState, df));
653
    tcg_gen_shli_tl(cpu_T[0], cpu_T[0], ot);
654
};
655

    
656
static void gen_extu(int ot, TCGv reg)
657
{
658
    switch(ot) {
659
    case OT_BYTE:
660
        tcg_gen_ext8u_tl(reg, reg);
661
        break;
662
    case OT_WORD:
663
        tcg_gen_ext16u_tl(reg, reg);
664
        break;
665
    case OT_LONG:
666
        tcg_gen_ext32u_tl(reg, reg);
667
        break;
668
    default:
669
        break;
670
    }
671
}
672

    
673
static void gen_exts(int ot, TCGv reg)
674
{
675
    switch(ot) {
676
    case OT_BYTE:
677
        tcg_gen_ext8s_tl(reg, reg);
678
        break;
679
    case OT_WORD:
680
        tcg_gen_ext16s_tl(reg, reg);
681
        break;
682
    case OT_LONG:
683
        tcg_gen_ext32s_tl(reg, reg);
684
        break;
685
    default:
686
        break;
687
    }
688
}
689

    
690
static inline void gen_op_jnz_ecx(int size, int label1)
691
{
692
    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[R_ECX]));
693
    gen_extu(size + 1, cpu_tmp0);
694
    tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, label1);
695
}
696

    
697
static inline void gen_op_jz_ecx(int size, int label1)
698
{
699
    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[R_ECX]));
700
    gen_extu(size + 1, cpu_tmp0);
701
    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
702
}
703

    
704
static void *helper_in_func[3] = {
705
    helper_inb,
706
    helper_inw,
707
    helper_inl,
708
};
709

    
710
static void *helper_out_func[3] = {
711
    helper_outb,
712
    helper_outw,
713
    helper_outl,
714
};
715

    
716
static void *gen_check_io_func[3] = {
717
    helper_check_iob,
718
    helper_check_iow,
719
    helper_check_iol,
720
};
721

    
722
static void gen_check_io(DisasContext *s, int ot, target_ulong cur_eip,
723
                         uint32_t svm_flags)
724
{
725
    int state_saved;
726
    target_ulong next_eip;
727

    
728
    state_saved = 0;
729
    if (s->pe && (s->cpl > s->iopl || s->vm86)) {
730
        if (s->cc_op != CC_OP_DYNAMIC)
731
            gen_op_set_cc_op(s->cc_op);
732
        gen_jmp_im(cur_eip);
733
        state_saved = 1;
734
        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
735
        tcg_gen_helper_0_1(gen_check_io_func[ot],
736
                           cpu_tmp2_i32);
737
    }
738
    if(s->flags & HF_SVMI_MASK) {
739
        if (!state_saved) {
740
            if (s->cc_op != CC_OP_DYNAMIC)
741
                gen_op_set_cc_op(s->cc_op);
742
            gen_jmp_im(cur_eip);
743
            state_saved = 1;
744
        }
745
        svm_flags |= (1 << (4 + ot));
746
        next_eip = s->pc - s->cs_base;
747
        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
748
        tcg_gen_helper_0_3(helper_svm_check_io,
749
                           cpu_tmp2_i32,
750
                           tcg_const_i32(svm_flags),
751
                           tcg_const_i32(next_eip - cur_eip));
752
    }
753
}
754

    
755
static inline void gen_movs(DisasContext *s, int ot)
756
{
757
    gen_string_movl_A0_ESI(s);
758
    gen_op_ld_T0_A0(ot + s->mem_index);
759
    gen_string_movl_A0_EDI(s);
760
    gen_op_st_T0_A0(ot + s->mem_index);
761
    gen_op_movl_T0_Dshift(ot);
762
    gen_op_add_reg_T0(s->aflag, R_ESI);
763
    gen_op_add_reg_T0(s->aflag, R_EDI);
764
}
765

    
766
static inline void gen_update_cc_op(DisasContext *s)
767
{
768
    if (s->cc_op != CC_OP_DYNAMIC) {
769
        gen_op_set_cc_op(s->cc_op);
770
        s->cc_op = CC_OP_DYNAMIC;
771
    }
772
}
773

    
774
static void gen_op_update1_cc(void)
775
{
776
    tcg_gen_discard_tl(cpu_cc_src);
777
    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
778
}
779

    
780
static void gen_op_update2_cc(void)
781
{
782
    tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
783
    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
784
}
785

    
786
static inline void gen_op_cmpl_T0_T1_cc(void)
787
{
788
    tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
789
    tcg_gen_sub_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]);
790
}
791

    
792
static inline void gen_op_testl_T0_T1_cc(void)
793
{
794
    tcg_gen_discard_tl(cpu_cc_src);
795
    tcg_gen_and_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]);
796
}
797

    
798
static void gen_op_update_neg_cc(void)
799
{
800
    tcg_gen_neg_tl(cpu_cc_src, cpu_T[0]);
801
    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
802
}
803

    
804
/* compute eflags.C to reg */
805
static void gen_compute_eflags_c(TCGv reg)
806
{
807
#if TCG_TARGET_REG_BITS == 32
808
    tcg_gen_shli_i32(cpu_tmp2_i32, cpu_cc_op, 3);
809
    tcg_gen_addi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 
810
                     (long)cc_table + offsetof(CCTable, compute_c));
811
    tcg_gen_ld_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0);
812
    tcg_gen_call(&tcg_ctx, cpu_tmp2_i32, TCG_CALL_PURE, 
813
                 1, &cpu_tmp2_i32, 0, NULL);
814
#else
815
    tcg_gen_extu_i32_tl(cpu_tmp1_i64, cpu_cc_op);
816
    tcg_gen_shli_i64(cpu_tmp1_i64, cpu_tmp1_i64, 4);
817
    tcg_gen_addi_i64(cpu_tmp1_i64, cpu_tmp1_i64, 
818
                     (long)cc_table + offsetof(CCTable, compute_c));
819
    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_tmp1_i64, 0);
820
    tcg_gen_call(&tcg_ctx, cpu_tmp1_i64, TCG_CALL_PURE, 
821
                 1, &cpu_tmp2_i32, 0, NULL);
822
#endif
823
    tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
824
}
825

    
826
/* compute all eflags to cc_src */
827
static void gen_compute_eflags(TCGv reg)
828
{
829
#if TCG_TARGET_REG_BITS == 32
830
    tcg_gen_shli_i32(cpu_tmp2_i32, cpu_cc_op, 3);
831
    tcg_gen_addi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 
832
                     (long)cc_table + offsetof(CCTable, compute_all));
833
    tcg_gen_ld_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0);
834
    tcg_gen_call(&tcg_ctx, cpu_tmp2_i32, TCG_CALL_PURE, 
835
                 1, &cpu_tmp2_i32, 0, NULL);
836
#else
837
    tcg_gen_extu_i32_tl(cpu_tmp1_i64, cpu_cc_op);
838
    tcg_gen_shli_i64(cpu_tmp1_i64, cpu_tmp1_i64, 4);
839
    tcg_gen_addi_i64(cpu_tmp1_i64, cpu_tmp1_i64, 
840
                     (long)cc_table + offsetof(CCTable, compute_all));
841
    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_tmp1_i64, 0);
842
    tcg_gen_call(&tcg_ctx, cpu_tmp1_i64, TCG_CALL_PURE, 
843
                 1, &cpu_tmp2_i32, 0, NULL);
844
#endif
845
    tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
846
}
847

    
848
static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
849
{
850
    if (s->cc_op != CC_OP_DYNAMIC)
851
        gen_op_set_cc_op(s->cc_op);
852
    switch(jcc_op) {
853
    case JCC_O:
854
        gen_compute_eflags(cpu_T[0]);
855
        tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 11);
856
        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
857
        break;
858
    case JCC_B:
859
        gen_compute_eflags_c(cpu_T[0]);
860
        break;
861
    case JCC_Z:
862
        gen_compute_eflags(cpu_T[0]);
863
        tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 6);
864
        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
865
        break;
866
    case JCC_BE:
867
        gen_compute_eflags(cpu_tmp0);
868
        tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 6);
869
        tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
870
        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
871
        break;
872
    case JCC_S:
873
        gen_compute_eflags(cpu_T[0]);
874
        tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 7);
875
        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
876
        break;
877
    case JCC_P:
878
        gen_compute_eflags(cpu_T[0]);
879
        tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 2);
880
        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
881
        break;
882
    case JCC_L:
883
        gen_compute_eflags(cpu_tmp0);
884
        tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */
885
        tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 7); /* CC_S */
886
        tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
887
        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
888
        break;
889
    default:
890
    case JCC_LE:
891
        gen_compute_eflags(cpu_tmp0);
892
        tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */
893
        tcg_gen_shri_tl(cpu_tmp4, cpu_tmp0, 7); /* CC_S */
894
        tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 6); /* CC_Z */
895
        tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
896
        tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
897
        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
898
        break;
899
    }
900
}
901

    
902
/* return true if setcc_slow is not needed (WARNING: must be kept in
903
   sync with gen_jcc1) */
904
static int is_fast_jcc_case(DisasContext *s, int b)
905
{
906
    int jcc_op;
907
    jcc_op = (b >> 1) & 7;
908
    switch(s->cc_op) {
909
        /* we optimize the cmp/jcc case */
910
    case CC_OP_SUBB:
911
    case CC_OP_SUBW:
912
    case CC_OP_SUBL:
913
    case CC_OP_SUBQ:
914
        if (jcc_op == JCC_O || jcc_op == JCC_P)
915
            goto slow_jcc;
916
        break;
917

    
918
        /* some jumps are easy to compute */
919
    case CC_OP_ADDB:
920
    case CC_OP_ADDW:
921
    case CC_OP_ADDL:
922
    case CC_OP_ADDQ:
923

    
924
    case CC_OP_LOGICB:
925
    case CC_OP_LOGICW:
926
    case CC_OP_LOGICL:
927
    case CC_OP_LOGICQ:
928

    
929
    case CC_OP_INCB:
930
    case CC_OP_INCW:
931
    case CC_OP_INCL:
932
    case CC_OP_INCQ:
933

    
934
    case CC_OP_DECB:
935
    case CC_OP_DECW:
936
    case CC_OP_DECL:
937
    case CC_OP_DECQ:
938

    
939
    case CC_OP_SHLB:
940
    case CC_OP_SHLW:
941
    case CC_OP_SHLL:
942
    case CC_OP_SHLQ:
943
        if (jcc_op != JCC_Z && jcc_op != JCC_S)
944
            goto slow_jcc;
945
        break;
946
    default:
947
    slow_jcc:
948
        return 0;
949
    }
950
    return 1;
951
}
952

    
953
/* generate a conditional jump to label 'l1' according to jump opcode
954
   value 'b'. In the fast case, T0 is guaranted not to be used. */
955
static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1)
956
{
957
    int inv, jcc_op, size, cond;
958
    TCGv t0;
959

    
960
    inv = b & 1;
961
    jcc_op = (b >> 1) & 7;
962

    
963
    switch(cc_op) {
964
        /* we optimize the cmp/jcc case */
965
    case CC_OP_SUBB:
966
    case CC_OP_SUBW:
967
    case CC_OP_SUBL:
968
    case CC_OP_SUBQ:
969
        
970
        size = cc_op - CC_OP_SUBB;
971
        switch(jcc_op) {
972
        case JCC_Z:
973
        fast_jcc_z:
974
            switch(size) {
975
            case 0:
976
                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xff);
977
                t0 = cpu_tmp0;
978
                break;
979
            case 1:
980
                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xffff);
981
                t0 = cpu_tmp0;
982
                break;
983
#ifdef TARGET_X86_64
984
            case 2:
985
                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xffffffff);
986
                t0 = cpu_tmp0;
987
                break;
988
#endif
989
            default:
990
                t0 = cpu_cc_dst;
991
                break;
992
            }
993
            tcg_gen_brcondi_tl(inv ? TCG_COND_NE : TCG_COND_EQ, t0, 0, l1);
994
            break;
995
        case JCC_S:
996
        fast_jcc_s:
997
            switch(size) {
998
            case 0:
999
                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80);
1000
                tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, 
1001
                                   0, l1);
1002
                break;
1003
            case 1:
1004
                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x8000);
1005
                tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, 
1006
                                   0, l1);
1007
                break;
1008
#ifdef TARGET_X86_64
1009
            case 2:
1010
                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80000000);
1011
                tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, 
1012
                                   0, l1);
1013
                break;
1014
#endif
1015
            default:
1016
                tcg_gen_brcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, cpu_cc_dst, 
1017
                                   0, l1);
1018
                break;
1019
            }
1020
            break;
1021
            
1022
        case JCC_B:
1023
            cond = inv ? TCG_COND_GEU : TCG_COND_LTU;
1024
            goto fast_jcc_b;
1025
        case JCC_BE:
1026
            cond = inv ? TCG_COND_GTU : TCG_COND_LEU;
1027
        fast_jcc_b:
1028
            tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
1029
            switch(size) {
1030
            case 0:
1031
                t0 = cpu_tmp0;
1032
                tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xff);
1033
                tcg_gen_andi_tl(t0, cpu_cc_src, 0xff);
1034
                break;
1035
            case 1:
1036
                t0 = cpu_tmp0;
1037
                tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xffff);
1038
                tcg_gen_andi_tl(t0, cpu_cc_src, 0xffff);
1039
                break;
1040
#ifdef TARGET_X86_64
1041
            case 2:
1042
                t0 = cpu_tmp0;
1043
                tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xffffffff);
1044
                tcg_gen_andi_tl(t0, cpu_cc_src, 0xffffffff);
1045
                break;
1046
#endif
1047
            default:
1048
                t0 = cpu_cc_src;
1049
                break;
1050
            }
1051
            tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1);
1052
            break;
1053
            
1054
        case JCC_L:
1055
            cond = inv ? TCG_COND_GE : TCG_COND_LT;
1056
            goto fast_jcc_l;
1057
        case JCC_LE:
1058
            cond = inv ? TCG_COND_GT : TCG_COND_LE;
1059
        fast_jcc_l:
1060
            tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
1061
            switch(size) {
1062
            case 0:
1063
                t0 = cpu_tmp0;
1064
                tcg_gen_ext8s_tl(cpu_tmp4, cpu_tmp4);
1065
                tcg_gen_ext8s_tl(t0, cpu_cc_src);
1066
                break;
1067
            case 1:
1068
                t0 = cpu_tmp0;
1069
                tcg_gen_ext16s_tl(cpu_tmp4, cpu_tmp4);
1070
                tcg_gen_ext16s_tl(t0, cpu_cc_src);
1071
                break;
1072
#ifdef TARGET_X86_64
1073
            case 2:
1074
                t0 = cpu_tmp0;
1075
                tcg_gen_ext32s_tl(cpu_tmp4, cpu_tmp4);
1076
                tcg_gen_ext32s_tl(t0, cpu_cc_src);
1077
                break;
1078
#endif
1079
            default:
1080
                t0 = cpu_cc_src;
1081
                break;
1082
            }
1083
            tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1);
1084
            break;
1085
            
1086
        default:
1087
            goto slow_jcc;
1088
        }
1089
        break;
1090
        
1091
        /* some jumps are easy to compute */
1092
    case CC_OP_ADDB:
1093
    case CC_OP_ADDW:
1094
    case CC_OP_ADDL:
1095
    case CC_OP_ADDQ:
1096
        
1097
    case CC_OP_ADCB:
1098
    case CC_OP_ADCW:
1099
    case CC_OP_ADCL:
1100
    case CC_OP_ADCQ:
1101
        
1102
    case CC_OP_SBBB:
1103
    case CC_OP_SBBW:
1104
    case CC_OP_SBBL:
1105
    case CC_OP_SBBQ:
1106
        
1107
    case CC_OP_LOGICB:
1108
    case CC_OP_LOGICW:
1109
    case CC_OP_LOGICL:
1110
    case CC_OP_LOGICQ:
1111
        
1112
    case CC_OP_INCB:
1113
    case CC_OP_INCW:
1114
    case CC_OP_INCL:
1115
    case CC_OP_INCQ:
1116
        
1117
    case CC_OP_DECB:
1118
    case CC_OP_DECW:
1119
    case CC_OP_DECL:
1120
    case CC_OP_DECQ:
1121
        
1122
    case CC_OP_SHLB:
1123
    case CC_OP_SHLW:
1124
    case CC_OP_SHLL:
1125
    case CC_OP_SHLQ:
1126
        
1127
    case CC_OP_SARB:
1128
    case CC_OP_SARW:
1129
    case CC_OP_SARL:
1130
    case CC_OP_SARQ:
1131
        switch(jcc_op) {
1132
        case JCC_Z:
1133
            size = (cc_op - CC_OP_ADDB) & 3;
1134
            goto fast_jcc_z;
1135
        case JCC_S:
1136
            size = (cc_op - CC_OP_ADDB) & 3;
1137
            goto fast_jcc_s;
1138
        default:
1139
            goto slow_jcc;
1140
        }
1141
        break;
1142
    default:
1143
    slow_jcc:
1144
        gen_setcc_slow_T0(s, jcc_op);
1145
        tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, 
1146
                           cpu_T[0], 0, l1);
1147
        break;
1148
    }
1149
}
1150

    
1151
/* XXX: does not work with gdbstub "ice" single step - not a
1152
   serious problem */
1153
static int gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1154
{
1155
    int l1, l2;
1156

    
1157
    l1 = gen_new_label();
1158
    l2 = gen_new_label();
1159
    gen_op_jnz_ecx(s->aflag, l1);
1160
    gen_set_label(l2);
1161
    gen_jmp_tb(s, next_eip, 1);
1162
    gen_set_label(l1);
1163
    return l2;
1164
}
1165

    
1166
static inline void gen_stos(DisasContext *s, int ot)
1167
{
1168
    gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
1169
    gen_string_movl_A0_EDI(s);
1170
    gen_op_st_T0_A0(ot + s->mem_index);
1171
    gen_op_movl_T0_Dshift(ot);
1172
    gen_op_add_reg_T0(s->aflag, R_EDI);
1173
}
1174

    
1175
static inline void gen_lods(DisasContext *s, int ot)
1176
{
1177
    gen_string_movl_A0_ESI(s);
1178
    gen_op_ld_T0_A0(ot + s->mem_index);
1179
    gen_op_mov_reg_T0(ot, R_EAX);
1180
    gen_op_movl_T0_Dshift(ot);
1181
    gen_op_add_reg_T0(s->aflag, R_ESI);
1182
}
1183

    
1184
static inline void gen_scas(DisasContext *s, int ot)
1185
{
1186
    gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
1187
    gen_string_movl_A0_EDI(s);
1188
    gen_op_ld_T1_A0(ot + s->mem_index);
1189
    gen_op_cmpl_T0_T1_cc();
1190
    gen_op_movl_T0_Dshift(ot);
1191
    gen_op_add_reg_T0(s->aflag, R_EDI);
1192
}
1193

    
1194
static inline void gen_cmps(DisasContext *s, int ot)
1195
{
1196
    gen_string_movl_A0_ESI(s);
1197
    gen_op_ld_T0_A0(ot + s->mem_index);
1198
    gen_string_movl_A0_EDI(s);
1199
    gen_op_ld_T1_A0(ot + s->mem_index);
1200
    gen_op_cmpl_T0_T1_cc();
1201
    gen_op_movl_T0_Dshift(ot);
1202
    gen_op_add_reg_T0(s->aflag, R_ESI);
1203
    gen_op_add_reg_T0(s->aflag, R_EDI);
1204
}
1205

    
1206
static inline void gen_ins(DisasContext *s, int ot)
1207
{
1208
    if (use_icount)
1209
        gen_io_start();
1210
    gen_string_movl_A0_EDI(s);
1211
    /* Note: we must do this dummy write first to be restartable in
1212
       case of page fault. */
1213
    gen_op_movl_T0_0();
1214
    gen_op_st_T0_A0(ot + s->mem_index);
1215
    gen_op_mov_TN_reg(OT_WORD, 1, R_EDX);
1216
    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[1]);
1217
    tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
1218
    tcg_gen_helper_1_1(helper_in_func[ot], cpu_T[0], cpu_tmp2_i32);
1219
    gen_op_st_T0_A0(ot + s->mem_index);
1220
    gen_op_movl_T0_Dshift(ot);
1221
    gen_op_add_reg_T0(s->aflag, R_EDI);
1222
    if (use_icount)
1223
        gen_io_end();
1224
}
1225

    
1226
static inline void gen_outs(DisasContext *s, int ot)
1227
{
1228
    if (use_icount)
1229
        gen_io_start();
1230
    gen_string_movl_A0_ESI(s);
1231
    gen_op_ld_T0_A0(ot + s->mem_index);
1232

    
1233
    gen_op_mov_TN_reg(OT_WORD, 1, R_EDX);
1234
    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[1]);
1235
    tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
1236
    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[0]);
1237
    tcg_gen_helper_0_2(helper_out_func[ot], cpu_tmp2_i32, cpu_tmp3_i32);
1238

    
1239
    gen_op_movl_T0_Dshift(ot);
1240
    gen_op_add_reg_T0(s->aflag, R_ESI);
1241
    if (use_icount)
1242
        gen_io_end();
1243
}
1244

    
1245
/* same method as Valgrind : we generate jumps to current or next
1246
   instruction */
1247
#define GEN_REPZ(op)                                                          \
1248
static inline void gen_repz_ ## op(DisasContext *s, int ot,                   \
1249
                                 target_ulong cur_eip, target_ulong next_eip) \
1250
{                                                                             \
1251
    int l2;\
1252
    gen_update_cc_op(s);                                                      \
1253
    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1254
    gen_ ## op(s, ot);                                                        \
1255
    gen_op_add_reg_im(s->aflag, R_ECX, -1);                                   \
1256
    /* a loop would cause two single step exceptions if ECX = 1               \
1257
       before rep string_insn */                                              \
1258
    if (!s->jmp_opt)                                                          \
1259
        gen_op_jz_ecx(s->aflag, l2);                                          \
1260
    gen_jmp(s, cur_eip);                                                      \
1261
}
1262

    
1263
#define GEN_REPZ2(op)                                                         \
1264
static inline void gen_repz_ ## op(DisasContext *s, int ot,                   \
1265
                                   target_ulong cur_eip,                      \
1266
                                   target_ulong next_eip,                     \
1267
                                   int nz)                                    \
1268
{                                                                             \
1269
    int l2;\
1270
    gen_update_cc_op(s);                                                      \
1271
    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1272
    gen_ ## op(s, ot);                                                        \
1273
    gen_op_add_reg_im(s->aflag, R_ECX, -1);                                   \
1274
    gen_op_set_cc_op(CC_OP_SUBB + ot);                                        \
1275
    gen_jcc1(s, CC_OP_SUBB + ot, (JCC_Z << 1) | (nz ^ 1), l2);                \
1276
    if (!s->jmp_opt)                                                          \
1277
        gen_op_jz_ecx(s->aflag, l2);                                          \
1278
    gen_jmp(s, cur_eip);                                                      \
1279
}
1280

    
1281
GEN_REPZ(movs)
1282
GEN_REPZ(stos)
1283
GEN_REPZ(lods)
1284
GEN_REPZ(ins)
1285
GEN_REPZ(outs)
1286
GEN_REPZ2(scas)
1287
GEN_REPZ2(cmps)
1288

    
1289
static void *helper_fp_arith_ST0_FT0[8] = {
1290
    helper_fadd_ST0_FT0,
1291
    helper_fmul_ST0_FT0,
1292
    helper_fcom_ST0_FT0,
1293
    helper_fcom_ST0_FT0,
1294
    helper_fsub_ST0_FT0,
1295
    helper_fsubr_ST0_FT0,
1296
    helper_fdiv_ST0_FT0,
1297
    helper_fdivr_ST0_FT0,
1298
};
1299

    
1300
/* NOTE the exception in "r" op ordering */
1301
static void *helper_fp_arith_STN_ST0[8] = {
1302
    helper_fadd_STN_ST0,
1303
    helper_fmul_STN_ST0,
1304
    NULL,
1305
    NULL,
1306
    helper_fsubr_STN_ST0,
1307
    helper_fsub_STN_ST0,
1308
    helper_fdivr_STN_ST0,
1309
    helper_fdiv_STN_ST0,
1310
};
1311

    
1312
/* if d == OR_TMP0, it means memory operand (address in A0) */
1313
static void gen_op(DisasContext *s1, int op, int ot, int d)
1314
{
1315
    if (d != OR_TMP0) {
1316
        gen_op_mov_TN_reg(ot, 0, d);
1317
    } else {
1318
        gen_op_ld_T0_A0(ot + s1->mem_index);
1319
    }
1320
    switch(op) {
1321
    case OP_ADCL:
1322
        if (s1->cc_op != CC_OP_DYNAMIC)
1323
            gen_op_set_cc_op(s1->cc_op);
1324
        gen_compute_eflags_c(cpu_tmp4);
1325
        tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1326
        tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
1327
        if (d != OR_TMP0)
1328
            gen_op_mov_reg_T0(ot, d);
1329
        else
1330
            gen_op_st_T0_A0(ot + s1->mem_index);
1331
        tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
1332
        tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
1333
        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp4);
1334
        tcg_gen_shli_i32(cpu_tmp2_i32, cpu_tmp2_i32, 2);
1335
        tcg_gen_addi_i32(cpu_cc_op, cpu_tmp2_i32, CC_OP_ADDB + ot);
1336
        s1->cc_op = CC_OP_DYNAMIC;
1337
        break;
1338
    case OP_SBBL:
1339
        if (s1->cc_op != CC_OP_DYNAMIC)
1340
            gen_op_set_cc_op(s1->cc_op);
1341
        gen_compute_eflags_c(cpu_tmp4);
1342
        tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1343
        tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
1344
        if (d != OR_TMP0)
1345
            gen_op_mov_reg_T0(ot, d);
1346
        else
1347
            gen_op_st_T0_A0(ot + s1->mem_index);
1348
        tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
1349
        tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
1350
        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp4);
1351
        tcg_gen_shli_i32(cpu_tmp2_i32, cpu_tmp2_i32, 2);
1352
        tcg_gen_addi_i32(cpu_cc_op, cpu_tmp2_i32, CC_OP_SUBB + ot);
1353
        s1->cc_op = CC_OP_DYNAMIC;
1354
        break;
1355
    case OP_ADDL:
1356
        gen_op_addl_T0_T1();
1357
        if (d != OR_TMP0)
1358
            gen_op_mov_reg_T0(ot, d);
1359
        else
1360
            gen_op_st_T0_A0(ot + s1->mem_index);
1361
        gen_op_update2_cc();
1362
        s1->cc_op = CC_OP_ADDB + ot;
1363
        break;
1364
    case OP_SUBL:
1365
        tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1366
        if (d != OR_TMP0)
1367
            gen_op_mov_reg_T0(ot, d);
1368
        else
1369
            gen_op_st_T0_A0(ot + s1->mem_index);
1370
        gen_op_update2_cc();
1371
        s1->cc_op = CC_OP_SUBB + ot;
1372
        break;
1373
    default:
1374
    case OP_ANDL:
1375
        tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1376
        if (d != OR_TMP0)
1377
            gen_op_mov_reg_T0(ot, d);
1378
        else
1379
            gen_op_st_T0_A0(ot + s1->mem_index);
1380
        gen_op_update1_cc();
1381
        s1->cc_op = CC_OP_LOGICB + ot;
1382
        break;
1383
    case OP_ORL:
1384
        tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1385
        if (d != OR_TMP0)
1386
            gen_op_mov_reg_T0(ot, d);
1387
        else
1388
            gen_op_st_T0_A0(ot + s1->mem_index);
1389
        gen_op_update1_cc();
1390
        s1->cc_op = CC_OP_LOGICB + ot;
1391
        break;
1392
    case OP_XORL:
1393
        tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1394
        if (d != OR_TMP0)
1395
            gen_op_mov_reg_T0(ot, d);
1396
        else
1397
            gen_op_st_T0_A0(ot + s1->mem_index);
1398
        gen_op_update1_cc();
1399
        s1->cc_op = CC_OP_LOGICB + ot;
1400
        break;
1401
    case OP_CMPL:
1402
        gen_op_cmpl_T0_T1_cc();
1403
        s1->cc_op = CC_OP_SUBB + ot;
1404
        break;
1405
    }
1406
}
1407

    
1408
/* if d == OR_TMP0, it means memory operand (address in A0) */
1409
static void gen_inc(DisasContext *s1, int ot, int d, int c)
1410
{
1411
    if (d != OR_TMP0)
1412
        gen_op_mov_TN_reg(ot, 0, d);
1413
    else
1414
        gen_op_ld_T0_A0(ot + s1->mem_index);
1415
    if (s1->cc_op != CC_OP_DYNAMIC)
1416
        gen_op_set_cc_op(s1->cc_op);
1417
    if (c > 0) {
1418
        tcg_gen_addi_tl(cpu_T[0], cpu_T[0], 1);
1419
        s1->cc_op = CC_OP_INCB + ot;
1420
    } else {
1421
        tcg_gen_addi_tl(cpu_T[0], cpu_T[0], -1);
1422
        s1->cc_op = CC_OP_DECB + ot;
1423
    }
1424
    if (d != OR_TMP0)
1425
        gen_op_mov_reg_T0(ot, d);
1426
    else
1427
        gen_op_st_T0_A0(ot + s1->mem_index);
1428
    gen_compute_eflags_c(cpu_cc_src);
1429
    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
1430
}
1431

    
1432
static void gen_shift_rm_T1(DisasContext *s, int ot, int op1, 
1433
                            int is_right, int is_arith)
1434
{
1435
    target_ulong mask;
1436
    int shift_label;
1437
    TCGv t0, t1;
1438

    
1439
    if (ot == OT_QUAD)
1440
        mask = 0x3f;
1441
    else
1442
        mask = 0x1f;
1443

    
1444
    /* load */
1445
    if (op1 == OR_TMP0)
1446
        gen_op_ld_T0_A0(ot + s->mem_index);
1447
    else
1448
        gen_op_mov_TN_reg(ot, 0, op1);
1449

    
1450
    tcg_gen_andi_tl(cpu_T[1], cpu_T[1], mask);
1451

    
1452
    tcg_gen_addi_tl(cpu_tmp5, cpu_T[1], -1);
1453

    
1454
    if (is_right) {
1455
        if (is_arith) {
1456
            gen_exts(ot, cpu_T[0]);
1457
            tcg_gen_sar_tl(cpu_T3, cpu_T[0], cpu_tmp5);
1458
            tcg_gen_sar_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1459
        } else {
1460
            gen_extu(ot, cpu_T[0]);
1461
            tcg_gen_shr_tl(cpu_T3, cpu_T[0], cpu_tmp5);
1462
            tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1463
        }
1464
    } else {
1465
        tcg_gen_shl_tl(cpu_T3, cpu_T[0], cpu_tmp5);
1466
        tcg_gen_shl_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1467
    }
1468

    
1469
    /* store */
1470
    if (op1 == OR_TMP0)
1471
        gen_op_st_T0_A0(ot + s->mem_index);
1472
    else
1473
        gen_op_mov_reg_T0(ot, op1);
1474
        
1475
    /* update eflags if non zero shift */
1476
    if (s->cc_op != CC_OP_DYNAMIC)
1477
        gen_op_set_cc_op(s->cc_op);
1478

    
1479
    /* XXX: inefficient */
1480
    t0 = tcg_temp_local_new(TCG_TYPE_TL);
1481
    t1 = tcg_temp_local_new(TCG_TYPE_TL);
1482

    
1483
    tcg_gen_mov_tl(t0, cpu_T[0]);
1484
    tcg_gen_mov_tl(t1, cpu_T3);
1485

    
1486
    shift_label = gen_new_label();
1487
    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_T[1], 0, shift_label);
1488

    
1489
    tcg_gen_mov_tl(cpu_cc_src, t1);
1490
    tcg_gen_mov_tl(cpu_cc_dst, t0);
1491
    if (is_right)
1492
        tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot);
1493
    else
1494
        tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot);
1495
        
1496
    gen_set_label(shift_label);
1497
    s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
1498

    
1499
    tcg_temp_free(t0);
1500
    tcg_temp_free(t1);
1501
}
1502

    
1503
static void gen_shift_rm_im(DisasContext *s, int ot, int op1, int op2,
1504
                            int is_right, int is_arith)
1505
{
1506
    int mask;
1507
    
1508
    if (ot == OT_QUAD)
1509
        mask = 0x3f;
1510
    else
1511
        mask = 0x1f;
1512

    
1513
    /* load */
1514
    if (op1 == OR_TMP0)
1515
        gen_op_ld_T0_A0(ot + s->mem_index);
1516
    else
1517
        gen_op_mov_TN_reg(ot, 0, op1);
1518

    
1519
    op2 &= mask;
1520
    if (op2 != 0) {
1521
        if (is_right) {
1522
            if (is_arith) {
1523
                gen_exts(ot, cpu_T[0]);
1524
                tcg_gen_sari_tl(cpu_tmp4, cpu_T[0], op2 - 1);
1525
                tcg_gen_sari_tl(cpu_T[0], cpu_T[0], op2);
1526
            } else {
1527
                gen_extu(ot, cpu_T[0]);
1528
                tcg_gen_shri_tl(cpu_tmp4, cpu_T[0], op2 - 1);
1529
                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], op2);
1530
            }
1531
        } else {
1532
            tcg_gen_shli_tl(cpu_tmp4, cpu_T[0], op2 - 1);
1533
            tcg_gen_shli_tl(cpu_T[0], cpu_T[0], op2);
1534
        }
1535
    }
1536

    
1537
    /* store */
1538
    if (op1 == OR_TMP0)
1539
        gen_op_st_T0_A0(ot + s->mem_index);
1540
    else
1541
        gen_op_mov_reg_T0(ot, op1);
1542
        
1543
    /* update eflags if non zero shift */
1544
    if (op2 != 0) {
1545
        tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
1546
        tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
1547
        if (is_right)
1548
            s->cc_op = CC_OP_SARB + ot;
1549
        else
1550
            s->cc_op = CC_OP_SHLB + ot;
1551
    }
1552
}
1553

    
1554
static inline void tcg_gen_lshift(TCGv ret, TCGv arg1, target_long arg2)
1555
{
1556
    if (arg2 >= 0)
1557
        tcg_gen_shli_tl(ret, arg1, arg2);
1558
    else
1559
        tcg_gen_shri_tl(ret, arg1, -arg2);
1560
}
1561

    
1562
/* XXX: add faster immediate case */
1563
static void gen_rot_rm_T1(DisasContext *s, int ot, int op1, 
1564
                          int is_right)
1565
{
1566
    target_ulong mask;
1567
    int label1, label2, data_bits;
1568
    TCGv t0, t1, t2, a0;
1569

    
1570
    /* XXX: inefficient, but we must use local temps */
1571
    t0 = tcg_temp_local_new(TCG_TYPE_TL);
1572
    t1 = tcg_temp_local_new(TCG_TYPE_TL);
1573
    t2 = tcg_temp_local_new(TCG_TYPE_TL);
1574
    a0 = tcg_temp_local_new(TCG_TYPE_TL);
1575

    
1576
    if (ot == OT_QUAD)
1577
        mask = 0x3f;
1578
    else
1579
        mask = 0x1f;
1580

    
1581
    /* load */
1582
    if (op1 == OR_TMP0) {
1583
        tcg_gen_mov_tl(a0, cpu_A0);
1584
        gen_op_ld_v(ot + s->mem_index, t0, a0);
1585
    } else {
1586
        gen_op_mov_v_reg(ot, t0, op1);
1587
    }
1588

    
1589
    tcg_gen_mov_tl(t1, cpu_T[1]);
1590

    
1591
    tcg_gen_andi_tl(t1, t1, mask);
1592

    
1593
    /* Must test zero case to avoid using undefined behaviour in TCG
1594
       shifts. */
1595
    label1 = gen_new_label();
1596
    tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label1);
1597
    
1598
    if (ot <= OT_WORD)
1599
        tcg_gen_andi_tl(cpu_tmp0, t1, (1 << (3 + ot)) - 1);
1600
    else
1601
        tcg_gen_mov_tl(cpu_tmp0, t1);
1602
    
1603
    gen_extu(ot, t0);
1604
    tcg_gen_mov_tl(t2, t0);
1605

    
1606
    data_bits = 8 << ot;
1607
    /* XXX: rely on behaviour of shifts when operand 2 overflows (XXX:
1608
       fix TCG definition) */
1609
    if (is_right) {
1610
        tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp0);
1611
        tcg_gen_sub_tl(cpu_tmp0, tcg_const_tl(data_bits), cpu_tmp0);
1612
        tcg_gen_shl_tl(t0, t0, cpu_tmp0);
1613
    } else {
1614
        tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp0);
1615
        tcg_gen_sub_tl(cpu_tmp0, tcg_const_tl(data_bits), cpu_tmp0);
1616
        tcg_gen_shr_tl(t0, t0, cpu_tmp0);
1617
    }
1618
    tcg_gen_or_tl(t0, t0, cpu_tmp4);
1619

    
1620
    gen_set_label(label1);
1621
    /* store */
1622
    if (op1 == OR_TMP0) {
1623
        gen_op_st_v(ot + s->mem_index, t0, a0);
1624
    } else {
1625
        gen_op_mov_reg_v(ot, op1, t0);
1626
    }
1627
    
1628
    /* update eflags */
1629
    if (s->cc_op != CC_OP_DYNAMIC)
1630
        gen_op_set_cc_op(s->cc_op);
1631

    
1632
    label2 = gen_new_label();
1633
    tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label2);
1634

    
1635
    gen_compute_eflags(cpu_cc_src);
1636
    tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C));
1637
    tcg_gen_xor_tl(cpu_tmp0, t2, t0);
1638
    tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1));
1639
    tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_O);
1640
    tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_tmp0);
1641
    if (is_right) {
1642
        tcg_gen_shri_tl(t0, t0, data_bits - 1);
1643
    }
1644
    tcg_gen_andi_tl(t0, t0, CC_C);
1645
    tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0);
1646
    
1647
    tcg_gen_discard_tl(cpu_cc_dst);
1648
    tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
1649
        
1650
    gen_set_label(label2);
1651
    s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
1652

    
1653
    tcg_temp_free(t0);
1654
    tcg_temp_free(t1);
1655
    tcg_temp_free(t2);
1656
    tcg_temp_free(a0);
1657
}
1658

    
1659
static void *helper_rotc[8] = {
1660
    helper_rclb,
1661
    helper_rclw,
1662
    helper_rcll,
1663
    X86_64_ONLY(helper_rclq),
1664
    helper_rcrb,
1665
    helper_rcrw,
1666
    helper_rcrl,
1667
    X86_64_ONLY(helper_rcrq),
1668
};
1669

    
1670
/* XXX: add faster immediate = 1 case */
1671
static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1, 
1672
                           int is_right)
1673
{
1674
    int label1;
1675

    
1676
    if (s->cc_op != CC_OP_DYNAMIC)
1677
        gen_op_set_cc_op(s->cc_op);
1678

    
1679
    /* load */
1680
    if (op1 == OR_TMP0)
1681
        gen_op_ld_T0_A0(ot + s->mem_index);
1682
    else
1683
        gen_op_mov_TN_reg(ot, 0, op1);
1684
    
1685
    tcg_gen_helper_1_2(helper_rotc[ot + (is_right * 4)],
1686
                       cpu_T[0], cpu_T[0], cpu_T[1]);
1687
    /* store */
1688
    if (op1 == OR_TMP0)
1689
        gen_op_st_T0_A0(ot + s->mem_index);
1690
    else
1691
        gen_op_mov_reg_T0(ot, op1);
1692

    
1693
    /* update eflags */
1694
    label1 = gen_new_label();
1695
    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_cc_tmp, -1, label1);
1696

    
1697
    tcg_gen_mov_tl(cpu_cc_src, cpu_cc_tmp);
1698
    tcg_gen_discard_tl(cpu_cc_dst);
1699
    tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
1700
        
1701
    gen_set_label(label1);
1702
    s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
1703
}
1704

    
1705
/* XXX: add faster immediate case */
1706
static void gen_shiftd_rm_T1_T3(DisasContext *s, int ot, int op1, 
1707
                                int is_right)
1708
{
1709
    int label1, label2, data_bits;
1710
    target_ulong mask;
1711
    TCGv t0, t1, t2, a0;
1712

    
1713
    t0 = tcg_temp_local_new(TCG_TYPE_TL);
1714
    t1 = tcg_temp_local_new(TCG_TYPE_TL);
1715
    t2 = tcg_temp_local_new(TCG_TYPE_TL);
1716
    a0 = tcg_temp_local_new(TCG_TYPE_TL);
1717

    
1718
    if (ot == OT_QUAD)
1719
        mask = 0x3f;
1720
    else
1721
        mask = 0x1f;
1722

    
1723
    /* load */
1724
    if (op1 == OR_TMP0) {
1725
        tcg_gen_mov_tl(a0, cpu_A0);
1726
        gen_op_ld_v(ot + s->mem_index, t0, a0);
1727
    } else {
1728
        gen_op_mov_v_reg(ot, t0, op1);
1729
    }
1730

    
1731
    tcg_gen_andi_tl(cpu_T3, cpu_T3, mask);
1732

    
1733
    tcg_gen_mov_tl(t1, cpu_T[1]);
1734
    tcg_gen_mov_tl(t2, cpu_T3);
1735

    
1736
    /* Must test zero case to avoid using undefined behaviour in TCG
1737
       shifts. */
1738
    label1 = gen_new_label();
1739
    tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1);
1740
    
1741
    tcg_gen_addi_tl(cpu_tmp5, t2, -1);
1742
    if (ot == OT_WORD) {
1743
        /* Note: we implement the Intel behaviour for shift count > 16 */
1744
        if (is_right) {
1745
            tcg_gen_andi_tl(t0, t0, 0xffff);
1746
            tcg_gen_shli_tl(cpu_tmp0, t1, 16);
1747
            tcg_gen_or_tl(t0, t0, cpu_tmp0);
1748
            tcg_gen_ext32u_tl(t0, t0);
1749

    
1750
            tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5);
1751
            
1752
            /* only needed if count > 16, but a test would complicate */
1753
            tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(32), t2);
1754
            tcg_gen_shl_tl(cpu_tmp0, t0, cpu_tmp5);
1755

    
1756
            tcg_gen_shr_tl(t0, t0, t2);
1757

    
1758
            tcg_gen_or_tl(t0, t0, cpu_tmp0);
1759
        } else {
1760
            /* XXX: not optimal */
1761
            tcg_gen_andi_tl(t0, t0, 0xffff);
1762
            tcg_gen_shli_tl(t1, t1, 16);
1763
            tcg_gen_or_tl(t1, t1, t0);
1764
            tcg_gen_ext32u_tl(t1, t1);
1765
            
1766
            tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5);
1767
            tcg_gen_sub_tl(cpu_tmp0, tcg_const_tl(32), cpu_tmp5);
1768
            tcg_gen_shr_tl(cpu_tmp6, t1, cpu_tmp0);
1769
            tcg_gen_or_tl(cpu_tmp4, cpu_tmp4, cpu_tmp6);
1770

    
1771
            tcg_gen_shl_tl(t0, t0, t2);
1772
            tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(32), t2);
1773
            tcg_gen_shr_tl(t1, t1, cpu_tmp5);
1774
            tcg_gen_or_tl(t0, t0, t1);
1775
        }
1776
    } else {
1777
        data_bits = 8 << ot;
1778
        if (is_right) {
1779
            if (ot == OT_LONG)
1780
                tcg_gen_ext32u_tl(t0, t0);
1781

    
1782
            tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5);
1783

    
1784
            tcg_gen_shr_tl(t0, t0, t2);
1785
            tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(data_bits), t2);
1786
            tcg_gen_shl_tl(t1, t1, cpu_tmp5);
1787
            tcg_gen_or_tl(t0, t0, t1);
1788
            
1789
        } else {
1790
            if (ot == OT_LONG)
1791
                tcg_gen_ext32u_tl(t1, t1);
1792

    
1793
            tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5);
1794
            
1795
            tcg_gen_shl_tl(t0, t0, t2);
1796
            tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(data_bits), t2);
1797
            tcg_gen_shr_tl(t1, t1, cpu_tmp5);
1798
            tcg_gen_or_tl(t0, t0, t1);
1799
        }
1800
    }
1801
    tcg_gen_mov_tl(t1, cpu_tmp4);
1802

    
1803
    gen_set_label(label1);
1804
    /* store */
1805
    if (op1 == OR_TMP0) {
1806
        gen_op_st_v(ot + s->mem_index, t0, a0);
1807
    } else {
1808
        gen_op_mov_reg_v(ot, op1, t0);
1809
    }
1810
    
1811
    /* update eflags */
1812
    if (s->cc_op != CC_OP_DYNAMIC)
1813
        gen_op_set_cc_op(s->cc_op);
1814

    
1815
    label2 = gen_new_label();
1816
    tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label2);
1817

    
1818
    tcg_gen_mov_tl(cpu_cc_src, t1);
1819
    tcg_gen_mov_tl(cpu_cc_dst, t0);
1820
    if (is_right) {
1821
        tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot);
1822
    } else {
1823
        tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot);
1824
    }
1825
    gen_set_label(label2);
1826
    s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
1827

    
1828
    tcg_temp_free(t0);
1829
    tcg_temp_free(t1);
1830
    tcg_temp_free(t2);
1831
    tcg_temp_free(a0);
1832
}
1833

    
1834
static void gen_shift(DisasContext *s1, int op, int ot, int d, int s)
1835
{
1836
    if (s != OR_TMP1)
1837
        gen_op_mov_TN_reg(ot, 1, s);
1838
    switch(op) {
1839
    case OP_ROL:
1840
        gen_rot_rm_T1(s1, ot, d, 0);
1841
        break;
1842
    case OP_ROR:
1843
        gen_rot_rm_T1(s1, ot, d, 1);
1844
        break;
1845
    case OP_SHL:
1846
    case OP_SHL1:
1847
        gen_shift_rm_T1(s1, ot, d, 0, 0);
1848
        break;
1849
    case OP_SHR:
1850
        gen_shift_rm_T1(s1, ot, d, 1, 0);
1851
        break;
1852
    case OP_SAR:
1853
        gen_shift_rm_T1(s1, ot, d, 1, 1);
1854
        break;
1855
    case OP_RCL:
1856
        gen_rotc_rm_T1(s1, ot, d, 0);
1857
        break;
1858
    case OP_RCR:
1859
        gen_rotc_rm_T1(s1, ot, d, 1);
1860
        break;
1861
    }
1862
}
1863

    
1864
static void gen_shifti(DisasContext *s1, int op, int ot, int d, int c)
1865
{
1866
    switch(op) {
1867
    case OP_SHL:
1868
    case OP_SHL1:
1869
        gen_shift_rm_im(s1, ot, d, c, 0, 0);
1870
        break;
1871
    case OP_SHR:
1872
        gen_shift_rm_im(s1, ot, d, c, 1, 0);
1873
        break;
1874
    case OP_SAR:
1875
        gen_shift_rm_im(s1, ot, d, c, 1, 1);
1876
        break;
1877
    default:
1878
        /* currently not optimized */
1879
        gen_op_movl_T1_im(c);
1880
        gen_shift(s1, op, ot, d, OR_TMP1);
1881
        break;
1882
    }
1883
}
1884

    
1885
static void gen_lea_modrm(DisasContext *s, int modrm, int *reg_ptr, int *offset_ptr)
1886
{
1887
    target_long disp;
1888
    int havesib;
1889
    int base;
1890
    int index;
1891
    int scale;
1892
    int opreg;
1893
    int mod, rm, code, override, must_add_seg;
1894

    
1895
    override = s->override;
1896
    must_add_seg = s->addseg;
1897
    if (override >= 0)
1898
        must_add_seg = 1;
1899
    mod = (modrm >> 6) & 3;
1900
    rm = modrm & 7;
1901

    
1902
    if (s->aflag) {
1903

    
1904
        havesib = 0;
1905
        base = rm;
1906
        index = 0;
1907
        scale = 0;
1908

    
1909
        if (base == 4) {
1910
            havesib = 1;
1911
            code = ldub_code(s->pc++);
1912
            scale = (code >> 6) & 3;
1913
            index = ((code >> 3) & 7) | REX_X(s);
1914
            base = (code & 7);
1915
        }
1916
        base |= REX_B(s);
1917

    
1918
        switch (mod) {
1919
        case 0:
1920
            if ((base & 7) == 5) {
1921
                base = -1;
1922
                disp = (int32_t)ldl_code(s->pc);
1923
                s->pc += 4;
1924
                if (CODE64(s) && !havesib) {
1925
                    disp += s->pc + s->rip_offset;
1926
                }
1927
            } else {
1928
                disp = 0;
1929
            }
1930
            break;
1931
        case 1:
1932
            disp = (int8_t)ldub_code(s->pc++);
1933
            break;
1934
        default:
1935
        case 2:
1936
            disp = ldl_code(s->pc);
1937
            s->pc += 4;
1938
            break;
1939
        }
1940

    
1941
        if (base >= 0) {
1942
            /* for correct popl handling with esp */
1943
            if (base == 4 && s->popl_esp_hack)
1944
                disp += s->popl_esp_hack;
1945
#ifdef TARGET_X86_64
1946
            if (s->aflag == 2) {
1947
                gen_op_movq_A0_reg(base);
1948
                if (disp != 0) {
1949
                    gen_op_addq_A0_im(disp);
1950
                }
1951
            } else
1952
#endif
1953
            {
1954
                gen_op_movl_A0_reg(base);
1955
                if (disp != 0)
1956
                    gen_op_addl_A0_im(disp);
1957
            }
1958
        } else {
1959
#ifdef TARGET_X86_64
1960
            if (s->aflag == 2) {
1961
                gen_op_movq_A0_im(disp);
1962
            } else
1963
#endif
1964
            {
1965
                gen_op_movl_A0_im(disp);
1966
            }
1967
        }
1968
        /* XXX: index == 4 is always invalid */
1969
        if (havesib && (index != 4 || scale != 0)) {
1970
#ifdef TARGET_X86_64
1971
            if (s->aflag == 2) {
1972
                gen_op_addq_A0_reg_sN(scale, index);
1973
            } else
1974
#endif
1975
            {
1976
                gen_op_addl_A0_reg_sN(scale, index);
1977
            }
1978
        }
1979
        if (must_add_seg) {
1980
            if (override < 0) {
1981
                if (base == R_EBP || base == R_ESP)
1982
                    override = R_SS;
1983
                else
1984
                    override = R_DS;
1985
            }
1986
#ifdef TARGET_X86_64
1987
            if (s->aflag == 2) {
1988
                gen_op_addq_A0_seg(override);
1989
            } else
1990
#endif
1991
            {
1992
                gen_op_addl_A0_seg(override);
1993
            }
1994
        }
1995
    } else {
1996
        switch (mod) {
1997
        case 0:
1998
            if (rm == 6) {
1999
                disp = lduw_code(s->pc);
2000
                s->pc += 2;
2001
                gen_op_movl_A0_im(disp);
2002
                rm = 0; /* avoid SS override */
2003
                goto no_rm;
2004
            } else {
2005
                disp = 0;
2006
            }
2007
            break;
2008
        case 1:
2009
            disp = (int8_t)ldub_code(s->pc++);
2010
            break;
2011
        default:
2012
        case 2:
2013
            disp = lduw_code(s->pc);
2014
            s->pc += 2;
2015
            break;
2016
        }
2017
        switch(rm) {
2018
        case 0:
2019
            gen_op_movl_A0_reg(R_EBX);
2020
            gen_op_addl_A0_reg_sN(0, R_ESI);
2021
            break;
2022
        case 1:
2023
            gen_op_movl_A0_reg(R_EBX);
2024
            gen_op_addl_A0_reg_sN(0, R_EDI);
2025
            break;
2026
        case 2:
2027
            gen_op_movl_A0_reg(R_EBP);
2028
            gen_op_addl_A0_reg_sN(0, R_ESI);
2029
            break;
2030
        case 3:
2031
            gen_op_movl_A0_reg(R_EBP);
2032
            gen_op_addl_A0_reg_sN(0, R_EDI);
2033
            break;
2034
        case 4:
2035
            gen_op_movl_A0_reg(R_ESI);
2036
            break;
2037
        case 5:
2038
            gen_op_movl_A0_reg(R_EDI);
2039
            break;
2040
        case 6:
2041
            gen_op_movl_A0_reg(R_EBP);
2042
            break;
2043
        default:
2044
        case 7:
2045
            gen_op_movl_A0_reg(R_EBX);
2046
            break;
2047
        }
2048
        if (disp != 0)
2049
            gen_op_addl_A0_im(disp);
2050
        gen_op_andl_A0_ffff();
2051
    no_rm:
2052
        if (must_add_seg) {
2053
            if (override < 0) {
2054
                if (rm == 2 || rm == 3 || rm == 6)
2055
                    override = R_SS;
2056
                else
2057
                    override = R_DS;
2058
            }
2059
            gen_op_addl_A0_seg(override);
2060
        }
2061
    }
2062

    
2063
    opreg = OR_A0;
2064
    disp = 0;
2065
    *reg_ptr = opreg;
2066
    *offset_ptr = disp;
2067
}
2068

    
2069
static void gen_nop_modrm(DisasContext *s, int modrm)
2070
{
2071
    int mod, rm, base, code;
2072

    
2073
    mod = (modrm >> 6) & 3;
2074
    if (mod == 3)
2075
        return;
2076
    rm = modrm & 7;
2077

    
2078
    if (s->aflag) {
2079

    
2080
        base = rm;
2081

    
2082
        if (base == 4) {
2083
            code = ldub_code(s->pc++);
2084
            base = (code & 7);
2085
        }
2086

    
2087
        switch (mod) {
2088
        case 0:
2089
            if (base == 5) {
2090
                s->pc += 4;
2091
            }
2092
            break;
2093
        case 1:
2094
            s->pc++;
2095
            break;
2096
        default:
2097
        case 2:
2098
            s->pc += 4;
2099
            break;
2100
        }
2101
    } else {
2102
        switch (mod) {
2103
        case 0:
2104
            if (rm == 6) {
2105
                s->pc += 2;
2106
            }
2107
            break;
2108
        case 1:
2109
            s->pc++;
2110
            break;
2111
        default:
2112
        case 2:
2113
            s->pc += 2;
2114
            break;
2115
        }
2116
    }
2117
}
2118

    
2119
/* used for LEA and MOV AX, mem */
2120
static void gen_add_A0_ds_seg(DisasContext *s)
2121
{
2122
    int override, must_add_seg;
2123
    must_add_seg = s->addseg;
2124
    override = R_DS;
2125
    if (s->override >= 0) {
2126
        override = s->override;
2127
        must_add_seg = 1;
2128
    } else {
2129
        override = R_DS;
2130
    }
2131
    if (must_add_seg) {
2132
#ifdef TARGET_X86_64
2133
        if (CODE64(s)) {
2134
            gen_op_addq_A0_seg(override);
2135
        } else
2136
#endif
2137
        {
2138
            gen_op_addl_A0_seg(override);
2139
        }
2140
    }
2141
}
2142

    
2143
/* generate modrm memory load or store of 'reg'. TMP0 is used if reg !=
2144
   OR_TMP0 */
2145
static void gen_ldst_modrm(DisasContext *s, int modrm, int ot, int reg, int is_store)
2146
{
2147
    int mod, rm, opreg, disp;
2148

    
2149
    mod = (modrm >> 6) & 3;
2150
    rm = (modrm & 7) | REX_B(s);
2151
    if (mod == 3) {
2152
        if (is_store) {
2153
            if (reg != OR_TMP0)
2154
                gen_op_mov_TN_reg(ot, 0, reg);
2155
            gen_op_mov_reg_T0(ot, rm);
2156
        } else {
2157
            gen_op_mov_TN_reg(ot, 0, rm);
2158
            if (reg != OR_TMP0)
2159
                gen_op_mov_reg_T0(ot, reg);
2160
        }
2161
    } else {
2162
        gen_lea_modrm(s, modrm, &opreg, &disp);
2163
        if (is_store) {
2164
            if (reg != OR_TMP0)
2165
                gen_op_mov_TN_reg(ot, 0, reg);
2166
            gen_op_st_T0_A0(ot + s->mem_index);
2167
        } else {
2168
            gen_op_ld_T0_A0(ot + s->mem_index);
2169
            if (reg != OR_TMP0)
2170
                gen_op_mov_reg_T0(ot, reg);
2171
        }
2172
    }
2173
}
2174

    
2175
static inline uint32_t insn_get(DisasContext *s, int ot)
2176
{
2177
    uint32_t ret;
2178

    
2179
    switch(ot) {
2180
    case OT_BYTE:
2181
        ret = ldub_code(s->pc);
2182
        s->pc++;
2183
        break;
2184
    case OT_WORD:
2185
        ret = lduw_code(s->pc);
2186
        s->pc += 2;
2187
        break;
2188
    default:
2189
    case OT_LONG:
2190
        ret = ldl_code(s->pc);
2191
        s->pc += 4;
2192
        break;
2193
    }
2194
    return ret;
2195
}
2196

    
2197
static inline int insn_const_size(unsigned int ot)
2198
{
2199
    if (ot <= OT_LONG)
2200
        return 1 << ot;
2201
    else
2202
        return 4;
2203
}
2204

    
2205
static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2206
{
2207
    TranslationBlock *tb;
2208
    target_ulong pc;
2209

    
2210
    pc = s->cs_base + eip;
2211
    tb = s->tb;
2212
    /* NOTE: we handle the case where the TB spans two pages here */
2213
    if ((pc & TARGET_PAGE_MASK) == (tb->pc & TARGET_PAGE_MASK) ||
2214
        (pc & TARGET_PAGE_MASK) == ((s->pc - 1) & TARGET_PAGE_MASK))  {
2215
        /* jump to same page: we can use a direct jump */
2216
        tcg_gen_goto_tb(tb_num);
2217
        gen_jmp_im(eip);
2218
        tcg_gen_exit_tb((long)tb + tb_num);
2219
    } else {
2220
        /* jump to another page: currently not optimized */
2221
        gen_jmp_im(eip);
2222
        gen_eob(s);
2223
    }
2224
}
2225

    
2226
static inline void gen_jcc(DisasContext *s, int b,
2227
                           target_ulong val, target_ulong next_eip)
2228
{
2229
    int l1, l2, cc_op;
2230

    
2231
    cc_op = s->cc_op;
2232
    if (s->cc_op != CC_OP_DYNAMIC) {
2233
        gen_op_set_cc_op(s->cc_op);
2234
        s->cc_op = CC_OP_DYNAMIC;
2235
    }
2236
    if (s->jmp_opt) {
2237
        l1 = gen_new_label();
2238
        gen_jcc1(s, cc_op, b, l1);
2239
        
2240
        gen_goto_tb(s, 0, next_eip);
2241

    
2242
        gen_set_label(l1);
2243
        gen_goto_tb(s, 1, val);
2244
        s->is_jmp = 3;
2245
    } else {
2246

    
2247
        l1 = gen_new_label();
2248
        l2 = gen_new_label();
2249
        gen_jcc1(s, cc_op, b, l1);
2250

    
2251
        gen_jmp_im(next_eip);
2252
        tcg_gen_br(l2);
2253

    
2254
        gen_set_label(l1);
2255
        gen_jmp_im(val);
2256
        gen_set_label(l2);
2257
        gen_eob(s);
2258
    }
2259
}
2260

    
2261
static void gen_setcc(DisasContext *s, int b)
2262
{
2263
    int inv, jcc_op, l1;
2264
    TCGv t0;
2265

    
2266
    if (is_fast_jcc_case(s, b)) {
2267
        /* nominal case: we use a jump */
2268
        /* XXX: make it faster by adding new instructions in TCG */
2269
        t0 = tcg_temp_local_new(TCG_TYPE_TL);
2270
        tcg_gen_movi_tl(t0, 0);
2271
        l1 = gen_new_label();
2272
        gen_jcc1(s, s->cc_op, b ^ 1, l1);
2273
        tcg_gen_movi_tl(t0, 1);
2274
        gen_set_label(l1);
2275
        tcg_gen_mov_tl(cpu_T[0], t0);
2276
        tcg_temp_free(t0);
2277
    } else {
2278
        /* slow case: it is more efficient not to generate a jump,
2279
           although it is questionnable whether this optimization is
2280
           worth to */
2281
        inv = b & 1;
2282
        jcc_op = (b >> 1) & 7;
2283
        gen_setcc_slow_T0(s, jcc_op);
2284
        if (inv) {
2285
            tcg_gen_xori_tl(cpu_T[0], cpu_T[0], 1);
2286
        }
2287
    }
2288
}
2289

    
2290
static inline void gen_op_movl_T0_seg(int seg_reg)
2291
{
2292
    tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
2293
                     offsetof(CPUX86State,segs[seg_reg].selector));
2294
}
2295

    
2296
static inline void gen_op_movl_seg_T0_vm(int seg_reg)
2297
{
2298
    tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff);
2299
    tcg_gen_st32_tl(cpu_T[0], cpu_env, 
2300
                    offsetof(CPUX86State,segs[seg_reg].selector));
2301
    tcg_gen_shli_tl(cpu_T[0], cpu_T[0], 4);
2302
    tcg_gen_st_tl(cpu_T[0], cpu_env, 
2303
                  offsetof(CPUX86State,segs[seg_reg].base));
2304
}
2305

    
2306
/* move T0 to seg_reg and compute if the CPU state may change. Never
2307
   call this function with seg_reg == R_CS */
2308
static void gen_movl_seg_T0(DisasContext *s, int seg_reg, target_ulong cur_eip)
2309
{
2310
    if (s->pe && !s->vm86) {
2311
        /* XXX: optimize by finding processor state dynamically */
2312
        if (s->cc_op != CC_OP_DYNAMIC)
2313
            gen_op_set_cc_op(s->cc_op);
2314
        gen_jmp_im(cur_eip);
2315
        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
2316
        tcg_gen_helper_0_2(helper_load_seg, tcg_const_i32(seg_reg), cpu_tmp2_i32);
2317
        /* abort translation because the addseg value may change or
2318
           because ss32 may change. For R_SS, translation must always
2319
           stop as a special handling must be done to disable hardware
2320
           interrupts for the next instruction */
2321
        if (seg_reg == R_SS || (s->code32 && seg_reg < R_FS))
2322
            s->is_jmp = 3;
2323
    } else {
2324
        gen_op_movl_seg_T0_vm(seg_reg);
2325
        if (seg_reg == R_SS)
2326
            s->is_jmp = 3;
2327
    }
2328
}
2329

    
2330
static inline int svm_is_rep(int prefixes)
2331
{
2332
    return ((prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) ? 8 : 0);
2333
}
2334

    
2335
static inline void
2336
gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
2337
                              uint32_t type, uint64_t param)
2338
{
2339
    /* no SVM activated; fast case */
2340
    if (likely(!(s->flags & HF_SVMI_MASK)))
2341
        return;
2342
    if (s->cc_op != CC_OP_DYNAMIC)
2343
        gen_op_set_cc_op(s->cc_op);
2344
    gen_jmp_im(pc_start - s->cs_base);
2345
    tcg_gen_helper_0_2(helper_svm_check_intercept_param, 
2346
                       tcg_const_i32(type), tcg_const_i64(param));
2347
}
2348

    
2349
static inline void
2350
gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type)
2351
{
2352
    gen_svm_check_intercept_param(s, pc_start, type, 0);
2353
}
2354

    
2355
static inline void gen_stack_update(DisasContext *s, int addend)
2356
{
2357
#ifdef TARGET_X86_64
2358
    if (CODE64(s)) {
2359
        gen_op_add_reg_im(2, R_ESP, addend);
2360
    } else
2361
#endif
2362
    if (s->ss32) {
2363
        gen_op_add_reg_im(1, R_ESP, addend);
2364
    } else {
2365
        gen_op_add_reg_im(0, R_ESP, addend);
2366
    }
2367
}
2368

    
2369
/* generate a push. It depends on ss32, addseg and dflag */
2370
static void gen_push_T0(DisasContext *s)
2371
{
2372
#ifdef TARGET_X86_64
2373
    if (CODE64(s)) {
2374
        gen_op_movq_A0_reg(R_ESP);
2375
        if (s->dflag) {
2376
            gen_op_addq_A0_im(-8);
2377
            gen_op_st_T0_A0(OT_QUAD + s->mem_index);
2378
        } else {
2379
            gen_op_addq_A0_im(-2);
2380
            gen_op_st_T0_A0(OT_WORD + s->mem_index);
2381
        }
2382
        gen_op_mov_reg_A0(2, R_ESP);
2383
    } else
2384
#endif
2385
    {
2386
        gen_op_movl_A0_reg(R_ESP);
2387
        if (!s->dflag)
2388
            gen_op_addl_A0_im(-2);
2389
        else
2390
            gen_op_addl_A0_im(-4);
2391
        if (s->ss32) {
2392
            if (s->addseg) {
2393
                tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2394
                gen_op_addl_A0_seg(R_SS);
2395
            }
2396
        } else {
2397
            gen_op_andl_A0_ffff();
2398
            tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2399
            gen_op_addl_A0_seg(R_SS);
2400
        }
2401
        gen_op_st_T0_A0(s->dflag + 1 + s->mem_index);
2402
        if (s->ss32 && !s->addseg)
2403
            gen_op_mov_reg_A0(1, R_ESP);
2404
        else
2405
            gen_op_mov_reg_T1(s->ss32 + 1, R_ESP);
2406
    }
2407
}
2408

    
2409
/* generate a push. It depends on ss32, addseg and dflag */
2410
/* slower version for T1, only used for call Ev */
2411
static void gen_push_T1(DisasContext *s)
2412
{
2413
#ifdef TARGET_X86_64
2414
    if (CODE64(s)) {
2415
        gen_op_movq_A0_reg(R_ESP);
2416
        if (s->dflag) {
2417
            gen_op_addq_A0_im(-8);
2418
            gen_op_st_T1_A0(OT_QUAD + s->mem_index);
2419
        } else {
2420
            gen_op_addq_A0_im(-2);
2421
            gen_op_st_T0_A0(OT_WORD + s->mem_index);
2422
        }
2423
        gen_op_mov_reg_A0(2, R_ESP);
2424
    } else
2425
#endif
2426
    {
2427
        gen_op_movl_A0_reg(R_ESP);
2428
        if (!s->dflag)
2429
            gen_op_addl_A0_im(-2);
2430
        else
2431
            gen_op_addl_A0_im(-4);
2432
        if (s->ss32) {
2433
            if (s->addseg) {
2434
                gen_op_addl_A0_seg(R_SS);
2435
            }
2436
        } else {
2437
            gen_op_andl_A0_ffff();
2438
            gen_op_addl_A0_seg(R_SS);
2439
        }
2440
        gen_op_st_T1_A0(s->dflag + 1 + s->mem_index);
2441

    
2442
        if (s->ss32 && !s->addseg)
2443
            gen_op_mov_reg_A0(1, R_ESP);
2444
        else
2445
            gen_stack_update(s, (-2) << s->dflag);
2446
    }
2447
}
2448

    
2449
/* two step pop is necessary for precise exceptions */
2450
static void gen_pop_T0(DisasContext *s)
2451
{
2452
#ifdef TARGET_X86_64
2453
    if (CODE64(s)) {
2454
        gen_op_movq_A0_reg(R_ESP);
2455
        gen_op_ld_T0_A0((s->dflag ? OT_QUAD : OT_WORD) + s->mem_index);
2456
    } else
2457
#endif
2458
    {
2459
        gen_op_movl_A0_reg(R_ESP);
2460
        if (s->ss32) {
2461
            if (s->addseg)
2462
                gen_op_addl_A0_seg(R_SS);
2463
        } else {
2464
            gen_op_andl_A0_ffff();
2465
            gen_op_addl_A0_seg(R_SS);
2466
        }
2467
        gen_op_ld_T0_A0(s->dflag + 1 + s->mem_index);
2468
    }
2469
}
2470

    
2471
static void gen_pop_update(DisasContext *s)
2472
{
2473
#ifdef TARGET_X86_64
2474
    if (CODE64(s) && s->dflag) {
2475
        gen_stack_update(s, 8);
2476
    } else
2477
#endif
2478
    {
2479
        gen_stack_update(s, 2 << s->dflag);
2480
    }
2481
}
2482

    
2483
static void gen_stack_A0(DisasContext *s)
2484
{
2485
    gen_op_movl_A0_reg(R_ESP);
2486
    if (!s->ss32)
2487
        gen_op_andl_A0_ffff();
2488
    tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2489
    if (s->addseg)
2490
        gen_op_addl_A0_seg(R_SS);
2491
}
2492

    
2493
/* NOTE: wrap around in 16 bit not fully handled */
2494
static void gen_pusha(DisasContext *s)
2495
{
2496
    int i;
2497
    gen_op_movl_A0_reg(R_ESP);
2498
    gen_op_addl_A0_im(-16 <<  s->dflag);
2499
    if (!s->ss32)
2500
        gen_op_andl_A0_ffff();
2501
    tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2502
    if (s->addseg)
2503
        gen_op_addl_A0_seg(R_SS);
2504
    for(i = 0;i < 8; i++) {
2505
        gen_op_mov_TN_reg(OT_LONG, 0, 7 - i);
2506
        gen_op_st_T0_A0(OT_WORD + s->dflag + s->mem_index);
2507
        gen_op_addl_A0_im(2 <<  s->dflag);
2508
    }
2509
    gen_op_mov_reg_T1(OT_WORD + s->ss32, R_ESP);
2510
}
2511

    
2512
/* NOTE: wrap around in 16 bit not fully handled */
2513
static void gen_popa(DisasContext *s)
2514
{
2515
    int i;
2516
    gen_op_movl_A0_reg(R_ESP);
2517
    if (!s->ss32)
2518
        gen_op_andl_A0_ffff();
2519
    tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2520
    tcg_gen_addi_tl(cpu_T[1], cpu_T[1], 16 <<  s->dflag);
2521
    if (s->addseg)
2522
        gen_op_addl_A0_seg(R_SS);
2523
    for(i = 0;i < 8; i++) {
2524
        /* ESP is not reloaded */
2525
        if (i != 3) {
2526
            gen_op_ld_T0_A0(OT_WORD + s->dflag + s->mem_index);
2527
            gen_op_mov_reg_T0(OT_WORD + s->dflag, 7 - i);
2528
        }
2529
        gen_op_addl_A0_im(2 <<  s->dflag);
2530
    }
2531
    gen_op_mov_reg_T1(OT_WORD + s->ss32, R_ESP);
2532
}
2533

    
2534
static void gen_enter(DisasContext *s, int esp_addend, int level)
2535
{
2536
    int ot, opsize;
2537

    
2538
    level &= 0x1f;
2539
#ifdef TARGET_X86_64
2540
    if (CODE64(s)) {
2541
        ot = s->dflag ? OT_QUAD : OT_WORD;
2542
        opsize = 1 << ot;
2543

    
2544
        gen_op_movl_A0_reg(R_ESP);
2545
        gen_op_addq_A0_im(-opsize);
2546
        tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2547

    
2548
        /* push bp */
2549
        gen_op_mov_TN_reg(OT_LONG, 0, R_EBP);
2550
        gen_op_st_T0_A0(ot + s->mem_index);
2551
        if (level) {
2552
            /* XXX: must save state */
2553
            tcg_gen_helper_0_3(helper_enter64_level,
2554
                               tcg_const_i32(level),
2555
                               tcg_const_i32((ot == OT_QUAD)),
2556
                               cpu_T[1]);
2557
        }
2558
        gen_op_mov_reg_T1(ot, R_EBP);
2559
        tcg_gen_addi_tl(cpu_T[1], cpu_T[1], -esp_addend + (-opsize * level));
2560
        gen_op_mov_reg_T1(OT_QUAD, R_ESP);
2561
    } else
2562
#endif
2563
    {
2564
        ot = s->dflag + OT_WORD;
2565
        opsize = 2 << s->dflag;
2566

    
2567
        gen_op_movl_A0_reg(R_ESP);
2568
        gen_op_addl_A0_im(-opsize);
2569
        if (!s->ss32)
2570
            gen_op_andl_A0_ffff();
2571
        tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2572
        if (s->addseg)
2573
            gen_op_addl_A0_seg(R_SS);
2574
        /* push bp */
2575
        gen_op_mov_TN_reg(OT_LONG, 0, R_EBP);
2576
        gen_op_st_T0_A0(ot + s->mem_index);
2577
        if (level) {
2578
            /* XXX: must save state */
2579
            tcg_gen_helper_0_3(helper_enter_level,
2580
                               tcg_const_i32(level),
2581
                               tcg_const_i32(s->dflag),
2582
                               cpu_T[1]);
2583
        }
2584
        gen_op_mov_reg_T1(ot, R_EBP);
2585
        tcg_gen_addi_tl(cpu_T[1], cpu_T[1], -esp_addend + (-opsize * level));
2586
        gen_op_mov_reg_T1(OT_WORD + s->ss32, R_ESP);
2587
    }
2588
}
2589

    
2590
static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
2591
{
2592
    if (s->cc_op != CC_OP_DYNAMIC)
2593
        gen_op_set_cc_op(s->cc_op);
2594
    gen_jmp_im(cur_eip);
2595
    tcg_gen_helper_0_1(helper_raise_exception, tcg_const_i32(trapno));
2596
    s->is_jmp = 3;
2597
}
2598

    
2599
/* an interrupt is different from an exception because of the
2600
   privilege checks */
2601
static void gen_interrupt(DisasContext *s, int intno,
2602
                          target_ulong cur_eip, target_ulong next_eip)
2603
{
2604
    if (s->cc_op != CC_OP_DYNAMIC)
2605
        gen_op_set_cc_op(s->cc_op);
2606
    gen_jmp_im(cur_eip);
2607
    tcg_gen_helper_0_2(helper_raise_interrupt, 
2608
                       tcg_const_i32(intno), 
2609
                       tcg_const_i32(next_eip - cur_eip));
2610
    s->is_jmp = 3;
2611
}
2612

    
2613
static void gen_debug(DisasContext *s, target_ulong cur_eip)
2614
{
2615
    if (s->cc_op != CC_OP_DYNAMIC)
2616
        gen_op_set_cc_op(s->cc_op);
2617
    gen_jmp_im(cur_eip);
2618
    tcg_gen_helper_0_0(helper_debug);
2619
    s->is_jmp = 3;
2620
}
2621

    
2622
/* generate a generic end of block. Trace exception is also generated
2623
   if needed */
2624
static void gen_eob(DisasContext *s)
2625
{
2626
    if (s->cc_op != CC_OP_DYNAMIC)
2627
        gen_op_set_cc_op(s->cc_op);
2628
    if (s->tb->flags & HF_INHIBIT_IRQ_MASK) {
2629
        tcg_gen_helper_0_0(helper_reset_inhibit_irq);
2630
    }
2631
    if (s->singlestep_enabled) {
2632
        tcg_gen_helper_0_0(helper_debug);
2633
    } else if (s->tf) {
2634
        tcg_gen_helper_0_0(helper_single_step);
2635
    } else {
2636
        tcg_gen_exit_tb(0);
2637
    }
2638
    s->is_jmp = 3;
2639
}
2640

    
2641
/* generate a jump to eip. No segment change must happen before as a
2642
   direct call to the next block may occur */
2643
static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2644
{
2645
    if (s->jmp_opt) {
2646
        if (s->cc_op != CC_OP_DYNAMIC) {
2647
            gen_op_set_cc_op(s->cc_op);
2648
            s->cc_op = CC_OP_DYNAMIC;
2649
        }
2650
        gen_goto_tb(s, tb_num, eip);
2651
        s->is_jmp = 3;
2652
    } else {
2653
        gen_jmp_im(eip);
2654
        gen_eob(s);
2655
    }
2656
}
2657

    
2658
static void gen_jmp(DisasContext *s, target_ulong eip)
2659
{
2660
    gen_jmp_tb(s, eip, 0);
2661
}
2662

    
2663
static inline void gen_ldq_env_A0(int idx, int offset)
2664
{
2665
    int mem_index = (idx >> 2) - 1;
2666
    tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, mem_index);
2667
    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset);
2668
}
2669

    
2670
static inline void gen_stq_env_A0(int idx, int offset)
2671
{
2672
    int mem_index = (idx >> 2) - 1;
2673
    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset);
2674
    tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, mem_index);
2675
}
2676

    
2677
static inline void gen_ldo_env_A0(int idx, int offset)
2678
{
2679
    int mem_index = (idx >> 2) - 1;
2680
    tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, mem_index);
2681
    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(0)));
2682
    tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
2683
    tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_tmp0, mem_index);
2684
    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(1)));
2685
}
2686

    
2687
static inline void gen_sto_env_A0(int idx, int offset)
2688
{
2689
    int mem_index = (idx >> 2) - 1;
2690
    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(0)));
2691
    tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, mem_index);
2692
    tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
2693
    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(1)));
2694
    tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_tmp0, mem_index);
2695
}
2696

    
2697
static inline void gen_op_movo(int d_offset, int s_offset)
2698
{
2699
    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
2700
    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2701
    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + 8);
2702
    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + 8);
2703
}
2704

    
2705
static inline void gen_op_movq(int d_offset, int s_offset)
2706
{
2707
    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
2708
    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2709
}
2710

    
2711
static inline void gen_op_movl(int d_offset, int s_offset)
2712
{
2713
    tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, s_offset);
2714
    tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, d_offset);
2715
}
2716

    
2717
static inline void gen_op_movq_env_0(int d_offset)
2718
{
2719
    tcg_gen_movi_i64(cpu_tmp1_i64, 0);
2720
    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2721
}
2722

    
2723
#define SSE_SPECIAL ((void *)1)
2724
#define SSE_DUMMY ((void *)2)
2725

    
2726
#define MMX_OP2(x) { helper_ ## x ## _mmx, helper_ ## x ## _xmm }
2727
#define SSE_FOP(x) { helper_ ## x ## ps, helper_ ## x ## pd, \
2728
                     helper_ ## x ## ss, helper_ ## x ## sd, }
2729

    
2730
static void *sse_op_table1[256][4] = {
2731
    /* 3DNow! extensions */
2732
    [0x0e] = { SSE_DUMMY }, /* femms */
2733
    [0x0f] = { SSE_DUMMY }, /* pf... */
2734
    /* pure SSE operations */
2735
    [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2736
    [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2737
    [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2738
    [0x13] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
2739
    [0x14] = { helper_punpckldq_xmm, helper_punpcklqdq_xmm },
2740
    [0x15] = { helper_punpckhdq_xmm, helper_punpckhqdq_xmm },
2741
    [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd, movshdup */
2742
    [0x17] = { SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd */
2743

    
2744
    [0x28] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2745
    [0x29] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2746
    [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2747
    [0x2b] = { SSE_SPECIAL, SSE_SPECIAL },  /* movntps, movntpd */
2748
    [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2749
    [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2750
    [0x2e] = { helper_ucomiss, helper_ucomisd },
2751
    [0x2f] = { helper_comiss, helper_comisd },
2752
    [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2753
    [0x51] = SSE_FOP(sqrt),
2754
    [0x52] = { helper_rsqrtps, NULL, helper_rsqrtss, NULL },
2755
    [0x53] = { helper_rcpps, NULL, helper_rcpss, NULL },
2756
    [0x54] = { helper_pand_xmm, helper_pand_xmm }, /* andps, andpd */
2757
    [0x55] = { helper_pandn_xmm, helper_pandn_xmm }, /* andnps, andnpd */
2758
    [0x56] = { helper_por_xmm, helper_por_xmm }, /* orps, orpd */
2759
    [0x57] = { helper_pxor_xmm, helper_pxor_xmm }, /* xorps, xorpd */
2760
    [0x58] = SSE_FOP(add),
2761
    [0x59] = SSE_FOP(mul),
2762
    [0x5a] = { helper_cvtps2pd, helper_cvtpd2ps,
2763
               helper_cvtss2sd, helper_cvtsd2ss },
2764
    [0x5b] = { helper_cvtdq2ps, helper_cvtps2dq, helper_cvttps2dq },
2765
    [0x5c] = SSE_FOP(sub),
2766
    [0x5d] = SSE_FOP(min),
2767
    [0x5e] = SSE_FOP(div),
2768
    [0x5f] = SSE_FOP(max),
2769

    
2770
    [0xc2] = SSE_FOP(cmpeq),
2771
    [0xc6] = { helper_shufps, helper_shufpd },
2772

    
2773
    /* MMX ops and their SSE extensions */
2774
    [0x60] = MMX_OP2(punpcklbw),
2775
    [0x61] = MMX_OP2(punpcklwd),
2776
    [0x62] = MMX_OP2(punpckldq),
2777
    [0x63] = MMX_OP2(packsswb),
2778
    [0x64] = MMX_OP2(pcmpgtb),
2779
    [0x65] = MMX_OP2(pcmpgtw),
2780
    [0x66] = MMX_OP2(pcmpgtl),
2781
    [0x67] = MMX_OP2(packuswb),
2782
    [0x68] = MMX_OP2(punpckhbw),
2783
    [0x69] = MMX_OP2(punpckhwd),
2784
    [0x6a] = MMX_OP2(punpckhdq),
2785
    [0x6b] = MMX_OP2(packssdw),
2786
    [0x6c] = { NULL, helper_punpcklqdq_xmm },
2787
    [0x6d] = { NULL, helper_punpckhqdq_xmm },
2788
    [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2789
    [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2790
    [0x70] = { helper_pshufw_mmx,
2791
               helper_pshufd_xmm,
2792
               helper_pshufhw_xmm,
2793
               helper_pshuflw_xmm },
2794
    [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2795
    [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2796
    [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2797
    [0x74] = MMX_OP2(pcmpeqb),
2798
    [0x75] = MMX_OP2(pcmpeqw),
2799
    [0x76] = MMX_OP2(pcmpeql),
2800
    [0x77] = { SSE_DUMMY }, /* emms */
2801
    [0x7c] = { NULL, helper_haddpd, NULL, helper_haddps },
2802
    [0x7d] = { NULL, helper_hsubpd, NULL, helper_hsubps },
2803
    [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2804
    [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2805
    [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2806
    [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2807
    [0xd0] = { NULL, helper_addsubpd, NULL, helper_addsubps },
2808
    [0xd1] = MMX_OP2(psrlw),
2809
    [0xd2] = MMX_OP2(psrld),
2810
    [0xd3] = MMX_OP2(psrlq),
2811
    [0xd4] = MMX_OP2(paddq),
2812
    [0xd5] = MMX_OP2(pmullw),
2813
    [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2814
    [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2815
    [0xd8] = MMX_OP2(psubusb),
2816
    [0xd9] = MMX_OP2(psubusw),
2817
    [0xda] = MMX_OP2(pminub),
2818
    [0xdb] = MMX_OP2(pand),
2819
    [0xdc] = MMX_OP2(paddusb),
2820
    [0xdd] = MMX_OP2(paddusw),
2821
    [0xde] = MMX_OP2(pmaxub),
2822
    [0xdf] = MMX_OP2(pandn),
2823
    [0xe0] = MMX_OP2(pavgb),
2824
    [0xe1] = MMX_OP2(psraw),
2825
    [0xe2] = MMX_OP2(psrad),
2826
    [0xe3] = MMX_OP2(pavgw),
2827
    [0xe4] = MMX_OP2(pmulhuw),
2828
    [0xe5] = MMX_OP2(pmulhw),
2829
    [0xe6] = { NULL, helper_cvttpd2dq, helper_cvtdq2pd, helper_cvtpd2dq },
2830
    [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
2831
    [0xe8] = MMX_OP2(psubsb),
2832
    [0xe9] = MMX_OP2(psubsw),
2833
    [0xea] = MMX_OP2(pminsw),
2834
    [0xeb] = MMX_OP2(por),
2835
    [0xec] = MMX_OP2(paddsb),
2836
    [0xed] = MMX_OP2(paddsw),
2837
    [0xee] = MMX_OP2(pmaxsw),
2838
    [0xef] = MMX_OP2(pxor),
2839
    [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2840
    [0xf1] = MMX_OP2(psllw),
2841
    [0xf2] = MMX_OP2(pslld),
2842
    [0xf3] = MMX_OP2(psllq),
2843
    [0xf4] = MMX_OP2(pmuludq),
2844
    [0xf5] = MMX_OP2(pmaddwd),
2845
    [0xf6] = MMX_OP2(psadbw),
2846
    [0xf7] = MMX_OP2(maskmov),
2847
    [0xf8] = MMX_OP2(psubb),
2848
    [0xf9] = MMX_OP2(psubw),
2849
    [0xfa] = MMX_OP2(psubl),
2850
    [0xfb] = MMX_OP2(psubq),
2851
    [0xfc] = MMX_OP2(paddb),
2852
    [0xfd] = MMX_OP2(paddw),
2853
    [0xfe] = MMX_OP2(paddl),
2854
};
2855

    
2856
static void *sse_op_table2[3 * 8][2] = {
2857
    [0 + 2] = MMX_OP2(psrlw),
2858
    [0 + 4] = MMX_OP2(psraw),
2859
    [0 + 6] = MMX_OP2(psllw),
2860
    [8 + 2] = MMX_OP2(psrld),
2861
    [8 + 4] = MMX_OP2(psrad),
2862
    [8 + 6] = MMX_OP2(pslld),
2863
    [16 + 2] = MMX_OP2(psrlq),
2864
    [16 + 3] = { NULL, helper_psrldq_xmm },
2865
    [16 + 6] = MMX_OP2(psllq),
2866
    [16 + 7] = { NULL, helper_pslldq_xmm },
2867
};
2868

    
2869
static void *sse_op_table3[4 * 3] = {
2870
    helper_cvtsi2ss,
2871
    helper_cvtsi2sd,
2872
    X86_64_ONLY(helper_cvtsq2ss),
2873
    X86_64_ONLY(helper_cvtsq2sd),
2874

    
2875
    helper_cvttss2si,
2876
    helper_cvttsd2si,
2877
    X86_64_ONLY(helper_cvttss2sq),
2878
    X86_64_ONLY(helper_cvttsd2sq),
2879

    
2880
    helper_cvtss2si,
2881
    helper_cvtsd2si,
2882
    X86_64_ONLY(helper_cvtss2sq),
2883
    X86_64_ONLY(helper_cvtsd2sq),
2884
};
2885

    
2886
static void *sse_op_table4[8][4] = {
2887
    SSE_FOP(cmpeq),
2888
    SSE_FOP(cmplt),
2889
    SSE_FOP(cmple),
2890
    SSE_FOP(cmpunord),
2891
    SSE_FOP(cmpneq),
2892
    SSE_FOP(cmpnlt),
2893
    SSE_FOP(cmpnle),
2894
    SSE_FOP(cmpord),
2895
};
2896

    
2897
static void *sse_op_table5[256] = {
2898
    [0x0c] = helper_pi2fw,
2899
    [0x0d] = helper_pi2fd,
2900
    [0x1c] = helper_pf2iw,
2901
    [0x1d] = helper_pf2id,
2902
    [0x8a] = helper_pfnacc,
2903
    [0x8e] = helper_pfpnacc,
2904
    [0x90] = helper_pfcmpge,
2905
    [0x94] = helper_pfmin,
2906
    [0x96] = helper_pfrcp,
2907
    [0x97] = helper_pfrsqrt,
2908
    [0x9a] = helper_pfsub,
2909
    [0x9e] = helper_pfadd,
2910
    [0xa0] = helper_pfcmpgt,
2911
    [0xa4] = helper_pfmax,
2912
    [0xa6] = helper_movq, /* pfrcpit1; no need to actually increase precision */
2913
    [0xa7] = helper_movq, /* pfrsqit1 */
2914
    [0xaa] = helper_pfsubr,
2915
    [0xae] = helper_pfacc,
2916
    [0xb0] = helper_pfcmpeq,
2917
    [0xb4] = helper_pfmul,
2918
    [0xb6] = helper_movq, /* pfrcpit2 */
2919
    [0xb7] = helper_pmulhrw_mmx,
2920
    [0xbb] = helper_pswapd,
2921
    [0xbf] = helper_pavgb_mmx /* pavgusb */
2922
};
2923

    
2924
static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
2925
{
2926
    int b1, op1_offset, op2_offset, is_xmm, val, ot;
2927
    int modrm, mod, rm, reg, reg_addr, offset_addr;
2928
    void *sse_op2;
2929

    
2930
    b &= 0xff;
2931
    if (s->prefix & PREFIX_DATA)
2932
        b1 = 1;
2933
    else if (s->prefix & PREFIX_REPZ)
2934
        b1 = 2;
2935
    else if (s->prefix & PREFIX_REPNZ)
2936
        b1 = 3;
2937
    else
2938
        b1 = 0;
2939
    sse_op2 = sse_op_table1[b][b1];
2940
    if (!sse_op2)
2941
        goto illegal_op;
2942
    if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
2943
        is_xmm = 1;
2944
    } else {
2945
        if (b1 == 0) {
2946
            /* MMX case */
2947
            is_xmm = 0;
2948
        } else {
2949
            is_xmm = 1;
2950
        }
2951
    }
2952
    /* simple MMX/SSE operation */
2953
    if (s->flags & HF_TS_MASK) {
2954
        gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
2955
        return;
2956
    }
2957
    if (s->flags & HF_EM_MASK) {
2958
    illegal_op:
2959
        gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base);
2960
        return;
2961
    }
2962
    if (is_xmm && !(s->flags & HF_OSFXSR_MASK))
2963
        goto illegal_op;
2964
    if (b == 0x0e) {
2965
        if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW))
2966
            goto illegal_op;
2967
        /* femms */
2968
        tcg_gen_helper_0_0(helper_emms);
2969
        return;
2970
    }
2971
    if (b == 0x77) {
2972
        /* emms */
2973
        tcg_gen_helper_0_0(helper_emms);
2974
        return;
2975
    }
2976
    /* prepare MMX state (XXX: optimize by storing fptt and fptags in
2977
       the static cpu state) */
2978
    if (!is_xmm) {
2979
        tcg_gen_helper_0_0(helper_enter_mmx);
2980
    }
2981

    
2982
    modrm = ldub_code(s->pc++);
2983
    reg = ((modrm >> 3) & 7);
2984
    if (is_xmm)
2985
        reg |= rex_r;
2986
    mod = (modrm >> 6) & 3;
2987
    if (sse_op2 == SSE_SPECIAL) {
2988
        b |= (b1 << 8);
2989
        switch(b) {
2990
        case 0x0e7: /* movntq */
2991
            if (mod == 3)
2992
                goto illegal_op;
2993
            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
2994
            gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx));
2995
            break;
2996
        case 0x1e7: /* movntdq */
2997
        case 0x02b: /* movntps */
2998
        case 0x12b: /* movntps */
2999
        case 0x3f0: /* lddqu */
3000
            if (mod == 3)
3001
                goto illegal_op;
3002
            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3003
            gen_sto_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
3004
            break;
3005
        case 0x6e: /* movd mm, ea */
3006
#ifdef TARGET_X86_64
3007
            if (s->dflag == 2) {
3008
                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0);
3009
                tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,fpregs[reg].mmx));
3010
            } else
3011
#endif
3012
            {
3013
                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0);
3014
                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3015
                                 offsetof(CPUX86State,fpregs[reg].mmx));
3016
                tcg_gen_helper_0_2(helper_movl_mm_T0_mmx, cpu_ptr0, cpu_T[0]);
3017
            }
3018
            break;
3019
        case 0x16e: /* movd xmm, ea */
3020
#ifdef TARGET_X86_64
3021
            if (s->dflag == 2) {
3022
                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0);
3023
                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3024
                                 offsetof(CPUX86State,xmm_regs[reg]));
3025
                tcg_gen_helper_0_2(helper_movq_mm_T0_xmm, cpu_ptr0, cpu_T[0]);
3026
            } else
3027
#endif
3028
            {
3029
                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0);
3030
                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3031
                                 offsetof(CPUX86State,xmm_regs[reg]));
3032
                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
3033
                tcg_gen_helper_0_2(helper_movl_mm_T0_xmm, cpu_ptr0, cpu_tmp2_i32);
3034
            }
3035
            break;
3036
        case 0x6f: /* movq mm, ea */
3037
            if (mod != 3) {
3038
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3039
                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx));
3040
            } else {
3041
                rm = (modrm & 7);
3042
                tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
3043
                               offsetof(CPUX86State,fpregs[rm].mmx));
3044
                tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
3045
                               offsetof(CPUX86State,fpregs[reg].mmx));
3046
            }
3047
            break;
3048
        case 0x010: /* movups */
3049
        case 0x110: /* movupd */
3050
        case 0x028: /* movaps */
3051
        case 0x128: /* movapd */
3052
        case 0x16f: /* movdqa xmm, ea */
3053
        case 0x26f: /* movdqu xmm, ea */
3054
            if (mod != 3) {
3055
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3056
                gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
3057
            } else {
3058
                rm = (modrm & 7) | REX_B(s);
3059
                gen_op_movo(offsetof(CPUX86State,xmm_regs[reg]),
3060
                            offsetof(CPUX86State,xmm_regs[rm]));
3061
            }
3062
            break;
3063
        case 0x210: /* movss xmm, ea */
3064
            if (mod != 3) {
3065
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3066
                gen_op_ld_T0_A0(OT_LONG + s->mem_index);
3067
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
3068
                gen_op_movl_T0_0();
3069
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)));
3070
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
3071
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
3072
            } else {
3073
                rm = (modrm & 7) | REX_B(s);
3074
                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
3075
                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)));
3076
            }
3077
            break;
3078
        case 0x310: /* movsd xmm, ea */
3079
            if (mod != 3) {
3080
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3081
                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3082
                gen_op_movl_T0_0();
3083
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
3084
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
3085
            } else {
3086
                rm = (modrm & 7) | REX_B(s);
3087
                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
3088
                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
3089
            }
3090
            break;
3091
        case 0x012: /* movlps */
3092
        case 0x112: /* movlpd */
3093
            if (mod != 3) {
3094
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3095
                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3096
            } else {
3097
                /* movhlps */
3098
                rm = (modrm & 7) | REX_B(s);
3099
                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
3100
                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1)));
3101
            }
3102
            break;
3103
        case 0x212: /* movsldup */
3104
            if (mod != 3) {
3105
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3106
                gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
3107
            } else {
3108
                rm = (modrm & 7) | REX_B(s);
3109
                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
3110
                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)));
3111
                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)),
3112
                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(2)));
3113
            }
3114
            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)),
3115
                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
3116
            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)),
3117
                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
3118
            break;
3119
        case 0x312: /* movddup */
3120
            if (mod != 3) {
3121
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3122
                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3123
            } else {
3124
                rm = (modrm & 7) | REX_B(s);
3125
                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
3126
                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
3127
            }
3128
            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)),
3129
                        offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3130
            break;
3131
        case 0x016: /* movhps */
3132
        case 0x116: /* movhpd */
3133
            if (mod != 3) {
3134
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3135
                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
3136
            } else {
3137
                /* movlhps */
3138
                rm = (modrm & 7) | REX_B(s);
3139
                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)),
3140
                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
3141
            }
3142
            break;
3143
        case 0x216: /* movshdup */
3144
            if (mod != 3) {
3145
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3146
                gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
3147
            } else {
3148
                rm = (modrm & 7) | REX_B(s);
3149
                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)),
3150
                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(1)));
3151
                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)),
3152
                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(3)));
3153
            }
3154
            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
3155
                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)));
3156
            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)),
3157
                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
3158
            break;
3159
        case 0x7e: /* movd ea, mm */
3160
#ifdef TARGET_X86_64
3161
            if (s->dflag == 2) {
3162
                tcg_gen_ld_i64(cpu_T[0], cpu_env, 
3163
                               offsetof(CPUX86State,fpregs[reg].mmx));
3164
                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1);
3165
            } else
3166
#endif
3167
            {
3168
                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
3169
                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
3170
                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1);
3171
            }
3172
            break;
3173
        case 0x17e: /* movd ea, xmm */
3174
#ifdef TARGET_X86_64
3175
            if (s->dflag == 2) {
3176
                tcg_gen_ld_i64(cpu_T[0], cpu_env, 
3177
                               offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3178
                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1);
3179
            } else
3180
#endif
3181
            {
3182
                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
3183
                                 offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
3184
                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1);
3185
            }
3186
            break;
3187
        case 0x27e: /* movq xmm, ea */
3188
            if (mod != 3) {
3189
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3190
                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3191
            } else {
3192
                rm = (modrm & 7) | REX_B(s);
3193
                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
3194
                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
3195
            }
3196
            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
3197
            break;
3198
        case 0x7f: /* movq ea, mm */
3199
            if (mod != 3) {
3200
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3201
                gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx));
3202
            } else {
3203
                rm = (modrm & 7);
3204
                gen_op_movq(offsetof(CPUX86State,fpregs[rm].mmx),
3205
                            offsetof(CPUX86State,fpregs[reg].mmx));
3206
            }
3207
            break;
3208
        case 0x011: /* movups */
3209
        case 0x111: /* movupd */
3210
        case 0x029: /* movaps */
3211
        case 0x129: /* movapd */
3212
        case 0x17f: /* movdqa ea, xmm */
3213
        case 0x27f: /* movdqu ea, xmm */
3214
            if (mod != 3) {
3215
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3216
                gen_sto_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
3217
            } else {
3218
                rm = (modrm & 7) | REX_B(s);
3219
                gen_op_movo(offsetof(CPUX86State,xmm_regs[rm]),
3220
                            offsetof(CPUX86State,xmm_regs[reg]));
3221
            }
3222
            break;
3223
        case 0x211: /* movss ea, xmm */
3224
            if (mod != 3) {
3225
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3226
                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
3227
                gen_op_st_T0_A0(OT_LONG + s->mem_index);
3228
            } else {
3229
                rm = (modrm & 7) | REX_B(s);
3230
                gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)),
3231
                            offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
3232
            }
3233
            break;
3234
        case 0x311: /* movsd ea, xmm */
3235
            if (mod != 3) {
3236
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3237
                gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3238
            } else {
3239
                rm = (modrm & 7) | REX_B(s);
3240
                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)),
3241
                            offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3242
            }
3243
            break;
3244
        case 0x013: /* movlps */
3245
        case 0x113: /* movlpd */
3246
            if (mod != 3) {
3247
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3248
                gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3249
            } else {
3250
                goto illegal_op;
3251
            }
3252
            break;
3253
        case 0x017: /* movhps */
3254
        case 0x117: /* movhpd */
3255
            if (mod != 3) {
3256
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3257
                gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
3258
            } else {
3259
                goto illegal_op;
3260
            }
3261
            break;
3262
        case 0x71: /* shift mm, im */
3263
        case 0x72:
3264
        case 0x73:
3265
        case 0x171: /* shift xmm, im */
3266
        case 0x172:
3267
        case 0x173:
3268
            val = ldub_code(s->pc++);
3269
            if (is_xmm) {
3270
                gen_op_movl_T0_im(val);
3271
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0)));
3272
                gen_op_movl_T0_0();
3273
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(1)));
3274
                op1_offset = offsetof(CPUX86State,xmm_t0);
3275
            } else {
3276
                gen_op_movl_T0_im(val);
3277
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(0)));
3278
                gen_op_movl_T0_0();
3279
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(1)));
3280
                op1_offset = offsetof(CPUX86State,mmx_t0);
3281
            }
3282
            sse_op2 = sse_op_table2[((b - 1) & 3) * 8 + (((modrm >> 3)) & 7)][b1];
3283
            if (!sse_op2)
3284
                goto illegal_op;
3285
            if (is_xmm) {
3286
                rm = (modrm & 7) | REX_B(s);
3287
                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3288
            } else {
3289
                rm = (modrm & 7);
3290
                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3291
            }
3292
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
3293
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op1_offset);
3294
            tcg_gen_helper_0_2(sse_op2, cpu_ptr0, cpu_ptr1);
3295
            break;
3296
        case 0x050: /* movmskps */
3297
            rm = (modrm & 7) | REX_B(s);
3298
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3299
                             offsetof(CPUX86State,xmm_regs[rm]));
3300
            tcg_gen_helper_1_1(helper_movmskps, cpu_tmp2_i32, cpu_ptr0);
3301
            tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
3302
            gen_op_mov_reg_T0(OT_LONG, reg);
3303
            break;
3304
        case 0x150: /* movmskpd */
3305
            rm = (modrm & 7) | REX_B(s);
3306
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3307
                             offsetof(CPUX86State,xmm_regs[rm]));
3308
            tcg_gen_helper_1_1(helper_movmskpd, cpu_tmp2_i32, cpu_ptr0);
3309
            tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
3310
            gen_op_mov_reg_T0(OT_LONG, reg);
3311
            break;
3312
        case 0x02a: /* cvtpi2ps */
3313
        case 0x12a: /* cvtpi2pd */
3314
            tcg_gen_helper_0_0(helper_enter_mmx);
3315
            if (mod != 3) {
3316
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3317
                op2_offset = offsetof(CPUX86State,mmx_t0);
3318
                gen_ldq_env_A0(s->mem_index, op2_offset);
3319
            } else {
3320
                rm = (modrm & 7);
3321
                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3322
            }
3323
            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3324
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3325
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3326
            switch(b >> 8) {
3327
            case 0x0:
3328
                tcg_gen_helper_0_2(helper_cvtpi2ps, cpu_ptr0, cpu_ptr1);
3329
                break;
3330
            default:
3331
            case 0x1:
3332
                tcg_gen_helper_0_2(helper_cvtpi2pd, cpu_ptr0, cpu_ptr1);
3333
                break;
3334
            }
3335
            break;
3336
        case 0x22a: /* cvtsi2ss */
3337
        case 0x32a: /* cvtsi2sd */
3338
            ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
3339
            gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
3340
            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3341
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3342
            sse_op2 = sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2)];
3343
            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
3344
            tcg_gen_helper_0_2(sse_op2, cpu_ptr0, cpu_tmp2_i32);
3345
            break;
3346
        case 0x02c: /* cvttps2pi */
3347
        case 0x12c: /* cvttpd2pi */
3348
        case 0x02d: /* cvtps2pi */
3349
        case 0x12d: /* cvtpd2pi */
3350
            tcg_gen_helper_0_0(helper_enter_mmx);
3351
            if (mod != 3) {
3352
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3353
                op2_offset = offsetof(CPUX86State,xmm_t0);
3354
                gen_ldo_env_A0(s->mem_index, op2_offset);
3355
            } else {
3356
                rm = (modrm & 7) | REX_B(s);
3357
                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3358
            }
3359
            op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
3360
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3361
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3362
            switch(b) {
3363
            case 0x02c:
3364
                tcg_gen_helper_0_2(helper_cvttps2pi, cpu_ptr0, cpu_ptr1);
3365
                break;
3366
            case 0x12c:
3367
                tcg_gen_helper_0_2(helper_cvttpd2pi, cpu_ptr0, cpu_ptr1);
3368
                break;
3369
            case 0x02d:
3370
                tcg_gen_helper_0_2(helper_cvtps2pi, cpu_ptr0, cpu_ptr1);
3371
                break;
3372
            case 0x12d:
3373
                tcg_gen_helper_0_2(helper_cvtpd2pi, cpu_ptr0, cpu_ptr1);
3374
                break;
3375
            }
3376
            break;
3377
        case 0x22c: /* cvttss2si */
3378
        case 0x32c: /* cvttsd2si */
3379
        case 0x22d: /* cvtss2si */
3380
        case 0x32d: /* cvtsd2si */
3381
            ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
3382
            if (mod != 3) {
3383
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3384
                if ((b >> 8) & 1) {
3385
                    gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_t0.XMM_Q(0)));
3386
                } else {
3387
                    gen_op_ld_T0_A0(OT_LONG + s->mem_index);
3388
                    tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0)));
3389
                }
3390
                op2_offset = offsetof(CPUX86State,xmm_t0);
3391
            } else {
3392
                rm = (modrm & 7) | REX_B(s);
3393
                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3394
            }
3395
            sse_op2 = sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2) + 4 +
3396
                                    (b & 1) * 4];
3397
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
3398
            if (ot == OT_LONG) {
3399
                tcg_gen_helper_1_1(sse_op2, cpu_tmp2_i32, cpu_ptr0);
3400
                tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
3401
            } else {
3402
                tcg_gen_helper_1_1(sse_op2, cpu_T[0], cpu_ptr0);
3403
            }
3404
            gen_op_mov_reg_T0(ot, reg);
3405
            break;
3406
        case 0xc4: /* pinsrw */
3407
        case 0x1c4:
3408
            s->rip_offset = 1;
3409
            gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
3410
            val = ldub_code(s->pc++);
3411
            if (b1) {
3412
                val &= 7;
3413
                tcg_gen_st16_tl(cpu_T[0], cpu_env,
3414
                                offsetof(CPUX86State,xmm_regs[reg].XMM_W(val)));
3415
            } else {
3416
                val &= 3;
3417
                tcg_gen_st16_tl(cpu_T[0], cpu_env,
3418
                                offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3419
            }
3420
            break;
3421
        case 0xc5: /* pextrw */
3422
        case 0x1c5:
3423
            if (mod != 3)
3424
                goto illegal_op;
3425
            val = ldub_code(s->pc++);
3426
            if (b1) {
3427
                val &= 7;
3428
                rm = (modrm & 7) | REX_B(s);
3429
                tcg_gen_ld16u_tl(cpu_T[0], cpu_env,
3430
                                 offsetof(CPUX86State,xmm_regs[rm].XMM_W(val)));
3431
            } else {
3432
                val &= 3;
3433
                rm = (modrm & 7);
3434
                tcg_gen_ld16u_tl(cpu_T[0], cpu_env,
3435
                                offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3436
            }
3437
            reg = ((modrm >> 3) & 7) | rex_r;
3438
            gen_op_mov_reg_T0(OT_LONG, reg);
3439
            break;
3440
        case 0x1d6: /* movq ea, xmm */
3441
            if (mod != 3) {
3442
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3443
                gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3444
            } else {
3445
                rm = (modrm & 7) | REX_B(s);
3446
                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)),
3447
                            offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3448
                gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1)));
3449
            }
3450
            break;
3451
        case 0x2d6: /* movq2dq */
3452
            tcg_gen_helper_0_0(helper_enter_mmx);
3453
            rm = (modrm & 7);
3454
            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
3455
                        offsetof(CPUX86State,fpregs[rm].mmx));
3456
            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
3457
            break;
3458
        case 0x3d6: /* movdq2q */
3459
            tcg_gen_helper_0_0(helper_enter_mmx);
3460
            rm = (modrm & 7) | REX_B(s);
3461
            gen_op_movq(offsetof(CPUX86State,fpregs[reg & 7].mmx),
3462
                        offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
3463
            break;
3464
        case 0xd7: /* pmovmskb */
3465
        case 0x1d7:
3466
            if (mod != 3)
3467
                goto illegal_op;
3468
            if (b1) {
3469
                rm = (modrm & 7) | REX_B(s);
3470
                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,xmm_regs[rm]));
3471
                tcg_gen_helper_1_1(helper_pmovmskb_xmm, cpu_tmp2_i32, cpu_ptr0);
3472
            } else {
3473
                rm = (modrm & 7);
3474
                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,fpregs[rm].mmx));
3475
                tcg_gen_helper_1_1(helper_pmovmskb_mmx, cpu_tmp2_i32, cpu_ptr0);
3476
            }
3477
            tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
3478
            reg = ((modrm >> 3) & 7) | rex_r;
3479
            gen_op_mov_reg_T0(OT_LONG, reg);
3480
            break;
3481
        default:
3482
            goto illegal_op;
3483
        }
3484
    } else {
3485
        /* generic MMX or SSE operation */
3486
        switch(b) {
3487
        case 0x70: /* pshufx insn */
3488
        case 0xc6: /* pshufx insn */
3489
        case 0xc2: /* compare insns */
3490
            s->rip_offset = 1;
3491
            break;
3492
        default:
3493
            break;
3494
        }
3495
        if (is_xmm) {
3496
            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3497
            if (mod != 3) {
3498
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3499
                op2_offset = offsetof(CPUX86State,xmm_t0);
3500
                if (b1 >= 2 && ((b >= 0x50 && b <= 0x5f && b != 0x5b) ||
3501
                                b == 0xc2)) {
3502
                    /* specific case for SSE single instructions */
3503
                    if (b1 == 2) {
3504
                        /* 32 bit access */
3505
                        gen_op_ld_T0_A0(OT_LONG + s->mem_index);
3506
                        tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0)));
3507
                    } else {
3508
                        /* 64 bit access */
3509
                        gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_t0.XMM_D(0)));
3510
                    }
3511
                } else {
3512
                    gen_ldo_env_A0(s->mem_index, op2_offset);
3513
                }
3514
            } else {
3515
                rm = (modrm & 7) | REX_B(s);
3516
                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3517
            }
3518
        } else {
3519
            op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3520
            if (mod != 3) {
3521
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3522
                op2_offset = offsetof(CPUX86State,mmx_t0);
3523
                gen_ldq_env_A0(s->mem_index, op2_offset);
3524
            } else {
3525
                rm = (modrm & 7);
3526
                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3527
            }
3528
        }
3529
        switch(b) {
3530
        case 0x0f: /* 3DNow! data insns */
3531
            if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW))
3532
                goto illegal_op;
3533
            val = ldub_code(s->pc++);
3534
            sse_op2 = sse_op_table5[val];
3535
            if (!sse_op2)
3536
                goto illegal_op;
3537
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3538
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3539
            tcg_gen_helper_0_2(sse_op2, cpu_ptr0, cpu_ptr1);
3540
            break;
3541
        case 0x70: /* pshufx insn */
3542
        case 0xc6: /* pshufx insn */
3543
            val = ldub_code(s->pc++);
3544
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3545
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3546
            tcg_gen_helper_0_3(sse_op2, cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
3547
            break;
3548
        case 0xc2:
3549
            /* compare insns */
3550
            val = ldub_code(s->pc++);
3551
            if (val >= 8)
3552
                goto illegal_op;
3553
            sse_op2 = sse_op_table4[val][b1];
3554
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3555
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3556
            tcg_gen_helper_0_2(sse_op2, cpu_ptr0, cpu_ptr1);
3557
            break;
3558
        case 0xf7:
3559
            /* maskmov : we must prepare A0 */
3560
            if (mod != 3)
3561
                goto illegal_op;
3562
#ifdef TARGET_X86_64
3563
            if (s->aflag == 2) {
3564
                gen_op_movq_A0_reg(R_EDI);
3565
            } else
3566
#endif
3567
            {
3568
                gen_op_movl_A0_reg(R_EDI);
3569
                if (s->aflag == 0)
3570
                    gen_op_andl_A0_ffff();
3571
            }
3572
            gen_add_A0_ds_seg(s);
3573

    
3574
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3575
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3576
            tcg_gen_helper_0_3(sse_op2, cpu_ptr0, cpu_ptr1, cpu_A0);
3577
            break;
3578
        default:
3579
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3580
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3581
            tcg_gen_helper_0_2(sse_op2, cpu_ptr0, cpu_ptr1);
3582
            break;
3583
        }
3584
        if (b == 0x2e || b == 0x2f) {
3585
            s->cc_op = CC_OP_EFLAGS;
3586
        }
3587
    }
3588
}
3589

    
3590
/* convert one instruction. s->is_jmp is set if the translation must
3591
   be stopped. Return the next pc value */
3592
static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
3593
{
3594
    int b, prefixes, aflag, dflag;
3595
    int shift, ot;
3596
    int modrm, reg, rm, mod, reg_addr, op, opreg, offset_addr, val;
3597
    target_ulong next_eip, tval;
3598
    int rex_w, rex_r;
3599

    
3600
    if (unlikely(loglevel & CPU_LOG_TB_OP))
3601
        tcg_gen_debug_insn_start(pc_start);
3602
    s->pc = pc_start;
3603
    prefixes = 0;
3604
    aflag = s->code32;
3605
    dflag = s->code32;
3606
    s->override = -1;
3607
    rex_w = -1;
3608
    rex_r = 0;
3609
#ifdef TARGET_X86_64
3610
    s->rex_x = 0;
3611
    s->rex_b = 0;
3612
    x86_64_hregs = 0;
3613
#endif
3614
    s->rip_offset = 0; /* for relative ip address */
3615
 next_byte:
3616
    b = ldub_code(s->pc);
3617
    s->pc++;
3618
    /* check prefixes */
3619
#ifdef TARGET_X86_64
3620
    if (CODE64(s)) {
3621
        switch (b) {
3622
        case 0xf3:
3623
            prefixes |= PREFIX_REPZ;
3624
            goto next_byte;
3625
        case 0xf2:
3626
            prefixes |= PREFIX_REPNZ;
3627
            goto next_byte;
3628
        case 0xf0:
3629
            prefixes |= PREFIX_LOCK;
3630
            goto next_byte;
3631
        case 0x2e:
3632
            s->override = R_CS;
3633
            goto next_byte;
3634
        case 0x36:
3635
            s->override = R_SS;
3636
            goto next_byte;
3637
        case 0x3e:
3638
            s->override = R_DS;
3639
            goto next_byte;
3640
        case 0x26:
3641
            s->override = R_ES;
3642
            goto next_byte;
3643
        case 0x64:
3644
            s->override = R_FS;
3645
            goto next_byte;
3646
        case 0x65:
3647
            s->override = R_GS;
3648
            goto next_byte;
3649
        case 0x66:
3650
            prefixes |= PREFIX_DATA;
3651
            goto next_byte;
3652
        case 0x67:
3653
            prefixes |= PREFIX_ADR;
3654
            goto next_byte;
3655
        case 0x40 ... 0x4f:
3656
            /* REX prefix */
3657
            rex_w = (b >> 3) & 1;
3658
            rex_r = (b & 0x4) << 1;
3659
            s->rex_x = (b & 0x2) << 2;
3660
            REX_B(s) = (b & 0x1) << 3;
3661
            x86_64_hregs = 1; /* select uniform byte register addressing */
3662
            goto next_byte;
3663
        }
3664
        if (rex_w == 1) {
3665
            /* 0x66 is ignored if rex.w is set */
3666
            dflag = 2;
3667
        } else {
3668
            if (prefixes & PREFIX_DATA)
3669
                dflag ^= 1;
3670
        }
3671
        if (!(prefixes & PREFIX_ADR))
3672
            aflag = 2;
3673
    } else
3674
#endif
3675
    {
3676
        switch (b) {
3677
        case 0xf3:
3678
            prefixes |= PREFIX_REPZ;
3679
            goto next_byte;
3680
        case 0xf2:
3681
            prefixes |= PREFIX_REPNZ;
3682
            goto next_byte;
3683
        case 0xf0:
3684
            prefixes |= PREFIX_LOCK;
3685
            goto next_byte;
3686
        case 0x2e:
3687
            s->override = R_CS;
3688
            goto next_byte;
3689
        case 0x36:
3690
            s->override = R_SS;
3691
            goto next_byte;
3692
        case 0x3e:
3693
            s->override = R_DS;
3694
            goto next_byte;
3695
        case 0x26:
3696
            s->override = R_ES;
3697
            goto next_byte;
3698
        case 0x64:
3699
            s->override = R_FS;
3700
            goto next_byte;
3701
        case 0x65:
3702
            s->override = R_GS;
3703
            goto next_byte;
3704
        case 0x66:
3705
            prefixes |= PREFIX_DATA;
3706
            goto next_byte;
3707
        case 0x67:
3708
            prefixes |= PREFIX_ADR;
3709
            goto next_byte;
3710
        }
3711
        if (prefixes & PREFIX_DATA)
3712
            dflag ^= 1;
3713
        if (prefixes & PREFIX_ADR)
3714
            aflag ^= 1;
3715
    }
3716

    
3717
    s->prefix = prefixes;
3718
    s->aflag = aflag;
3719
    s->dflag = dflag;
3720

    
3721
    /* lock generation */
3722
    if (prefixes & PREFIX_LOCK)
3723
        tcg_gen_helper_0_0(helper_lock);
3724

    
3725
    /* now check op code */
3726
 reswitch:
3727
    switch(b) {
3728
    case 0x0f:
3729
        /**************************/
3730
        /* extended op code */
3731
        b = ldub_code(s->pc++) | 0x100;
3732
        goto reswitch;
3733

    
3734
        /**************************/
3735
        /* arith & logic */
3736
    case 0x00 ... 0x05:
3737
    case 0x08 ... 0x0d:
3738
    case 0x10 ... 0x15:
3739
    case 0x18 ... 0x1d:
3740
    case 0x20 ... 0x25:
3741
    case 0x28 ... 0x2d:
3742
    case 0x30 ... 0x35:
3743
    case 0x38 ... 0x3d:
3744
        {
3745
            int op, f, val;
3746
            op = (b >> 3) & 7;
3747
            f = (b >> 1) & 3;
3748

    
3749
            if ((b & 1) == 0)
3750
                ot = OT_BYTE;
3751
            else
3752
                ot = dflag + OT_WORD;
3753

    
3754
            switch(f) {
3755
            case 0: /* OP Ev, Gv */
3756
                modrm = ldub_code(s->pc++);
3757
                reg = ((modrm >> 3) & 7) | rex_r;
3758
                mod = (modrm >> 6) & 3;
3759
                rm = (modrm & 7) | REX_B(s);
3760
                if (mod != 3) {
3761
                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3762
                    opreg = OR_TMP0;
3763
                } else if (op == OP_XORL && rm == reg) {
3764
                xor_zero:
3765
                    /* xor reg, reg optimisation */
3766
                    gen_op_movl_T0_0();
3767
                    s->cc_op = CC_OP_LOGICB + ot;
3768
                    gen_op_mov_reg_T0(ot, reg);
3769
                    gen_op_update1_cc();
3770
                    break;
3771
                } else {
3772
                    opreg = rm;
3773
                }
3774
                gen_op_mov_TN_reg(ot, 1, reg);
3775
                gen_op(s, op, ot, opreg);
3776
                break;
3777
            case 1: /* OP Gv, Ev */
3778
                modrm = ldub_code(s->pc++);
3779
                mod = (modrm >> 6) & 3;
3780
                reg = ((modrm >> 3) & 7) | rex_r;
3781
                rm = (modrm & 7) | REX_B(s);
3782
                if (mod != 3) {
3783
                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3784
                    gen_op_ld_T1_A0(ot + s->mem_index);
3785
                } else if (op == OP_XORL && rm == reg) {
3786
                    goto xor_zero;
3787
                } else {
3788
                    gen_op_mov_TN_reg(ot, 1, rm);
3789
                }
3790
                gen_op(s, op, ot, reg);
3791
                break;
3792
            case 2: /* OP A, Iv */
3793
                val = insn_get(s, ot);
3794
                gen_op_movl_T1_im(val);
3795
                gen_op(s, op, ot, OR_EAX);
3796
                break;
3797
            }
3798
        }
3799
        break;
3800

    
3801
    case 0x82:
3802
        if (CODE64(s))
3803
            goto illegal_op;
3804
    case 0x80: /* GRP1 */
3805
    case 0x81:
3806
    case 0x83:
3807
        {
3808
            int val;
3809

    
3810
            if ((b & 1) == 0)
3811
                ot = OT_BYTE;
3812
            else
3813
                ot = dflag + OT_WORD;
3814

    
3815
            modrm = ldub_code(s->pc++);
3816
            mod = (modrm >> 6) & 3;
3817
            rm = (modrm & 7) | REX_B(s);
3818
            op = (modrm >> 3) & 7;
3819

    
3820
            if (mod != 3) {
3821
                if (b == 0x83)
3822
                    s->rip_offset = 1;
3823
                else
3824
                    s->rip_offset = insn_const_size(ot);
3825
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3826
                opreg = OR_TMP0;
3827
            } else {
3828
                opreg = rm;
3829
            }
3830

    
3831
            switch(b) {
3832
            default:
3833
            case 0x80:
3834
            case 0x81:
3835
            case 0x82:
3836
                val = insn_get(s, ot);
3837
                break;
3838
            case 0x83:
3839
                val = (int8_t)insn_get(s, OT_BYTE);
3840
                break;
3841
            }
3842
            gen_op_movl_T1_im(val);
3843
            gen_op(s, op, ot, opreg);
3844
        }
3845
        break;
3846

    
3847
        /**************************/
3848
        /* inc, dec, and other misc arith */
3849
    case 0x40 ... 0x47: /* inc Gv */
3850
        ot = dflag ? OT_LONG : OT_WORD;
3851
        gen_inc(s, ot, OR_EAX + (b & 7), 1);
3852
        break;
3853
    case 0x48 ... 0x4f: /* dec Gv */
3854
        ot = dflag ? OT_LONG : OT_WORD;
3855
        gen_inc(s, ot, OR_EAX + (b & 7), -1);
3856
        break;
3857
    case 0xf6: /* GRP3 */
3858
    case 0xf7:
3859
        if ((b & 1) == 0)
3860
            ot = OT_BYTE;
3861
        else
3862
            ot = dflag + OT_WORD;
3863

    
3864
        modrm = ldub_code(s->pc++);
3865
        mod = (modrm >> 6) & 3;
3866
        rm = (modrm & 7) | REX_B(s);
3867
        op = (modrm >> 3) & 7;
3868
        if (mod != 3) {
3869
            if (op == 0)
3870
                s->rip_offset = insn_const_size(ot);
3871
            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3872
            gen_op_ld_T0_A0(ot + s->mem_index);
3873
        } else {
3874
            gen_op_mov_TN_reg(ot, 0, rm);
3875
        }
3876

    
3877
        switch(op) {
3878
        case 0: /* test */
3879
            val = insn_get(s, ot);
3880
            gen_op_movl_T1_im(val);
3881
            gen_op_testl_T0_T1_cc();
3882
            s->cc_op = CC_OP_LOGICB + ot;
3883
            break;
3884
        case 2: /* not */
3885
            tcg_gen_not_tl(cpu_T[0], cpu_T[0]);
3886
            if (mod != 3) {
3887
                gen_op_st_T0_A0(ot + s->mem_index);
3888
            } else {
3889
                gen_op_mov_reg_T0(ot, rm);
3890
            }
3891
            break;
3892
        case 3: /* neg */
3893
            tcg_gen_neg_tl(cpu_T[0], cpu_T[0]);
3894
            if (mod != 3) {
3895
                gen_op_st_T0_A0(ot + s->mem_index);
3896
            } else {
3897
                gen_op_mov_reg_T0(ot, rm);
3898
            }
3899
            gen_op_update_neg_cc();
3900
            s->cc_op = CC_OP_SUBB + ot;
3901
            break;
3902
        case 4: /* mul */
3903
            switch(ot) {
3904
            case OT_BYTE:
3905
                gen_op_mov_TN_reg(OT_BYTE, 1, R_EAX);
3906
                tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]);
3907
                tcg_gen_ext8u_tl(cpu_T[1], cpu_T[1]);
3908
                /* XXX: use 32 bit mul which could be faster */
3909
                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
3910
                gen_op_mov_reg_T0(OT_WORD, R_EAX);
3911
                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
3912
                tcg_gen_andi_tl(cpu_cc_src, cpu_T[0], 0xff00);
3913
                s->cc_op = CC_OP_MULB;
3914
                break;
3915
            case OT_WORD:
3916
                gen_op_mov_TN_reg(OT_WORD, 1, R_EAX);
3917
                tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]);
3918
                tcg_gen_ext16u_tl(cpu_T[1], cpu_T[1]);
3919
                /* XXX: use 32 bit mul which could be faster */
3920
                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
3921
                gen_op_mov_reg_T0(OT_WORD, R_EAX);
3922
                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
3923
                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16);
3924
                gen_op_mov_reg_T0(OT_WORD, R_EDX);
3925
                tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
3926
                s->cc_op = CC_OP_MULW;
3927
                break;
3928
            default:
3929
            case OT_LONG:
3930
#ifdef TARGET_X86_64
3931
                gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
3932
                tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]);
3933
                tcg_gen_ext32u_tl(cpu_T[1], cpu_T[1]);
3934
                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
3935
                gen_op_mov_reg_T0(OT_LONG, R_EAX);
3936
                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
3937
                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32);
3938
                gen_op_mov_reg_T0(OT_LONG, R_EDX);
3939
                tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
3940
#else
3941
                {
3942
                    TCGv t0, t1;
3943
                    t0 = tcg_temp_new(TCG_TYPE_I64);
3944
                    t1 = tcg_temp_new(TCG_TYPE_I64);
3945
                    gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
3946
                    tcg_gen_extu_i32_i64(t0, cpu_T[0]);
3947
                    tcg_gen_extu_i32_i64(t1, cpu_T[1]);
3948
                    tcg_gen_mul_i64(t0, t0, t1);
3949
                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
3950
                    gen_op_mov_reg_T0(OT_LONG, R_EAX);
3951
                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
3952
                    tcg_gen_shri_i64(t0, t0, 32);
3953
                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
3954
                    gen_op_mov_reg_T0(OT_LONG, R_EDX);
3955
                    tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
3956
                }
3957
#endif
3958
                s->cc_op = CC_OP_MULL;
3959
                break;
3960
#ifdef TARGET_X86_64
3961
            case OT_QUAD:
3962
                tcg_gen_helper_0_1(helper_mulq_EAX_T0, cpu_T[0]);
3963
                s->cc_op = CC_OP_MULQ;
3964
                break;
3965
#endif
3966
            }
3967
            break;
3968
        case 5: /* imul */
3969
            switch(ot) {
3970
            case OT_BYTE:
3971
                gen_op_mov_TN_reg(OT_BYTE, 1, R_EAX);
3972
                tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]);
3973
                tcg_gen_ext8s_tl(cpu_T[1], cpu_T[1]);
3974
                /* XXX: use 32 bit mul which could be faster */
3975
                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
3976
                gen_op_mov_reg_T0(OT_WORD, R_EAX);
3977
                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
3978
                tcg_gen_ext8s_tl(cpu_tmp0, cpu_T[0]);
3979
                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
3980
                s->cc_op = CC_OP_MULB;
3981
                break;
3982
            case OT_WORD:
3983
                gen_op_mov_TN_reg(OT_WORD, 1, R_EAX);
3984
                tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
3985
                tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]);
3986
                /* XXX: use 32 bit mul which could be faster */
3987
                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
3988
                gen_op_mov_reg_T0(OT_WORD, R_EAX);
3989
                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
3990
                tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]);
3991
                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
3992
                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16);
3993
                gen_op_mov_reg_T0(OT_WORD, R_EDX);
3994
                s->cc_op = CC_OP_MULW;
3995
                break;
3996
            default:
3997
            case OT_LONG:
3998
#ifdef TARGET_X86_64
3999
                gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
4000
                tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
4001
                tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]);
4002
                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4003
                gen_op_mov_reg_T0(OT_LONG, R_EAX);
4004
                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4005
                tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]);
4006
                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
4007
                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32);
4008
                gen_op_mov_reg_T0(OT_LONG, R_EDX);
4009
#else
4010
                {
4011
                    TCGv t0, t1;
4012
                    t0 = tcg_temp_new(TCG_TYPE_I64);
4013
                    t1 = tcg_temp_new(TCG_TYPE_I64);
4014
                    gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
4015
                    tcg_gen_ext_i32_i64(t0, cpu_T[0]);
4016
                    tcg_gen_ext_i32_i64(t1, cpu_T[1]);
4017
                    tcg_gen_mul_i64(t0, t0, t1);
4018
                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
4019
                    gen_op_mov_reg_T0(OT_LONG, R_EAX);
4020
                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4021
                    tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31);
4022
                    tcg_gen_shri_i64(t0, t0, 32);
4023
                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
4024
                    gen_op_mov_reg_T0(OT_LONG, R_EDX);
4025
                    tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
4026
                }
4027
#endif
4028
                s->cc_op = CC_OP_MULL;
4029
                break;
4030
#ifdef TARGET_X86_64
4031
            case OT_QUAD:
4032
                tcg_gen_helper_0_1(helper_imulq_EAX_T0, cpu_T[0]);
4033
                s->cc_op = CC_OP_MULQ;
4034
                break;
4035
#endif
4036
            }
4037
            break;
4038
        case 6: /* div */
4039
            switch(ot) {
4040
            case OT_BYTE:
4041
                gen_jmp_im(pc_start - s->cs_base);
4042
                tcg_gen_helper_0_1(helper_divb_AL, cpu_T[0]);
4043
                break;
4044
            case OT_WORD:
4045
                gen_jmp_im(pc_start - s->cs_base);
4046
                tcg_gen_helper_0_1(helper_divw_AX, cpu_T[0]);
4047
                break;
4048
            default:
4049
            case OT_LONG:
4050
                gen_jmp_im(pc_start - s->cs_base);
4051
                tcg_gen_helper_0_1(helper_divl_EAX, cpu_T[0]);
4052
                break;
4053
#ifdef TARGET_X86_64
4054
            case OT_QUAD:
4055
                gen_jmp_im(pc_start - s->cs_base);
4056
                tcg_gen_helper_0_1(helper_divq_EAX, cpu_T[0]);
4057
                break;
4058
#endif
4059
            }
4060
            break;
4061
        case 7: /* idiv */
4062
            switch(ot) {
4063
            case OT_BYTE:
4064
                gen_jmp_im(pc_start - s->cs_base);
4065
                tcg_gen_helper_0_1(helper_idivb_AL, cpu_T[0]);
4066
                break;
4067
            case OT_WORD:
4068
                gen_jmp_im(pc_start - s->cs_base);
4069
                tcg_gen_helper_0_1(helper_idivw_AX, cpu_T[0]);
4070
                break;
4071
            default:
4072
            case OT_LONG:
4073
                gen_jmp_im(pc_start - s->cs_base);
4074
                tcg_gen_helper_0_1(helper_idivl_EAX, cpu_T[0]);
4075
                break;
4076
#ifdef TARGET_X86_64
4077
            case OT_QUAD:
4078
                gen_jmp_im(pc_start - s->cs_base);
4079
                tcg_gen_helper_0_1(helper_idivq_EAX, cpu_T[0]);
4080
                break;
4081
#endif
4082
            }
4083
            break;
4084
        default:
4085
            goto illegal_op;
4086
        }
4087
        break;
4088

    
4089
    case 0xfe: /* GRP4 */
4090
    case 0xff: /* GRP5 */
4091
        if ((b & 1) == 0)
4092
            ot = OT_BYTE;
4093
        else
4094
            ot = dflag + OT_WORD;
4095

    
4096
        modrm = ldub_code(s->pc++);
4097
        mod = (modrm >> 6) & 3;
4098
        rm = (modrm & 7) | REX_B(s);
4099
        op = (modrm >> 3) & 7;
4100
        if (op >= 2 && b == 0xfe) {
4101
            goto illegal_op;
4102
        }
4103
        if (CODE64(s)) {
4104
            if (op == 2 || op == 4) {
4105
                /* operand size for jumps is 64 bit */
4106
                ot = OT_QUAD;
4107
            } else if (op == 3 || op == 5) {
4108
                /* for call calls, the operand is 16 or 32 bit, even
4109
                   in long mode */
4110
                ot = dflag ? OT_LONG : OT_WORD;
4111
            } else if (op == 6) {
4112
                /* default push size is 64 bit */
4113
                ot = dflag ? OT_QUAD : OT_WORD;
4114
            }
4115
        }
4116
        if (mod != 3) {
4117
            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4118
            if (op >= 2 && op != 3 && op != 5)
4119
                gen_op_ld_T0_A0(ot + s->mem_index);
4120
        } else {
4121
            gen_op_mov_TN_reg(ot, 0, rm);
4122
        }
4123

    
4124
        switch(op) {
4125
        case 0: /* inc Ev */
4126
            if (mod != 3)
4127
                opreg = OR_TMP0;
4128
            else
4129
                opreg = rm;
4130
            gen_inc(s, ot, opreg, 1);
4131
            break;
4132
        case 1: /* dec Ev */
4133
            if (mod != 3)
4134
                opreg = OR_TMP0;
4135
            else
4136
                opreg = rm;
4137
            gen_inc(s, ot, opreg, -1);
4138
            break;
4139
        case 2: /* call Ev */
4140
            /* XXX: optimize if memory (no 'and' is necessary) */
4141
            if (s->dflag == 0)
4142
                gen_op_andl_T0_ffff();
4143
            next_eip = s->pc - s->cs_base;
4144
            gen_movtl_T1_im(next_eip);
4145
            gen_push_T1(s);
4146
            gen_op_jmp_T0();
4147
            gen_eob(s);
4148
            break;
4149
        case 3: /* lcall Ev */
4150
            gen_op_ld_T1_A0(ot + s->mem_index);
4151
            gen_add_A0_im(s, 1 << (ot - OT_WORD + 1));
4152
            gen_op_ldu_T0_A0(OT_WORD + s->mem_index);
4153
        do_lcall:
4154
            if (s->pe && !s->vm86) {
4155
                if (s->cc_op != CC_OP_DYNAMIC)
4156
                    gen_op_set_cc_op(s->cc_op);
4157
                gen_jmp_im(pc_start - s->cs_base);
4158
                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
4159
                tcg_gen_helper_0_4(helper_lcall_protected,
4160
                                   cpu_tmp2_i32, cpu_T[1],
4161
                                   tcg_const_i32(dflag), 
4162
                                   tcg_const_i32(s->pc - pc_start));
4163
            } else {
4164
                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
4165
                tcg_gen_helper_0_4(helper_lcall_real,
4166
                                   cpu_tmp2_i32, cpu_T[1],
4167
                                   tcg_const_i32(dflag), 
4168
                                   tcg_const_i32(s->pc - s->cs_base));
4169
            }
4170
            gen_eob(s);
4171
            break;
4172
        case 4: /* jmp Ev */
4173
            if (s->dflag == 0)
4174
                gen_op_andl_T0_ffff();
4175
            gen_op_jmp_T0();
4176
            gen_eob(s);
4177
            break;
4178
        case 5: /* ljmp Ev */
4179
            gen_op_ld_T1_A0(ot + s->mem_index);
4180
            gen_add_A0_im(s, 1 << (ot - OT_WORD + 1));
4181
            gen_op_ldu_T0_A0(OT_WORD + s->mem_index);
4182
        do_ljmp:
4183
            if (s->pe && !s->vm86) {
4184
                if (s->cc_op != CC_OP_DYNAMIC)
4185
                    gen_op_set_cc_op(s->cc_op);
4186
                gen_jmp_im(pc_start - s->cs_base);
4187
                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
4188
                tcg_gen_helper_0_3(helper_ljmp_protected,
4189
                                   cpu_tmp2_i32,
4190
                                   cpu_T[1],
4191
                                   tcg_const_i32(s->pc - pc_start));
4192
            } else {
4193
                gen_op_movl_seg_T0_vm(R_CS);
4194
                gen_op_movl_T0_T1();
4195
                gen_op_jmp_T0();
4196
            }
4197
            gen_eob(s);
4198
            break;
4199
        case 6: /* push Ev */
4200
            gen_push_T0(s);
4201
            break;
4202
        default:
4203
            goto illegal_op;
4204
        }
4205
        break;
4206

    
4207
    case 0x84: /* test Ev, Gv */
4208
    case 0x85:
4209
        if ((b & 1) == 0)
4210
            ot = OT_BYTE;
4211
        else
4212
            ot = dflag + OT_WORD;
4213

    
4214
        modrm = ldub_code(s->pc++);
4215
        mod = (modrm >> 6) & 3;
4216
        rm = (modrm & 7) | REX_B(s);
4217
        reg = ((modrm >> 3) & 7) | rex_r;
4218

    
4219
        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
4220
        gen_op_mov_TN_reg(ot, 1, reg);
4221
        gen_op_testl_T0_T1_cc();
4222
        s->cc_op = CC_OP_LOGICB + ot;
4223
        break;
4224

    
4225
    case 0xa8: /* test eAX, Iv */
4226
    case 0xa9:
4227
        if ((b & 1) == 0)
4228
            ot = OT_BYTE;
4229
        else
4230
            ot = dflag + OT_WORD;
4231
        val = insn_get(s, ot);
4232

    
4233
        gen_op_mov_TN_reg(ot, 0, OR_EAX);
4234
        gen_op_movl_T1_im(val);
4235
        gen_op_testl_T0_T1_cc();
4236
        s->cc_op = CC_OP_LOGICB + ot;
4237
        break;
4238

    
4239
    case 0x98: /* CWDE/CBW */
4240
#ifdef TARGET_X86_64
4241
        if (dflag == 2) {
4242
            gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
4243
            tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
4244
            gen_op_mov_reg_T0(OT_QUAD, R_EAX);
4245
        } else
4246
#endif
4247
        if (dflag == 1) {
4248
            gen_op_mov_TN_reg(OT_WORD, 0, R_EAX);
4249
            tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
4250
            gen_op_mov_reg_T0(OT_LONG, R_EAX);
4251
        } else {
4252
            gen_op_mov_TN_reg(OT_BYTE, 0, R_EAX);
4253
            tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]);
4254
            gen_op_mov_reg_T0(OT_WORD, R_EAX);
4255
        }
4256
        break;
4257
    case 0x99: /* CDQ/CWD */
4258
#ifdef TARGET_X86_64
4259
        if (dflag == 2) {
4260
            gen_op_mov_TN_reg(OT_QUAD, 0, R_EAX);
4261
            tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 63);
4262
            gen_op_mov_reg_T0(OT_QUAD, R_EDX);
4263
        } else
4264
#endif
4265
        if (dflag == 1) {
4266
            gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
4267
            tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
4268
            tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 31);
4269
            gen_op_mov_reg_T0(OT_LONG, R_EDX);
4270
        } else {
4271
            gen_op_mov_TN_reg(OT_WORD, 0, R_EAX);
4272
            tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
4273
            tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 15);
4274
            gen_op_mov_reg_T0(OT_WORD, R_EDX);
4275
        }
4276
        break;
4277
    case 0x1af: /* imul Gv, Ev */
4278
    case 0x69: /* imul Gv, Ev, I */
4279
    case 0x6b:
4280
        ot = dflag + OT_WORD;
4281
        modrm = ldub_code(s->pc++);
4282
        reg = ((modrm >> 3) & 7) | rex_r;
4283
        if (b == 0x69)
4284
            s->rip_offset = insn_const_size(ot);
4285
        else if (b == 0x6b)
4286
            s->rip_offset = 1;
4287
        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
4288
        if (b == 0x69) {
4289
            val = insn_get(s, ot);
4290
            gen_op_movl_T1_im(val);
4291
        } else if (b == 0x6b) {
4292
            val = (int8_t)insn_get(s, OT_BYTE);
4293
            gen_op_movl_T1_im(val);
4294
        } else {
4295
            gen_op_mov_TN_reg(ot, 1, reg);
4296
        }
4297

    
4298
#ifdef TARGET_X86_64
4299
        if (ot == OT_QUAD) {
4300
            tcg_gen_helper_1_2(helper_imulq_T0_T1, cpu_T[0], cpu_T[0], cpu_T[1]);
4301
        } else
4302
#endif
4303
        if (ot == OT_LONG) {
4304
#ifdef TARGET_X86_64
4305
                tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
4306
                tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]);
4307
                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4308
                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4309
                tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]);
4310
                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
4311
#else
4312
                {
4313
                    TCGv t0, t1;
4314
                    t0 = tcg_temp_new(TCG_TYPE_I64);
4315
                    t1 = tcg_temp_new(TCG_TYPE_I64);
4316
                    tcg_gen_ext_i32_i64(t0, cpu_T[0]);
4317
                    tcg_gen_ext_i32_i64(t1, cpu_T[1]);
4318
                    tcg_gen_mul_i64(t0, t0, t1);
4319
                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
4320
                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4321
                    tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31);
4322
                    tcg_gen_shri_i64(t0, t0, 32);
4323
                    tcg_gen_trunc_i64_i32(cpu_T[1], t0);
4324
                    tcg_gen_sub_tl(cpu_cc_src, cpu_T[1], cpu_tmp0);
4325
                }
4326
#endif
4327
        } else {
4328
            tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
4329
            tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]);
4330
            /* XXX: use 32 bit mul which could be faster */
4331
            tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4332
            tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4333
            tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]);
4334
            tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
4335
        }
4336
        gen_op_mov_reg_T0(ot, reg);
4337
        s->cc_op = CC_OP_MULB + ot;
4338
        break;
4339
    case 0x1c0:
4340
    case 0x1c1: /* xadd Ev, Gv */
4341
        if ((b & 1) == 0)
4342
            ot = OT_BYTE;
4343
        else
4344
            ot = dflag + OT_WORD;
4345
        modrm = ldub_code(s->pc++);
4346
        reg = ((modrm >> 3) & 7) | rex_r;
4347
        mod = (modrm >> 6) & 3;
4348
        if (mod == 3) {
4349
            rm = (modrm & 7) | REX_B(s);
4350
            gen_op_mov_TN_reg(ot, 0, reg);
4351
            gen_op_mov_TN_reg(ot, 1, rm);
4352
            gen_op_addl_T0_T1();
4353
            gen_op_mov_reg_T1(ot, reg);
4354
            gen_op_mov_reg_T0(ot, rm);
4355
        } else {
4356
            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4357
            gen_op_mov_TN_reg(ot, 0, reg);
4358
            gen_op_ld_T1_A0(ot + s->mem_index);
4359
            gen_op_addl_T0_T1();
4360
            gen_op_st_T0_A0(ot + s->mem_index);
4361
            gen_op_mov_reg_T1(ot, reg);
4362
        }
4363
        gen_op_update2_cc();
4364
        s->cc_op = CC_OP_ADDB + ot;
4365
        break;
4366
    case 0x1b0:
4367
    case 0x1b1: /* cmpxchg Ev, Gv */
4368
        {
4369
            int label1, label2;
4370
            TCGv t0, t1, t2, a0;
4371

    
4372
            if ((b & 1) == 0)
4373
                ot = OT_BYTE;
4374
            else
4375
                ot = dflag + OT_WORD;
4376
            modrm = ldub_code(s->pc++);
4377
            reg = ((modrm >> 3) & 7) | rex_r;
4378
            mod = (modrm >> 6) & 3;
4379
            t0 = tcg_temp_local_new(TCG_TYPE_TL);
4380
            t1 = tcg_temp_local_new(TCG_TYPE_TL);
4381
            t2 = tcg_temp_local_new(TCG_TYPE_TL);
4382
            a0 = tcg_temp_local_new(TCG_TYPE_TL);
4383
            gen_op_mov_v_reg(ot, t1, reg);
4384
            if (mod == 3) {
4385
                rm = (modrm & 7) | REX_B(s);
4386
                gen_op_mov_v_reg(ot, t0, rm);
4387
            } else {
4388
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4389
                tcg_gen_mov_tl(a0, cpu_A0);
4390
                gen_op_ld_v(ot + s->mem_index, t0, a0);
4391
                rm = 0; /* avoid warning */
4392
            }
4393
            label1 = gen_new_label();
4394
            tcg_gen_ld_tl(t2, cpu_env, offsetof(CPUState, regs[R_EAX]));
4395
            tcg_gen_sub_tl(t2, t2, t0);
4396
            gen_extu(ot, t2);
4397
            tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1);
4398
            if (mod == 3) {
4399
                label2 = gen_new_label();
4400
                gen_op_mov_reg_v(ot, R_EAX, t0);
4401
                tcg_gen_br(label2);
4402
                gen_set_label(label1);
4403
                gen_op_mov_reg_v(ot, rm, t1);
4404
                gen_set_label(label2);
4405
            } else {
4406
                tcg_gen_mov_tl(t1, t0);
4407
                gen_op_mov_reg_v(ot, R_EAX, t0);
4408
                gen_set_label(label1);
4409
                /* always store */
4410
                gen_op_st_v(ot + s->mem_index, t1, a0);
4411
            }
4412
            tcg_gen_mov_tl(cpu_cc_src, t0);
4413
            tcg_gen_mov_tl(cpu_cc_dst, t2);
4414
            s->cc_op = CC_OP_SUBB + ot;
4415
            tcg_temp_free(t0);
4416
            tcg_temp_free(t1);
4417
            tcg_temp_free(t2);
4418
            tcg_temp_free(a0);
4419
        }
4420
        break;
4421
    case 0x1c7: /* cmpxchg8b */
4422
        modrm = ldub_code(s->pc++);
4423
        mod = (modrm >> 6) & 3;
4424
        if ((mod == 3) || ((modrm & 0x38) != 0x8))
4425
            goto illegal_op;
4426
#ifdef TARGET_X86_64
4427
        if (dflag == 2) {
4428
            if (!(s->cpuid_ext_features & CPUID_EXT_CX16))
4429
                goto illegal_op;
4430
            gen_jmp_im(pc_start - s->cs_base);
4431
            if (s->cc_op != CC_OP_DYNAMIC)
4432
                gen_op_set_cc_op(s->cc_op);
4433
            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4434
            tcg_gen_helper_0_1(helper_cmpxchg16b, cpu_A0);
4435
        } else
4436
#endif        
4437
        {
4438
            if (!(s->cpuid_features & CPUID_CX8))
4439
                goto illegal_op;
4440
            gen_jmp_im(pc_start - s->cs_base);
4441
            if (s->cc_op != CC_OP_DYNAMIC)
4442
                gen_op_set_cc_op(s->cc_op);
4443
            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4444
            tcg_gen_helper_0_1(helper_cmpxchg8b, cpu_A0);
4445
        }
4446
        s->cc_op = CC_OP_EFLAGS;
4447
        break;
4448

    
4449
        /**************************/
4450
        /* push/pop */
4451
    case 0x50 ... 0x57: /* push */
4452
        gen_op_mov_TN_reg(OT_LONG, 0, (b & 7) | REX_B(s));
4453
        gen_push_T0(s);
4454
        break;
4455
    case 0x58 ... 0x5f: /* pop */
4456
        if (CODE64(s)) {
4457
            ot = dflag ? OT_QUAD : OT_WORD;
4458
        } else {
4459
            ot = dflag + OT_WORD;
4460
        }
4461
        gen_pop_T0(s);
4462
        /* NOTE: order is important for pop %sp */
4463
        gen_pop_update(s);
4464
        gen_op_mov_reg_T0(ot, (b & 7) | REX_B(s));
4465
        break;
4466
    case 0x60: /* pusha */
4467
        if (CODE64(s))
4468
            goto illegal_op;
4469
        gen_pusha(s);
4470
        break;
4471
    case 0x61: /* popa */
4472
        if (CODE64(s))
4473
            goto illegal_op;
4474
        gen_popa(s);
4475
        break;
4476
    case 0x68: /* push Iv */
4477
    case 0x6a:
4478
        if (CODE64(s)) {
4479
            ot = dflag ? OT_QUAD : OT_WORD;
4480
        } else {
4481
            ot = dflag + OT_WORD;
4482
        }
4483
        if (b == 0x68)
4484
            val = insn_get(s, ot);
4485
        else
4486
            val = (int8_t)insn_get(s, OT_BYTE);
4487
        gen_op_movl_T0_im(val);
4488
        gen_push_T0(s);
4489
        break;
4490
    case 0x8f: /* pop Ev */
4491
        if (CODE64(s)) {
4492
            ot = dflag ? OT_QUAD : OT_WORD;
4493
        } else {
4494
            ot = dflag + OT_WORD;
4495
        }
4496
        modrm = ldub_code(s->pc++);
4497
        mod = (modrm >> 6) & 3;
4498
        gen_pop_T0(s);
4499
        if (mod == 3) {
4500
            /* NOTE: order is important for pop %sp */
4501
            gen_pop_update(s);
4502
            rm = (modrm & 7) | REX_B(s);
4503
            gen_op_mov_reg_T0(ot, rm);
4504
        } else {
4505
            /* NOTE: order is important too for MMU exceptions */
4506
            s->popl_esp_hack = 1 << ot;
4507
            gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1);
4508
            s->popl_esp_hack = 0;
4509
            gen_pop_update(s);
4510
        }
4511
        break;
4512
    case 0xc8: /* enter */
4513
        {
4514
            int level;
4515
            val = lduw_code(s->pc);
4516
            s->pc += 2;
4517
            level = ldub_code(s->pc++);
4518
            gen_enter(s, val, level);
4519
        }
4520
        break;
4521
    case 0xc9: /* leave */
4522
        /* XXX: exception not precise (ESP is updated before potential exception) */
4523
        if (CODE64(s)) {
4524
            gen_op_mov_TN_reg(OT_QUAD, 0, R_EBP);
4525
            gen_op_mov_reg_T0(OT_QUAD, R_ESP);
4526
        } else if (s->ss32) {
4527
            gen_op_mov_TN_reg(OT_LONG, 0, R_EBP);
4528
            gen_op_mov_reg_T0(OT_LONG, R_ESP);
4529
        } else {
4530
            gen_op_mov_TN_reg(OT_WORD, 0, R_EBP);
4531
            gen_op_mov_reg_T0(OT_WORD, R_ESP);
4532
        }
4533
        gen_pop_T0(s);
4534
        if (CODE64(s)) {
4535
            ot = dflag ? OT_QUAD : OT_WORD;
4536
        } else {
4537
            ot = dflag + OT_WORD;
4538
        }
4539
        gen_op_mov_reg_T0(ot, R_EBP);
4540
        gen_pop_update(s);
4541
        break;
4542
    case 0x06: /* push es */
4543
    case 0x0e: /* push cs */
4544
    case 0x16: /* push ss */
4545
    case 0x1e: /* push ds */
4546
        if (CODE64(s))
4547
            goto illegal_op;
4548
        gen_op_movl_T0_seg(b >> 3);
4549
        gen_push_T0(s);
4550
        break;
4551
    case 0x1a0: /* push fs */
4552
    case 0x1a8: /* push gs */
4553
        gen_op_movl_T0_seg((b >> 3) & 7);
4554
        gen_push_T0(s);
4555
        break;
4556
    case 0x07: /* pop es */
4557
    case 0x17: /* pop ss */
4558
    case 0x1f: /* pop ds */
4559
        if (CODE64(s))
4560
            goto illegal_op;
4561
        reg = b >> 3;
4562
        gen_pop_T0(s);
4563
        gen_movl_seg_T0(s, reg, pc_start - s->cs_base);
4564
        gen_pop_update(s);
4565
        if (reg == R_SS) {
4566
            /* if reg == SS, inhibit interrupts/trace. */
4567
            /* If several instructions disable interrupts, only the
4568
               _first_ does it */
4569
            if (!(s->tb->flags & HF_INHIBIT_IRQ_MASK))
4570
                tcg_gen_helper_0_0(helper_set_inhibit_irq);
4571
            s->tf = 0;
4572
        }
4573
        if (s->is_jmp) {
4574
            gen_jmp_im(s->pc - s->cs_base);
4575
            gen_eob(s);
4576
        }
4577
        break;
4578
    case 0x1a1: /* pop fs */
4579
    case 0x1a9: /* pop gs */
4580
        gen_pop_T0(s);
4581
        gen_movl_seg_T0(s, (b >> 3) & 7, pc_start - s->cs_base);
4582
        gen_pop_update(s);
4583
        if (s->is_jmp) {
4584
            gen_jmp_im(s->pc - s->cs_base);
4585
            gen_eob(s);
4586
        }
4587
        break;
4588

    
4589
        /**************************/
4590
        /* mov */
4591
    case 0x88:
4592
    case 0x89: /* mov Gv, Ev */
4593
        if ((b & 1) == 0)
4594
            ot = OT_BYTE;
4595
        else
4596
            ot = dflag + OT_WORD;
4597
        modrm = ldub_code(s->pc++);
4598
        reg = ((modrm >> 3) & 7) | rex_r;
4599

    
4600
        /* generate a generic store */
4601
        gen_ldst_modrm(s, modrm, ot, reg, 1);
4602
        break;
4603
    case 0xc6:
4604
    case 0xc7: /* mov Ev, Iv */
4605
        if ((b & 1) == 0)
4606
            ot = OT_BYTE;
4607
        else
4608
            ot = dflag + OT_WORD;
4609
        modrm = ldub_code(s->pc++);
4610
        mod = (modrm >> 6) & 3;
4611
        if (mod != 3) {
4612
            s->rip_offset = insn_const_size(ot);
4613
            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4614
        }
4615
        val = insn_get(s, ot);
4616
        gen_op_movl_T0_im(val);
4617
        if (mod != 3)
4618
            gen_op_st_T0_A0(ot + s->mem_index);
4619
        else
4620
            gen_op_mov_reg_T0(ot, (modrm & 7) | REX_B(s));
4621
        break;
4622
    case 0x8a:
4623
    case 0x8b: /* mov Ev, Gv */
4624
        if ((b & 1) == 0)
4625
            ot = OT_BYTE;
4626
        else
4627
            ot = OT_WORD + dflag;
4628
        modrm = ldub_code(s->pc++);
4629
        reg = ((modrm >> 3) & 7) | rex_r;
4630

    
4631
        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
4632
        gen_op_mov_reg_T0(ot, reg);
4633
        break;
4634
    case 0x8e: /* mov seg, Gv */
4635
        modrm = ldub_code(s->pc++);
4636
        reg = (modrm >> 3) & 7;
4637
        if (reg >= 6 || reg == R_CS)
4638
            goto illegal_op;
4639
        gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
4640
        gen_movl_seg_T0(s, reg, pc_start - s->cs_base);
4641
        if (reg == R_SS) {
4642
            /* if reg == SS, inhibit interrupts/trace */
4643
            /* If several instructions disable interrupts, only the
4644
               _first_ does it */
4645
            if (!(s->tb->flags & HF_INHIBIT_IRQ_MASK))
4646
                tcg_gen_helper_0_0(helper_set_inhibit_irq);
4647
            s->tf = 0;
4648
        }
4649
        if (s->is_jmp) {
4650
            gen_jmp_im(s->pc - s->cs_base);
4651
            gen_eob(s);
4652
        }
4653
        break;
4654
    case 0x8c: /* mov Gv, seg */
4655
        modrm = ldub_code(s->pc++);
4656
        reg = (modrm >> 3) & 7;
4657
        mod = (modrm >> 6) & 3;
4658
        if (reg >= 6)
4659
            goto illegal_op;
4660
        gen_op_movl_T0_seg(reg);
4661
        if (mod == 3)
4662
            ot = OT_WORD + dflag;
4663
        else
4664
            ot = OT_WORD;
4665
        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1);
4666
        break;
4667

    
4668
    case 0x1b6: /* movzbS Gv, Eb */
4669
    case 0x1b7: /* movzwS Gv, Eb */
4670
    case 0x1be: /* movsbS Gv, Eb */
4671
    case 0x1bf: /* movswS Gv, Eb */
4672
        {
4673
            int d_ot;
4674
            /* d_ot is the size of destination */
4675
            d_ot = dflag + OT_WORD;
4676
            /* ot is the size of source */
4677
            ot = (b & 1) + OT_BYTE;
4678
            modrm = ldub_code(s->pc++);
4679
            reg = ((modrm >> 3) & 7) | rex_r;
4680
            mod = (modrm >> 6) & 3;
4681
            rm = (modrm & 7) | REX_B(s);
4682

    
4683
            if (mod == 3) {
4684
                gen_op_mov_TN_reg(ot, 0, rm);
4685
                switch(ot | (b & 8)) {
4686
                case OT_BYTE:
4687
                    tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]);
4688
                    break;
4689
                case OT_BYTE | 8:
4690
                    tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]);
4691
                    break;
4692
                case OT_WORD:
4693
                    tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]);
4694
                    break;
4695
                default:
4696
                case OT_WORD | 8:
4697
                    tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
4698
                    break;
4699
                }
4700
                gen_op_mov_reg_T0(d_ot, reg);
4701
            } else {
4702
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4703
                if (b & 8) {
4704
                    gen_op_lds_T0_A0(ot + s->mem_index);
4705
                } else {
4706
                    gen_op_ldu_T0_A0(ot + s->mem_index);
4707
                }
4708
                gen_op_mov_reg_T0(d_ot, reg);
4709
            }
4710
        }
4711
        break;
4712

    
4713
    case 0x8d: /* lea */
4714
        ot = dflag + OT_WORD;
4715
        modrm = ldub_code(s->pc++);
4716
        mod = (modrm >> 6) & 3;
4717
        if (mod == 3)
4718
            goto illegal_op;
4719
        reg = ((modrm >> 3) & 7) | rex_r;
4720
        /* we must ensure that no segment is added */
4721
        s->override = -1;
4722
        val = s->addseg;
4723
        s->addseg = 0;
4724
        gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4725
        s->addseg = val;
4726
        gen_op_mov_reg_A0(ot - OT_WORD, reg);
4727
        break;
4728

    
4729
    case 0xa0: /* mov EAX, Ov */
4730
    case 0xa1:
4731
    case 0xa2: /* mov Ov, EAX */
4732
    case 0xa3:
4733
        {
4734
            target_ulong offset_addr;
4735

    
4736
            if ((b & 1) == 0)
4737
                ot = OT_BYTE;
4738
            else
4739
                ot = dflag + OT_WORD;
4740
#ifdef TARGET_X86_64
4741
            if (s->aflag == 2) {
4742
                offset_addr = ldq_code(s->pc);
4743
                s->pc += 8;
4744
                gen_op_movq_A0_im(offset_addr);
4745
            } else
4746
#endif
4747
            {
4748
                if (s->aflag) {
4749
                    offset_addr = insn_get(s, OT_LONG);
4750
                } else {
4751
                    offset_addr = insn_get(s, OT_WORD);
4752
                }
4753
                gen_op_movl_A0_im(offset_addr);
4754
            }
4755
            gen_add_A0_ds_seg(s);
4756
            if ((b & 2) == 0) {
4757
                gen_op_ld_T0_A0(ot + s->mem_index);
4758
                gen_op_mov_reg_T0(ot, R_EAX);
4759
            } else {
4760
                gen_op_mov_TN_reg(ot, 0, R_EAX);
4761
                gen_op_st_T0_A0(ot + s->mem_index);
4762
            }
4763
        }
4764
        break;
4765
    case 0xd7: /* xlat */
4766
#ifdef TARGET_X86_64
4767
        if (s->aflag == 2) {
4768
            gen_op_movq_A0_reg(R_EBX);
4769
            gen_op_mov_TN_reg(OT_QUAD, 0, R_EAX);
4770
            tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xff);
4771
            tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_T[0]);
4772
        } else
4773
#endif
4774
        {
4775
            gen_op_movl_A0_reg(R_EBX);
4776
            gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
4777
            tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xff);
4778
            tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_T[0]);
4779
            if (s->aflag == 0)
4780
                gen_op_andl_A0_ffff();
4781
            else
4782
                tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
4783
        }
4784
        gen_add_A0_ds_seg(s);
4785
        gen_op_ldu_T0_A0(OT_BYTE + s->mem_index);
4786
        gen_op_mov_reg_T0(OT_BYTE, R_EAX);
4787
        break;
4788
    case 0xb0 ... 0xb7: /* mov R, Ib */
4789
        val = insn_get(s, OT_BYTE);
4790
        gen_op_movl_T0_im(val);
4791
        gen_op_mov_reg_T0(OT_BYTE, (b & 7) | REX_B(s));
4792
        break;
4793
    case 0xb8 ... 0xbf: /* mov R, Iv */
4794
#ifdef TARGET_X86_64
4795
        if (dflag == 2) {
4796
            uint64_t tmp;
4797
            /* 64 bit case */
4798
            tmp = ldq_code(s->pc);
4799
            s->pc += 8;
4800
            reg = (b & 7) | REX_B(s);
4801
            gen_movtl_T0_im(tmp);
4802
            gen_op_mov_reg_T0(OT_QUAD, reg);
4803
        } else
4804
#endif
4805
        {
4806
            ot = dflag ? OT_LONG : OT_WORD;
4807
            val = insn_get(s, ot);
4808
            reg = (b & 7) | REX_B(s);
4809
            gen_op_movl_T0_im(val);
4810
            gen_op_mov_reg_T0(ot, reg);
4811
        }
4812
        break;
4813

    
4814
    case 0x91 ... 0x97: /* xchg R, EAX */
4815
        ot = dflag + OT_WORD;
4816
        reg = (b & 7) | REX_B(s);
4817
        rm = R_EAX;
4818
        goto do_xchg_reg;
4819
    case 0x86:
4820
    case 0x87: /* xchg Ev, Gv */
4821
        if ((b & 1) == 0)
4822
            ot = OT_BYTE;
4823
        else
4824
            ot = dflag + OT_WORD;
4825
        modrm = ldub_code(s->pc++);
4826
        reg = ((modrm >> 3) & 7) | rex_r;
4827
        mod = (modrm >> 6) & 3;
4828
        if (mod == 3) {
4829
            rm = (modrm & 7) | REX_B(s);
4830
        do_xchg_reg:
4831
            gen_op_mov_TN_reg(ot, 0, reg);
4832
            gen_op_mov_TN_reg(ot, 1, rm);
4833
            gen_op_mov_reg_T0(ot, rm);
4834
            gen_op_mov_reg_T1(ot, reg);
4835
        } else {
4836
            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4837
            gen_op_mov_TN_reg(ot, 0, reg);
4838
            /* for xchg, lock is implicit */
4839
            if (!(prefixes & PREFIX_LOCK))
4840
                tcg_gen_helper_0_0(helper_lock);
4841
            gen_op_ld_T1_A0(ot + s->mem_index);
4842
            gen_op_st_T0_A0(ot + s->mem_index);
4843
            if (!(prefixes & PREFIX_LOCK))
4844
                tcg_gen_helper_0_0(helper_unlock);
4845
            gen_op_mov_reg_T1(ot, reg);
4846
        }
4847
        break;
4848
    case 0xc4: /* les Gv */
4849
        if (CODE64(s))
4850
            goto illegal_op;
4851
        op = R_ES;
4852
        goto do_lxx;
4853
    case 0xc5: /* lds Gv */
4854
        if (CODE64(s))
4855
            goto illegal_op;
4856
        op = R_DS;
4857
        goto do_lxx;
4858
    case 0x1b2: /* lss Gv */
4859
        op = R_SS;
4860
        goto do_lxx;
4861
    case 0x1b4: /* lfs Gv */
4862
        op = R_FS;
4863
        goto do_lxx;
4864
    case 0x1b5: /* lgs Gv */
4865
        op = R_GS;
4866
    do_lxx:
4867
        ot = dflag ? OT_LONG : OT_WORD;
4868
        modrm = ldub_code(s->pc++);
4869
        reg = ((modrm >> 3) & 7) | rex_r;
4870
        mod = (modrm >> 6) & 3;
4871
        if (mod == 3)
4872
            goto illegal_op;
4873
        gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4874
        gen_op_ld_T1_A0(ot + s->mem_index);
4875
        gen_add_A0_im(s, 1 << (ot - OT_WORD + 1));
4876
        /* load the segment first to handle exceptions properly */
4877
        gen_op_ldu_T0_A0(OT_WORD + s->mem_index);
4878
        gen_movl_seg_T0(s, op, pc_start - s->cs_base);
4879
        /* then put the data */
4880
        gen_op_mov_reg_T1(ot, reg);
4881
        if (s->is_jmp) {
4882
            gen_jmp_im(s->pc - s->cs_base);
4883
            gen_eob(s);
4884
        }
4885
        break;
4886

    
4887
        /************************/
4888
        /* shifts */
4889
    case 0xc0:
4890
    case 0xc1:
4891
        /* shift Ev,Ib */
4892
        shift = 2;
4893
    grp2:
4894
        {
4895
            if ((b & 1) == 0)
4896
                ot = OT_BYTE;
4897
            else
4898
                ot = dflag + OT_WORD;
4899

    
4900
            modrm = ldub_code(s->pc++);
4901
            mod = (modrm >> 6) & 3;
4902
            op = (modrm >> 3) & 7;
4903

    
4904
            if (mod != 3) {
4905
                if (shift == 2) {
4906
                    s->rip_offset = 1;
4907
                }
4908
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4909
                opreg = OR_TMP0;
4910
            } else {
4911
                opreg = (modrm & 7) | REX_B(s);
4912
            }
4913

    
4914
            /* simpler op */
4915
            if (shift == 0) {
4916
                gen_shift(s, op, ot, opreg, OR_ECX);
4917
            } else {
4918
                if (shift == 2) {
4919
                    shift = ldub_code(s->pc++);
4920
                }
4921
                gen_shifti(s, op, ot, opreg, shift);
4922
            }
4923
        }
4924
        break;
4925
    case 0xd0:
4926
    case 0xd1:
4927
        /* shift Ev,1 */
4928
        shift = 1;
4929
        goto grp2;
4930
    case 0xd2:
4931
    case 0xd3:
4932
        /* shift Ev,cl */
4933
        shift = 0;
4934
        goto grp2;
4935

    
4936
    case 0x1a4: /* shld imm */
4937
        op = 0;
4938
        shift = 1;
4939
        goto do_shiftd;
4940
    case 0x1a5: /* shld cl */
4941
        op = 0;
4942
        shift = 0;
4943
        goto do_shiftd;
4944
    case 0x1ac: /* shrd imm */
4945
        op = 1;
4946
        shift = 1;
4947
        goto do_shiftd;
4948
    case 0x1ad: /* shrd cl */
4949
        op = 1;
4950
        shift = 0;
4951
    do_shiftd:
4952
        ot = dflag + OT_WORD;
4953
        modrm = ldub_code(s->pc++);
4954
        mod = (modrm >> 6) & 3;
4955
        rm = (modrm & 7) | REX_B(s);
4956
        reg = ((modrm >> 3) & 7) | rex_r;
4957
        if (mod != 3) {
4958
            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4959
            opreg = OR_TMP0;
4960
        } else {
4961
            opreg = rm;
4962
        }
4963
        gen_op_mov_TN_reg(ot, 1, reg);
4964

    
4965
        if (shift) {
4966
            val = ldub_code(s->pc++);
4967
            tcg_gen_movi_tl(cpu_T3, val);
4968
        } else {
4969
            tcg_gen_ld_tl(cpu_T3, cpu_env, offsetof(CPUState, regs[R_ECX]));