Statistics
| Branch: | Revision:

root / target-i386 / translate.c @ 222a3336

History | View | Annotate | Download (252.9 kB)

1
/*
2
 *  i386 translation
3
 *
4
 *  Copyright (c) 2003 Fabrice Bellard
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, write to the Free Software
18
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19
 */
20
#include <stdarg.h>
21
#include <stdlib.h>
22
#include <stdio.h>
23
#include <string.h>
24
#include <inttypes.h>
25
#include <signal.h>
26
#include <assert.h>
27

    
28
#include "cpu.h"
29
#include "exec-all.h"
30
#include "disas.h"
31
#include "helper.h"
32
#include "tcg-op.h"
33

    
34
#define PREFIX_REPZ   0x01
35
#define PREFIX_REPNZ  0x02
36
#define PREFIX_LOCK   0x04
37
#define PREFIX_DATA   0x08
38
#define PREFIX_ADR    0x10
39

    
40
#ifdef TARGET_X86_64
41
#define X86_64_ONLY(x) x
42
#define X86_64_DEF(x...) x
43
#define CODE64(s) ((s)->code64)
44
#define REX_X(s) ((s)->rex_x)
45
#define REX_B(s) ((s)->rex_b)
46
/* XXX: gcc generates push/pop in some opcodes, so we cannot use them */
47
#if 1
48
#define BUGGY_64(x) NULL
49
#endif
50
#else
51
#define X86_64_ONLY(x) NULL
52
#define X86_64_DEF(x...)
53
#define CODE64(s) 0
54
#define REX_X(s) 0
55
#define REX_B(s) 0
56
#endif
57

    
58
//#define MACRO_TEST   1
59

    
60
/* global register indexes */
61
static TCGv cpu_env, cpu_A0, cpu_cc_op, cpu_cc_src, cpu_cc_dst, cpu_cc_tmp;
62
/* local temps */
63
static TCGv cpu_T[2], cpu_T3;
64
/* local register indexes (only used inside old micro ops) */
65
static TCGv cpu_tmp0, cpu_tmp1_i64, cpu_tmp2_i32, cpu_tmp3_i32, cpu_tmp4, cpu_ptr0, cpu_ptr1;
66
static TCGv cpu_tmp5, cpu_tmp6;
67

    
68
#include "gen-icount.h"
69

    
70
#ifdef TARGET_X86_64
71
static int x86_64_hregs;
72
#endif
73

    
74
typedef struct DisasContext {
75
    /* current insn context */
76
    int override; /* -1 if no override */
77
    int prefix;
78
    int aflag, dflag;
79
    target_ulong pc; /* pc = eip + cs_base */
80
    int is_jmp; /* 1 = means jump (stop translation), 2 means CPU
81
                   static state change (stop translation) */
82
    /* current block context */
83
    target_ulong cs_base; /* base of CS segment */
84
    int pe;     /* protected mode */
85
    int code32; /* 32 bit code segment */
86
#ifdef TARGET_X86_64
87
    int lma;    /* long mode active */
88
    int code64; /* 64 bit code segment */
89
    int rex_x, rex_b;
90
#endif
91
    int ss32;   /* 32 bit stack segment */
92
    int cc_op;  /* current CC operation */
93
    int addseg; /* non zero if either DS/ES/SS have a non zero base */
94
    int f_st;   /* currently unused */
95
    int vm86;   /* vm86 mode */
96
    int cpl;
97
    int iopl;
98
    int tf;     /* TF cpu flag */
99
    int singlestep_enabled; /* "hardware" single step enabled */
100
    int jmp_opt; /* use direct block chaining for direct jumps */
101
    int mem_index; /* select memory access functions */
102
    uint64_t flags; /* all execution flags */
103
    struct TranslationBlock *tb;
104
    int popl_esp_hack; /* for correct popl with esp base handling */
105
    int rip_offset; /* only used in x86_64, but left for simplicity */
106
    int cpuid_features;
107
    int cpuid_ext_features;
108
    int cpuid_ext2_features;
109
    int cpuid_ext3_features;
110
} DisasContext;
111

    
112
static void gen_eob(DisasContext *s);
113
static void gen_jmp(DisasContext *s, target_ulong eip);
114
static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
115

    
116
/* i386 arith/logic operations */
117
enum {
118
    OP_ADDL,
119
    OP_ORL,
120
    OP_ADCL,
121
    OP_SBBL,
122
    OP_ANDL,
123
    OP_SUBL,
124
    OP_XORL,
125
    OP_CMPL,
126
};
127

    
128
/* i386 shift ops */
129
enum {
130
    OP_ROL,
131
    OP_ROR,
132
    OP_RCL,
133
    OP_RCR,
134
    OP_SHL,
135
    OP_SHR,
136
    OP_SHL1, /* undocumented */
137
    OP_SAR = 7,
138
};
139

    
140
enum {
141
    JCC_O,
142
    JCC_B,
143
    JCC_Z,
144
    JCC_BE,
145
    JCC_S,
146
    JCC_P,
147
    JCC_L,
148
    JCC_LE,
149
};
150

    
151
/* operand size */
152
enum {
153
    OT_BYTE = 0,
154
    OT_WORD,
155
    OT_LONG,
156
    OT_QUAD,
157
};
158

    
159
enum {
160
    /* I386 int registers */
161
    OR_EAX,   /* MUST be even numbered */
162
    OR_ECX,
163
    OR_EDX,
164
    OR_EBX,
165
    OR_ESP,
166
    OR_EBP,
167
    OR_ESI,
168
    OR_EDI,
169

    
170
    OR_TMP0 = 16,    /* temporary operand register */
171
    OR_TMP1,
172
    OR_A0, /* temporary register used when doing address evaluation */
173
};
174

    
175
static inline void gen_op_movl_T0_0(void)
176
{
177
    tcg_gen_movi_tl(cpu_T[0], 0);
178
}
179

    
180
static inline void gen_op_movl_T0_im(int32_t val)
181
{
182
    tcg_gen_movi_tl(cpu_T[0], val);
183
}
184

    
185
static inline void gen_op_movl_T0_imu(uint32_t val)
186
{
187
    tcg_gen_movi_tl(cpu_T[0], val);
188
}
189

    
190
static inline void gen_op_movl_T1_im(int32_t val)
191
{
192
    tcg_gen_movi_tl(cpu_T[1], val);
193
}
194

    
195
static inline void gen_op_movl_T1_imu(uint32_t val)
196
{
197
    tcg_gen_movi_tl(cpu_T[1], val);
198
}
199

    
200
static inline void gen_op_movl_A0_im(uint32_t val)
201
{
202
    tcg_gen_movi_tl(cpu_A0, val);
203
}
204

    
205
#ifdef TARGET_X86_64
206
static inline void gen_op_movq_A0_im(int64_t val)
207
{
208
    tcg_gen_movi_tl(cpu_A0, val);
209
}
210
#endif
211

    
212
static inline void gen_movtl_T0_im(target_ulong val)
213
{
214
    tcg_gen_movi_tl(cpu_T[0], val);
215
}
216

    
217
static inline void gen_movtl_T1_im(target_ulong val)
218
{
219
    tcg_gen_movi_tl(cpu_T[1], val);
220
}
221

    
222
static inline void gen_op_andl_T0_ffff(void)
223
{
224
    tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff);
225
}
226

    
227
static inline void gen_op_andl_T0_im(uint32_t val)
228
{
229
    tcg_gen_andi_tl(cpu_T[0], cpu_T[0], val);
230
}
231

    
232
static inline void gen_op_movl_T0_T1(void)
233
{
234
    tcg_gen_mov_tl(cpu_T[0], cpu_T[1]);
235
}
236

    
237
static inline void gen_op_andl_A0_ffff(void)
238
{
239
    tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffff);
240
}
241

    
242
#ifdef TARGET_X86_64
243

    
244
#define NB_OP_SIZES 4
245

    
246
#else /* !TARGET_X86_64 */
247

    
248
#define NB_OP_SIZES 3
249

    
250
#endif /* !TARGET_X86_64 */
251

    
252
#if defined(WORDS_BIGENDIAN)
253
#define REG_B_OFFSET (sizeof(target_ulong) - 1)
254
#define REG_H_OFFSET (sizeof(target_ulong) - 2)
255
#define REG_W_OFFSET (sizeof(target_ulong) - 2)
256
#define REG_L_OFFSET (sizeof(target_ulong) - 4)
257
#define REG_LH_OFFSET (sizeof(target_ulong) - 8)
258
#else
259
#define REG_B_OFFSET 0
260
#define REG_H_OFFSET 1
261
#define REG_W_OFFSET 0
262
#define REG_L_OFFSET 0
263
#define REG_LH_OFFSET 4
264
#endif
265

    
266
static inline void gen_op_mov_reg_v(int ot, int reg, TCGv t0)
267
{
268
    switch(ot) {
269
    case OT_BYTE:
270
        if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
271
            tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_B_OFFSET);
272
        } else {
273
            tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
274
        }
275
        break;
276
    case OT_WORD:
277
        tcg_gen_st16_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
278
        break;
279
#ifdef TARGET_X86_64
280
    case OT_LONG:
281
        tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
282
        /* high part of register set to zero */
283
        tcg_gen_movi_tl(cpu_tmp0, 0);
284
        tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
285
        break;
286
    default:
287
    case OT_QUAD:
288
        tcg_gen_st_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
289
        break;
290
#else
291
    default:
292
    case OT_LONG:
293
        tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
294
        break;
295
#endif
296
    }
297
}
298

    
299
static inline void gen_op_mov_reg_T0(int ot, int reg)
300
{
301
    gen_op_mov_reg_v(ot, reg, cpu_T[0]);
302
}
303

    
304
static inline void gen_op_mov_reg_T1(int ot, int reg)
305
{
306
    gen_op_mov_reg_v(ot, reg, cpu_T[1]);
307
}
308

    
309
static inline void gen_op_mov_reg_A0(int size, int reg)
310
{
311
    switch(size) {
312
    case 0:
313
        tcg_gen_st16_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
314
        break;
315
#ifdef TARGET_X86_64
316
    case 1:
317
        tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
318
        /* high part of register set to zero */
319
        tcg_gen_movi_tl(cpu_tmp0, 0);
320
        tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
321
        break;
322
    default:
323
    case 2:
324
        tcg_gen_st_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]));
325
        break;
326
#else
327
    default:
328
    case 1:
329
        tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
330
        break;
331
#endif
332
    }
333
}
334

    
335
static inline void gen_op_mov_v_reg(int ot, TCGv t0, int reg)
336
{
337
    switch(ot) {
338
    case OT_BYTE:
339
        if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
340
            goto std_case;
341
        } else {
342
            tcg_gen_ld8u_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
343
        }
344
        break;
345
    default:
346
    std_case:
347
        tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
348
        break;
349
    }
350
}
351

    
352
static inline void gen_op_mov_TN_reg(int ot, int t_index, int reg)
353
{
354
    gen_op_mov_v_reg(ot, cpu_T[t_index], reg);
355
}
356

    
357
static inline void gen_op_movl_A0_reg(int reg)
358
{
359
    tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
360
}
361

    
362
static inline void gen_op_addl_A0_im(int32_t val)
363
{
364
    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
365
#ifdef TARGET_X86_64
366
    tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
367
#endif
368
}
369

    
370
#ifdef TARGET_X86_64
371
static inline void gen_op_addq_A0_im(int64_t val)
372
{
373
    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
374
}
375
#endif
376
    
377
static void gen_add_A0_im(DisasContext *s, int val)
378
{
379
#ifdef TARGET_X86_64
380
    if (CODE64(s))
381
        gen_op_addq_A0_im(val);
382
    else
383
#endif
384
        gen_op_addl_A0_im(val);
385
}
386

    
387
static inline void gen_op_addl_T0_T1(void)
388
{
389
    tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
390
}
391

    
392
static inline void gen_op_jmp_T0(void)
393
{
394
    tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUState, eip));
395
}
396

    
397
static inline void gen_op_add_reg_im(int size, int reg, int32_t val)
398
{
399
    switch(size) {
400
    case 0:
401
        tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
402
        tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
403
        tcg_gen_st16_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
404
        break;
405
    case 1:
406
        tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
407
        tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
408
#ifdef TARGET_X86_64
409
        tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffff);
410
#endif
411
        tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
412
        break;
413
#ifdef TARGET_X86_64
414
    case 2:
415
        tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
416
        tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
417
        tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
418
        break;
419
#endif
420
    }
421
}
422

    
423
static inline void gen_op_add_reg_T0(int size, int reg)
424
{
425
    switch(size) {
426
    case 0:
427
        tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
428
        tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
429
        tcg_gen_st16_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
430
        break;
431
    case 1:
432
        tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
433
        tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
434
#ifdef TARGET_X86_64
435
        tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffff);
436
#endif
437
        tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
438
        break;
439
#ifdef TARGET_X86_64
440
    case 2:
441
        tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
442
        tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
443
        tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
444
        break;
445
#endif
446
    }
447
}
448

    
449
static inline void gen_op_set_cc_op(int32_t val)
450
{
451
    tcg_gen_movi_i32(cpu_cc_op, val);
452
}
453

    
454
static inline void gen_op_addl_A0_reg_sN(int shift, int reg)
455
{
456
    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
457
    if (shift != 0) 
458
        tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
459
    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
460
#ifdef TARGET_X86_64
461
    tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
462
#endif
463
}
464

    
465
static inline void gen_op_movl_A0_seg(int reg)
466
{
467
    tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, segs[reg].base) + REG_L_OFFSET);
468
}
469

    
470
static inline void gen_op_addl_A0_seg(int reg)
471
{
472
    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, segs[reg].base));
473
    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
474
#ifdef TARGET_X86_64
475
    tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
476
#endif
477
}
478

    
479
#ifdef TARGET_X86_64
480
static inline void gen_op_movq_A0_seg(int reg)
481
{
482
    tcg_gen_ld_tl(cpu_A0, cpu_env, offsetof(CPUState, segs[reg].base));
483
}
484

    
485
static inline void gen_op_addq_A0_seg(int reg)
486
{
487
    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, segs[reg].base));
488
    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
489
}
490

    
491
static inline void gen_op_movq_A0_reg(int reg)
492
{
493
    tcg_gen_ld_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]));
494
}
495

    
496
static inline void gen_op_addq_A0_reg_sN(int shift, int reg)
497
{
498
    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
499
    if (shift != 0) 
500
        tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
501
    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
502
}
503
#endif
504

    
505
static inline void gen_op_lds_T0_A0(int idx)
506
{
507
    int mem_index = (idx >> 2) - 1;
508
    switch(idx & 3) {
509
    case 0:
510
        tcg_gen_qemu_ld8s(cpu_T[0], cpu_A0, mem_index);
511
        break;
512
    case 1:
513
        tcg_gen_qemu_ld16s(cpu_T[0], cpu_A0, mem_index);
514
        break;
515
    default:
516
    case 2:
517
        tcg_gen_qemu_ld32s(cpu_T[0], cpu_A0, mem_index);
518
        break;
519
    }
520
}
521

    
522
static inline void gen_op_ld_v(int idx, TCGv t0, TCGv a0)
523
{
524
    int mem_index = (idx >> 2) - 1;
525
    switch(idx & 3) {
526
    case 0:
527
        tcg_gen_qemu_ld8u(t0, a0, mem_index);
528
        break;
529
    case 1:
530
        tcg_gen_qemu_ld16u(t0, a0, mem_index);
531
        break;
532
    case 2:
533
        tcg_gen_qemu_ld32u(t0, a0, mem_index);
534
        break;
535
    default:
536
    case 3:
537
        tcg_gen_qemu_ld64(t0, a0, mem_index);
538
        break;
539
    }
540
}
541

    
542
/* XXX: always use ldu or lds */
543
static inline void gen_op_ld_T0_A0(int idx)
544
{
545
    gen_op_ld_v(idx, cpu_T[0], cpu_A0);
546
}
547

    
548
static inline void gen_op_ldu_T0_A0(int idx)
549
{
550
    gen_op_ld_v(idx, cpu_T[0], cpu_A0);
551
}
552

    
553
static inline void gen_op_ld_T1_A0(int idx)
554
{
555
    gen_op_ld_v(idx, cpu_T[1], cpu_A0);
556
}
557

    
558
static inline void gen_op_st_v(int idx, TCGv t0, TCGv a0)
559
{
560
    int mem_index = (idx >> 2) - 1;
561
    switch(idx & 3) {
562
    case 0:
563
        tcg_gen_qemu_st8(t0, a0, mem_index);
564
        break;
565
    case 1:
566
        tcg_gen_qemu_st16(t0, a0, mem_index);
567
        break;
568
    case 2:
569
        tcg_gen_qemu_st32(t0, a0, mem_index);
570
        break;
571
    default:
572
    case 3:
573
        tcg_gen_qemu_st64(t0, a0, mem_index);
574
        break;
575
    }
576
}
577

    
578
static inline void gen_op_st_T0_A0(int idx)
579
{
580
    gen_op_st_v(idx, cpu_T[0], cpu_A0);
581
}
582

    
583
static inline void gen_op_st_T1_A0(int idx)
584
{
585
    gen_op_st_v(idx, cpu_T[1], cpu_A0);
586
}
587

    
588
static inline void gen_jmp_im(target_ulong pc)
589
{
590
    tcg_gen_movi_tl(cpu_tmp0, pc);
591
    tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, eip));
592
}
593

    
594
static inline void gen_string_movl_A0_ESI(DisasContext *s)
595
{
596
    int override;
597

    
598
    override = s->override;
599
#ifdef TARGET_X86_64
600
    if (s->aflag == 2) {
601
        if (override >= 0) {
602
            gen_op_movq_A0_seg(override);
603
            gen_op_addq_A0_reg_sN(0, R_ESI);
604
        } else {
605
            gen_op_movq_A0_reg(R_ESI);
606
        }
607
    } else
608
#endif
609
    if (s->aflag) {
610
        /* 32 bit address */
611
        if (s->addseg && override < 0)
612
            override = R_DS;
613
        if (override >= 0) {
614
            gen_op_movl_A0_seg(override);
615
            gen_op_addl_A0_reg_sN(0, R_ESI);
616
        } else {
617
            gen_op_movl_A0_reg(R_ESI);
618
        }
619
    } else {
620
        /* 16 address, always override */
621
        if (override < 0)
622
            override = R_DS;
623
        gen_op_movl_A0_reg(R_ESI);
624
        gen_op_andl_A0_ffff();
625
        gen_op_addl_A0_seg(override);
626
    }
627
}
628

    
629
static inline void gen_string_movl_A0_EDI(DisasContext *s)
630
{
631
#ifdef TARGET_X86_64
632
    if (s->aflag == 2) {
633
        gen_op_movq_A0_reg(R_EDI);
634
    } else
635
#endif
636
    if (s->aflag) {
637
        if (s->addseg) {
638
            gen_op_movl_A0_seg(R_ES);
639
            gen_op_addl_A0_reg_sN(0, R_EDI);
640
        } else {
641
            gen_op_movl_A0_reg(R_EDI);
642
        }
643
    } else {
644
        gen_op_movl_A0_reg(R_EDI);
645
        gen_op_andl_A0_ffff();
646
        gen_op_addl_A0_seg(R_ES);
647
    }
648
}
649

    
650
static inline void gen_op_movl_T0_Dshift(int ot) 
651
{
652
    tcg_gen_ld32s_tl(cpu_T[0], cpu_env, offsetof(CPUState, df));
653
    tcg_gen_shli_tl(cpu_T[0], cpu_T[0], ot);
654
};
655

    
656
static void gen_extu(int ot, TCGv reg)
657
{
658
    switch(ot) {
659
    case OT_BYTE:
660
        tcg_gen_ext8u_tl(reg, reg);
661
        break;
662
    case OT_WORD:
663
        tcg_gen_ext16u_tl(reg, reg);
664
        break;
665
    case OT_LONG:
666
        tcg_gen_ext32u_tl(reg, reg);
667
        break;
668
    default:
669
        break;
670
    }
671
}
672

    
673
static void gen_exts(int ot, TCGv reg)
674
{
675
    switch(ot) {
676
    case OT_BYTE:
677
        tcg_gen_ext8s_tl(reg, reg);
678
        break;
679
    case OT_WORD:
680
        tcg_gen_ext16s_tl(reg, reg);
681
        break;
682
    case OT_LONG:
683
        tcg_gen_ext32s_tl(reg, reg);
684
        break;
685
    default:
686
        break;
687
    }
688
}
689

    
690
static inline void gen_op_jnz_ecx(int size, int label1)
691
{
692
    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[R_ECX]));
693
    gen_extu(size + 1, cpu_tmp0);
694
    tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, label1);
695
}
696

    
697
static inline void gen_op_jz_ecx(int size, int label1)
698
{
699
    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[R_ECX]));
700
    gen_extu(size + 1, cpu_tmp0);
701
    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
702
}
703

    
704
static void *helper_in_func[3] = {
705
    helper_inb,
706
    helper_inw,
707
    helper_inl,
708
};
709

    
710
static void *helper_out_func[3] = {
711
    helper_outb,
712
    helper_outw,
713
    helper_outl,
714
};
715

    
716
static void *gen_check_io_func[3] = {
717
    helper_check_iob,
718
    helper_check_iow,
719
    helper_check_iol,
720
};
721

    
722
static void gen_check_io(DisasContext *s, int ot, target_ulong cur_eip,
723
                         uint32_t svm_flags)
724
{
725
    int state_saved;
726
    target_ulong next_eip;
727

    
728
    state_saved = 0;
729
    if (s->pe && (s->cpl > s->iopl || s->vm86)) {
730
        if (s->cc_op != CC_OP_DYNAMIC)
731
            gen_op_set_cc_op(s->cc_op);
732
        gen_jmp_im(cur_eip);
733
        state_saved = 1;
734
        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
735
        tcg_gen_helper_0_1(gen_check_io_func[ot],
736
                           cpu_tmp2_i32);
737
    }
738
    if(s->flags & HF_SVMI_MASK) {
739
        if (!state_saved) {
740
            if (s->cc_op != CC_OP_DYNAMIC)
741
                gen_op_set_cc_op(s->cc_op);
742
            gen_jmp_im(cur_eip);
743
            state_saved = 1;
744
        }
745
        svm_flags |= (1 << (4 + ot));
746
        next_eip = s->pc - s->cs_base;
747
        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
748
        tcg_gen_helper_0_3(helper_svm_check_io,
749
                           cpu_tmp2_i32,
750
                           tcg_const_i32(svm_flags),
751
                           tcg_const_i32(next_eip - cur_eip));
752
    }
753
}
754

    
755
static inline void gen_movs(DisasContext *s, int ot)
756
{
757
    gen_string_movl_A0_ESI(s);
758
    gen_op_ld_T0_A0(ot + s->mem_index);
759
    gen_string_movl_A0_EDI(s);
760
    gen_op_st_T0_A0(ot + s->mem_index);
761
    gen_op_movl_T0_Dshift(ot);
762
    gen_op_add_reg_T0(s->aflag, R_ESI);
763
    gen_op_add_reg_T0(s->aflag, R_EDI);
764
}
765

    
766
static inline void gen_update_cc_op(DisasContext *s)
767
{
768
    if (s->cc_op != CC_OP_DYNAMIC) {
769
        gen_op_set_cc_op(s->cc_op);
770
        s->cc_op = CC_OP_DYNAMIC;
771
    }
772
}
773

    
774
static void gen_op_update1_cc(void)
775
{
776
    tcg_gen_discard_tl(cpu_cc_src);
777
    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
778
}
779

    
780
static void gen_op_update2_cc(void)
781
{
782
    tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
783
    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
784
}
785

    
786
static inline void gen_op_cmpl_T0_T1_cc(void)
787
{
788
    tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
789
    tcg_gen_sub_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]);
790
}
791

    
792
static inline void gen_op_testl_T0_T1_cc(void)
793
{
794
    tcg_gen_discard_tl(cpu_cc_src);
795
    tcg_gen_and_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]);
796
}
797

    
798
static void gen_op_update_neg_cc(void)
799
{
800
    tcg_gen_neg_tl(cpu_cc_src, cpu_T[0]);
801
    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
802
}
803

    
804
/* compute eflags.C to reg */
805
static void gen_compute_eflags_c(TCGv reg)
806
{
807
#if TCG_TARGET_REG_BITS == 32
808
    tcg_gen_shli_i32(cpu_tmp2_i32, cpu_cc_op, 3);
809
    tcg_gen_addi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 
810
                     (long)cc_table + offsetof(CCTable, compute_c));
811
    tcg_gen_ld_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0);
812
    tcg_gen_call(&tcg_ctx, cpu_tmp2_i32, TCG_CALL_PURE, 
813
                 1, &cpu_tmp2_i32, 0, NULL);
814
#else
815
    tcg_gen_extu_i32_tl(cpu_tmp1_i64, cpu_cc_op);
816
    tcg_gen_shli_i64(cpu_tmp1_i64, cpu_tmp1_i64, 4);
817
    tcg_gen_addi_i64(cpu_tmp1_i64, cpu_tmp1_i64, 
818
                     (long)cc_table + offsetof(CCTable, compute_c));
819
    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_tmp1_i64, 0);
820
    tcg_gen_call(&tcg_ctx, cpu_tmp1_i64, TCG_CALL_PURE, 
821
                 1, &cpu_tmp2_i32, 0, NULL);
822
#endif
823
    tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
824
}
825

    
826
/* compute all eflags to cc_src */
827
static void gen_compute_eflags(TCGv reg)
828
{
829
#if TCG_TARGET_REG_BITS == 32
830
    tcg_gen_shli_i32(cpu_tmp2_i32, cpu_cc_op, 3);
831
    tcg_gen_addi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 
832
                     (long)cc_table + offsetof(CCTable, compute_all));
833
    tcg_gen_ld_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0);
834
    tcg_gen_call(&tcg_ctx, cpu_tmp2_i32, TCG_CALL_PURE, 
835
                 1, &cpu_tmp2_i32, 0, NULL);
836
#else
837
    tcg_gen_extu_i32_tl(cpu_tmp1_i64, cpu_cc_op);
838
    tcg_gen_shli_i64(cpu_tmp1_i64, cpu_tmp1_i64, 4);
839
    tcg_gen_addi_i64(cpu_tmp1_i64, cpu_tmp1_i64, 
840
                     (long)cc_table + offsetof(CCTable, compute_all));
841
    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_tmp1_i64, 0);
842
    tcg_gen_call(&tcg_ctx, cpu_tmp1_i64, TCG_CALL_PURE, 
843
                 1, &cpu_tmp2_i32, 0, NULL);
844
#endif
845
    tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
846
}
847

    
848
static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
849
{
850
    if (s->cc_op != CC_OP_DYNAMIC)
851
        gen_op_set_cc_op(s->cc_op);
852
    switch(jcc_op) {
853
    case JCC_O:
854
        gen_compute_eflags(cpu_T[0]);
855
        tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 11);
856
        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
857
        break;
858
    case JCC_B:
859
        gen_compute_eflags_c(cpu_T[0]);
860
        break;
861
    case JCC_Z:
862
        gen_compute_eflags(cpu_T[0]);
863
        tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 6);
864
        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
865
        break;
866
    case JCC_BE:
867
        gen_compute_eflags(cpu_tmp0);
868
        tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 6);
869
        tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
870
        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
871
        break;
872
    case JCC_S:
873
        gen_compute_eflags(cpu_T[0]);
874
        tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 7);
875
        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
876
        break;
877
    case JCC_P:
878
        gen_compute_eflags(cpu_T[0]);
879
        tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 2);
880
        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
881
        break;
882
    case JCC_L:
883
        gen_compute_eflags(cpu_tmp0);
884
        tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */
885
        tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 7); /* CC_S */
886
        tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
887
        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
888
        break;
889
    default:
890
    case JCC_LE:
891
        gen_compute_eflags(cpu_tmp0);
892
        tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */
893
        tcg_gen_shri_tl(cpu_tmp4, cpu_tmp0, 7); /* CC_S */
894
        tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 6); /* CC_Z */
895
        tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
896
        tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
897
        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
898
        break;
899
    }
900
}
901

    
902
/* return true if setcc_slow is not needed (WARNING: must be kept in
903
   sync with gen_jcc1) */
904
static int is_fast_jcc_case(DisasContext *s, int b)
905
{
906
    int jcc_op;
907
    jcc_op = (b >> 1) & 7;
908
    switch(s->cc_op) {
909
        /* we optimize the cmp/jcc case */
910
    case CC_OP_SUBB:
911
    case CC_OP_SUBW:
912
    case CC_OP_SUBL:
913
    case CC_OP_SUBQ:
914
        if (jcc_op == JCC_O || jcc_op == JCC_P)
915
            goto slow_jcc;
916
        break;
917

    
918
        /* some jumps are easy to compute */
919
    case CC_OP_ADDB:
920
    case CC_OP_ADDW:
921
    case CC_OP_ADDL:
922
    case CC_OP_ADDQ:
923

    
924
    case CC_OP_LOGICB:
925
    case CC_OP_LOGICW:
926
    case CC_OP_LOGICL:
927
    case CC_OP_LOGICQ:
928

    
929
    case CC_OP_INCB:
930
    case CC_OP_INCW:
931
    case CC_OP_INCL:
932
    case CC_OP_INCQ:
933

    
934
    case CC_OP_DECB:
935
    case CC_OP_DECW:
936
    case CC_OP_DECL:
937
    case CC_OP_DECQ:
938

    
939
    case CC_OP_SHLB:
940
    case CC_OP_SHLW:
941
    case CC_OP_SHLL:
942
    case CC_OP_SHLQ:
943
        if (jcc_op != JCC_Z && jcc_op != JCC_S)
944
            goto slow_jcc;
945
        break;
946
    default:
947
    slow_jcc:
948
        return 0;
949
    }
950
    return 1;
951
}
952

    
953
/* generate a conditional jump to label 'l1' according to jump opcode
954
   value 'b'. In the fast case, T0 is guaranted not to be used. */
955
static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1)
956
{
957
    int inv, jcc_op, size, cond;
958
    TCGv t0;
959

    
960
    inv = b & 1;
961
    jcc_op = (b >> 1) & 7;
962

    
963
    switch(cc_op) {
964
        /* we optimize the cmp/jcc case */
965
    case CC_OP_SUBB:
966
    case CC_OP_SUBW:
967
    case CC_OP_SUBL:
968
    case CC_OP_SUBQ:
969
        
970
        size = cc_op - CC_OP_SUBB;
971
        switch(jcc_op) {
972
        case JCC_Z:
973
        fast_jcc_z:
974
            switch(size) {
975
            case 0:
976
                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xff);
977
                t0 = cpu_tmp0;
978
                break;
979
            case 1:
980
                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xffff);
981
                t0 = cpu_tmp0;
982
                break;
983
#ifdef TARGET_X86_64
984
            case 2:
985
                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xffffffff);
986
                t0 = cpu_tmp0;
987
                break;
988
#endif
989
            default:
990
                t0 = cpu_cc_dst;
991
                break;
992
            }
993
            tcg_gen_brcondi_tl(inv ? TCG_COND_NE : TCG_COND_EQ, t0, 0, l1);
994
            break;
995
        case JCC_S:
996
        fast_jcc_s:
997
            switch(size) {
998
            case 0:
999
                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80);
1000
                tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, 
1001
                                   0, l1);
1002
                break;
1003
            case 1:
1004
                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x8000);
1005
                tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, 
1006
                                   0, l1);
1007
                break;
1008
#ifdef TARGET_X86_64
1009
            case 2:
1010
                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80000000);
1011
                tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, 
1012
                                   0, l1);
1013
                break;
1014
#endif
1015
            default:
1016
                tcg_gen_brcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, cpu_cc_dst, 
1017
                                   0, l1);
1018
                break;
1019
            }
1020
            break;
1021
            
1022
        case JCC_B:
1023
            cond = inv ? TCG_COND_GEU : TCG_COND_LTU;
1024
            goto fast_jcc_b;
1025
        case JCC_BE:
1026
            cond = inv ? TCG_COND_GTU : TCG_COND_LEU;
1027
        fast_jcc_b:
1028
            tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
1029
            switch(size) {
1030
            case 0:
1031
                t0 = cpu_tmp0;
1032
                tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xff);
1033
                tcg_gen_andi_tl(t0, cpu_cc_src, 0xff);
1034
                break;
1035
            case 1:
1036
                t0 = cpu_tmp0;
1037
                tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xffff);
1038
                tcg_gen_andi_tl(t0, cpu_cc_src, 0xffff);
1039
                break;
1040
#ifdef TARGET_X86_64
1041
            case 2:
1042
                t0 = cpu_tmp0;
1043
                tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xffffffff);
1044
                tcg_gen_andi_tl(t0, cpu_cc_src, 0xffffffff);
1045
                break;
1046
#endif
1047
            default:
1048
                t0 = cpu_cc_src;
1049
                break;
1050
            }
1051
            tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1);
1052
            break;
1053
            
1054
        case JCC_L:
1055
            cond = inv ? TCG_COND_GE : TCG_COND_LT;
1056
            goto fast_jcc_l;
1057
        case JCC_LE:
1058
            cond = inv ? TCG_COND_GT : TCG_COND_LE;
1059
        fast_jcc_l:
1060
            tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
1061
            switch(size) {
1062
            case 0:
1063
                t0 = cpu_tmp0;
1064
                tcg_gen_ext8s_tl(cpu_tmp4, cpu_tmp4);
1065
                tcg_gen_ext8s_tl(t0, cpu_cc_src);
1066
                break;
1067
            case 1:
1068
                t0 = cpu_tmp0;
1069
                tcg_gen_ext16s_tl(cpu_tmp4, cpu_tmp4);
1070
                tcg_gen_ext16s_tl(t0, cpu_cc_src);
1071
                break;
1072
#ifdef TARGET_X86_64
1073
            case 2:
1074
                t0 = cpu_tmp0;
1075
                tcg_gen_ext32s_tl(cpu_tmp4, cpu_tmp4);
1076
                tcg_gen_ext32s_tl(t0, cpu_cc_src);
1077
                break;
1078
#endif
1079
            default:
1080
                t0 = cpu_cc_src;
1081
                break;
1082
            }
1083
            tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1);
1084
            break;
1085
            
1086
        default:
1087
            goto slow_jcc;
1088
        }
1089
        break;
1090
        
1091
        /* some jumps are easy to compute */
1092
    case CC_OP_ADDB:
1093
    case CC_OP_ADDW:
1094
    case CC_OP_ADDL:
1095
    case CC_OP_ADDQ:
1096
        
1097
    case CC_OP_ADCB:
1098
    case CC_OP_ADCW:
1099
    case CC_OP_ADCL:
1100
    case CC_OP_ADCQ:
1101
        
1102
    case CC_OP_SBBB:
1103
    case CC_OP_SBBW:
1104
    case CC_OP_SBBL:
1105
    case CC_OP_SBBQ:
1106
        
1107
    case CC_OP_LOGICB:
1108
    case CC_OP_LOGICW:
1109
    case CC_OP_LOGICL:
1110
    case CC_OP_LOGICQ:
1111
        
1112
    case CC_OP_INCB:
1113
    case CC_OP_INCW:
1114
    case CC_OP_INCL:
1115
    case CC_OP_INCQ:
1116
        
1117
    case CC_OP_DECB:
1118
    case CC_OP_DECW:
1119
    case CC_OP_DECL:
1120
    case CC_OP_DECQ:
1121
        
1122
    case CC_OP_SHLB:
1123
    case CC_OP_SHLW:
1124
    case CC_OP_SHLL:
1125
    case CC_OP_SHLQ:
1126
        
1127
    case CC_OP_SARB:
1128
    case CC_OP_SARW:
1129
    case CC_OP_SARL:
1130
    case CC_OP_SARQ:
1131
        switch(jcc_op) {
1132
        case JCC_Z:
1133
            size = (cc_op - CC_OP_ADDB) & 3;
1134
            goto fast_jcc_z;
1135
        case JCC_S:
1136
            size = (cc_op - CC_OP_ADDB) & 3;
1137
            goto fast_jcc_s;
1138
        default:
1139
            goto slow_jcc;
1140
        }
1141
        break;
1142
    default:
1143
    slow_jcc:
1144
        gen_setcc_slow_T0(s, jcc_op);
1145
        tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, 
1146
                           cpu_T[0], 0, l1);
1147
        break;
1148
    }
1149
}
1150

    
1151
/* XXX: does not work with gdbstub "ice" single step - not a
1152
   serious problem */
1153
static int gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1154
{
1155
    int l1, l2;
1156

    
1157
    l1 = gen_new_label();
1158
    l2 = gen_new_label();
1159
    gen_op_jnz_ecx(s->aflag, l1);
1160
    gen_set_label(l2);
1161
    gen_jmp_tb(s, next_eip, 1);
1162
    gen_set_label(l1);
1163
    return l2;
1164
}
1165

    
1166
static inline void gen_stos(DisasContext *s, int ot)
1167
{
1168
    gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
1169
    gen_string_movl_A0_EDI(s);
1170
    gen_op_st_T0_A0(ot + s->mem_index);
1171
    gen_op_movl_T0_Dshift(ot);
1172
    gen_op_add_reg_T0(s->aflag, R_EDI);
1173
}
1174

    
1175
static inline void gen_lods(DisasContext *s, int ot)
1176
{
1177
    gen_string_movl_A0_ESI(s);
1178
    gen_op_ld_T0_A0(ot + s->mem_index);
1179
    gen_op_mov_reg_T0(ot, R_EAX);
1180
    gen_op_movl_T0_Dshift(ot);
1181
    gen_op_add_reg_T0(s->aflag, R_ESI);
1182
}
1183

    
1184
static inline void gen_scas(DisasContext *s, int ot)
1185
{
1186
    gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
1187
    gen_string_movl_A0_EDI(s);
1188
    gen_op_ld_T1_A0(ot + s->mem_index);
1189
    gen_op_cmpl_T0_T1_cc();
1190
    gen_op_movl_T0_Dshift(ot);
1191
    gen_op_add_reg_T0(s->aflag, R_EDI);
1192
}
1193

    
1194
static inline void gen_cmps(DisasContext *s, int ot)
1195
{
1196
    gen_string_movl_A0_ESI(s);
1197
    gen_op_ld_T0_A0(ot + s->mem_index);
1198
    gen_string_movl_A0_EDI(s);
1199
    gen_op_ld_T1_A0(ot + s->mem_index);
1200
    gen_op_cmpl_T0_T1_cc();
1201
    gen_op_movl_T0_Dshift(ot);
1202
    gen_op_add_reg_T0(s->aflag, R_ESI);
1203
    gen_op_add_reg_T0(s->aflag, R_EDI);
1204
}
1205

    
1206
static inline void gen_ins(DisasContext *s, int ot)
1207
{
1208
    if (use_icount)
1209
        gen_io_start();
1210
    gen_string_movl_A0_EDI(s);
1211
    /* Note: we must do this dummy write first to be restartable in
1212
       case of page fault. */
1213
    gen_op_movl_T0_0();
1214
    gen_op_st_T0_A0(ot + s->mem_index);
1215
    gen_op_mov_TN_reg(OT_WORD, 1, R_EDX);
1216
    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[1]);
1217
    tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
1218
    tcg_gen_helper_1_1(helper_in_func[ot], cpu_T[0], cpu_tmp2_i32);
1219
    gen_op_st_T0_A0(ot + s->mem_index);
1220
    gen_op_movl_T0_Dshift(ot);
1221
    gen_op_add_reg_T0(s->aflag, R_EDI);
1222
    if (use_icount)
1223
        gen_io_end();
1224
}
1225

    
1226
static inline void gen_outs(DisasContext *s, int ot)
1227
{
1228
    if (use_icount)
1229
        gen_io_start();
1230
    gen_string_movl_A0_ESI(s);
1231
    gen_op_ld_T0_A0(ot + s->mem_index);
1232

    
1233
    gen_op_mov_TN_reg(OT_WORD, 1, R_EDX);
1234
    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[1]);
1235
    tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
1236
    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[0]);
1237
    tcg_gen_helper_0_2(helper_out_func[ot], cpu_tmp2_i32, cpu_tmp3_i32);
1238

    
1239
    gen_op_movl_T0_Dshift(ot);
1240
    gen_op_add_reg_T0(s->aflag, R_ESI);
1241
    if (use_icount)
1242
        gen_io_end();
1243
}
1244

    
1245
/* same method as Valgrind : we generate jumps to current or next
1246
   instruction */
1247
#define GEN_REPZ(op)                                                          \
1248
static inline void gen_repz_ ## op(DisasContext *s, int ot,                   \
1249
                                 target_ulong cur_eip, target_ulong next_eip) \
1250
{                                                                             \
1251
    int l2;\
1252
    gen_update_cc_op(s);                                                      \
1253
    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1254
    gen_ ## op(s, ot);                                                        \
1255
    gen_op_add_reg_im(s->aflag, R_ECX, -1);                                   \
1256
    /* a loop would cause two single step exceptions if ECX = 1               \
1257
       before rep string_insn */                                              \
1258
    if (!s->jmp_opt)                                                          \
1259
        gen_op_jz_ecx(s->aflag, l2);                                          \
1260
    gen_jmp(s, cur_eip);                                                      \
1261
}
1262

    
1263
#define GEN_REPZ2(op)                                                         \
1264
static inline void gen_repz_ ## op(DisasContext *s, int ot,                   \
1265
                                   target_ulong cur_eip,                      \
1266
                                   target_ulong next_eip,                     \
1267
                                   int nz)                                    \
1268
{                                                                             \
1269
    int l2;\
1270
    gen_update_cc_op(s);                                                      \
1271
    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1272
    gen_ ## op(s, ot);                                                        \
1273
    gen_op_add_reg_im(s->aflag, R_ECX, -1);                                   \
1274
    gen_op_set_cc_op(CC_OP_SUBB + ot);                                        \
1275
    gen_jcc1(s, CC_OP_SUBB + ot, (JCC_Z << 1) | (nz ^ 1), l2);                \
1276
    if (!s->jmp_opt)                                                          \
1277
        gen_op_jz_ecx(s->aflag, l2);                                          \
1278
    gen_jmp(s, cur_eip);                                                      \
1279
}
1280

    
1281
GEN_REPZ(movs)
1282
GEN_REPZ(stos)
1283
GEN_REPZ(lods)
1284
GEN_REPZ(ins)
1285
GEN_REPZ(outs)
1286
GEN_REPZ2(scas)
1287
GEN_REPZ2(cmps)
1288

    
1289
static void *helper_fp_arith_ST0_FT0[8] = {
1290
    helper_fadd_ST0_FT0,
1291
    helper_fmul_ST0_FT0,
1292
    helper_fcom_ST0_FT0,
1293
    helper_fcom_ST0_FT0,
1294
    helper_fsub_ST0_FT0,
1295
    helper_fsubr_ST0_FT0,
1296
    helper_fdiv_ST0_FT0,
1297
    helper_fdivr_ST0_FT0,
1298
};
1299

    
1300
/* NOTE the exception in "r" op ordering */
1301
static void *helper_fp_arith_STN_ST0[8] = {
1302
    helper_fadd_STN_ST0,
1303
    helper_fmul_STN_ST0,
1304
    NULL,
1305
    NULL,
1306
    helper_fsubr_STN_ST0,
1307
    helper_fsub_STN_ST0,
1308
    helper_fdivr_STN_ST0,
1309
    helper_fdiv_STN_ST0,
1310
};
1311

    
1312
/* if d == OR_TMP0, it means memory operand (address in A0) */
1313
static void gen_op(DisasContext *s1, int op, int ot, int d)
1314
{
1315
    if (d != OR_TMP0) {
1316
        gen_op_mov_TN_reg(ot, 0, d);
1317
    } else {
1318
        gen_op_ld_T0_A0(ot + s1->mem_index);
1319
    }
1320
    switch(op) {
1321
    case OP_ADCL:
1322
        if (s1->cc_op != CC_OP_DYNAMIC)
1323
            gen_op_set_cc_op(s1->cc_op);
1324
        gen_compute_eflags_c(cpu_tmp4);
1325
        tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1326
        tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
1327
        if (d != OR_TMP0)
1328
            gen_op_mov_reg_T0(ot, d);
1329
        else
1330
            gen_op_st_T0_A0(ot + s1->mem_index);
1331
        tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
1332
        tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
1333
        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp4);
1334
        tcg_gen_shli_i32(cpu_tmp2_i32, cpu_tmp2_i32, 2);
1335
        tcg_gen_addi_i32(cpu_cc_op, cpu_tmp2_i32, CC_OP_ADDB + ot);
1336
        s1->cc_op = CC_OP_DYNAMIC;
1337
        break;
1338
    case OP_SBBL:
1339
        if (s1->cc_op != CC_OP_DYNAMIC)
1340
            gen_op_set_cc_op(s1->cc_op);
1341
        gen_compute_eflags_c(cpu_tmp4);
1342
        tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1343
        tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
1344
        if (d != OR_TMP0)
1345
            gen_op_mov_reg_T0(ot, d);
1346
        else
1347
            gen_op_st_T0_A0(ot + s1->mem_index);
1348
        tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
1349
        tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
1350
        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp4);
1351
        tcg_gen_shli_i32(cpu_tmp2_i32, cpu_tmp2_i32, 2);
1352
        tcg_gen_addi_i32(cpu_cc_op, cpu_tmp2_i32, CC_OP_SUBB + ot);
1353
        s1->cc_op = CC_OP_DYNAMIC;
1354
        break;
1355
    case OP_ADDL:
1356
        gen_op_addl_T0_T1();
1357
        if (d != OR_TMP0)
1358
            gen_op_mov_reg_T0(ot, d);
1359
        else
1360
            gen_op_st_T0_A0(ot + s1->mem_index);
1361
        gen_op_update2_cc();
1362
        s1->cc_op = CC_OP_ADDB + ot;
1363
        break;
1364
    case OP_SUBL:
1365
        tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1366
        if (d != OR_TMP0)
1367
            gen_op_mov_reg_T0(ot, d);
1368
        else
1369
            gen_op_st_T0_A0(ot + s1->mem_index);
1370
        gen_op_update2_cc();
1371
        s1->cc_op = CC_OP_SUBB + ot;
1372
        break;
1373
    default:
1374
    case OP_ANDL:
1375
        tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1376
        if (d != OR_TMP0)
1377
            gen_op_mov_reg_T0(ot, d);
1378
        else
1379
            gen_op_st_T0_A0(ot + s1->mem_index);
1380
        gen_op_update1_cc();
1381
        s1->cc_op = CC_OP_LOGICB + ot;
1382
        break;
1383
    case OP_ORL:
1384
        tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1385
        if (d != OR_TMP0)
1386
            gen_op_mov_reg_T0(ot, d);
1387
        else
1388
            gen_op_st_T0_A0(ot + s1->mem_index);
1389
        gen_op_update1_cc();
1390
        s1->cc_op = CC_OP_LOGICB + ot;
1391
        break;
1392
    case OP_XORL:
1393
        tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1394
        if (d != OR_TMP0)
1395
            gen_op_mov_reg_T0(ot, d);
1396
        else
1397
            gen_op_st_T0_A0(ot + s1->mem_index);
1398
        gen_op_update1_cc();
1399
        s1->cc_op = CC_OP_LOGICB + ot;
1400
        break;
1401
    case OP_CMPL:
1402
        gen_op_cmpl_T0_T1_cc();
1403
        s1->cc_op = CC_OP_SUBB + ot;
1404
        break;
1405
    }
1406
}
1407

    
1408
/* if d == OR_TMP0, it means memory operand (address in A0) */
1409
static void gen_inc(DisasContext *s1, int ot, int d, int c)
1410
{
1411
    if (d != OR_TMP0)
1412
        gen_op_mov_TN_reg(ot, 0, d);
1413
    else
1414
        gen_op_ld_T0_A0(ot + s1->mem_index);
1415
    if (s1->cc_op != CC_OP_DYNAMIC)
1416
        gen_op_set_cc_op(s1->cc_op);
1417
    if (c > 0) {
1418
        tcg_gen_addi_tl(cpu_T[0], cpu_T[0], 1);
1419
        s1->cc_op = CC_OP_INCB + ot;
1420
    } else {
1421
        tcg_gen_addi_tl(cpu_T[0], cpu_T[0], -1);
1422
        s1->cc_op = CC_OP_DECB + ot;
1423
    }
1424
    if (d != OR_TMP0)
1425
        gen_op_mov_reg_T0(ot, d);
1426
    else
1427
        gen_op_st_T0_A0(ot + s1->mem_index);
1428
    gen_compute_eflags_c(cpu_cc_src);
1429
    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
1430
}
1431

    
1432
static void gen_shift_rm_T1(DisasContext *s, int ot, int op1, 
1433
                            int is_right, int is_arith)
1434
{
1435
    target_ulong mask;
1436
    int shift_label;
1437
    TCGv t0, t1;
1438

    
1439
    if (ot == OT_QUAD)
1440
        mask = 0x3f;
1441
    else
1442
        mask = 0x1f;
1443

    
1444
    /* load */
1445
    if (op1 == OR_TMP0)
1446
        gen_op_ld_T0_A0(ot + s->mem_index);
1447
    else
1448
        gen_op_mov_TN_reg(ot, 0, op1);
1449

    
1450
    tcg_gen_andi_tl(cpu_T[1], cpu_T[1], mask);
1451

    
1452
    tcg_gen_addi_tl(cpu_tmp5, cpu_T[1], -1);
1453

    
1454
    if (is_right) {
1455
        if (is_arith) {
1456
            gen_exts(ot, cpu_T[0]);
1457
            tcg_gen_sar_tl(cpu_T3, cpu_T[0], cpu_tmp5);
1458
            tcg_gen_sar_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1459
        } else {
1460
            gen_extu(ot, cpu_T[0]);
1461
            tcg_gen_shr_tl(cpu_T3, cpu_T[0], cpu_tmp5);
1462
            tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1463
        }
1464
    } else {
1465
        tcg_gen_shl_tl(cpu_T3, cpu_T[0], cpu_tmp5);
1466
        tcg_gen_shl_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1467
    }
1468

    
1469
    /* store */
1470
    if (op1 == OR_TMP0)
1471
        gen_op_st_T0_A0(ot + s->mem_index);
1472
    else
1473
        gen_op_mov_reg_T0(ot, op1);
1474
        
1475
    /* update eflags if non zero shift */
1476
    if (s->cc_op != CC_OP_DYNAMIC)
1477
        gen_op_set_cc_op(s->cc_op);
1478

    
1479
    /* XXX: inefficient */
1480
    t0 = tcg_temp_local_new(TCG_TYPE_TL);
1481
    t1 = tcg_temp_local_new(TCG_TYPE_TL);
1482

    
1483
    tcg_gen_mov_tl(t0, cpu_T[0]);
1484
    tcg_gen_mov_tl(t1, cpu_T3);
1485

    
1486
    shift_label = gen_new_label();
1487
    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_T[1], 0, shift_label);
1488

    
1489
    tcg_gen_mov_tl(cpu_cc_src, t1);
1490
    tcg_gen_mov_tl(cpu_cc_dst, t0);
1491
    if (is_right)
1492
        tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot);
1493
    else
1494
        tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot);
1495
        
1496
    gen_set_label(shift_label);
1497
    s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
1498

    
1499
    tcg_temp_free(t0);
1500
    tcg_temp_free(t1);
1501
}
1502

    
1503
static void gen_shift_rm_im(DisasContext *s, int ot, int op1, int op2,
1504
                            int is_right, int is_arith)
1505
{
1506
    int mask;
1507
    
1508
    if (ot == OT_QUAD)
1509
        mask = 0x3f;
1510
    else
1511
        mask = 0x1f;
1512

    
1513
    /* load */
1514
    if (op1 == OR_TMP0)
1515
        gen_op_ld_T0_A0(ot + s->mem_index);
1516
    else
1517
        gen_op_mov_TN_reg(ot, 0, op1);
1518

    
1519
    op2 &= mask;
1520
    if (op2 != 0) {
1521
        if (is_right) {
1522
            if (is_arith) {
1523
                gen_exts(ot, cpu_T[0]);
1524
                tcg_gen_sari_tl(cpu_tmp4, cpu_T[0], op2 - 1);
1525
                tcg_gen_sari_tl(cpu_T[0], cpu_T[0], op2);
1526
            } else {
1527
                gen_extu(ot, cpu_T[0]);
1528
                tcg_gen_shri_tl(cpu_tmp4, cpu_T[0], op2 - 1);
1529
                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], op2);
1530
            }
1531
        } else {
1532
            tcg_gen_shli_tl(cpu_tmp4, cpu_T[0], op2 - 1);
1533
            tcg_gen_shli_tl(cpu_T[0], cpu_T[0], op2);
1534
        }
1535
    }
1536

    
1537
    /* store */
1538
    if (op1 == OR_TMP0)
1539
        gen_op_st_T0_A0(ot + s->mem_index);
1540
    else
1541
        gen_op_mov_reg_T0(ot, op1);
1542
        
1543
    /* update eflags if non zero shift */
1544
    if (op2 != 0) {
1545
        tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
1546
        tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
1547
        if (is_right)
1548
            s->cc_op = CC_OP_SARB + ot;
1549
        else
1550
            s->cc_op = CC_OP_SHLB + ot;
1551
    }
1552
}
1553

    
1554
static inline void tcg_gen_lshift(TCGv ret, TCGv arg1, target_long arg2)
1555
{
1556
    if (arg2 >= 0)
1557
        tcg_gen_shli_tl(ret, arg1, arg2);
1558
    else
1559
        tcg_gen_shri_tl(ret, arg1, -arg2);
1560
}
1561

    
1562
/* XXX: add faster immediate case */
1563
static void gen_rot_rm_T1(DisasContext *s, int ot, int op1, 
1564
                          int is_right)
1565
{
1566
    target_ulong mask;
1567
    int label1, label2, data_bits;
1568
    TCGv t0, t1, t2, a0;
1569

    
1570
    /* XXX: inefficient, but we must use local temps */
1571
    t0 = tcg_temp_local_new(TCG_TYPE_TL);
1572
    t1 = tcg_temp_local_new(TCG_TYPE_TL);
1573
    t2 = tcg_temp_local_new(TCG_TYPE_TL);
1574
    a0 = tcg_temp_local_new(TCG_TYPE_TL);
1575

    
1576
    if (ot == OT_QUAD)
1577
        mask = 0x3f;
1578
    else
1579
        mask = 0x1f;
1580

    
1581
    /* load */
1582
    if (op1 == OR_TMP0) {
1583
        tcg_gen_mov_tl(a0, cpu_A0);
1584
        gen_op_ld_v(ot + s->mem_index, t0, a0);
1585
    } else {
1586
        gen_op_mov_v_reg(ot, t0, op1);
1587
    }
1588

    
1589
    tcg_gen_mov_tl(t1, cpu_T[1]);
1590

    
1591
    tcg_gen_andi_tl(t1, t1, mask);
1592

    
1593
    /* Must test zero case to avoid using undefined behaviour in TCG
1594
       shifts. */
1595
    label1 = gen_new_label();
1596
    tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label1);
1597
    
1598
    if (ot <= OT_WORD)
1599
        tcg_gen_andi_tl(cpu_tmp0, t1, (1 << (3 + ot)) - 1);
1600
    else
1601
        tcg_gen_mov_tl(cpu_tmp0, t1);
1602
    
1603
    gen_extu(ot, t0);
1604
    tcg_gen_mov_tl(t2, t0);
1605

    
1606
    data_bits = 8 << ot;
1607
    /* XXX: rely on behaviour of shifts when operand 2 overflows (XXX:
1608
       fix TCG definition) */
1609
    if (is_right) {
1610
        tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp0);
1611
        tcg_gen_sub_tl(cpu_tmp0, tcg_const_tl(data_bits), cpu_tmp0);
1612
        tcg_gen_shl_tl(t0, t0, cpu_tmp0);
1613
    } else {
1614
        tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp0);
1615
        tcg_gen_sub_tl(cpu_tmp0, tcg_const_tl(data_bits), cpu_tmp0);
1616
        tcg_gen_shr_tl(t0, t0, cpu_tmp0);
1617
    }
1618
    tcg_gen_or_tl(t0, t0, cpu_tmp4);
1619

    
1620
    gen_set_label(label1);
1621
    /* store */
1622
    if (op1 == OR_TMP0) {
1623
        gen_op_st_v(ot + s->mem_index, t0, a0);
1624
    } else {
1625
        gen_op_mov_reg_v(ot, op1, t0);
1626
    }
1627
    
1628
    /* update eflags */
1629
    if (s->cc_op != CC_OP_DYNAMIC)
1630
        gen_op_set_cc_op(s->cc_op);
1631

    
1632
    label2 = gen_new_label();
1633
    tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label2);
1634

    
1635
    gen_compute_eflags(cpu_cc_src);
1636
    tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C));
1637
    tcg_gen_xor_tl(cpu_tmp0, t2, t0);
1638
    tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1));
1639
    tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_O);
1640
    tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_tmp0);
1641
    if (is_right) {
1642
        tcg_gen_shri_tl(t0, t0, data_bits - 1);
1643
    }
1644
    tcg_gen_andi_tl(t0, t0, CC_C);
1645
    tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0);
1646
    
1647
    tcg_gen_discard_tl(cpu_cc_dst);
1648
    tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
1649
        
1650
    gen_set_label(label2);
1651
    s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
1652

    
1653
    tcg_temp_free(t0);
1654
    tcg_temp_free(t1);
1655
    tcg_temp_free(t2);
1656
    tcg_temp_free(a0);
1657
}
1658

    
1659
static void *helper_rotc[8] = {
1660
    helper_rclb,
1661
    helper_rclw,
1662
    helper_rcll,
1663
    X86_64_ONLY(helper_rclq),
1664
    helper_rcrb,
1665
    helper_rcrw,
1666
    helper_rcrl,
1667
    X86_64_ONLY(helper_rcrq),
1668
};
1669

    
1670
/* XXX: add faster immediate = 1 case */
1671
static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1, 
1672
                           int is_right)
1673
{
1674
    int label1;
1675

    
1676
    if (s->cc_op != CC_OP_DYNAMIC)
1677
        gen_op_set_cc_op(s->cc_op);
1678

    
1679
    /* load */
1680
    if (op1 == OR_TMP0)
1681
        gen_op_ld_T0_A0(ot + s->mem_index);
1682
    else
1683
        gen_op_mov_TN_reg(ot, 0, op1);
1684
    
1685
    tcg_gen_helper_1_2(helper_rotc[ot + (is_right * 4)],
1686
                       cpu_T[0], cpu_T[0], cpu_T[1]);
1687
    /* store */
1688
    if (op1 == OR_TMP0)
1689
        gen_op_st_T0_A0(ot + s->mem_index);
1690
    else
1691
        gen_op_mov_reg_T0(ot, op1);
1692

    
1693
    /* update eflags */
1694
    label1 = gen_new_label();
1695
    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_cc_tmp, -1, label1);
1696

    
1697
    tcg_gen_mov_tl(cpu_cc_src, cpu_cc_tmp);
1698
    tcg_gen_discard_tl(cpu_cc_dst);
1699
    tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
1700
        
1701
    gen_set_label(label1);
1702
    s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
1703
}
1704

    
1705
/* XXX: add faster immediate case */
1706
static void gen_shiftd_rm_T1_T3(DisasContext *s, int ot, int op1, 
1707
                                int is_right)
1708
{
1709
    int label1, label2, data_bits;
1710
    target_ulong mask;
1711
    TCGv t0, t1, t2, a0;
1712

    
1713
    t0 = tcg_temp_local_new(TCG_TYPE_TL);
1714
    t1 = tcg_temp_local_new(TCG_TYPE_TL);
1715
    t2 = tcg_temp_local_new(TCG_TYPE_TL);
1716
    a0 = tcg_temp_local_new(TCG_TYPE_TL);
1717

    
1718
    if (ot == OT_QUAD)
1719
        mask = 0x3f;
1720
    else
1721
        mask = 0x1f;
1722

    
1723
    /* load */
1724
    if (op1 == OR_TMP0) {
1725
        tcg_gen_mov_tl(a0, cpu_A0);
1726
        gen_op_ld_v(ot + s->mem_index, t0, a0);
1727
    } else {
1728
        gen_op_mov_v_reg(ot, t0, op1);
1729
    }
1730

    
1731
    tcg_gen_andi_tl(cpu_T3, cpu_T3, mask);
1732

    
1733
    tcg_gen_mov_tl(t1, cpu_T[1]);
1734
    tcg_gen_mov_tl(t2, cpu_T3);
1735

    
1736
    /* Must test zero case to avoid using undefined behaviour in TCG
1737
       shifts. */
1738
    label1 = gen_new_label();
1739
    tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1);
1740
    
1741
    tcg_gen_addi_tl(cpu_tmp5, t2, -1);
1742
    if (ot == OT_WORD) {
1743
        /* Note: we implement the Intel behaviour for shift count > 16 */
1744
        if (is_right) {
1745
            tcg_gen_andi_tl(t0, t0, 0xffff);
1746
            tcg_gen_shli_tl(cpu_tmp0, t1, 16);
1747
            tcg_gen_or_tl(t0, t0, cpu_tmp0);
1748
            tcg_gen_ext32u_tl(t0, t0);
1749

    
1750
            tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5);
1751
            
1752
            /* only needed if count > 16, but a test would complicate */
1753
            tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(32), t2);
1754
            tcg_gen_shl_tl(cpu_tmp0, t0, cpu_tmp5);
1755

    
1756
            tcg_gen_shr_tl(t0, t0, t2);
1757

    
1758
            tcg_gen_or_tl(t0, t0, cpu_tmp0);
1759
        } else {
1760
            /* XXX: not optimal */
1761
            tcg_gen_andi_tl(t0, t0, 0xffff);
1762
            tcg_gen_shli_tl(t1, t1, 16);
1763
            tcg_gen_or_tl(t1, t1, t0);
1764
            tcg_gen_ext32u_tl(t1, t1);
1765
            
1766
            tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5);
1767
            tcg_gen_sub_tl(cpu_tmp0, tcg_const_tl(32), cpu_tmp5);
1768
            tcg_gen_shr_tl(cpu_tmp6, t1, cpu_tmp0);
1769
            tcg_gen_or_tl(cpu_tmp4, cpu_tmp4, cpu_tmp6);
1770

    
1771
            tcg_gen_shl_tl(t0, t0, t2);
1772
            tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(32), t2);
1773
            tcg_gen_shr_tl(t1, t1, cpu_tmp5);
1774
            tcg_gen_or_tl(t0, t0, t1);
1775
        }
1776
    } else {
1777
        data_bits = 8 << ot;
1778
        if (is_right) {
1779
            if (ot == OT_LONG)
1780
                tcg_gen_ext32u_tl(t0, t0);
1781

    
1782
            tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5);
1783

    
1784
            tcg_gen_shr_tl(t0, t0, t2);
1785
            tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(data_bits), t2);
1786
            tcg_gen_shl_tl(t1, t1, cpu_tmp5);
1787
            tcg_gen_or_tl(t0, t0, t1);
1788
            
1789
        } else {
1790
            if (ot == OT_LONG)
1791
                tcg_gen_ext32u_tl(t1, t1);
1792

    
1793
            tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5);
1794
            
1795
            tcg_gen_shl_tl(t0, t0, t2);
1796
            tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(data_bits), t2);
1797
            tcg_gen_shr_tl(t1, t1, cpu_tmp5);
1798
            tcg_gen_or_tl(t0, t0, t1);
1799
        }
1800
    }
1801
    tcg_gen_mov_tl(t1, cpu_tmp4);
1802

    
1803
    gen_set_label(label1);
1804
    /* store */
1805
    if (op1 == OR_TMP0) {
1806
        gen_op_st_v(ot + s->mem_index, t0, a0);
1807
    } else {
1808
        gen_op_mov_reg_v(ot, op1, t0);
1809
    }
1810
    
1811
    /* update eflags */
1812
    if (s->cc_op != CC_OP_DYNAMIC)
1813
        gen_op_set_cc_op(s->cc_op);
1814

    
1815
    label2 = gen_new_label();
1816
    tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label2);
1817

    
1818
    tcg_gen_mov_tl(cpu_cc_src, t1);
1819
    tcg_gen_mov_tl(cpu_cc_dst, t0);
1820
    if (is_right) {
1821
        tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot);
1822
    } else {
1823
        tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot);
1824
    }
1825
    gen_set_label(label2);
1826
    s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
1827

    
1828
    tcg_temp_free(t0);
1829
    tcg_temp_free(t1);
1830
    tcg_temp_free(t2);
1831
    tcg_temp_free(a0);
1832
}
1833

    
1834
static void gen_shift(DisasContext *s1, int op, int ot, int d, int s)
1835
{
1836
    if (s != OR_TMP1)
1837
        gen_op_mov_TN_reg(ot, 1, s);
1838
    switch(op) {
1839
    case OP_ROL:
1840
        gen_rot_rm_T1(s1, ot, d, 0);
1841
        break;
1842
    case OP_ROR:
1843
        gen_rot_rm_T1(s1, ot, d, 1);
1844
        break;
1845
    case OP_SHL:
1846
    case OP_SHL1:
1847
        gen_shift_rm_T1(s1, ot, d, 0, 0);
1848
        break;
1849
    case OP_SHR:
1850
        gen_shift_rm_T1(s1, ot, d, 1, 0);
1851
        break;
1852
    case OP_SAR:
1853
        gen_shift_rm_T1(s1, ot, d, 1, 1);
1854
        break;
1855
    case OP_RCL:
1856
        gen_rotc_rm_T1(s1, ot, d, 0);
1857
        break;
1858
    case OP_RCR:
1859
        gen_rotc_rm_T1(s1, ot, d, 1);
1860
        break;
1861
    }
1862
}
1863

    
1864
static void gen_shifti(DisasContext *s1, int op, int ot, int d, int c)
1865
{
1866
    switch(op) {
1867
    case OP_SHL:
1868
    case OP_SHL1:
1869
        gen_shift_rm_im(s1, ot, d, c, 0, 0);
1870
        break;
1871
    case OP_SHR:
1872
        gen_shift_rm_im(s1, ot, d, c, 1, 0);
1873
        break;
1874
    case OP_SAR:
1875
        gen_shift_rm_im(s1, ot, d, c, 1, 1);
1876
        break;
1877
    default:
1878
        /* currently not optimized */
1879
        gen_op_movl_T1_im(c);
1880
        gen_shift(s1, op, ot, d, OR_TMP1);
1881
        break;
1882
    }
1883
}
1884

    
1885
static void gen_lea_modrm(DisasContext *s, int modrm, int *reg_ptr, int *offset_ptr)
1886
{
1887
    target_long disp;
1888
    int havesib;
1889
    int base;
1890
    int index;
1891
    int scale;
1892
    int opreg;
1893
    int mod, rm, code, override, must_add_seg;
1894

    
1895
    override = s->override;
1896
    must_add_seg = s->addseg;
1897
    if (override >= 0)
1898
        must_add_seg = 1;
1899
    mod = (modrm >> 6) & 3;
1900
    rm = modrm & 7;
1901

    
1902
    if (s->aflag) {
1903

    
1904
        havesib = 0;
1905
        base = rm;
1906
        index = 0;
1907
        scale = 0;
1908

    
1909
        if (base == 4) {
1910
            havesib = 1;
1911
            code = ldub_code(s->pc++);
1912
            scale = (code >> 6) & 3;
1913
            index = ((code >> 3) & 7) | REX_X(s);
1914
            base = (code & 7);
1915
        }
1916
        base |= REX_B(s);
1917

    
1918
        switch (mod) {
1919
        case 0:
1920
            if ((base & 7) == 5) {
1921
                base = -1;
1922
                disp = (int32_t)ldl_code(s->pc);
1923
                s->pc += 4;
1924
                if (CODE64(s) && !havesib) {
1925
                    disp += s->pc + s->rip_offset;
1926
                }
1927
            } else {
1928
                disp = 0;
1929
            }
1930
            break;
1931
        case 1:
1932
            disp = (int8_t)ldub_code(s->pc++);
1933
            break;
1934
        default:
1935
        case 2:
1936
            disp = ldl_code(s->pc);
1937
            s->pc += 4;
1938
            break;
1939
        }
1940

    
1941
        if (base >= 0) {
1942
            /* for correct popl handling with esp */
1943
            if (base == 4 && s->popl_esp_hack)
1944
                disp += s->popl_esp_hack;
1945
#ifdef TARGET_X86_64
1946
            if (s->aflag == 2) {
1947
                gen_op_movq_A0_reg(base);
1948
                if (disp != 0) {
1949
                    gen_op_addq_A0_im(disp);
1950
                }
1951
            } else
1952
#endif
1953
            {
1954
                gen_op_movl_A0_reg(base);
1955
                if (disp != 0)
1956
                    gen_op_addl_A0_im(disp);
1957
            }
1958
        } else {
1959
#ifdef TARGET_X86_64
1960
            if (s->aflag == 2) {
1961
                gen_op_movq_A0_im(disp);
1962
            } else
1963
#endif
1964
            {
1965
                gen_op_movl_A0_im(disp);
1966
            }
1967
        }
1968
        /* XXX: index == 4 is always invalid */
1969
        if (havesib && (index != 4 || scale != 0)) {
1970
#ifdef TARGET_X86_64
1971
            if (s->aflag == 2) {
1972
                gen_op_addq_A0_reg_sN(scale, index);
1973
            } else
1974
#endif
1975
            {
1976
                gen_op_addl_A0_reg_sN(scale, index);
1977
            }
1978
        }
1979
        if (must_add_seg) {
1980
            if (override < 0) {
1981
                if (base == R_EBP || base == R_ESP)
1982
                    override = R_SS;
1983
                else
1984
                    override = R_DS;
1985
            }
1986
#ifdef TARGET_X86_64
1987
            if (s->aflag == 2) {
1988
                gen_op_addq_A0_seg(override);
1989
            } else
1990
#endif
1991
            {
1992
                gen_op_addl_A0_seg(override);
1993
            }
1994
        }
1995
    } else {
1996
        switch (mod) {
1997
        case 0:
1998
            if (rm == 6) {
1999
                disp = lduw_code(s->pc);
2000
                s->pc += 2;
2001
                gen_op_movl_A0_im(disp);
2002
                rm = 0; /* avoid SS override */
2003
                goto no_rm;
2004
            } else {
2005
                disp = 0;
2006
            }
2007
            break;
2008
        case 1:
2009
            disp = (int8_t)ldub_code(s->pc++);
2010
            break;
2011
        default:
2012
        case 2:
2013
            disp = lduw_code(s->pc);
2014
            s->pc += 2;
2015
            break;
2016
        }
2017
        switch(rm) {
2018
        case 0:
2019
            gen_op_movl_A0_reg(R_EBX);
2020
            gen_op_addl_A0_reg_sN(0, R_ESI);
2021
            break;
2022
        case 1:
2023
            gen_op_movl_A0_reg(R_EBX);
2024
            gen_op_addl_A0_reg_sN(0, R_EDI);
2025
            break;
2026
        case 2:
2027
            gen_op_movl_A0_reg(R_EBP);
2028
            gen_op_addl_A0_reg_sN(0, R_ESI);
2029
            break;
2030
        case 3:
2031
            gen_op_movl_A0_reg(R_EBP);
2032
            gen_op_addl_A0_reg_sN(0, R_EDI);
2033
            break;
2034
        case 4:
2035
            gen_op_movl_A0_reg(R_ESI);
2036
            break;
2037
        case 5:
2038
            gen_op_movl_A0_reg(R_EDI);
2039
            break;
2040
        case 6:
2041
            gen_op_movl_A0_reg(R_EBP);
2042
            break;
2043
        default:
2044
        case 7:
2045
            gen_op_movl_A0_reg(R_EBX);
2046
            break;
2047
        }
2048
        if (disp != 0)
2049
            gen_op_addl_A0_im(disp);
2050
        gen_op_andl_A0_ffff();
2051
    no_rm:
2052
        if (must_add_seg) {
2053
            if (override < 0) {
2054
                if (rm == 2 || rm == 3 || rm == 6)
2055
                    override = R_SS;
2056
                else
2057
                    override = R_DS;
2058
            }
2059
            gen_op_addl_A0_seg(override);
2060
        }
2061
    }
2062

    
2063
    opreg = OR_A0;
2064
    disp = 0;
2065
    *reg_ptr = opreg;
2066
    *offset_ptr = disp;
2067
}
2068

    
2069
static void gen_nop_modrm(DisasContext *s, int modrm)
2070
{
2071
    int mod, rm, base, code;
2072

    
2073
    mod = (modrm >> 6) & 3;
2074
    if (mod == 3)
2075
        return;
2076
    rm = modrm & 7;
2077

    
2078
    if (s->aflag) {
2079

    
2080
        base = rm;
2081

    
2082
        if (base == 4) {
2083
            code = ldub_code(s->pc++);
2084
            base = (code & 7);
2085
        }
2086

    
2087
        switch (mod) {
2088
        case 0:
2089
            if (base == 5) {
2090
                s->pc += 4;
2091
            }
2092
            break;
2093
        case 1:
2094
            s->pc++;
2095
            break;
2096
        default:
2097
        case 2:
2098
            s->pc += 4;
2099
            break;
2100
        }
2101
    } else {
2102
        switch (mod) {
2103
        case 0:
2104
            if (rm == 6) {
2105
                s->pc += 2;
2106
            }
2107
            break;
2108
        case 1:
2109
            s->pc++;
2110
            break;
2111
        default:
2112
        case 2:
2113
            s->pc += 2;
2114
            break;
2115
        }
2116
    }
2117
}
2118

    
2119
/* used for LEA and MOV AX, mem */
2120
static void gen_add_A0_ds_seg(DisasContext *s)
2121
{
2122
    int override, must_add_seg;
2123
    must_add_seg = s->addseg;
2124
    override = R_DS;
2125
    if (s->override >= 0) {
2126
        override = s->override;
2127
        must_add_seg = 1;
2128
    } else {
2129
        override = R_DS;
2130
    }
2131
    if (must_add_seg) {
2132
#ifdef TARGET_X86_64
2133
        if (CODE64(s)) {
2134
            gen_op_addq_A0_seg(override);
2135
        } else
2136
#endif
2137
        {
2138
            gen_op_addl_A0_seg(override);
2139
        }
2140
    }
2141
}
2142

    
2143
/* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2144
   OR_TMP0 */
2145
static void gen_ldst_modrm(DisasContext *s, int modrm, int ot, int reg, int is_store)
2146
{
2147
    int mod, rm, opreg, disp;
2148

    
2149
    mod = (modrm >> 6) & 3;
2150
    rm = (modrm & 7) | REX_B(s);
2151
    if (mod == 3) {
2152
        if (is_store) {
2153
            if (reg != OR_TMP0)
2154
                gen_op_mov_TN_reg(ot, 0, reg);
2155
            gen_op_mov_reg_T0(ot, rm);
2156
        } else {
2157
            gen_op_mov_TN_reg(ot, 0, rm);
2158
            if (reg != OR_TMP0)
2159
                gen_op_mov_reg_T0(ot, reg);
2160
        }
2161
    } else {
2162
        gen_lea_modrm(s, modrm, &opreg, &disp);
2163
        if (is_store) {
2164
            if (reg != OR_TMP0)
2165
                gen_op_mov_TN_reg(ot, 0, reg);
2166
            gen_op_st_T0_A0(ot + s->mem_index);
2167
        } else {
2168
            gen_op_ld_T0_A0(ot + s->mem_index);
2169
            if (reg != OR_TMP0)
2170
                gen_op_mov_reg_T0(ot, reg);
2171
        }
2172
    }
2173
}
2174

    
2175
static inline uint32_t insn_get(DisasContext *s, int ot)
2176
{
2177
    uint32_t ret;
2178

    
2179
    switch(ot) {
2180
    case OT_BYTE:
2181
        ret = ldub_code(s->pc);
2182
        s->pc++;
2183
        break;
2184
    case OT_WORD:
2185
        ret = lduw_code(s->pc);
2186
        s->pc += 2;
2187
        break;
2188
    default:
2189
    case OT_LONG:
2190
        ret = ldl_code(s->pc);
2191
        s->pc += 4;
2192
        break;
2193
    }
2194
    return ret;
2195
}
2196

    
2197
static inline int insn_const_size(unsigned int ot)
2198
{
2199
    if (ot <= OT_LONG)
2200
        return 1 << ot;
2201
    else
2202
        return 4;
2203
}
2204

    
2205
static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2206
{
2207
    TranslationBlock *tb;
2208
    target_ulong pc;
2209

    
2210
    pc = s->cs_base + eip;
2211
    tb = s->tb;
2212
    /* NOTE: we handle the case where the TB spans two pages here */
2213
    if ((pc & TARGET_PAGE_MASK) == (tb->pc & TARGET_PAGE_MASK) ||
2214
        (pc & TARGET_PAGE_MASK) == ((s->pc - 1) & TARGET_PAGE_MASK))  {
2215
        /* jump to same page: we can use a direct jump */
2216
        tcg_gen_goto_tb(tb_num);
2217
        gen_jmp_im(eip);
2218
        tcg_gen_exit_tb((long)tb + tb_num);
2219
    } else {
2220
        /* jump to another page: currently not optimized */
2221
        gen_jmp_im(eip);
2222
        gen_eob(s);
2223
    }
2224
}
2225

    
2226
static inline void gen_jcc(DisasContext *s, int b,
2227
                           target_ulong val, target_ulong next_eip)
2228
{
2229
    int l1, l2, cc_op;
2230

    
2231
    cc_op = s->cc_op;
2232
    if (s->cc_op != CC_OP_DYNAMIC) {
2233
        gen_op_set_cc_op(s->cc_op);
2234
        s->cc_op = CC_OP_DYNAMIC;
2235
    }
2236
    if (s->jmp_opt) {
2237
        l1 = gen_new_label();
2238
        gen_jcc1(s, cc_op, b, l1);
2239
        
2240
        gen_goto_tb(s, 0, next_eip);
2241

    
2242
        gen_set_label(l1);
2243
        gen_goto_tb(s, 1, val);
2244
        s->is_jmp = 3;
2245
    } else {
2246

    
2247
        l1 = gen_new_label();
2248
        l2 = gen_new_label();
2249
        gen_jcc1(s, cc_op, b, l1);
2250

    
2251
        gen_jmp_im(next_eip);
2252
        tcg_gen_br(l2);
2253

    
2254
        gen_set_label(l1);
2255
        gen_jmp_im(val);
2256
        gen_set_label(l2);
2257
        gen_eob(s);
2258
    }
2259
}
2260

    
2261
static void gen_setcc(DisasContext *s, int b)
2262
{
2263
    int inv, jcc_op, l1;
2264
    TCGv t0;
2265

    
2266
    if (is_fast_jcc_case(s, b)) {
2267
        /* nominal case: we use a jump */
2268
        /* XXX: make it faster by adding new instructions in TCG */
2269
        t0 = tcg_temp_local_new(TCG_TYPE_TL);
2270
        tcg_gen_movi_tl(t0, 0);
2271
        l1 = gen_new_label();
2272
        gen_jcc1(s, s->cc_op, b ^ 1, l1);
2273
        tcg_gen_movi_tl(t0, 1);
2274
        gen_set_label(l1);
2275
        tcg_gen_mov_tl(cpu_T[0], t0);
2276
        tcg_temp_free(t0);
2277
    } else {
2278
        /* slow case: it is more efficient not to generate a jump,
2279
           although it is questionnable whether this optimization is
2280
           worth to */
2281
        inv = b & 1;
2282
        jcc_op = (b >> 1) & 7;
2283
        gen_setcc_slow_T0(s, jcc_op);
2284
        if (inv) {
2285
            tcg_gen_xori_tl(cpu_T[0], cpu_T[0], 1);
2286
        }
2287
    }
2288
}
2289

    
2290
static inline void gen_op_movl_T0_seg(int seg_reg)
2291
{
2292
    tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
2293
                     offsetof(CPUX86State,segs[seg_reg].selector));
2294
}
2295

    
2296
static inline void gen_op_movl_seg_T0_vm(int seg_reg)
2297
{
2298
    tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff);
2299
    tcg_gen_st32_tl(cpu_T[0], cpu_env, 
2300
                    offsetof(CPUX86State,segs[seg_reg].selector));
2301
    tcg_gen_shli_tl(cpu_T[0], cpu_T[0], 4);
2302
    tcg_gen_st_tl(cpu_T[0], cpu_env, 
2303
                  offsetof(CPUX86State,segs[seg_reg].base));
2304
}
2305

    
2306
/* move T0 to seg_reg and compute if the CPU state may change. Never
2307
   call this function with seg_reg == R_CS */
2308
static void gen_movl_seg_T0(DisasContext *s, int seg_reg, target_ulong cur_eip)
2309
{
2310
    if (s->pe && !s->vm86) {
2311
        /* XXX: optimize by finding processor state dynamically */
2312
        if (s->cc_op != CC_OP_DYNAMIC)
2313
            gen_op_set_cc_op(s->cc_op);
2314
        gen_jmp_im(cur_eip);
2315
        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
2316
        tcg_gen_helper_0_2(helper_load_seg, tcg_const_i32(seg_reg), cpu_tmp2_i32);
2317
        /* abort translation because the addseg value may change or
2318
           because ss32 may change. For R_SS, translation must always
2319
           stop as a special handling must be done to disable hardware
2320
           interrupts for the next instruction */
2321
        if (seg_reg == R_SS || (s->code32 && seg_reg < R_FS))
2322
            s->is_jmp = 3;
2323
    } else {
2324
        gen_op_movl_seg_T0_vm(seg_reg);
2325
        if (seg_reg == R_SS)
2326
            s->is_jmp = 3;
2327
    }
2328
}
2329

    
2330
static inline int svm_is_rep(int prefixes)
2331
{
2332
    return ((prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) ? 8 : 0);
2333
}
2334

    
2335
static inline void
2336
gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
2337
                              uint32_t type, uint64_t param)
2338
{
2339
    /* no SVM activated; fast case */
2340
    if (likely(!(s->flags & HF_SVMI_MASK)))
2341
        return;
2342
    if (s->cc_op != CC_OP_DYNAMIC)
2343
        gen_op_set_cc_op(s->cc_op);
2344
    gen_jmp_im(pc_start - s->cs_base);
2345
    tcg_gen_helper_0_2(helper_svm_check_intercept_param, 
2346
                       tcg_const_i32(type), tcg_const_i64(param));
2347
}
2348

    
2349
static inline void
2350
gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type)
2351
{
2352
    gen_svm_check_intercept_param(s, pc_start, type, 0);
2353
}
2354

    
2355
static inline void gen_stack_update(DisasContext *s, int addend)
2356
{
2357
#ifdef TARGET_X86_64
2358
    if (CODE64(s)) {
2359
        gen_op_add_reg_im(2, R_ESP, addend);
2360
    } else
2361
#endif
2362
    if (s->ss32) {
2363
        gen_op_add_reg_im(1, R_ESP, addend);
2364
    } else {
2365
        gen_op_add_reg_im(0, R_ESP, addend);
2366
    }
2367
}
2368

    
2369
/* generate a push. It depends on ss32, addseg and dflag */
2370
static void gen_push_T0(DisasContext *s)
2371
{
2372
#ifdef TARGET_X86_64
2373
    if (CODE64(s)) {
2374
        gen_op_movq_A0_reg(R_ESP);
2375
        if (s->dflag) {
2376
            gen_op_addq_A0_im(-8);
2377
            gen_op_st_T0_A0(OT_QUAD + s->mem_index);
2378
        } else {
2379
            gen_op_addq_A0_im(-2);
2380
            gen_op_st_T0_A0(OT_WORD + s->mem_index);
2381
        }
2382
        gen_op_mov_reg_A0(2, R_ESP);
2383
    } else
2384
#endif
2385
    {
2386
        gen_op_movl_A0_reg(R_ESP);
2387
        if (!s->dflag)
2388
            gen_op_addl_A0_im(-2);
2389
        else
2390
            gen_op_addl_A0_im(-4);
2391
        if (s->ss32) {
2392
            if (s->addseg) {
2393
                tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2394
                gen_op_addl_A0_seg(R_SS);
2395
            }
2396
        } else {
2397
            gen_op_andl_A0_ffff();
2398
            tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2399
            gen_op_addl_A0_seg(R_SS);
2400
        }
2401
        gen_op_st_T0_A0(s->dflag + 1 + s->mem_index);
2402
        if (s->ss32 && !s->addseg)
2403
            gen_op_mov_reg_A0(1, R_ESP);
2404
        else
2405
            gen_op_mov_reg_T1(s->ss32 + 1, R_ESP);
2406
    }
2407
}
2408

    
2409
/* generate a push. It depends on ss32, addseg and dflag */
2410
/* slower version for T1, only used for call Ev */
2411
static void gen_push_T1(DisasContext *s)
2412
{
2413
#ifdef TARGET_X86_64
2414
    if (CODE64(s)) {
2415
        gen_op_movq_A0_reg(R_ESP);
2416
        if (s->dflag) {
2417
            gen_op_addq_A0_im(-8);
2418
            gen_op_st_T1_A0(OT_QUAD + s->mem_index);
2419
        } else {
2420
            gen_op_addq_A0_im(-2);
2421
            gen_op_st_T0_A0(OT_WORD + s->mem_index);
2422
        }
2423
        gen_op_mov_reg_A0(2, R_ESP);
2424
    } else
2425
#endif
2426
    {
2427
        gen_op_movl_A0_reg(R_ESP);
2428
        if (!s->dflag)
2429
            gen_op_addl_A0_im(-2);
2430
        else
2431
            gen_op_addl_A0_im(-4);
2432
        if (s->ss32) {
2433
            if (s->addseg) {
2434
                gen_op_addl_A0_seg(R_SS);
2435
            }
2436
        } else {
2437
            gen_op_andl_A0_ffff();
2438
            gen_op_addl_A0_seg(R_SS);
2439
        }
2440
        gen_op_st_T1_A0(s->dflag + 1 + s->mem_index);
2441

    
2442
        if (s->ss32 && !s->addseg)
2443
            gen_op_mov_reg_A0(1, R_ESP);
2444
        else
2445
            gen_stack_update(s, (-2) << s->dflag);
2446
    }
2447
}
2448

    
2449
/* two step pop is necessary for precise exceptions */
2450
static void gen_pop_T0(DisasContext *s)
2451
{
2452
#ifdef TARGET_X86_64
2453
    if (CODE64(s)) {
2454
        gen_op_movq_A0_reg(R_ESP);
2455
        gen_op_ld_T0_A0((s->dflag ? OT_QUAD : OT_WORD) + s->mem_index);
2456
    } else
2457
#endif
2458
    {
2459
        gen_op_movl_A0_reg(R_ESP);
2460
        if (s->ss32) {
2461
            if (s->addseg)
2462
                gen_op_addl_A0_seg(R_SS);
2463
        } else {
2464
            gen_op_andl_A0_ffff();
2465
            gen_op_addl_A0_seg(R_SS);
2466
        }
2467
        gen_op_ld_T0_A0(s->dflag + 1 + s->mem_index);
2468
    }
2469
}
2470

    
2471
static void gen_pop_update(DisasContext *s)
2472
{
2473
#ifdef TARGET_X86_64
2474
    if (CODE64(s) && s->dflag) {
2475
        gen_stack_update(s, 8);
2476
    } else
2477
#endif
2478
    {
2479
        gen_stack_update(s, 2 << s->dflag);
2480
    }
2481
}
2482

    
2483
static void gen_stack_A0(DisasContext *s)
2484
{
2485
    gen_op_movl_A0_reg(R_ESP);
2486
    if (!s->ss32)
2487
        gen_op_andl_A0_ffff();
2488
    tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2489
    if (s->addseg)
2490
        gen_op_addl_A0_seg(R_SS);
2491
}
2492

    
2493
/* NOTE: wrap around in 16 bit not fully handled */
2494
static void gen_pusha(DisasContext *s)
2495
{
2496
    int i;
2497
    gen_op_movl_A0_reg(R_ESP);
2498
    gen_op_addl_A0_im(-16 <<  s->dflag);
2499
    if (!s->ss32)
2500
        gen_op_andl_A0_ffff();
2501
    tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2502
    if (s->addseg)
2503
        gen_op_addl_A0_seg(R_SS);
2504
    for(i = 0;i < 8; i++) {
2505
        gen_op_mov_TN_reg(OT_LONG, 0, 7 - i);
2506
        gen_op_st_T0_A0(OT_WORD + s->dflag + s->mem_index);
2507
        gen_op_addl_A0_im(2 <<  s->dflag);
2508
    }
2509
    gen_op_mov_reg_T1(OT_WORD + s->ss32, R_ESP);
2510
}
2511

    
2512
/* NOTE: wrap around in 16 bit not fully handled */
2513
static void gen_popa(DisasContext *s)
2514
{
2515
    int i;
2516
    gen_op_movl_A0_reg(R_ESP);
2517
    if (!s->ss32)
2518
        gen_op_andl_A0_ffff();
2519
    tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2520
    tcg_gen_addi_tl(cpu_T[1], cpu_T[1], 16 <<  s->dflag);
2521
    if (s->addseg)
2522
        gen_op_addl_A0_seg(R_SS);
2523
    for(i = 0;i < 8; i++) {
2524
        /* ESP is not reloaded */
2525
        if (i != 3) {
2526
            gen_op_ld_T0_A0(OT_WORD + s->dflag + s->mem_index);
2527
            gen_op_mov_reg_T0(OT_WORD + s->dflag, 7 - i);
2528
        }
2529
        gen_op_addl_A0_im(2 <<  s->dflag);
2530
    }
2531
    gen_op_mov_reg_T1(OT_WORD + s->ss32, R_ESP);
2532
}
2533

    
2534
static void gen_enter(DisasContext *s, int esp_addend, int level)
2535
{
2536
    int ot, opsize;
2537

    
2538
    level &= 0x1f;
2539
#ifdef TARGET_X86_64
2540
    if (CODE64(s)) {
2541
        ot = s->dflag ? OT_QUAD : OT_WORD;
2542
        opsize = 1 << ot;
2543

    
2544
        gen_op_movl_A0_reg(R_ESP);
2545
        gen_op_addq_A0_im(-opsize);
2546
        tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2547

    
2548
        /* push bp */
2549
        gen_op_mov_TN_reg(OT_LONG, 0, R_EBP);
2550
        gen_op_st_T0_A0(ot + s->mem_index);
2551
        if (level) {
2552
            /* XXX: must save state */
2553
            tcg_gen_helper_0_3(helper_enter64_level,
2554
                               tcg_const_i32(level),
2555
                               tcg_const_i32((ot == OT_QUAD)),
2556
                               cpu_T[1]);
2557
        }
2558
        gen_op_mov_reg_T1(ot, R_EBP);
2559
        tcg_gen_addi_tl(cpu_T[1], cpu_T[1], -esp_addend + (-opsize * level));
2560
        gen_op_mov_reg_T1(OT_QUAD, R_ESP);
2561
    } else
2562
#endif
2563
    {
2564
        ot = s->dflag + OT_WORD;
2565
        opsize = 2 << s->dflag;
2566

    
2567
        gen_op_movl_A0_reg(R_ESP);
2568
        gen_op_addl_A0_im(-opsize);
2569
        if (!s->ss32)
2570
            gen_op_andl_A0_ffff();
2571
        tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2572
        if (s->addseg)
2573
            gen_op_addl_A0_seg(R_SS);
2574
        /* push bp */
2575
        gen_op_mov_TN_reg(OT_LONG, 0, R_EBP);
2576
        gen_op_st_T0_A0(ot + s->mem_index);
2577
        if (level) {
2578
            /* XXX: must save state */
2579
            tcg_gen_helper_0_3(helper_enter_level,
2580
                               tcg_const_i32(level),
2581
                               tcg_const_i32(s->dflag),
2582
                               cpu_T[1]);
2583
        }
2584
        gen_op_mov_reg_T1(ot, R_EBP);
2585
        tcg_gen_addi_tl(cpu_T[1], cpu_T[1], -esp_addend + (-opsize * level));
2586
        gen_op_mov_reg_T1(OT_WORD + s->ss32, R_ESP);
2587
    }
2588
}
2589

    
2590
static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
2591
{
2592
    if (s->cc_op != CC_OP_DYNAMIC)
2593
        gen_op_set_cc_op(s->cc_op);
2594
    gen_jmp_im(cur_eip);
2595
    tcg_gen_helper_0_1(helper_raise_exception, tcg_const_i32(trapno));
2596
    s->is_jmp = 3;
2597
}
2598

    
2599
/* an interrupt is different from an exception because of the
2600
   privilege checks */
2601
static void gen_interrupt(DisasContext *s, int intno,
2602
                          target_ulong cur_eip, target_ulong next_eip)
2603
{
2604
    if (s->cc_op != CC_OP_DYNAMIC)
2605
        gen_op_set_cc_op(s->cc_op);
2606
    gen_jmp_im(cur_eip);
2607
    tcg_gen_helper_0_2(helper_raise_interrupt, 
2608
                       tcg_const_i32(intno), 
2609
                       tcg_const_i32(next_eip - cur_eip));
2610
    s->is_jmp = 3;
2611
}
2612

    
2613
static void gen_debug(DisasContext *s, target_ulong cur_eip)
2614
{
2615
    if (s->cc_op != CC_OP_DYNAMIC)
2616
        gen_op_set_cc_op(s->cc_op);
2617
    gen_jmp_im(cur_eip);
2618
    tcg_gen_helper_0_0(helper_debug);
2619
    s->is_jmp = 3;
2620
}
2621

    
2622
/* generate a generic end of block. Trace exception is also generated
2623
   if needed */
2624
static void gen_eob(DisasContext *s)
2625
{
2626
    if (s->cc_op != CC_OP_DYNAMIC)
2627
        gen_op_set_cc_op(s->cc_op);
2628
    if (s->tb->flags & HF_INHIBIT_IRQ_MASK) {
2629
        tcg_gen_helper_0_0(helper_reset_inhibit_irq);
2630
    }
2631
    if (s->singlestep_enabled) {
2632
        tcg_gen_helper_0_0(helper_debug);
2633
    } else if (s->tf) {
2634
        tcg_gen_helper_0_0(helper_single_step);
2635
    } else {
2636
        tcg_gen_exit_tb(0);
2637
    }
2638
    s->is_jmp = 3;
2639
}
2640

    
2641
/* generate a jump to eip. No segment change must happen before as a
2642
   direct call to the next block may occur */
2643
static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2644
{
2645
    if (s->jmp_opt) {
2646
        if (s->cc_op != CC_OP_DYNAMIC) {
2647
            gen_op_set_cc_op(s->cc_op);
2648
            s->cc_op = CC_OP_DYNAMIC;
2649
        }
2650
        gen_goto_tb(s, tb_num, eip);
2651
        s->is_jmp = 3;
2652
    } else {
2653
        gen_jmp_im(eip);
2654
        gen_eob(s);
2655
    }
2656
}
2657

    
2658
static void gen_jmp(DisasContext *s, target_ulong eip)
2659
{
2660
    gen_jmp_tb(s, eip, 0);
2661
}
2662

    
2663
static inline void gen_ldq_env_A0(int idx, int offset)
2664
{
2665
    int mem_index = (idx >> 2) - 1;
2666
    tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, mem_index);
2667
    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset);
2668
}
2669

    
2670
static inline void gen_stq_env_A0(int idx, int offset)
2671
{
2672
    int mem_index = (idx >> 2) - 1;
2673
    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset);
2674
    tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, mem_index);
2675
}
2676

    
2677
static inline void gen_ldo_env_A0(int idx, int offset)
2678
{
2679
    int mem_index = (idx >> 2) - 1;
2680
    tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, mem_index);
2681
    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(0)));
2682
    tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
2683
    tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_tmp0, mem_index);
2684
    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(1)));
2685
}
2686

    
2687
static inline void gen_sto_env_A0(int idx, int offset)
2688
{
2689
    int mem_index = (idx >> 2) - 1;
2690
    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(0)));
2691
    tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, mem_index);
2692
    tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
2693
    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(1)));
2694
    tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_tmp0, mem_index);
2695
}
2696

    
2697
static inline void gen_op_movo(int d_offset, int s_offset)
2698
{
2699
    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
2700
    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2701
    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + 8);
2702
    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + 8);
2703
}
2704

    
2705
static inline void gen_op_movq(int d_offset, int s_offset)
2706
{
2707
    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
2708
    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2709
}
2710

    
2711
static inline void gen_op_movl(int d_offset, int s_offset)
2712
{
2713
    tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, s_offset);
2714
    tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, d_offset);
2715
}
2716

    
2717
static inline void gen_op_movq_env_0(int d_offset)
2718
{
2719
    tcg_gen_movi_i64(cpu_tmp1_i64, 0);
2720
    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2721
}
2722

    
2723
#define SSE_SPECIAL ((void *)1)
2724
#define SSE_DUMMY ((void *)2)
2725

    
2726
#define MMX_OP2(x) { helper_ ## x ## _mmx, helper_ ## x ## _xmm }
2727
#define SSE_FOP(x) { helper_ ## x ## ps, helper_ ## x ## pd, \
2728
                     helper_ ## x ## ss, helper_ ## x ## sd, }
2729

    
2730
static void *sse_op_table1[256][4] = {
2731
    /* 3DNow! extensions */
2732
    [0x0e] = { SSE_DUMMY }, /* femms */
2733
    [0x0f] = { SSE_DUMMY }, /* pf... */
2734
    /* pure SSE operations */
2735
    [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2736
    [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2737
    [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2738
    [0x13] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
2739
    [0x14] = { helper_punpckldq_xmm, helper_punpcklqdq_xmm },
2740
    [0x15] = { helper_punpckhdq_xmm, helper_punpckhqdq_xmm },
2741
    [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd, movshdup */
2742
    [0x17] = { SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd */
2743

    
2744
    [0x28] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2745
    [0x29] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2746
    [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2747
    [0x2b] = { SSE_SPECIAL, SSE_SPECIAL },  /* movntps, movntpd */
2748
    [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2749
    [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2750
    [0x2e] = { helper_ucomiss, helper_ucomisd },
2751
    [0x2f] = { helper_comiss, helper_comisd },
2752
    [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2753
    [0x51] = SSE_FOP(sqrt),
2754
    [0x52] = { helper_rsqrtps, NULL, helper_rsqrtss, NULL },
2755
    [0x53] = { helper_rcpps, NULL, helper_rcpss, NULL },
2756
    [0x54] = { helper_pand_xmm, helper_pand_xmm }, /* andps, andpd */
2757
    [0x55] = { helper_pandn_xmm, helper_pandn_xmm }, /* andnps, andnpd */
2758
    [0x56] = { helper_por_xmm, helper_por_xmm }, /* orps, orpd */
2759
    [0x57] = { helper_pxor_xmm, helper_pxor_xmm }, /* xorps, xorpd */
2760
    [0x58] = SSE_FOP(add),
2761
    [0x59] = SSE_FOP(mul),
2762
    [0x5a] = { helper_cvtps2pd, helper_cvtpd2ps,
2763
               helper_cvtss2sd, helper_cvtsd2ss },
2764
    [0x5b] = { helper_cvtdq2ps, helper_cvtps2dq, helper_cvttps2dq },
2765
    [0x5c] = SSE_FOP(sub),
2766
    [0x5d] = SSE_FOP(min),
2767
    [0x5e] = SSE_FOP(div),
2768
    [0x5f] = SSE_FOP(max),
2769

    
2770
    [0xc2] = SSE_FOP(cmpeq),
2771
    [0xc6] = { helper_shufps, helper_shufpd },
2772

    
2773
    [0x38] = { SSE_SPECIAL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* SSSE3/SSE4 */
2774
    [0x3a] = { SSE_SPECIAL, SSE_SPECIAL }, /* SSSE3/SSE4 */
2775

    
2776
    /* MMX ops and their SSE extensions */
2777
    [0x60] = MMX_OP2(punpcklbw),
2778
    [0x61] = MMX_OP2(punpcklwd),
2779
    [0x62] = MMX_OP2(punpckldq),
2780
    [0x63] = MMX_OP2(packsswb),
2781
    [0x64] = MMX_OP2(pcmpgtb),
2782
    [0x65] = MMX_OP2(pcmpgtw),
2783
    [0x66] = MMX_OP2(pcmpgtl),
2784
    [0x67] = MMX_OP2(packuswb),
2785
    [0x68] = MMX_OP2(punpckhbw),
2786
    [0x69] = MMX_OP2(punpckhwd),
2787
    [0x6a] = MMX_OP2(punpckhdq),
2788
    [0x6b] = MMX_OP2(packssdw),
2789
    [0x6c] = { NULL, helper_punpcklqdq_xmm },
2790
    [0x6d] = { NULL, helper_punpckhqdq_xmm },
2791
    [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2792
    [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2793
    [0x70] = { helper_pshufw_mmx,
2794
               helper_pshufd_xmm,
2795
               helper_pshufhw_xmm,
2796
               helper_pshuflw_xmm },
2797
    [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2798
    [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2799
    [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2800
    [0x74] = MMX_OP2(pcmpeqb),
2801
    [0x75] = MMX_OP2(pcmpeqw),
2802
    [0x76] = MMX_OP2(pcmpeql),
2803
    [0x77] = { SSE_DUMMY }, /* emms */
2804
    [0x7c] = { NULL, helper_haddpd, NULL, helper_haddps },
2805
    [0x7d] = { NULL, helper_hsubpd, NULL, helper_hsubps },
2806
    [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2807
    [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2808
    [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2809
    [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2810
    [0xd0] = { NULL, helper_addsubpd, NULL, helper_addsubps },
2811
    [0xd1] = MMX_OP2(psrlw),
2812
    [0xd2] = MMX_OP2(psrld),
2813
    [0xd3] = MMX_OP2(psrlq),
2814
    [0xd4] = MMX_OP2(paddq),
2815
    [0xd5] = MMX_OP2(pmullw),
2816
    [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2817
    [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2818
    [0xd8] = MMX_OP2(psubusb),
2819
    [0xd9] = MMX_OP2(psubusw),
2820
    [0xda] = MMX_OP2(pminub),
2821
    [0xdb] = MMX_OP2(pand),
2822
    [0xdc] = MMX_OP2(paddusb),
2823
    [0xdd] = MMX_OP2(paddusw),
2824
    [0xde] = MMX_OP2(pmaxub),
2825
    [0xdf] = MMX_OP2(pandn),
2826
    [0xe0] = MMX_OP2(pavgb),
2827
    [0xe1] = MMX_OP2(psraw),
2828
    [0xe2] = MMX_OP2(psrad),
2829
    [0xe3] = MMX_OP2(pavgw),
2830
    [0xe4] = MMX_OP2(pmulhuw),
2831
    [0xe5] = MMX_OP2(pmulhw),
2832
    [0xe6] = { NULL, helper_cvttpd2dq, helper_cvtdq2pd, helper_cvtpd2dq },
2833
    [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
2834
    [0xe8] = MMX_OP2(psubsb),
2835
    [0xe9] = MMX_OP2(psubsw),
2836
    [0xea] = MMX_OP2(pminsw),
2837
    [0xeb] = MMX_OP2(por),
2838
    [0xec] = MMX_OP2(paddsb),
2839
    [0xed] = MMX_OP2(paddsw),
2840
    [0xee] = MMX_OP2(pmaxsw),
2841
    [0xef] = MMX_OP2(pxor),
2842
    [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2843
    [0xf1] = MMX_OP2(psllw),
2844
    [0xf2] = MMX_OP2(pslld),
2845
    [0xf3] = MMX_OP2(psllq),
2846
    [0xf4] = MMX_OP2(pmuludq),
2847
    [0xf5] = MMX_OP2(pmaddwd),
2848
    [0xf6] = MMX_OP2(psadbw),
2849
    [0xf7] = MMX_OP2(maskmov),
2850
    [0xf8] = MMX_OP2(psubb),
2851
    [0xf9] = MMX_OP2(psubw),
2852
    [0xfa] = MMX_OP2(psubl),
2853
    [0xfb] = MMX_OP2(psubq),
2854
    [0xfc] = MMX_OP2(paddb),
2855
    [0xfd] = MMX_OP2(paddw),
2856
    [0xfe] = MMX_OP2(paddl),
2857
};
2858

    
2859
static void *sse_op_table2[3 * 8][2] = {
2860
    [0 + 2] = MMX_OP2(psrlw),
2861
    [0 + 4] = MMX_OP2(psraw),
2862
    [0 + 6] = MMX_OP2(psllw),
2863
    [8 + 2] = MMX_OP2(psrld),
2864
    [8 + 4] = MMX_OP2(psrad),
2865
    [8 + 6] = MMX_OP2(pslld),
2866
    [16 + 2] = MMX_OP2(psrlq),
2867
    [16 + 3] = { NULL, helper_psrldq_xmm },
2868
    [16 + 6] = MMX_OP2(psllq),
2869
    [16 + 7] = { NULL, helper_pslldq_xmm },
2870
};
2871

    
2872
static void *sse_op_table3[4 * 3] = {
2873
    helper_cvtsi2ss,
2874
    helper_cvtsi2sd,
2875
    X86_64_ONLY(helper_cvtsq2ss),
2876
    X86_64_ONLY(helper_cvtsq2sd),
2877

    
2878
    helper_cvttss2si,
2879
    helper_cvttsd2si,
2880
    X86_64_ONLY(helper_cvttss2sq),
2881
    X86_64_ONLY(helper_cvttsd2sq),
2882

    
2883
    helper_cvtss2si,
2884
    helper_cvtsd2si,
2885
    X86_64_ONLY(helper_cvtss2sq),
2886
    X86_64_ONLY(helper_cvtsd2sq),
2887
};
2888

    
2889
static void *sse_op_table4[8][4] = {
2890
    SSE_FOP(cmpeq),
2891
    SSE_FOP(cmplt),
2892
    SSE_FOP(cmple),
2893
    SSE_FOP(cmpunord),
2894
    SSE_FOP(cmpneq),
2895
    SSE_FOP(cmpnlt),
2896
    SSE_FOP(cmpnle),
2897
    SSE_FOP(cmpord),
2898
};
2899

    
2900
static void *sse_op_table5[256] = {
2901
    [0x0c] = helper_pi2fw,
2902
    [0x0d] = helper_pi2fd,
2903
    [0x1c] = helper_pf2iw,
2904
    [0x1d] = helper_pf2id,
2905
    [0x8a] = helper_pfnacc,
2906
    [0x8e] = helper_pfpnacc,
2907
    [0x90] = helper_pfcmpge,
2908
    [0x94] = helper_pfmin,
2909
    [0x96] = helper_pfrcp,
2910
    [0x97] = helper_pfrsqrt,
2911
    [0x9a] = helper_pfsub,
2912
    [0x9e] = helper_pfadd,
2913
    [0xa0] = helper_pfcmpgt,
2914
    [0xa4] = helper_pfmax,
2915
    [0xa6] = helper_movq, /* pfrcpit1; no need to actually increase precision */
2916
    [0xa7] = helper_movq, /* pfrsqit1 */
2917
    [0xaa] = helper_pfsubr,
2918
    [0xae] = helper_pfacc,
2919
    [0xb0] = helper_pfcmpeq,
2920
    [0xb4] = helper_pfmul,
2921
    [0xb6] = helper_movq, /* pfrcpit2 */
2922
    [0xb7] = helper_pmulhrw_mmx,
2923
    [0xbb] = helper_pswapd,
2924
    [0xbf] = helper_pavgb_mmx /* pavgusb */
2925
};
2926

    
2927
struct sse_op_helper_s {
2928
    void *op[2]; uint32_t ext_mask;
2929
};
2930
#define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
2931
#define SSE41_OP(x) { { NULL, helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
2932
#define SSE42_OP(x) { { NULL, helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
2933
#define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
2934
static struct sse_op_helper_s sse_op_table6[256] = {
2935
    [0x00] = SSSE3_OP(pshufb),
2936
    [0x01] = SSSE3_OP(phaddw),
2937
    [0x02] = SSSE3_OP(phaddd),
2938
    [0x03] = SSSE3_OP(phaddsw),
2939
    [0x04] = SSSE3_OP(pmaddubsw),
2940
    [0x05] = SSSE3_OP(phsubw),
2941
    [0x06] = SSSE3_OP(phsubd),
2942
    [0x07] = SSSE3_OP(phsubsw),
2943
    [0x08] = SSSE3_OP(psignb),
2944
    [0x09] = SSSE3_OP(psignw),
2945
    [0x0a] = SSSE3_OP(psignd),
2946
    [0x0b] = SSSE3_OP(pmulhrsw),
2947
    [0x10] = SSE41_OP(pblendvb),
2948
    [0x14] = SSE41_OP(blendvps),
2949
    [0x15] = SSE41_OP(blendvpd),
2950
    [0x17] = SSE41_OP(ptest),
2951
    [0x1c] = SSSE3_OP(pabsb),
2952
    [0x1d] = SSSE3_OP(pabsw),
2953
    [0x1e] = SSSE3_OP(pabsd),
2954
    [0x20] = SSE41_OP(pmovsxbw),
2955
    [0x21] = SSE41_OP(pmovsxbd),
2956
    [0x22] = SSE41_OP(pmovsxbq),
2957
    [0x23] = SSE41_OP(pmovsxwd),
2958
    [0x24] = SSE41_OP(pmovsxwq),
2959
    [0x25] = SSE41_OP(pmovsxdq),
2960
    [0x28] = SSE41_OP(pmuldq),
2961
    [0x29] = SSE41_OP(pcmpeqq),
2962
    [0x2a] = SSE41_SPECIAL, /* movntqda */
2963
    [0x2b] = SSE41_OP(packusdw),
2964
    [0x30] = SSE41_OP(pmovzxbw),
2965
    [0x31] = SSE41_OP(pmovzxbd),
2966
    [0x32] = SSE41_OP(pmovzxbq),
2967
    [0x33] = SSE41_OP(pmovzxwd),
2968
    [0x34] = SSE41_OP(pmovzxwq),
2969
    [0x35] = SSE41_OP(pmovzxdq),
2970
    [0x37] = SSE42_OP(pcmpgtq),
2971
    [0x38] = SSE41_OP(pminsb),
2972
    [0x39] = SSE41_OP(pminsd),
2973
    [0x3a] = SSE41_OP(pminuw),
2974
    [0x3b] = SSE41_OP(pminud),
2975
    [0x3c] = SSE41_OP(pmaxsb),
2976
    [0x3d] = SSE41_OP(pmaxsd),
2977
    [0x3e] = SSE41_OP(pmaxuw),
2978
    [0x3f] = SSE41_OP(pmaxud),
2979
    [0x40] = SSE41_OP(pmulld),
2980
    [0x41] = SSE41_OP(phminposuw),
2981
};
2982

    
2983
static struct sse_op_helper_s sse_op_table7[256] = {
2984
    [0x08] = SSE41_OP(roundps),
2985
    [0x09] = SSE41_OP(roundpd),
2986
    [0x0a] = SSE41_OP(roundss),
2987
    [0x0b] = SSE41_OP(roundsd),
2988
    [0x0c] = SSE41_OP(blendps),
2989
    [0x0d] = SSE41_OP(blendpd),
2990
    [0x0e] = SSE41_OP(pblendw),
2991
    [0x0f] = SSSE3_OP(palignr),
2992
    [0x14] = SSE41_SPECIAL, /* pextrb */
2993
    [0x15] = SSE41_SPECIAL, /* pextrw */
2994
    [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
2995
    [0x17] = SSE41_SPECIAL, /* extractps */
2996
    [0x20] = SSE41_SPECIAL, /* pinsrb */
2997
    [0x21] = SSE41_SPECIAL, /* insertps */
2998
    [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
2999
    [0x40] = SSE41_OP(dpps),
3000
    [0x41] = SSE41_OP(dppd),
3001
    [0x42] = SSE41_OP(mpsadbw),
3002
    [0x60] = SSE42_OP(pcmpestrm),
3003
    [0x61] = SSE42_OP(pcmpestri),
3004
    [0x62] = SSE42_OP(pcmpistrm),
3005
    [0x63] = SSE42_OP(pcmpistri),
3006
};
3007

    
3008
static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
3009
{
3010
    int b1, op1_offset, op2_offset, is_xmm, val, ot;
3011
    int modrm, mod, rm, reg, reg_addr, offset_addr;
3012
    void *sse_op2;
3013

    
3014
    b &= 0xff;
3015
    if (s->prefix & PREFIX_DATA)
3016
        b1 = 1;
3017
    else if (s->prefix & PREFIX_REPZ)
3018
        b1 = 2;
3019
    else if (s->prefix & PREFIX_REPNZ)
3020
        b1 = 3;
3021
    else
3022
        b1 = 0;
3023
    sse_op2 = sse_op_table1[b][b1];
3024
    if (!sse_op2)
3025
        goto illegal_op;
3026
    if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
3027
        is_xmm = 1;
3028
    } else {
3029
        if (b1 == 0) {
3030
            /* MMX case */
3031
            is_xmm = 0;
3032
        } else {
3033
            is_xmm = 1;
3034
        }
3035
    }
3036
    /* simple MMX/SSE operation */
3037
    if (s->flags & HF_TS_MASK) {
3038
        gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
3039
        return;
3040
    }
3041
    if (s->flags & HF_EM_MASK) {
3042
    illegal_op:
3043
        gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base);
3044
        return;
3045
    }
3046
    if (is_xmm && !(s->flags & HF_OSFXSR_MASK))
3047
        if ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA))
3048
            goto illegal_op;
3049
    if (b == 0x0e) {
3050
        if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW))
3051
            goto illegal_op;
3052
        /* femms */
3053
        tcg_gen_helper_0_0(helper_emms);
3054
        return;
3055
    }
3056
    if (b == 0x77) {
3057
        /* emms */
3058
        tcg_gen_helper_0_0(helper_emms);
3059
        return;
3060
    }
3061
    /* prepare MMX state (XXX: optimize by storing fptt and fptags in
3062
       the static cpu state) */
3063
    if (!is_xmm) {
3064
        tcg_gen_helper_0_0(helper_enter_mmx);
3065
    }
3066

    
3067
    modrm = ldub_code(s->pc++);
3068
    reg = ((modrm >> 3) & 7);
3069
    if (is_xmm)
3070
        reg |= rex_r;
3071
    mod = (modrm >> 6) & 3;
3072
    if (sse_op2 == SSE_SPECIAL) {
3073
        b |= (b1 << 8);
3074
        switch(b) {
3075
        case 0x0e7: /* movntq */
3076
            if (mod == 3)
3077
                goto illegal_op;
3078
            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3079
            gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx));
3080
            break;
3081
        case 0x1e7: /* movntdq */
3082
        case 0x02b: /* movntps */
3083
        case 0x12b: /* movntps */
3084
        case 0x3f0: /* lddqu */
3085
            if (mod == 3)
3086
                goto illegal_op;
3087
            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3088
            gen_sto_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
3089
            break;
3090
        case 0x6e: /* movd mm, ea */
3091
#ifdef TARGET_X86_64
3092
            if (s->dflag == 2) {
3093
                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0);
3094
                tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,fpregs[reg].mmx));
3095
            } else
3096
#endif
3097
            {
3098
                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0);
3099
                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3100
                                 offsetof(CPUX86State,fpregs[reg].mmx));
3101
                tcg_gen_helper_0_2(helper_movl_mm_T0_mmx, cpu_ptr0, cpu_T[0]);
3102
            }
3103
            break;
3104
        case 0x16e: /* movd xmm, ea */
3105
#ifdef TARGET_X86_64
3106
            if (s->dflag == 2) {
3107
                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0);
3108
                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3109
                                 offsetof(CPUX86State,xmm_regs[reg]));
3110
                tcg_gen_helper_0_2(helper_movq_mm_T0_xmm, cpu_ptr0, cpu_T[0]);
3111
            } else
3112
#endif
3113
            {
3114
                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0);
3115
                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3116
                                 offsetof(CPUX86State,xmm_regs[reg]));
3117
                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
3118
                tcg_gen_helper_0_2(helper_movl_mm_T0_xmm, cpu_ptr0, cpu_tmp2_i32);
3119
            }
3120
            break;
3121
        case 0x6f: /* movq mm, ea */
3122
            if (mod != 3) {
3123
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3124
                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx));
3125
            } else {
3126
                rm = (modrm & 7);
3127
                tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
3128
                               offsetof(CPUX86State,fpregs[rm].mmx));
3129
                tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
3130
                               offsetof(CPUX86State,fpregs[reg].mmx));
3131
            }
3132
            break;
3133
        case 0x010: /* movups */
3134
        case 0x110: /* movupd */
3135
        case 0x028: /* movaps */
3136
        case 0x128: /* movapd */
3137
        case 0x16f: /* movdqa xmm, ea */
3138
        case 0x26f: /* movdqu xmm, ea */
3139
            if (mod != 3) {
3140
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3141
                gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
3142
            } else {
3143
                rm = (modrm & 7) | REX_B(s);
3144
                gen_op_movo(offsetof(CPUX86State,xmm_regs[reg]),
3145
                            offsetof(CPUX86State,xmm_regs[rm]));
3146
            }
3147
            break;
3148
        case 0x210: /* movss xmm, ea */
3149
            if (mod != 3) {
3150
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3151
                gen_op_ld_T0_A0(OT_LONG + s->mem_index);
3152
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
3153
                gen_op_movl_T0_0();
3154
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)));
3155
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
3156
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
3157
            } else {
3158
                rm = (modrm & 7) | REX_B(s);
3159
                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
3160
                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)));
3161
            }
3162
            break;
3163
        case 0x310: /* movsd xmm, ea */
3164
            if (mod != 3) {
3165
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3166
                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3167
                gen_op_movl_T0_0();
3168
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
3169
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
3170
            } else {
3171
                rm = (modrm & 7) | REX_B(s);
3172
                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
3173
                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
3174
            }
3175
            break;
3176
        case 0x012: /* movlps */
3177
        case 0x112: /* movlpd */
3178
            if (mod != 3) {
3179
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3180
                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3181
            } else {
3182
                /* movhlps */
3183
                rm = (modrm & 7) | REX_B(s);
3184
                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
3185
                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1)));
3186
            }
3187
            break;
3188
        case 0x212: /* movsldup */
3189
            if (mod != 3) {
3190
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3191
                gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
3192
            } else {
3193
                rm = (modrm & 7) | REX_B(s);
3194
                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
3195
                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)));
3196
                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)),
3197
                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(2)));
3198
            }
3199
            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)),
3200
                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
3201
            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)),
3202
                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
3203
            break;
3204
        case 0x312: /* movddup */
3205
            if (mod != 3) {
3206
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3207
                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3208
            } else {
3209
                rm = (modrm & 7) | REX_B(s);
3210
                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
3211
                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
3212
            }
3213
            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)),
3214
                        offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3215
            break;
3216
        case 0x016: /* movhps */
3217
        case 0x116: /* movhpd */
3218
            if (mod != 3) {
3219
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3220
                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
3221
            } else {
3222
                /* movlhps */
3223
                rm = (modrm & 7) | REX_B(s);
3224
                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)),
3225
                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
3226
            }
3227
            break;
3228
        case 0x216: /* movshdup */
3229
            if (mod != 3) {
3230
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3231
                gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
3232
            } else {
3233
                rm = (modrm & 7) | REX_B(s);
3234
                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)),
3235
                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(1)));
3236
                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)),
3237
                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(3)));
3238
            }
3239
            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
3240
                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)));
3241
            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)),
3242
                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
3243
            break;
3244
        case 0x7e: /* movd ea, mm */
3245
#ifdef TARGET_X86_64
3246
            if (s->dflag == 2) {
3247
                tcg_gen_ld_i64(cpu_T[0], cpu_env, 
3248
                               offsetof(CPUX86State,fpregs[reg].mmx));
3249
                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1);
3250
            } else
3251
#endif
3252
            {
3253
                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
3254
                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
3255
                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1);
3256
            }
3257
            break;
3258
        case 0x17e: /* movd ea, xmm */
3259
#ifdef TARGET_X86_64
3260
            if (s->dflag == 2) {
3261
                tcg_gen_ld_i64(cpu_T[0], cpu_env, 
3262
                               offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3263
                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1);
3264
            } else
3265
#endif
3266
            {
3267
                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
3268
                                 offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
3269
                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1);
3270
            }
3271
            break;
3272
        case 0x27e: /* movq xmm, ea */
3273
            if (mod != 3) {
3274
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3275
                gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3276
            } else {
3277
                rm = (modrm & 7) | REX_B(s);
3278
                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
3279
                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
3280
            }
3281
            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
3282
            break;
3283
        case 0x7f: /* movq ea, mm */
3284
            if (mod != 3) {
3285
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3286
                gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx));
3287
            } else {
3288
                rm = (modrm & 7);
3289
                gen_op_movq(offsetof(CPUX86State,fpregs[rm].mmx),
3290
                            offsetof(CPUX86State,fpregs[reg].mmx));
3291
            }
3292
            break;
3293
        case 0x011: /* movups */
3294
        case 0x111: /* movupd */
3295
        case 0x029: /* movaps */
3296
        case 0x129: /* movapd */
3297
        case 0x17f: /* movdqa ea, xmm */
3298
        case 0x27f: /* movdqu ea, xmm */
3299
            if (mod != 3) {
3300
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3301
                gen_sto_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
3302
            } else {
3303
                rm = (modrm & 7) | REX_B(s);
3304
                gen_op_movo(offsetof(CPUX86State,xmm_regs[rm]),
3305
                            offsetof(CPUX86State,xmm_regs[reg]));
3306
            }
3307
            break;
3308
        case 0x211: /* movss ea, xmm */
3309
            if (mod != 3) {
3310
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3311
                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
3312
                gen_op_st_T0_A0(OT_LONG + s->mem_index);
3313
            } else {
3314
                rm = (modrm & 7) | REX_B(s);
3315
                gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)),
3316
                            offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
3317
            }
3318
            break;
3319
        case 0x311: /* movsd ea, xmm */
3320
            if (mod != 3) {
3321
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3322
                gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3323
            } else {
3324
                rm = (modrm & 7) | REX_B(s);
3325
                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)),
3326
                            offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3327
            }
3328
            break;
3329
        case 0x013: /* movlps */
3330
        case 0x113: /* movlpd */
3331
            if (mod != 3) {
3332
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3333
                gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3334
            } else {
3335
                goto illegal_op;
3336
            }
3337
            break;
3338
        case 0x017: /* movhps */
3339
        case 0x117: /* movhpd */
3340
            if (mod != 3) {
3341
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3342
                gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
3343
            } else {
3344
                goto illegal_op;
3345
            }
3346
            break;
3347
        case 0x71: /* shift mm, im */
3348
        case 0x72:
3349
        case 0x73:
3350
        case 0x171: /* shift xmm, im */
3351
        case 0x172:
3352
        case 0x173:
3353
            val = ldub_code(s->pc++);
3354
            if (is_xmm) {
3355
                gen_op_movl_T0_im(val);
3356
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0)));
3357
                gen_op_movl_T0_0();
3358
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(1)));
3359
                op1_offset = offsetof(CPUX86State,xmm_t0);
3360
            } else {
3361
                gen_op_movl_T0_im(val);
3362
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(0)));
3363
                gen_op_movl_T0_0();
3364
                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(1)));
3365
                op1_offset = offsetof(CPUX86State,mmx_t0);
3366
            }
3367
            sse_op2 = sse_op_table2[((b - 1) & 3) * 8 + (((modrm >> 3)) & 7)][b1];
3368
            if (!sse_op2)
3369
                goto illegal_op;
3370
            if (is_xmm) {
3371
                rm = (modrm & 7) | REX_B(s);
3372
                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3373
            } else {
3374
                rm = (modrm & 7);
3375
                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3376
            }
3377
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
3378
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op1_offset);
3379
            tcg_gen_helper_0_2(sse_op2, cpu_ptr0, cpu_ptr1);
3380
            break;
3381
        case 0x050: /* movmskps */
3382
            rm = (modrm & 7) | REX_B(s);
3383
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3384
                             offsetof(CPUX86State,xmm_regs[rm]));
3385
            tcg_gen_helper_1_1(helper_movmskps, cpu_tmp2_i32, cpu_ptr0);
3386
            tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
3387
            gen_op_mov_reg_T0(OT_LONG, reg);
3388
            break;
3389
        case 0x150: /* movmskpd */
3390
            rm = (modrm & 7) | REX_B(s);
3391
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3392
                             offsetof(CPUX86State,xmm_regs[rm]));
3393
            tcg_gen_helper_1_1(helper_movmskpd, cpu_tmp2_i32, cpu_ptr0);
3394
            tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
3395
            gen_op_mov_reg_T0(OT_LONG, reg);
3396
            break;
3397
        case 0x02a: /* cvtpi2ps */
3398
        case 0x12a: /* cvtpi2pd */
3399
            tcg_gen_helper_0_0(helper_enter_mmx);
3400
            if (mod != 3) {
3401
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3402
                op2_offset = offsetof(CPUX86State,mmx_t0);
3403
                gen_ldq_env_A0(s->mem_index, op2_offset);
3404
            } else {
3405
                rm = (modrm & 7);
3406
                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3407
            }
3408
            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3409
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3410
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3411
            switch(b >> 8) {
3412
            case 0x0:
3413
                tcg_gen_helper_0_2(helper_cvtpi2ps, cpu_ptr0, cpu_ptr1);
3414
                break;
3415
            default:
3416
            case 0x1:
3417
                tcg_gen_helper_0_2(helper_cvtpi2pd, cpu_ptr0, cpu_ptr1);
3418
                break;
3419
            }
3420
            break;
3421
        case 0x22a: /* cvtsi2ss */
3422
        case 0x32a: /* cvtsi2sd */
3423
            ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
3424
            gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
3425
            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3426
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3427
            sse_op2 = sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2)];
3428
            if (ot == OT_LONG) {
3429
                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
3430
                tcg_gen_helper_0_2(sse_op2, cpu_ptr0, cpu_tmp2_i32);
3431
            } else {
3432
                tcg_gen_helper_0_2(sse_op2, cpu_ptr0, cpu_T[0]);
3433
            }
3434
            break;
3435
        case 0x02c: /* cvttps2pi */
3436
        case 0x12c: /* cvttpd2pi */
3437
        case 0x02d: /* cvtps2pi */
3438
        case 0x12d: /* cvtpd2pi */
3439
            tcg_gen_helper_0_0(helper_enter_mmx);
3440
            if (mod != 3) {
3441
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3442
                op2_offset = offsetof(CPUX86State,xmm_t0);
3443
                gen_ldo_env_A0(s->mem_index, op2_offset);
3444
            } else {
3445
                rm = (modrm & 7) | REX_B(s);
3446
                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3447
            }
3448
            op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
3449
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3450
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3451
            switch(b) {
3452
            case 0x02c:
3453
                tcg_gen_helper_0_2(helper_cvttps2pi, cpu_ptr0, cpu_ptr1);
3454
                break;
3455
            case 0x12c:
3456
                tcg_gen_helper_0_2(helper_cvttpd2pi, cpu_ptr0, cpu_ptr1);
3457
                break;
3458
            case 0x02d:
3459
                tcg_gen_helper_0_2(helper_cvtps2pi, cpu_ptr0, cpu_ptr1);
3460
                break;
3461
            case 0x12d:
3462
                tcg_gen_helper_0_2(helper_cvtpd2pi, cpu_ptr0, cpu_ptr1);
3463
                break;
3464
            }
3465
            break;
3466
        case 0x22c: /* cvttss2si */
3467
        case 0x32c: /* cvttsd2si */
3468
        case 0x22d: /* cvtss2si */
3469
        case 0x32d: /* cvtsd2si */
3470
            ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
3471
            if (mod != 3) {
3472
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3473
                if ((b >> 8) & 1) {
3474
                    gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_t0.XMM_Q(0)));
3475
                } else {
3476
                    gen_op_ld_T0_A0(OT_LONG + s->mem_index);
3477
                    tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0)));
3478
                }
3479
                op2_offset = offsetof(CPUX86State,xmm_t0);
3480
            } else {
3481
                rm = (modrm & 7) | REX_B(s);
3482
                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3483
            }
3484
            sse_op2 = sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2) + 4 +
3485
                                    (b & 1) * 4];
3486
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
3487
            if (ot == OT_LONG) {
3488
                tcg_gen_helper_1_1(sse_op2, cpu_tmp2_i32, cpu_ptr0);
3489
                tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
3490
            } else {
3491
                tcg_gen_helper_1_1(sse_op2, cpu_T[0], cpu_ptr0);
3492
            }
3493
            gen_op_mov_reg_T0(ot, reg);
3494
            break;
3495
        case 0xc4: /* pinsrw */
3496
        case 0x1c4:
3497
            s->rip_offset = 1;
3498
            gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
3499
            val = ldub_code(s->pc++);
3500
            if (b1) {
3501
                val &= 7;
3502
                tcg_gen_st16_tl(cpu_T[0], cpu_env,
3503
                                offsetof(CPUX86State,xmm_regs[reg].XMM_W(val)));
3504
            } else {
3505
                val &= 3;
3506
                tcg_gen_st16_tl(cpu_T[0], cpu_env,
3507
                                offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3508
            }
3509
            break;
3510
        case 0xc5: /* pextrw */
3511
        case 0x1c5:
3512
            if (mod != 3)
3513
                goto illegal_op;
3514
            ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
3515
            val = ldub_code(s->pc++);
3516
            if (b1) {
3517
                val &= 7;
3518
                rm = (modrm & 7) | REX_B(s);
3519
                tcg_gen_ld16u_tl(cpu_T[0], cpu_env,
3520
                                 offsetof(CPUX86State,xmm_regs[rm].XMM_W(val)));
3521
            } else {
3522
                val &= 3;
3523
                rm = (modrm & 7);
3524
                tcg_gen_ld16u_tl(cpu_T[0], cpu_env,
3525
                                offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3526
            }
3527
            reg = ((modrm >> 3) & 7) | rex_r;
3528
            gen_op_mov_reg_T0(ot, reg);
3529
            break;
3530
        case 0x1d6: /* movq ea, xmm */
3531
            if (mod != 3) {
3532
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3533
                gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3534
            } else {
3535
                rm = (modrm & 7) | REX_B(s);
3536
                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)),
3537
                            offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3538
                gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1)));
3539
            }
3540
            break;
3541
        case 0x2d6: /* movq2dq */
3542
            tcg_gen_helper_0_0(helper_enter_mmx);
3543
            rm = (modrm & 7);
3544
            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
3545
                        offsetof(CPUX86State,fpregs[rm].mmx));
3546
            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
3547
            break;
3548
        case 0x3d6: /* movdq2q */
3549
            tcg_gen_helper_0_0(helper_enter_mmx);
3550
            rm = (modrm & 7) | REX_B(s);
3551
            gen_op_movq(offsetof(CPUX86State,fpregs[reg & 7].mmx),
3552
                        offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
3553
            break;
3554
        case 0xd7: /* pmovmskb */
3555
        case 0x1d7:
3556
            if (mod != 3)
3557
                goto illegal_op;
3558
            if (b1) {
3559
                rm = (modrm & 7) | REX_B(s);
3560
                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,xmm_regs[rm]));
3561
                tcg_gen_helper_1_1(helper_pmovmskb_xmm, cpu_tmp2_i32, cpu_ptr0);
3562
            } else {
3563
                rm = (modrm & 7);
3564
                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,fpregs[rm].mmx));
3565
                tcg_gen_helper_1_1(helper_pmovmskb_mmx, cpu_tmp2_i32, cpu_ptr0);
3566
            }
3567
            tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
3568
            reg = ((modrm >> 3) & 7) | rex_r;
3569
            gen_op_mov_reg_T0(OT_LONG, reg);
3570
            break;
3571
        case 0x038:
3572
        case 0x138:
3573
            b = modrm;
3574
            modrm = ldub_code(s->pc++);
3575
            rm = modrm & 7;
3576
            reg = ((modrm >> 3) & 7) | rex_r;
3577
            mod = (modrm >> 6) & 3;
3578

    
3579
            if (s->prefix & PREFIX_REPNZ)
3580
                goto crc32;
3581

    
3582
            sse_op2 = sse_op_table6[b].op[b1];
3583
            if (!sse_op2)
3584
                goto illegal_op;
3585
            if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
3586
                goto illegal_op;
3587

    
3588
            if (b1) {
3589
                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3590
                if (mod == 3) {
3591
                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
3592
                } else {
3593
                    op2_offset = offsetof(CPUX86State,xmm_t0);
3594
                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3595
                    switch (b) {
3596
                    case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
3597
                    case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
3598
                    case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
3599
                        gen_ldq_env_A0(s->mem_index, op2_offset +
3600
                                        offsetof(XMMReg, XMM_Q(0)));
3601
                        break;
3602
                    case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
3603
                    case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
3604
                        tcg_gen_qemu_ld32u(cpu_tmp2_i32, cpu_A0,
3605
                                          (s->mem_index >> 2) - 1);
3606
                        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, op2_offset +
3607
                                        offsetof(XMMReg, XMM_L(0)));
3608
                        break;
3609
                    case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
3610
                        tcg_gen_qemu_ld16u(cpu_tmp0, cpu_A0,
3611
                                          (s->mem_index >> 2) - 1);
3612
                        tcg_gen_st16_tl(cpu_tmp0, cpu_env, op2_offset +
3613
                                        offsetof(XMMReg, XMM_W(0)));
3614
                        break;
3615
                    case 0x2a:            /* movntqda */
3616
                        gen_ldo_env_A0(s->mem_index, op1_offset);
3617
                        return;
3618
                    default:
3619
                        gen_ldo_env_A0(s->mem_index, op2_offset);
3620
                    }
3621
                }
3622
            } else {
3623
                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3624
                if (mod == 3) {
3625
                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3626
                } else {
3627
                    op2_offset = offsetof(CPUX86State,mmx_t0);
3628
                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3629
                    gen_ldq_env_A0(s->mem_index, op2_offset);
3630
                }
3631
            }
3632
            if (sse_op2 == SSE_SPECIAL)
3633
                goto illegal_op;
3634

    
3635
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3636
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3637
            tcg_gen_helper_0_2(sse_op2, cpu_ptr0, cpu_ptr1);
3638

    
3639
            if (b == 0x17)
3640
                s->cc_op = CC_OP_EFLAGS;
3641
            break;
3642
        case 0x338: /* crc32 */
3643
        crc32:
3644
            b = modrm;
3645
            modrm = ldub_code(s->pc++);
3646
            reg = ((modrm >> 3) & 7) | rex_r;
3647

    
3648
            if (b != 0xf0 && b != 0xf1)
3649
                goto illegal_op;
3650
            if (!(s->cpuid_ext_features & CPUID_EXT_SSE42))
3651
                goto illegal_op;
3652

    
3653
            if (b == 0xf0)
3654
                ot = OT_BYTE;
3655
            else if (b == 0xf1 && s->dflag != 2)
3656
                if (s->prefix & PREFIX_DATA)
3657
                    ot = OT_WORD;
3658
                else
3659
                    ot = OT_LONG;
3660
            else
3661
                ot = OT_QUAD;
3662

    
3663
            gen_op_mov_TN_reg(OT_LONG, 0, reg);
3664
            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
3665
            gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
3666
            tcg_gen_helper_1_3(helper_crc32, cpu_T[0], cpu_tmp2_i32,
3667
                            cpu_T[0], tcg_const_i32(8 << ot));
3668

    
3669
            ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
3670
            gen_op_mov_reg_T0(ot, reg);
3671
            break;
3672
        case 0x03a:
3673
        case 0x13a:
3674
            b = modrm;
3675
            modrm = ldub_code(s->pc++);
3676
            rm = modrm & 7;
3677
            reg = ((modrm >> 3) & 7) | rex_r;
3678
            mod = (modrm >> 6) & 3;
3679

    
3680
            sse_op2 = sse_op_table7[b].op[b1];
3681
            if (!sse_op2)
3682
                goto illegal_op;
3683
            if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
3684
                goto illegal_op;
3685

    
3686
            if (sse_op2 == SSE_SPECIAL) {
3687
                ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
3688
                rm = (modrm & 7) | REX_B(s);
3689
                if (mod != 3)
3690
                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3691
                reg = ((modrm >> 3) & 7) | rex_r;
3692
                val = ldub_code(s->pc++);
3693
                switch (b) {
3694
                case 0x14: /* pextrb */
3695
                    tcg_gen_ld8u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
3696
                                            xmm_regs[reg].XMM_B(val & 15)));
3697
                    if (mod == 3)
3698
                        gen_op_mov_reg_T0(ot, rm);
3699
                    else
3700
                        tcg_gen_qemu_st8(cpu_T[0], cpu_A0,
3701
                                        (s->mem_index >> 2) - 1);
3702
                    break;
3703
                case 0x15: /* pextrw */
3704
                    tcg_gen_ld16u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
3705
                                            xmm_regs[reg].XMM_W(val & 7)));
3706
                    if (mod == 3)
3707
                        gen_op_mov_reg_T0(ot, rm);
3708
                    else
3709
                        tcg_gen_qemu_st16(cpu_T[0], cpu_A0,
3710
                                        (s->mem_index >> 2) - 1);
3711
                    break;
3712
                case 0x16:
3713
                    if (ot == OT_LONG) { /* pextrd */
3714
                        tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
3715
                                        offsetof(CPUX86State,
3716
                                                xmm_regs[reg].XMM_L(val & 3)));
3717
                        if (mod == 3)
3718
                            gen_op_mov_reg_v(ot, rm, cpu_tmp2_i32);
3719
                        else
3720
                            tcg_gen_qemu_st32(cpu_tmp2_i32, cpu_A0,
3721
                                            (s->mem_index >> 2) - 1);
3722
                    } else { /* pextrq */
3723
                        tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
3724
                                        offsetof(CPUX86State,
3725
                                                xmm_regs[reg].XMM_Q(val & 1)));
3726
                        if (mod == 3)
3727
                            gen_op_mov_reg_v(ot, rm, cpu_tmp1_i64);
3728
                        else
3729
                            tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0,
3730
                                            (s->mem_index >> 2) - 1);
3731
                    }
3732
                    break;
3733
                case 0x17: /* extractps */
3734
                    tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
3735
                                            xmm_regs[reg].XMM_L(val & 3)));
3736
                    if (mod == 3)
3737
                        gen_op_mov_reg_T0(ot, rm);
3738
                    else
3739
                        tcg_gen_qemu_st32(cpu_T[0], cpu_A0,
3740
                                        (s->mem_index >> 2) - 1);
3741
                    break;
3742
                case 0x20: /* pinsrb */
3743
                    if (mod == 3)
3744
                        gen_op_mov_TN_reg(OT_LONG, 0, rm);
3745
                    else
3746
                        tcg_gen_qemu_ld8u(cpu_T[0], cpu_A0,
3747
                                        (s->mem_index >> 2) - 1);
3748
                    tcg_gen_st8_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
3749
                                            xmm_regs[reg].XMM_B(val & 15)));
3750
                    break;
3751
                case 0x21: /* insertps */
3752
                    if (mod == 3)
3753
                        tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
3754
                                        offsetof(CPUX86State,xmm_regs[rm]
3755
                                                .XMM_L((val >> 6) & 3)));
3756
                    else
3757
                        tcg_gen_qemu_ld32u(cpu_tmp2_i32, cpu_A0,
3758
                                        (s->mem_index >> 2) - 1);
3759
                    tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
3760
                                    offsetof(CPUX86State,xmm_regs[reg]
3761
                                            .XMM_L((val >> 4) & 3)));
3762
                    if ((val >> 0) & 1)
3763
                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
3764
                                        cpu_env, offsetof(CPUX86State,
3765
                                                xmm_regs[reg].XMM_L(0)));
3766
                    if ((val >> 1) & 1)
3767
                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
3768
                                        cpu_env, offsetof(CPUX86State,
3769
                                                xmm_regs[reg].XMM_L(1)));
3770
                    if ((val >> 2) & 1)
3771
                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
3772
                                        cpu_env, offsetof(CPUX86State,
3773
                                                xmm_regs[reg].XMM_L(2)));
3774
                    if ((val >> 3) & 1)
3775
                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
3776
                                        cpu_env, offsetof(CPUX86State,
3777
                                                xmm_regs[reg].XMM_L(3)));
3778
                    break;
3779
                case 0x22:
3780
                    if (ot == OT_LONG) { /* pinsrd */
3781
                        if (mod == 3)
3782
                            gen_op_mov_v_reg(ot, cpu_tmp2_i32, rm);
3783
                        else
3784
                            tcg_gen_qemu_ld32u(cpu_tmp2_i32, cpu_A0,
3785
                                            (s->mem_index >> 2) - 1);
3786
                        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
3787
                                        offsetof(CPUX86State,
3788
                                                xmm_regs[reg].XMM_L(val & 3)));
3789
                    } else { /* pinsrq */
3790
                        if (mod == 3)
3791
                            gen_op_mov_v_reg(ot, cpu_tmp1_i64, rm);
3792
                        else
3793
                            tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0,
3794
                                            (s->mem_index >> 2) - 1);
3795
                        tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
3796
                                        offsetof(CPUX86State,
3797
                                                xmm_regs[reg].XMM_Q(val & 1)));
3798
                    }
3799
                    break;
3800
                }
3801
                return;
3802
            }
3803

    
3804
            if (b1) {
3805
                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3806
                if (mod == 3) {
3807
                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
3808
                } else {
3809
                    op2_offset = offsetof(CPUX86State,xmm_t0);
3810
                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3811
                    gen_ldo_env_A0(s->mem_index, op2_offset);
3812
                }
3813
            } else {
3814
                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3815
                if (mod == 3) {
3816
                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3817
                } else {
3818
                    op2_offset = offsetof(CPUX86State,mmx_t0);
3819
                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3820
                    gen_ldq_env_A0(s->mem_index, op2_offset);
3821
                }
3822
            }
3823
            val = ldub_code(s->pc++);
3824

    
3825
            if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
3826
                s->cc_op = CC_OP_EFLAGS;
3827

    
3828
                if (s->dflag == 2)
3829
                    /* The helper must use entire 64-bit gp registers */
3830
                    val |= 1 << 8;
3831
            }
3832

    
3833
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3834
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3835
            tcg_gen_helper_0_3(sse_op2, cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
3836
            break;
3837
        default:
3838
            goto illegal_op;
3839
        }
3840
    } else {
3841
        /* generic MMX or SSE operation */
3842
        switch(b) {
3843
        case 0x70: /* pshufx insn */
3844
        case 0xc6: /* pshufx insn */
3845
        case 0xc2: /* compare insns */
3846
            s->rip_offset = 1;
3847
            break;
3848
        default:
3849
            break;
3850
        }
3851
        if (is_xmm) {
3852
            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3853
            if (mod != 3) {
3854
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3855
                op2_offset = offsetof(CPUX86State,xmm_t0);
3856
                if (b1 >= 2 && ((b >= 0x50 && b <= 0x5f && b != 0x5b) ||
3857
                                b == 0xc2)) {
3858
                    /* specific case for SSE single instructions */
3859
                    if (b1 == 2) {
3860
                        /* 32 bit access */
3861
                        gen_op_ld_T0_A0(OT_LONG + s->mem_index);
3862
                        tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0)));
3863
                    } else {
3864
                        /* 64 bit access */
3865
                        gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_t0.XMM_D(0)));
3866
                    }
3867
                } else {
3868
                    gen_ldo_env_A0(s->mem_index, op2_offset);
3869
                }
3870
            } else {
3871
                rm = (modrm & 7) | REX_B(s);
3872
                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3873
            }
3874
        } else {
3875
            op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3876
            if (mod != 3) {
3877
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
3878
                op2_offset = offsetof(CPUX86State,mmx_t0);
3879
                gen_ldq_env_A0(s->mem_index, op2_offset);
3880
            } else {
3881
                rm = (modrm & 7);
3882
                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3883
            }
3884
        }
3885
        switch(b) {
3886
        case 0x0f: /* 3DNow! data insns */
3887
            if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW))
3888
                goto illegal_op;
3889
            val = ldub_code(s->pc++);
3890
            sse_op2 = sse_op_table5[val];
3891
            if (!sse_op2)
3892
                goto illegal_op;
3893
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3894
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3895
            tcg_gen_helper_0_2(sse_op2, cpu_ptr0, cpu_ptr1);
3896
            break;
3897
        case 0x70: /* pshufx insn */
3898
        case 0xc6: /* pshufx insn */
3899
            val = ldub_code(s->pc++);
3900
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3901
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3902
            tcg_gen_helper_0_3(sse_op2, cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
3903
            break;
3904
        case 0xc2:
3905
            /* compare insns */
3906
            val = ldub_code(s->pc++);
3907
            if (val >= 8)
3908
                goto illegal_op;
3909
            sse_op2 = sse_op_table4[val][b1];
3910
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3911
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3912
            tcg_gen_helper_0_2(sse_op2, cpu_ptr0, cpu_ptr1);
3913
            break;
3914
        case 0xf7:
3915
            /* maskmov : we must prepare A0 */
3916
            if (mod != 3)
3917
                goto illegal_op;
3918
#ifdef TARGET_X86_64
3919
            if (s->aflag == 2) {
3920
                gen_op_movq_A0_reg(R_EDI);
3921
            } else
3922
#endif
3923
            {
3924
                gen_op_movl_A0_reg(R_EDI);
3925
                if (s->aflag == 0)
3926
                    gen_op_andl_A0_ffff();
3927
            }
3928
            gen_add_A0_ds_seg(s);
3929

    
3930
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3931
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3932
            tcg_gen_helper_0_3(sse_op2, cpu_ptr0, cpu_ptr1, cpu_A0);
3933
            break;
3934
        default:
3935
            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3936
            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3937
            tcg_gen_helper_0_2(sse_op2, cpu_ptr0, cpu_ptr1);
3938
            break;
3939
        }
3940
        if (b == 0x2e || b == 0x2f) {
3941
            s->cc_op = CC_OP_EFLAGS;
3942
        }
3943
    }
3944
}
3945

    
3946
/* convert one instruction. s->is_jmp is set if the translation must
3947
   be stopped. Return the next pc value */
3948
static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
3949
{
3950
    int b, prefixes, aflag, dflag;
3951
    int shift, ot;
3952
    int modrm, reg, rm, mod, reg_addr, op, opreg, offset_addr, val;
3953
    target_ulong next_eip, tval;
3954
    int rex_w, rex_r;
3955

    
3956
    if (unlikely(loglevel & CPU_LOG_TB_OP))
3957
        tcg_gen_debug_insn_start(pc_start);
3958
    s->pc = pc_start;
3959
    prefixes = 0;
3960
    aflag = s->code32;
3961
    dflag = s->code32;
3962
    s->override = -1;
3963
    rex_w = -1;
3964
    rex_r = 0;
3965
#ifdef TARGET_X86_64
3966
    s->rex_x = 0;
3967
    s->rex_b = 0;
3968
    x86_64_hregs = 0;
3969
#endif
3970
    s->rip_offset = 0; /* for relative ip address */
3971
 next_byte:
3972
    b = ldub_code(s->pc);
3973
    s->pc++;
3974
    /* check prefixes */
3975
#ifdef TARGET_X86_64
3976
    if (CODE64(s)) {
3977
        switch (b) {
3978
        case 0xf3:
3979
            prefixes |= PREFIX_REPZ;
3980
            goto next_byte;
3981
        case 0xf2:
3982
            prefixes |= PREFIX_REPNZ;
3983
            goto next_byte;
3984
        case 0xf0:
3985
            prefixes |= PREFIX_LOCK;
3986
            goto next_byte;
3987
        case 0x2e:
3988
            s->override = R_CS;
3989
            goto next_byte;
3990
        case 0x36:
3991
            s->override = R_SS;
3992
            goto next_byte;
3993
        case 0x3e:
3994
            s->override = R_DS;
3995
            goto next_byte;
3996
        case 0x26:
3997
            s->override = R_ES;
3998
            goto next_byte;
3999
        case 0x64:
4000
            s->override = R_FS;
4001
            goto next_byte;
4002
        case 0x65:
4003
            s->override = R_GS;
4004
            goto next_byte;
4005
        case 0x66:
4006
            prefixes |= PREFIX_DATA;
4007
            goto next_byte;
4008
        case 0x67:
4009
            prefixes |= PREFIX_ADR;
4010
            goto next_byte;
4011
        case 0x40 ... 0x4f:
4012
            /* REX prefix */
4013
            rex_w = (b >> 3) & 1;
4014
            rex_r = (b & 0x4) << 1;
4015
            s->rex_x = (b & 0x2) << 2;
4016
            REX_B(s) = (b & 0x1) << 3;
4017
            x86_64_hregs = 1; /* select uniform byte register addressing */
4018
            goto next_byte;
4019
        }
4020
        if (rex_w == 1) {
4021
            /* 0x66 is ignored if rex.w is set */
4022
            dflag = 2;
4023
        } else {
4024
            if (prefixes & PREFIX_DATA)
4025
                dflag ^= 1;
4026
        }
4027
        if (!(prefixes & PREFIX_ADR))
4028
            aflag = 2;
4029
    } else
4030
#endif
4031
    {
4032
        switch (b) {
4033
        case 0xf3:
4034
            prefixes |= PREFIX_REPZ;
4035
            goto next_byte;
4036
        case 0xf2:
4037
            prefixes |= PREFIX_REPNZ;
4038
            goto next_byte;
4039
        case 0xf0:
4040
            prefixes |= PREFIX_LOCK;
4041
            goto next_byte;
4042
        case 0x2e:
4043
            s->override = R_CS;
4044
            goto next_byte;
4045
        case 0x36:
4046
            s->override = R_SS;
4047
            goto next_byte;
4048
        case 0x3e:
4049
            s->override = R_DS;
4050
            goto next_byte;
4051
        case 0x26:
4052
            s->override = R_ES;
4053
            goto next_byte;
4054
        case 0x64:
4055
            s->override = R_FS;
4056
            goto next_byte;
4057
        case 0x65:
4058
            s->override = R_GS;
4059
            goto next_byte;
4060
        case 0x66:
4061
            prefixes |= PREFIX_DATA;
4062
            goto next_byte;
4063
        case 0x67:
4064
            prefixes |= PREFIX_ADR;
4065
            goto next_byte;
4066
        }
4067
        if (prefixes & PREFIX_DATA)
4068
            dflag ^= 1;
4069
        if (prefixes & PREFIX_ADR)
4070
            aflag ^= 1;
4071
    }
4072

    
4073
    s->prefix = prefixes;
4074
    s->aflag = aflag;
4075
    s->dflag = dflag;
4076

    
4077
    /* lock generation */
4078
    if (prefixes & PREFIX_LOCK)
4079
        tcg_gen_helper_0_0(helper_lock);
4080

    
4081
    /* now check op code */
4082
 reswitch:
4083
    switch(b) {
4084
    case 0x0f:
4085
        /**************************/
4086
        /* extended op code */
4087
        b = ldub_code(s->pc++) | 0x100;
4088
        goto reswitch;
4089

    
4090
        /**************************/
4091
        /* arith & logic */
4092
    case 0x00 ... 0x05:
4093
    case 0x08 ... 0x0d:
4094
    case 0x10 ... 0x15:
4095
    case 0x18 ... 0x1d:
4096
    case 0x20 ... 0x25:
4097
    case 0x28 ... 0x2d:
4098
    case 0x30 ... 0x35:
4099
    case 0x38 ... 0x3d:
4100
        {
4101
            int op, f, val;
4102
            op = (b >> 3) & 7;
4103
            f = (b >> 1) & 3;
4104

    
4105
            if ((b & 1) == 0)
4106
                ot = OT_BYTE;
4107
            else
4108
                ot = dflag + OT_WORD;
4109

    
4110
            switch(f) {
4111
            case 0: /* OP Ev, Gv */
4112
                modrm = ldub_code(s->pc++);
4113
                reg = ((modrm >> 3) & 7) | rex_r;
4114
                mod = (modrm >> 6) & 3;
4115
                rm = (modrm & 7) | REX_B(s);
4116
                if (mod != 3) {
4117
                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4118
                    opreg = OR_TMP0;
4119
                } else if (op == OP_XORL && rm == reg) {
4120
                xor_zero:
4121
                    /* xor reg, reg optimisation */
4122
                    gen_op_movl_T0_0();
4123
                    s->cc_op = CC_OP_LOGICB + ot;
4124
                    gen_op_mov_reg_T0(ot, reg);
4125
                    gen_op_update1_cc();
4126
                    break;
4127
                } else {
4128
                    opreg = rm;
4129
                }
4130
                gen_op_mov_TN_reg(ot, 1, reg);
4131
                gen_op(s, op, ot, opreg);
4132
                break;
4133
            case 1: /* OP Gv, Ev */
4134
                modrm = ldub_code(s->pc++);
4135
                mod = (modrm >> 6) & 3;
4136
                reg = ((modrm >> 3) & 7) | rex_r;
4137
                rm = (modrm & 7) | REX_B(s);
4138
                if (mod != 3) {
4139
                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4140
                    gen_op_ld_T1_A0(ot + s->mem_index);
4141
                } else if (op == OP_XORL && rm == reg) {
4142
                    goto xor_zero;
4143
                } else {
4144
                    gen_op_mov_TN_reg(ot, 1, rm);
4145
                }
4146
                gen_op(s, op, ot, reg);
4147
                break;
4148
            case 2: /* OP A, Iv */
4149
                val = insn_get(s, ot);
4150
                gen_op_movl_T1_im(val);
4151
                gen_op(s, op, ot, OR_EAX);
4152
                break;
4153
            }
4154
        }
4155
        break;
4156

    
4157
    case 0x82:
4158
        if (CODE64(s))
4159
            goto illegal_op;
4160
    case 0x80: /* GRP1 */
4161
    case 0x81:
4162
    case 0x83:
4163
        {
4164
            int val;
4165

    
4166
            if ((b & 1) == 0)
4167
                ot = OT_BYTE;
4168
            else
4169
                ot = dflag + OT_WORD;
4170

    
4171
            modrm = ldub_code(s->pc++);
4172
            mod = (modrm >> 6) & 3;
4173
            rm = (modrm & 7) | REX_B(s);
4174
            op = (modrm >> 3) & 7;
4175

    
4176
            if (mod != 3) {
4177
                if (b == 0x83)
4178
                    s->rip_offset = 1;
4179
                else
4180
                    s->rip_offset = insn_const_size(ot);
4181
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4182
                opreg = OR_TMP0;
4183
            } else {
4184
                opreg = rm;
4185
            }
4186

    
4187
            switch(b) {
4188
            default:
4189
            case 0x80:
4190
            case 0x81:
4191
            case 0x82:
4192
                val = insn_get(s, ot);
4193
                break;
4194
            case 0x83:
4195
                val = (int8_t)insn_get(s, OT_BYTE);
4196
                break;
4197
            }
4198
            gen_op_movl_T1_im(val);
4199
            gen_op(s, op, ot, opreg);
4200
        }
4201
        break;
4202

    
4203
        /**************************/
4204
        /* inc, dec, and other misc arith */
4205
    case 0x40 ... 0x47: /* inc Gv */
4206
        ot = dflag ? OT_LONG : OT_WORD;
4207
        gen_inc(s, ot, OR_EAX + (b & 7), 1);
4208
        break;
4209
    case 0x48 ... 0x4f: /* dec Gv */
4210
        ot = dflag ? OT_LONG : OT_WORD;
4211
        gen_inc(s, ot, OR_EAX + (b & 7), -1);
4212
        break;
4213
    case 0xf6: /* GRP3 */
4214
    case 0xf7:
4215
        if ((b & 1) == 0)
4216
            ot = OT_BYTE;
4217
        else
4218
            ot = dflag + OT_WORD;
4219

    
4220
        modrm = ldub_code(s->pc++);
4221
        mod = (modrm >> 6) & 3;
4222
        rm = (modrm & 7) | REX_B(s);
4223
        op = (modrm >> 3) & 7;
4224
        if (mod != 3) {
4225
            if (op == 0)
4226
                s->rip_offset = insn_const_size(ot);
4227
            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4228
            gen_op_ld_T0_A0(ot + s->mem_index);
4229
        } else {
4230
            gen_op_mov_TN_reg(ot, 0, rm);
4231
        }
4232

    
4233
        switch(op) {
4234
        case 0: /* test */
4235
            val = insn_get(s, ot);
4236
            gen_op_movl_T1_im(val);
4237
            gen_op_testl_T0_T1_cc();
4238
            s->cc_op = CC_OP_LOGICB + ot;
4239
            break;
4240
        case 2: /* not */
4241
            tcg_gen_not_tl(cpu_T[0], cpu_T[0]);
4242
            if (mod != 3) {
4243
                gen_op_st_T0_A0(ot + s->mem_index);
4244
            } else {
4245
                gen_op_mov_reg_T0(ot, rm);
4246
            }
4247
            break;
4248
        case 3: /* neg */
4249
            tcg_gen_neg_tl(cpu_T[0], cpu_T[0]);
4250
            if (mod != 3) {
4251
                gen_op_st_T0_A0(ot + s->mem_index);
4252
            } else {
4253
                gen_op_mov_reg_T0(ot, rm);
4254
            }
4255
            gen_op_update_neg_cc();
4256
            s->cc_op = CC_OP_SUBB + ot;
4257
            break;
4258
        case 4: /* mul */
4259
            switch(ot) {
4260
            case OT_BYTE:
4261
                gen_op_mov_TN_reg(OT_BYTE, 1, R_EAX);
4262
                tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]);
4263
                tcg_gen_ext8u_tl(cpu_T[1], cpu_T[1]);
4264
                /* XXX: use 32 bit mul which could be faster */
4265
                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4266
                gen_op_mov_reg_T0(OT_WORD, R_EAX);
4267
                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4268
                tcg_gen_andi_tl(cpu_cc_src, cpu_T[0], 0xff00);
4269
                s->cc_op = CC_OP_MULB;
4270
                break;
4271
            case OT_WORD:
4272
                gen_op_mov_TN_reg(OT_WORD, 1, R_EAX);
4273
                tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]);
4274
                tcg_gen_ext16u_tl(cpu_T[1], cpu_T[1]);
4275
                /* XXX: use 32 bit mul which could be faster */
4276
                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4277
                gen_op_mov_reg_T0(OT_WORD, R_EAX);
4278
                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4279
                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16);
4280
                gen_op_mov_reg_T0(OT_WORD, R_EDX);
4281
                tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
4282
                s->cc_op = CC_OP_MULW;
4283
                break;
4284
            default:
4285
            case OT_LONG:
4286
#ifdef TARGET_X86_64
4287
                gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
4288
                tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]);
4289
                tcg_gen_ext32u_tl(cpu_T[1], cpu_T[1]);
4290
                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4291
                gen_op_mov_reg_T0(OT_LONG, R_EAX);
4292
                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4293
                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32);
4294
                gen_op_mov_reg_T0(OT_LONG, R_EDX);
4295
                tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
4296
#else
4297
                {
4298
                    TCGv t0, t1;
4299
                    t0 = tcg_temp_new(TCG_TYPE_I64);
4300
                    t1 = tcg_temp_new(TCG_TYPE_I64);
4301
                    gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
4302
                    tcg_gen_extu_i32_i64(t0, cpu_T[0]);
4303
                    tcg_gen_extu_i32_i64(t1, cpu_T[1]);
4304
                    tcg_gen_mul_i64(t0, t0, t1);
4305
                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
4306
                    gen_op_mov_reg_T0(OT_LONG, R_EAX);
4307
                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4308
                    tcg_gen_shri_i64(t0, t0, 32);
4309
                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
4310
                    gen_op_mov_reg_T0(OT_LONG, R_EDX);
4311
                    tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
4312
                }
4313
#endif
4314
                s->cc_op = CC_OP_MULL;
4315
                break;
4316
#ifdef TARGET_X86_64
4317
            case OT_QUAD:
4318
                tcg_gen_helper_0_1(helper_mulq_EAX_T0, cpu_T[0]);
4319
                s->cc_op = CC_OP_MULQ;
4320
                break;
4321
#endif
4322
            }
4323
            break;
4324
        case 5: /* imul */
4325
            switch(ot) {
4326
            case OT_BYTE:
4327
                gen_op_mov_TN_reg(OT_BYTE, 1, R_EAX);
4328
                tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]);
4329
                tcg_gen_ext8s_tl(cpu_T[1], cpu_T[1]);
4330
                /* XXX: use 32 bit mul which could be faster */
4331
                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4332
                gen_op_mov_reg_T0(OT_WORD, R_EAX);
4333
                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4334
                tcg_gen_ext8s_tl(cpu_tmp0, cpu_T[0]);
4335
                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
4336
                s->cc_op = CC_OP_MULB;
4337
                break;
4338
            case OT_WORD:
4339
                gen_op_mov_TN_reg(OT_WORD, 1, R_EAX);
4340
                tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
4341
                tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]);
4342
                /* XXX: use 32 bit mul which could be faster */
4343
                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4344
                gen_op_mov_reg_T0(OT_WORD, R_EAX);
4345
                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4346
                tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]);
4347
                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
4348
                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16);
4349
                gen_op_mov_reg_T0(OT_WORD, R_EDX);
4350
                s->cc_op = CC_OP_MULW;
4351
                break;
4352
            default:
4353
            case OT_LONG:
4354
#ifdef TARGET_X86_64
4355
                gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
4356
                tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
4357
                tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]);
4358
                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4359
                gen_op_mov_reg_T0(OT_LONG, R_EAX);
4360
                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4361
                tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]);
4362
                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
4363
                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32);
4364
                gen_op_mov_reg_T0(OT_LONG, R_EDX);
4365
#else
4366
                {
4367
                    TCGv t0, t1;
4368
                    t0 = tcg_temp_new(TCG_TYPE_I64);
4369
                    t1 = tcg_temp_new(TCG_TYPE_I64);
4370
                    gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
4371
                    tcg_gen_ext_i32_i64(t0, cpu_T[0]);
4372
                    tcg_gen_ext_i32_i64(t1, cpu_T[1]);
4373
                    tcg_gen_mul_i64(t0, t0, t1);
4374
                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
4375
                    gen_op_mov_reg_T0(OT_LONG, R_EAX);
4376
                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4377
                    tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31);
4378
                    tcg_gen_shri_i64(t0, t0, 32);
4379
                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
4380
                    gen_op_mov_reg_T0(OT_LONG, R_EDX);
4381
                    tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
4382
                }
4383
#endif
4384
                s->cc_op = CC_OP_MULL;
4385
                break;
4386
#ifdef TARGET_X86_64
4387
            case OT_QUAD:
4388
                tcg_gen_helper_0_1(helper_imulq_EAX_T0, cpu_T[0]);
4389
                s->cc_op = CC_OP_MULQ;
4390
                break;
4391
#endif
4392
            }
4393
            break;
4394
        case 6: /* div */
4395
            switch(ot) {
4396
            case OT_BYTE:
4397
                gen_jmp_im(pc_start - s->cs_base);
4398
                tcg_gen_helper_0_1(helper_divb_AL, cpu_T[0]);
4399
                break;
4400
            case OT_WORD:
4401
                gen_jmp_im(pc_start - s->cs_base);
4402
                tcg_gen_helper_0_1(helper_divw_AX, cpu_T[0]);
4403
                break;
4404
            default:
4405
            case OT_LONG:
4406
                gen_jmp_im(pc_start - s->cs_base);
4407
                tcg_gen_helper_0_1(helper_divl_EAX, cpu_T[0]);
4408
                break;
4409
#ifdef TARGET_X86_64
4410
            case OT_QUAD:
4411
                gen_jmp_im(pc_start - s->cs_base);
4412
                tcg_gen_helper_0_1(helper_divq_EAX, cpu_T[0]);
4413
                break;
4414
#endif
4415
            }
4416
            break;
4417
        case 7: /* idiv */
4418
            switch(ot) {
4419
            case OT_BYTE:
4420
                gen_jmp_im(pc_start - s->cs_base);
4421
                tcg_gen_helper_0_1(helper_idivb_AL, cpu_T[0]);
4422
                break;
4423
            case OT_WORD:
4424
                gen_jmp_im(pc_start - s->cs_base);
4425
                tcg_gen_helper_0_1(helper_idivw_AX, cpu_T[0]);
4426
                break;
4427
            default:
4428
            case OT_LONG:
4429
                gen_jmp_im(pc_start - s->cs_base);
4430
                tcg_gen_helper_0_1(helper_idivl_EAX, cpu_T[0]);
4431
                break;
4432
#ifdef TARGET_X86_64
4433
            case OT_QUAD:
4434
                gen_jmp_im(pc_start - s->cs_base);
4435
                tcg_gen_helper_0_1(helper_idivq_EAX, cpu_T[0]);
4436
                break;
4437
#endif
4438
            }
4439
            break;
4440
        default:
4441
            goto illegal_op;
4442
        }
4443
        break;
4444

    
4445
    case 0xfe: /* GRP4 */
4446
    case 0xff: /* GRP5 */
4447
        if ((b & 1) == 0)
4448
            ot = OT_BYTE;
4449
        else
4450
            ot = dflag + OT_WORD;
4451

    
4452
        modrm = ldub_code(s->pc++);
4453
        mod = (modrm >> 6) & 3;
4454
        rm = (modrm & 7) | REX_B(s);
4455
        op = (modrm >> 3) & 7;
4456
        if (op >= 2 && b == 0xfe) {
4457
            goto illegal_op;
4458
        }
4459
        if (CODE64(s)) {
4460
            if (op == 2 || op == 4) {
4461
                /* operand size for jumps is 64 bit */
4462
                ot = OT_QUAD;
4463
            } else if (op == 3 || op == 5) {
4464
                /* for call calls, the operand is 16 or 32 bit, even
4465
                   in long mode */
4466
                ot = dflag ? OT_LONG : OT_WORD;
4467
            } else if (op == 6) {
4468
                /* default push size is 64 bit */
4469
                ot = dflag ? OT_QUAD : OT_WORD;
4470
            }
4471
        }
4472
        if (mod != 3) {
4473
            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4474
            if (op >= 2 && op != 3 && op != 5)
4475
                gen_op_ld_T0_A0(ot + s->mem_index);
4476
        } else {
4477
            gen_op_mov_TN_reg(ot, 0, rm);
4478
        }
4479

    
4480
        switch(op) {
4481
        case 0: /* inc Ev */
4482
            if (mod != 3)
4483
                opreg = OR_TMP0;
4484
            else
4485
                opreg = rm;
4486
            gen_inc(s, ot, opreg, 1);
4487
            break;
4488
        case 1: /* dec Ev */
4489
            if (mod != 3)
4490
                opreg = OR_TMP0;
4491
            else
4492
                opreg = rm;
4493
            gen_inc(s, ot, opreg, -1);
4494
            break;
4495
        case 2: /* call Ev */
4496
            /* XXX: optimize if memory (no 'and' is necessary) */
4497
            if (s->dflag == 0)
4498
                gen_op_andl_T0_ffff();
4499
            next_eip = s->pc - s->cs_base;
4500
            gen_movtl_T1_im(next_eip);
4501
            gen_push_T1(s);
4502
            gen_op_jmp_T0();
4503
            gen_eob(s);
4504
            break;
4505
        case 3: /* lcall Ev */
4506
            gen_op_ld_T1_A0(ot + s->mem_index);
4507
            gen_add_A0_im(s, 1 << (ot - OT_WORD + 1));
4508
            gen_op_ldu_T0_A0(OT_WORD + s->mem_index);
4509
        do_lcall:
4510
            if (s->pe && !s->vm86) {
4511
                if (s->cc_op != CC_OP_DYNAMIC)
4512
                    gen_op_set_cc_op(s->cc_op);
4513
                gen_jmp_im(pc_start - s->cs_base);
4514
                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
4515
                tcg_gen_helper_0_4(helper_lcall_protected,
4516
                                   cpu_tmp2_i32, cpu_T[1],
4517
                                   tcg_const_i32(dflag), 
4518
                                   tcg_const_i32(s->pc - pc_start));
4519
            } else {
4520
                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
4521
                tcg_gen_helper_0_4(helper_lcall_real,
4522
                                   cpu_tmp2_i32, cpu_T[1],
4523
                                   tcg_const_i32(dflag), 
4524
                                   tcg_const_i32(s->pc - s->cs_base));
4525
            }
4526
            gen_eob(s);
4527
            break;
4528
        case 4: /* jmp Ev */
4529
            if (s->dflag == 0)
4530
                gen_op_andl_T0_ffff();
4531
            gen_op_jmp_T0();
4532
            gen_eob(s);
4533
            break;
4534
        case 5: /* ljmp Ev */
4535
            gen_op_ld_T1_A0(ot + s->mem_index);
4536
            gen_add_A0_im(s, 1 << (ot - OT_WORD + 1));
4537
            gen_op_ldu_T0_A0(OT_WORD + s->mem_index);
4538
        do_ljmp:
4539
            if (s->pe && !s->vm86) {
4540
                if (s->cc_op != CC_OP_DYNAMIC)
4541
                    gen_op_set_cc_op(s->cc_op);
4542
                gen_jmp_im(pc_start - s->cs_base);
4543
                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
4544
                tcg_gen_helper_0_3(helper_ljmp_protected,
4545
                                   cpu_tmp2_i32,
4546
                                   cpu_T[1],
4547
                                   tcg_const_i32(s->pc - pc_start));
4548
            } else {
4549
                gen_op_movl_seg_T0_vm(R_CS);
4550
                gen_op_movl_T0_T1();
4551
                gen_op_jmp_T0();
4552
            }
4553
            gen_eob(s);
4554
            break;
4555
        case 6: /* push Ev */
4556
            gen_push_T0(s);
4557
            break;
4558
        default:
4559
            goto illegal_op;
4560
        }
4561
        break;
4562

    
4563
    case 0x84: /* test Ev, Gv */
4564
    case 0x85:
4565
        if ((b & 1) == 0)
4566
            ot = OT_BYTE;
4567
        else
4568
            ot = dflag + OT_WORD;
4569

    
4570
        modrm = ldub_code(s->pc++);
4571
        mod = (modrm >> 6) & 3;
4572
        rm = (modrm & 7) | REX_B(s);
4573
        reg = ((modrm >> 3) & 7) | rex_r;
4574

    
4575
        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
4576
        gen_op_mov_TN_reg(ot, 1, reg);
4577
        gen_op_testl_T0_T1_cc();
4578
        s->cc_op = CC_OP_LOGICB + ot;
4579
        break;
4580

    
4581
    case 0xa8: /* test eAX, Iv */
4582
    case 0xa9:
4583
        if ((b & 1) == 0)
4584
            ot = OT_BYTE;
4585
        else
4586
            ot = dflag + OT_WORD;
4587
        val = insn_get(s, ot);
4588

    
4589
        gen_op_mov_TN_reg(ot, 0, OR_EAX);
4590
        gen_op_movl_T1_im(val);
4591
        gen_op_testl_T0_T1_cc();
4592
        s->cc_op = CC_OP_LOGICB + ot;
4593
        break;
4594

    
4595
    case 0x98: /* CWDE/CBW */
4596
#ifdef TARGET_X86_64
4597
        if (dflag == 2) {
4598
            gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
4599
            tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
4600
            gen_op_mov_reg_T0(OT_QUAD, R_EAX);
4601
        } else
4602
#endif
4603
        if (dflag == 1) {
4604
            gen_op_mov_TN_reg(OT_WORD, 0, R_EAX);
4605
            tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
4606
            gen_op_mov_reg_T0(OT_LONG, R_EAX);
4607
        } else {
4608
            gen_op_mov_TN_reg(OT_BYTE, 0, R_EAX);
4609
            tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]);
4610
            gen_op_mov_reg_T0(OT_WORD, R_EAX);
4611
        }
4612
        break;
4613
    case 0x99: /* CDQ/CWD */
4614
#ifdef TARGET_X86_64
4615
        if (dflag == 2) {
4616
            gen_op_mov_TN_reg(OT_QUAD, 0, R_EAX);
4617
            tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 63);
4618
            gen_op_mov_reg_T0(OT_QUAD, R_EDX);
4619
        } else
4620
#endif
4621
        if (dflag == 1) {
4622
            gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
4623
            tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
4624
            tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 31);
4625
            gen_op_mov_reg_T0(OT_LONG, R_EDX);
4626
        } else {
4627
            gen_op_mov_TN_reg(OT_WORD, 0, R_EAX);
4628
            tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
4629
            tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 15);
4630
            gen_op_mov_reg_T0(OT_WORD, R_EDX);
4631
        }
4632
        break;
4633
    case 0x1af: /* imul Gv, Ev */
4634
    case 0x69: /* imul Gv, Ev, I */
4635
    case 0x6b:
4636
        ot = dflag + OT_WORD;
4637
        modrm = ldub_code(s->pc++);
4638
        reg = ((modrm >> 3) & 7) | rex_r;
4639
        if (b == 0x69)
4640
            s->rip_offset = insn_const_size(ot);
4641
        else if (b == 0x6b)
4642
            s->rip_offset = 1;
4643
        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
4644
        if (b == 0x69) {
4645
            val = insn_get(s, ot);
4646
            gen_op_movl_T1_im(val);
4647
        } else if (b == 0x6b) {
4648
            val = (int8_t)insn_get(s, OT_BYTE);
4649
            gen_op_movl_T1_im(val);
4650
        } else {
4651
            gen_op_mov_TN_reg(ot, 1, reg);
4652
        }
4653

    
4654
#ifdef TARGET_X86_64
4655
        if (ot == OT_QUAD) {
4656
            tcg_gen_helper_1_2(helper_imulq_T0_T1, cpu_T[0], cpu_T[0], cpu_T[1]);
4657
        } else
4658
#endif
4659
        if (ot == OT_LONG) {
4660
#ifdef TARGET_X86_64
4661
                tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
4662
                tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]);
4663
                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4664
                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4665
                tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]);
4666
                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
4667
#else
4668
                {
4669
                    TCGv t0, t1;
4670
                    t0 = tcg_temp_new(TCG_TYPE_I64);
4671
                    t1 = tcg_temp_new(TCG_TYPE_I64);
4672
                    tcg_gen_ext_i32_i64(t0, cpu_T[0]);
4673
                    tcg_gen_ext_i32_i64(t1, cpu_T[1]);
4674
                    tcg_gen_mul_i64(t0, t0, t1);
4675
                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
4676
                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4677
                    tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31);
4678
                    tcg_gen_shri_i64(t0, t0, 32);
4679
                    tcg_gen_trunc_i64_i32(cpu_T[1], t0);
4680
                    tcg_gen_sub_tl(cpu_cc_src, cpu_T[1], cpu_tmp0);
4681
                }
4682
#endif
4683
        } else {
4684
            tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
4685
            tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]);
4686
            /* XXX: use 32 bit mul which could be faster */
4687
            tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4688
            tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4689
            tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]);
4690
            tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
4691
        }
4692
        gen_op_mov_reg_T0(ot, reg);
4693
        s->cc_op = CC_OP_MULB + ot;
4694
        break;
4695
    case 0x1c0:
4696
    case 0x1c1: /* xadd Ev, Gv */
4697
        if ((b & 1) == 0)
4698
            ot = OT_BYTE;
4699
        else
4700
            ot = dflag + OT_WORD;
4701
        modrm = ldub_code(s->pc++);
4702
        reg = ((modrm >> 3) & 7) | rex_r;
4703
        mod = (modrm >> 6) & 3;
4704
        if (mod == 3) {
4705
            rm = (modrm & 7) | REX_B(s);
4706
            gen_op_mov_TN_reg(ot, 0, reg);
4707
            gen_op_mov_TN_reg(ot, 1, rm);
4708
            gen_op_addl_T0_T1();
4709
            gen_op_mov_reg_T1(ot, reg);
4710
            gen_op_mov_reg_T0(ot, rm);
4711
        } else {
4712
            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4713
            gen_op_mov_TN_reg(ot, 0, reg);
4714
            gen_op_ld_T1_A0(ot + s->mem_index);
4715
            gen_op_addl_T0_T1();
4716
            gen_op_st_T0_A0(ot + s->mem_index);
4717
            gen_op_mov_reg_T1(ot, reg);
4718
        }
4719
        gen_op_update2_cc();
4720
        s->cc_op = CC_OP_ADDB + ot;
4721
        break;
4722
    case 0x1b0:
4723
    case 0x1b1: /* cmpxchg Ev, Gv */
4724
        {
4725
            int label1, label2;
4726
            TCGv t0, t1, t2, a0;
4727

    
4728
            if ((b & 1) == 0)
4729
                ot = OT_BYTE;
4730
            else
4731
                ot = dflag + OT_WORD;
4732
            modrm = ldub_code(s->pc++);
4733
            reg = ((modrm >> 3) & 7) | rex_r;
4734
            mod = (modrm >> 6) & 3;
4735
            t0 = tcg_temp_local_new(TCG_TYPE_TL);
4736
            t1 = tcg_temp_local_new(TCG_TYPE_TL);
4737
            t2 = tcg_temp_local_new(TCG_TYPE_TL);
4738
            a0 = tcg_temp_local_new(TCG_TYPE_TL);
4739
            gen_op_mov_v_reg(ot, t1, reg);
4740
            if (mod == 3) {
4741
                rm = (modrm & 7) | REX_B(s);
4742
                gen_op_mov_v_reg(ot, t0, rm);
4743
            } else {
4744
                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4745
                tcg_gen_mov_tl(a0, cpu_A0);
4746
                gen_op_ld_v(ot + s->mem_index, t0, a0);
4747
                rm = 0; /* avoid warning */
4748
            }
4749
            label1 = gen_new_label();
4750
            tcg_gen_ld_tl(t2, cpu_env, offsetof(CPUState, regs[R_EAX]));
4751
            tcg_gen_sub_tl(t2, t2, t0);
4752
            gen_extu(ot, t2);
4753
            tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1);
4754
            if (mod == 3) {
4755
                label2 = gen_new_label();
4756
                gen_op_mov_reg_v(ot, R_EAX, t0);
4757
                tcg_gen_br(label2);
4758
                gen_set_label(label1);
4759
                gen_op_mov_reg_v(ot, rm, t1);
4760
                gen_set_label(label2);
4761
            } else {
4762
                tcg_gen_mov_tl(t1, t0);
4763
                gen_op_mov_reg_v(ot, R_EAX, t0);
4764
                gen_set_label(label1);
4765
                /* always store */
4766
                gen_op_st_v(ot + s->mem_index, t1, a0);
4767
            }
4768
            tcg_gen_mov_tl(cpu_cc_src, t0);
4769
            tcg_gen_mov_tl(cpu_cc_dst, t2);
4770
            s->cc_op = CC_OP_SUBB + ot;
4771
            tcg_temp_free(t0);
4772
            tcg_temp_free(t1);
4773
            tcg_temp_free(t2);
4774
            tcg_temp_free(a0);
4775
        }
4776
        break;
4777
    case 0x1c7: /* cmpxchg8b */
4778
        modrm = ldub_code(s->pc++);
4779
        mod = (modrm >> 6) & 3;
4780
        if ((mod == 3) || ((modrm & 0x38) != 0x8))
4781
            goto illegal_op;
4782
#ifdef TARGET_X86_64
4783
        if (dflag == 2) {
4784
            if (!(s->cpuid_ext_features & CPUID_EXT_CX16))
4785
                goto illegal_op;
4786
            gen_jmp_im(pc_start - s->cs_base);
4787
            if (s->cc_op != CC_OP_DYNAMIC)
4788
                gen_op_set_cc_op(s->cc_op);
4789
            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4790
            tcg_gen_helper_0_1(helper_cmpxchg16b, cpu_A0);
4791
        } else
4792
#endif        
4793
        {
4794
            if (!(s->cpuid_features & CPUID_CX8))
4795
                goto illegal_op;
4796
            gen_jmp_im(pc_start - s->cs_base);
4797
            if (s->cc_op != CC_OP_DYNAMIC)
4798
                gen_op_set_cc_op(s->cc_op);
4799
            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
4800
            tcg_gen_helper_0_1(helper_cmpxchg8b, cpu_A0);
4801
        }
4802
        s->cc_op = CC_OP_EFLAGS;
4803
        break;
4804

    
4805
        /**************************/
4806
        /* push/pop */
4807
    case 0x50 ... 0x57: /* push */
4808
        gen_op_mov_TN_reg(OT_LONG, 0, (b & 7) | REX_B(s));
4809
        gen_push_T0(s);
4810
        break;
4811
    case 0x58 ... 0x5f: /* pop */
4812
        if (CODE64(s)) {
4813
            ot = dflag ? OT_QUAD : OT_WORD;
4814
        } else {
4815
            ot = dflag + OT_WORD;
4816
        }
4817
        gen_pop_T0(s);
4818
        /* NOTE: order is important for pop %sp */
4819
        gen_pop_update(s);
4820
        gen_op_mov_reg_T0(ot, (b & 7) | REX_B(s));
4821
        break;
4822
    case 0x60: /* pusha */
4823
        if (CODE64(s))
4824
            goto illegal_op;
4825
        gen_pusha(s);
4826
        break;
4827
    case 0x61: /* popa */
4828
        if (CODE64(s))
4829
            goto illegal_op;
4830
        gen_popa(s);
4831
        break;
4832
    case 0x68: /* push Iv */
4833
    case 0x6a:
4834
        if (CODE64(s)) {
4835
            ot = dflag ? OT_QUAD : OT_WORD;
4836
        } else {
4837
            ot = dflag + OT_WORD;
4838
        }
4839
        if (b == 0x68)
4840
            val = insn_get(s, ot);
4841
        else
4842
            val = (int8_t)insn_get(s, OT_BYTE);
4843
        gen_op_movl_T0_im(val);
4844
        gen_push_T0(s);
4845
        break;
4846
    case 0x8f: /* pop Ev */
4847
        if (CODE64(s)) {
4848
            ot = dflag ? OT_QUAD : OT_WORD;
4849
        } else {
4850
            ot = dflag + OT_WORD;
4851
        }
4852
        modrm = ldub_code(s->pc++);
4853
        mod = (modrm >> 6) & 3;
4854
        gen_pop_T0(s);
4855
        if (mod == 3) {
4856
            /* NOTE: order is important for pop %sp */
4857
            gen_pop_update(s);
4858
            rm = (modrm & 7) | REX_B(s);
4859
            gen_op_mov_reg_T0(ot, rm);
4860
        } else {
4861
            /* NOTE: order is important too for MMU exceptions */
4862
            s->popl_esp_hack = 1 << ot;
4863
            gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1);
4864
            s->popl_esp_hack = 0;
4865
            gen_pop_update(s);
4866
        }
4867
        break;
4868
    case 0xc8: /* enter */
4869
        {
4870
            int level;
4871
            val = lduw_code(s->pc);
4872
            s->pc += 2;
4873
            level = ldub_code(s->pc++);
4874
            gen_enter(s, val, level);
4875
        }
4876
        break;
4877
    case 0xc9: /* leave */
4878
        /* XXX: exception not precise (ESP is updated before potential exception) */
4879
        if (CODE64(s)) {
4880
            gen_op_mov_TN_reg(OT_QUAD, 0, R_EBP);
4881
            gen_op_mov_reg_T0(OT_QUAD, R_ESP);
4882
        } else if (s->ss32) {
4883
            gen_op_mov_TN_reg(OT_LONG, 0, R_EBP);
4884
            gen_op_mov_reg_T0(OT_LONG, R_ESP);
4885
        } else {
4886
            gen_op_mov_TN_reg(OT_WORD, 0, R_EBP);
4887
            gen_op_mov_reg_T0(OT_WORD, R_ESP);
4888
        }
4889
        gen_pop_T0(s);
4890
        if (CODE64(s)) {
4891
            ot = dflag ? OT_QUAD : OT_WORD;
4892
        } else {
4893
            ot = dflag + OT_WORD;
4894
        }
4895
        gen_op_mov_reg_T0(ot, R_EBP);
4896
        gen_pop_update(s);
4897
        break;
4898
    case 0x06: /* push es */
4899
    case 0x0e: /* push cs */
4900
    case 0x16: /* push ss */
4901
    case 0x1e: /* push ds */
4902
        if (CODE64(s))
4903
            goto illegal_op;
4904
        gen_op_movl_T0_seg(b >> 3);
4905
        gen_push_T0(s);
4906
        break;
4907
    case 0x1a0: /* push fs */
4908
    case 0x1a8: /* push gs */
4909
        gen_op_movl_T0_seg((b >> 3) & 7);
4910
        gen_push_T0(s);
4911
        break;
4912
    case 0x07: /* pop es */
4913
    case 0x17: /* pop ss */
4914
    case 0x1f: /* pop ds */
4915
        if (CODE64(s))
4916
            goto illegal_op;
4917
        reg = b >> 3;
4918
        gen_pop_T0(s);
4919
        gen_movl_seg_T0(s, reg, pc_start - s->cs_base);
4920
        gen_pop_update(s);
4921
        if (reg == R_SS) {
4922
            /* if reg == SS, inhibit interrupts/trace. */
4923
            /* If several instructions disable interrupts, only the
4924
               _first_ does it */
4925
            if (!(s->tb->flags & HF_INHIBIT_IRQ_MASK))
4926
                tcg_gen_helper_0_0(helper_set_inhibit_irq);
4927
            s->tf = 0;
4928
        }
4929
        if (s->is_jmp) {
4930
            gen_jmp_im(s->pc - s->cs_base);
4931
            gen_eob(s);
4932
        }
4933
        break;
4934
    case 0x1a1: /* pop fs */
4935
    case 0x1a9: /* pop gs */
4936
        gen_pop_T0(s);
4937
        gen_movl_seg_T0(s, (b >> 3) & 7, pc_start - s->cs_base);
4938
        gen_pop_update(s);
4939
        if (s->is_jmp) {
4940
            gen_jmp_im(s->pc - s->cs_base);
4941
            gen_eob(s);
4942
        }