Statistics
| Branch: | Revision:

root / target-i386 / fpu_helper.c @ 76f13133

History | View | Annotate | Download (29.8 kB)

1
/*
2
 *  x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
3
 *
4
 *  Copyright (c) 2003 Fabrice Bellard
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18
 */
19

    
20
#include <math.h>
21
#include "cpu.h"
22
#include "helper.h"
23

    
24
#if !defined(CONFIG_USER_ONLY)
25
#include "exec/softmmu_exec.h"
26
#endif /* !defined(CONFIG_USER_ONLY) */
27

    
28
#define FPU_RC_MASK         0xc00
29
#define FPU_RC_NEAR         0x000
30
#define FPU_RC_DOWN         0x400
31
#define FPU_RC_UP           0x800
32
#define FPU_RC_CHOP         0xc00
33

    
34
#define MAXTAN 9223372036854775808.0
35

    
36
/* the following deal with x86 long double-precision numbers */
37
#define MAXEXPD 0x7fff
38
#define EXPBIAS 16383
39
#define EXPD(fp)        (fp.l.upper & 0x7fff)
40
#define SIGND(fp)       ((fp.l.upper) & 0x8000)
41
#define MANTD(fp)       (fp.l.lower)
42
#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
43

    
44
#define FPUS_IE (1 << 0)
45
#define FPUS_DE (1 << 1)
46
#define FPUS_ZE (1 << 2)
47
#define FPUS_OE (1 << 3)
48
#define FPUS_UE (1 << 4)
49
#define FPUS_PE (1 << 5)
50
#define FPUS_SF (1 << 6)
51
#define FPUS_SE (1 << 7)
52
#define FPUS_B  (1 << 15)
53

    
54
#define FPUC_EM 0x3f
55

    
56
#define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
57
#define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
58
#define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
59

    
60
static inline void fpush(CPUX86State *env)
61
{
62
    env->fpstt = (env->fpstt - 1) & 7;
63
    env->fptags[env->fpstt] = 0; /* validate stack entry */
64
}
65

    
66
static inline void fpop(CPUX86State *env)
67
{
68
    env->fptags[env->fpstt] = 1; /* invalidate stack entry */
69
    env->fpstt = (env->fpstt + 1) & 7;
70
}
71

    
72
static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr)
73
{
74
    CPU_LDoubleU temp;
75

    
76
    temp.l.lower = cpu_ldq_data(env, ptr);
77
    temp.l.upper = cpu_lduw_data(env, ptr + 8);
78
    return temp.d;
79
}
80

    
81
static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr)
82
{
83
    CPU_LDoubleU temp;
84

    
85
    temp.d = f;
86
    cpu_stq_data(env, ptr, temp.l.lower);
87
    cpu_stw_data(env, ptr + 8, temp.l.upper);
88
}
89

    
90
/* x87 FPU helpers */
91

    
92
static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
93
{
94
    union {
95
        float64 f64;
96
        double d;
97
    } u;
98

    
99
    u.f64 = floatx80_to_float64(a, &env->fp_status);
100
    return u.d;
101
}
102

    
103
static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
104
{
105
    union {
106
        float64 f64;
107
        double d;
108
    } u;
109

    
110
    u.d = a;
111
    return float64_to_floatx80(u.f64, &env->fp_status);
112
}
113

    
114
static void fpu_set_exception(CPUX86State *env, int mask)
115
{
116
    env->fpus |= mask;
117
    if (env->fpus & (~env->fpuc & FPUC_EM)) {
118
        env->fpus |= FPUS_SE | FPUS_B;
119
    }
120
}
121

    
122
static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
123
{
124
    if (floatx80_is_zero(b)) {
125
        fpu_set_exception(env, FPUS_ZE);
126
    }
127
    return floatx80_div(a, b, &env->fp_status);
128
}
129

    
130
static void fpu_raise_exception(CPUX86State *env)
131
{
132
    if (env->cr[0] & CR0_NE_MASK) {
133
        raise_exception(env, EXCP10_COPR);
134
    }
135
#if !defined(CONFIG_USER_ONLY)
136
    else {
137
        cpu_set_ferr(env);
138
    }
139
#endif
140
}
141

    
142
void helper_flds_FT0(CPUX86State *env, uint32_t val)
143
{
144
    union {
145
        float32 f;
146
        uint32_t i;
147
    } u;
148

    
149
    u.i = val;
150
    FT0 = float32_to_floatx80(u.f, &env->fp_status);
151
}
152

    
153
void helper_fldl_FT0(CPUX86State *env, uint64_t val)
154
{
155
    union {
156
        float64 f;
157
        uint64_t i;
158
    } u;
159

    
160
    u.i = val;
161
    FT0 = float64_to_floatx80(u.f, &env->fp_status);
162
}
163

    
164
void helper_fildl_FT0(CPUX86State *env, int32_t val)
165
{
166
    FT0 = int32_to_floatx80(val, &env->fp_status);
167
}
168

    
169
void helper_flds_ST0(CPUX86State *env, uint32_t val)
170
{
171
    int new_fpstt;
172
    union {
173
        float32 f;
174
        uint32_t i;
175
    } u;
176

    
177
    new_fpstt = (env->fpstt - 1) & 7;
178
    u.i = val;
179
    env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
180
    env->fpstt = new_fpstt;
181
    env->fptags[new_fpstt] = 0; /* validate stack entry */
182
}
183

    
184
void helper_fldl_ST0(CPUX86State *env, uint64_t val)
185
{
186
    int new_fpstt;
187
    union {
188
        float64 f;
189
        uint64_t i;
190
    } u;
191

    
192
    new_fpstt = (env->fpstt - 1) & 7;
193
    u.i = val;
194
    env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
195
    env->fpstt = new_fpstt;
196
    env->fptags[new_fpstt] = 0; /* validate stack entry */
197
}
198

    
199
void helper_fildl_ST0(CPUX86State *env, int32_t val)
200
{
201
    int new_fpstt;
202

    
203
    new_fpstt = (env->fpstt - 1) & 7;
204
    env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
205
    env->fpstt = new_fpstt;
206
    env->fptags[new_fpstt] = 0; /* validate stack entry */
207
}
208

    
209
void helper_fildll_ST0(CPUX86State *env, int64_t val)
210
{
211
    int new_fpstt;
212

    
213
    new_fpstt = (env->fpstt - 1) & 7;
214
    env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
215
    env->fpstt = new_fpstt;
216
    env->fptags[new_fpstt] = 0; /* validate stack entry */
217
}
218

    
219
uint32_t helper_fsts_ST0(CPUX86State *env)
220
{
221
    union {
222
        float32 f;
223
        uint32_t i;
224
    } u;
225

    
226
    u.f = floatx80_to_float32(ST0, &env->fp_status);
227
    return u.i;
228
}
229

    
230
uint64_t helper_fstl_ST0(CPUX86State *env)
231
{
232
    union {
233
        float64 f;
234
        uint64_t i;
235
    } u;
236

    
237
    u.f = floatx80_to_float64(ST0, &env->fp_status);
238
    return u.i;
239
}
240

    
241
int32_t helper_fist_ST0(CPUX86State *env)
242
{
243
    int32_t val;
244

    
245
    val = floatx80_to_int32(ST0, &env->fp_status);
246
    if (val != (int16_t)val) {
247
        val = -32768;
248
    }
249
    return val;
250
}
251

    
252
int32_t helper_fistl_ST0(CPUX86State *env)
253
{
254
    int32_t val;
255

    
256
    val = floatx80_to_int32(ST0, &env->fp_status);
257
    return val;
258
}
259

    
260
int64_t helper_fistll_ST0(CPUX86State *env)
261
{
262
    int64_t val;
263

    
264
    val = floatx80_to_int64(ST0, &env->fp_status);
265
    return val;
266
}
267

    
268
int32_t helper_fistt_ST0(CPUX86State *env)
269
{
270
    int32_t val;
271

    
272
    val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
273
    if (val != (int16_t)val) {
274
        val = -32768;
275
    }
276
    return val;
277
}
278

    
279
int32_t helper_fisttl_ST0(CPUX86State *env)
280
{
281
    int32_t val;
282

    
283
    val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
284
    return val;
285
}
286

    
287
int64_t helper_fisttll_ST0(CPUX86State *env)
288
{
289
    int64_t val;
290

    
291
    val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
292
    return val;
293
}
294

    
295
void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
296
{
297
    int new_fpstt;
298

    
299
    new_fpstt = (env->fpstt - 1) & 7;
300
    env->fpregs[new_fpstt].d = helper_fldt(env, ptr);
301
    env->fpstt = new_fpstt;
302
    env->fptags[new_fpstt] = 0; /* validate stack entry */
303
}
304

    
305
void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
306
{
307
    helper_fstt(env, ST0, ptr);
308
}
309

    
310
void helper_fpush(CPUX86State *env)
311
{
312
    fpush(env);
313
}
314

    
315
void helper_fpop(CPUX86State *env)
316
{
317
    fpop(env);
318
}
319

    
320
void helper_fdecstp(CPUX86State *env)
321
{
322
    env->fpstt = (env->fpstt - 1) & 7;
323
    env->fpus &= ~0x4700;
324
}
325

    
326
void helper_fincstp(CPUX86State *env)
327
{
328
    env->fpstt = (env->fpstt + 1) & 7;
329
    env->fpus &= ~0x4700;
330
}
331

    
332
/* FPU move */
333

    
334
void helper_ffree_STN(CPUX86State *env, int st_index)
335
{
336
    env->fptags[(env->fpstt + st_index) & 7] = 1;
337
}
338

    
339
void helper_fmov_ST0_FT0(CPUX86State *env)
340
{
341
    ST0 = FT0;
342
}
343

    
344
void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
345
{
346
    FT0 = ST(st_index);
347
}
348

    
349
void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
350
{
351
    ST0 = ST(st_index);
352
}
353

    
354
void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
355
{
356
    ST(st_index) = ST0;
357
}
358

    
359
void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
360
{
361
    floatx80 tmp;
362

    
363
    tmp = ST(st_index);
364
    ST(st_index) = ST0;
365
    ST0 = tmp;
366
}
367

    
368
/* FPU operations */
369

    
370
static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
371

    
372
void helper_fcom_ST0_FT0(CPUX86State *env)
373
{
374
    int ret;
375

    
376
    ret = floatx80_compare(ST0, FT0, &env->fp_status);
377
    env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
378
}
379

    
380
void helper_fucom_ST0_FT0(CPUX86State *env)
381
{
382
    int ret;
383

    
384
    ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
385
    env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
386
}
387

    
388
static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
389

    
390
void helper_fcomi_ST0_FT0(CPUX86State *env)
391
{
392
    int eflags;
393
    int ret;
394

    
395
    ret = floatx80_compare(ST0, FT0, &env->fp_status);
396
    eflags = cpu_cc_compute_all(env, CC_OP);
397
    eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
398
    CC_SRC = eflags;
399
}
400

    
401
void helper_fucomi_ST0_FT0(CPUX86State *env)
402
{
403
    int eflags;
404
    int ret;
405

    
406
    ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
407
    eflags = cpu_cc_compute_all(env, CC_OP);
408
    eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
409
    CC_SRC = eflags;
410
}
411

    
412
void helper_fadd_ST0_FT0(CPUX86State *env)
413
{
414
    ST0 = floatx80_add(ST0, FT0, &env->fp_status);
415
}
416

    
417
void helper_fmul_ST0_FT0(CPUX86State *env)
418
{
419
    ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
420
}
421

    
422
void helper_fsub_ST0_FT0(CPUX86State *env)
423
{
424
    ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
425
}
426

    
427
void helper_fsubr_ST0_FT0(CPUX86State *env)
428
{
429
    ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
430
}
431

    
432
void helper_fdiv_ST0_FT0(CPUX86State *env)
433
{
434
    ST0 = helper_fdiv(env, ST0, FT0);
435
}
436

    
437
void helper_fdivr_ST0_FT0(CPUX86State *env)
438
{
439
    ST0 = helper_fdiv(env, FT0, ST0);
440
}
441

    
442
/* fp operations between STN and ST0 */
443

    
444
void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
445
{
446
    ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
447
}
448

    
449
void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
450
{
451
    ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
452
}
453

    
454
void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
455
{
456
    ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
457
}
458

    
459
void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
460
{
461
    ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
462
}
463

    
464
void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
465
{
466
    floatx80 *p;
467

    
468
    p = &ST(st_index);
469
    *p = helper_fdiv(env, *p, ST0);
470
}
471

    
472
void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
473
{
474
    floatx80 *p;
475

    
476
    p = &ST(st_index);
477
    *p = helper_fdiv(env, ST0, *p);
478
}
479

    
480
/* misc FPU operations */
481
void helper_fchs_ST0(CPUX86State *env)
482
{
483
    ST0 = floatx80_chs(ST0);
484
}
485

    
486
void helper_fabs_ST0(CPUX86State *env)
487
{
488
    ST0 = floatx80_abs(ST0);
489
}
490

    
491
void helper_fld1_ST0(CPUX86State *env)
492
{
493
    ST0 = floatx80_one;
494
}
495

    
496
void helper_fldl2t_ST0(CPUX86State *env)
497
{
498
    ST0 = floatx80_l2t;
499
}
500

    
501
void helper_fldl2e_ST0(CPUX86State *env)
502
{
503
    ST0 = floatx80_l2e;
504
}
505

    
506
void helper_fldpi_ST0(CPUX86State *env)
507
{
508
    ST0 = floatx80_pi;
509
}
510

    
511
void helper_fldlg2_ST0(CPUX86State *env)
512
{
513
    ST0 = floatx80_lg2;
514
}
515

    
516
void helper_fldln2_ST0(CPUX86State *env)
517
{
518
    ST0 = floatx80_ln2;
519
}
520

    
521
void helper_fldz_ST0(CPUX86State *env)
522
{
523
    ST0 = floatx80_zero;
524
}
525

    
526
void helper_fldz_FT0(CPUX86State *env)
527
{
528
    FT0 = floatx80_zero;
529
}
530

    
531
uint32_t helper_fnstsw(CPUX86State *env)
532
{
533
    return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
534
}
535

    
536
uint32_t helper_fnstcw(CPUX86State *env)
537
{
538
    return env->fpuc;
539
}
540

    
541
static void update_fp_status(CPUX86State *env)
542
{
543
    int rnd_type;
544

    
545
    /* set rounding mode */
546
    switch (env->fpuc & FPU_RC_MASK) {
547
    default:
548
    case FPU_RC_NEAR:
549
        rnd_type = float_round_nearest_even;
550
        break;
551
    case FPU_RC_DOWN:
552
        rnd_type = float_round_down;
553
        break;
554
    case FPU_RC_UP:
555
        rnd_type = float_round_up;
556
        break;
557
    case FPU_RC_CHOP:
558
        rnd_type = float_round_to_zero;
559
        break;
560
    }
561
    set_float_rounding_mode(rnd_type, &env->fp_status);
562
    switch ((env->fpuc >> 8) & 3) {
563
    case 0:
564
        rnd_type = 32;
565
        break;
566
    case 2:
567
        rnd_type = 64;
568
        break;
569
    case 3:
570
    default:
571
        rnd_type = 80;
572
        break;
573
    }
574
    set_floatx80_rounding_precision(rnd_type, &env->fp_status);
575
}
576

    
577
void helper_fldcw(CPUX86State *env, uint32_t val)
578
{
579
    env->fpuc = val;
580
    update_fp_status(env);
581
}
582

    
583
void helper_fclex(CPUX86State *env)
584
{
585
    env->fpus &= 0x7f00;
586
}
587

    
588
void helper_fwait(CPUX86State *env)
589
{
590
    if (env->fpus & FPUS_SE) {
591
        fpu_raise_exception(env);
592
    }
593
}
594

    
595
void helper_fninit(CPUX86State *env)
596
{
597
    env->fpus = 0;
598
    env->fpstt = 0;
599
    env->fpuc = 0x37f;
600
    env->fptags[0] = 1;
601
    env->fptags[1] = 1;
602
    env->fptags[2] = 1;
603
    env->fptags[3] = 1;
604
    env->fptags[4] = 1;
605
    env->fptags[5] = 1;
606
    env->fptags[6] = 1;
607
    env->fptags[7] = 1;
608
}
609

    
610
/* BCD ops */
611

    
612
void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
613
{
614
    floatx80 tmp;
615
    uint64_t val;
616
    unsigned int v;
617
    int i;
618

    
619
    val = 0;
620
    for (i = 8; i >= 0; i--) {
621
        v = cpu_ldub_data(env, ptr + i);
622
        val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
623
    }
624
    tmp = int64_to_floatx80(val, &env->fp_status);
625
    if (cpu_ldub_data(env, ptr + 9) & 0x80) {
626
        floatx80_chs(tmp);
627
    }
628
    fpush(env);
629
    ST0 = tmp;
630
}
631

    
632
void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
633
{
634
    int v;
635
    target_ulong mem_ref, mem_end;
636
    int64_t val;
637

    
638
    val = floatx80_to_int64(ST0, &env->fp_status);
639
    mem_ref = ptr;
640
    mem_end = mem_ref + 9;
641
    if (val < 0) {
642
        cpu_stb_data(env, mem_end, 0x80);
643
        val = -val;
644
    } else {
645
        cpu_stb_data(env, mem_end, 0x00);
646
    }
647
    while (mem_ref < mem_end) {
648
        if (val == 0) {
649
            break;
650
        }
651
        v = val % 100;
652
        val = val / 100;
653
        v = ((v / 10) << 4) | (v % 10);
654
        cpu_stb_data(env, mem_ref++, v);
655
    }
656
    while (mem_ref < mem_end) {
657
        cpu_stb_data(env, mem_ref++, 0);
658
    }
659
}
660

    
661
void helper_f2xm1(CPUX86State *env)
662
{
663
    double val = floatx80_to_double(env, ST0);
664

    
665
    val = pow(2.0, val) - 1.0;
666
    ST0 = double_to_floatx80(env, val);
667
}
668

    
669
void helper_fyl2x(CPUX86State *env)
670
{
671
    double fptemp = floatx80_to_double(env, ST0);
672

    
673
    if (fptemp > 0.0) {
674
        fptemp = log(fptemp) / log(2.0); /* log2(ST) */
675
        fptemp *= floatx80_to_double(env, ST1);
676
        ST1 = double_to_floatx80(env, fptemp);
677
        fpop(env);
678
    } else {
679
        env->fpus &= ~0x4700;
680
        env->fpus |= 0x400;
681
    }
682
}
683

    
684
void helper_fptan(CPUX86State *env)
685
{
686
    double fptemp = floatx80_to_double(env, ST0);
687

    
688
    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
689
        env->fpus |= 0x400;
690
    } else {
691
        fptemp = tan(fptemp);
692
        ST0 = double_to_floatx80(env, fptemp);
693
        fpush(env);
694
        ST0 = floatx80_one;
695
        env->fpus &= ~0x400; /* C2 <-- 0 */
696
        /* the above code is for |arg| < 2**52 only */
697
    }
698
}
699

    
700
void helper_fpatan(CPUX86State *env)
701
{
702
    double fptemp, fpsrcop;
703

    
704
    fpsrcop = floatx80_to_double(env, ST1);
705
    fptemp = floatx80_to_double(env, ST0);
706
    ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp));
707
    fpop(env);
708
}
709

    
710
void helper_fxtract(CPUX86State *env)
711
{
712
    CPU_LDoubleU temp;
713

    
714
    temp.d = ST0;
715

    
716
    if (floatx80_is_zero(ST0)) {
717
        /* Easy way to generate -inf and raising division by 0 exception */
718
        ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
719
                           &env->fp_status);
720
        fpush(env);
721
        ST0 = temp.d;
722
    } else {
723
        int expdif;
724

    
725
        expdif = EXPD(temp) - EXPBIAS;
726
        /* DP exponent bias */
727
        ST0 = int32_to_floatx80(expdif, &env->fp_status);
728
        fpush(env);
729
        BIASEXPONENT(temp);
730
        ST0 = temp.d;
731
    }
732
}
733

    
734
void helper_fprem1(CPUX86State *env)
735
{
736
    double st0, st1, dblq, fpsrcop, fptemp;
737
    CPU_LDoubleU fpsrcop1, fptemp1;
738
    int expdif;
739
    signed long long int q;
740

    
741
    st0 = floatx80_to_double(env, ST0);
742
    st1 = floatx80_to_double(env, ST1);
743

    
744
    if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
745
        ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
746
        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
747
        return;
748
    }
749

    
750
    fpsrcop = st0;
751
    fptemp = st1;
752
    fpsrcop1.d = ST0;
753
    fptemp1.d = ST1;
754
    expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
755

    
756
    if (expdif < 0) {
757
        /* optimisation? taken from the AMD docs */
758
        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
759
        /* ST0 is unchanged */
760
        return;
761
    }
762

    
763
    if (expdif < 53) {
764
        dblq = fpsrcop / fptemp;
765
        /* round dblq towards nearest integer */
766
        dblq = rint(dblq);
767
        st0 = fpsrcop - fptemp * dblq;
768

    
769
        /* convert dblq to q by truncating towards zero */
770
        if (dblq < 0.0) {
771
            q = (signed long long int)(-dblq);
772
        } else {
773
            q = (signed long long int)dblq;
774
        }
775

    
776
        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
777
        /* (C0,C3,C1) <-- (q2,q1,q0) */
778
        env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
779
        env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
780
        env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
781
    } else {
782
        env->fpus |= 0x400;  /* C2 <-- 1 */
783
        fptemp = pow(2.0, expdif - 50);
784
        fpsrcop = (st0 / st1) / fptemp;
785
        /* fpsrcop = integer obtained by chopping */
786
        fpsrcop = (fpsrcop < 0.0) ?
787
                  -(floor(fabs(fpsrcop))) : floor(fpsrcop);
788
        st0 -= (st1 * fpsrcop * fptemp);
789
    }
790
    ST0 = double_to_floatx80(env, st0);
791
}
792

    
793
void helper_fprem(CPUX86State *env)
794
{
795
    double st0, st1, dblq, fpsrcop, fptemp;
796
    CPU_LDoubleU fpsrcop1, fptemp1;
797
    int expdif;
798
    signed long long int q;
799

    
800
    st0 = floatx80_to_double(env, ST0);
801
    st1 = floatx80_to_double(env, ST1);
802

    
803
    if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
804
        ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
805
        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
806
        return;
807
    }
808

    
809
    fpsrcop = st0;
810
    fptemp = st1;
811
    fpsrcop1.d = ST0;
812
    fptemp1.d = ST1;
813
    expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
814

    
815
    if (expdif < 0) {
816
        /* optimisation? taken from the AMD docs */
817
        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
818
        /* ST0 is unchanged */
819
        return;
820
    }
821

    
822
    if (expdif < 53) {
823
        dblq = fpsrcop / fptemp; /* ST0 / ST1 */
824
        /* round dblq towards zero */
825
        dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
826
        st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
827

    
828
        /* convert dblq to q by truncating towards zero */
829
        if (dblq < 0.0) {
830
            q = (signed long long int)(-dblq);
831
        } else {
832
            q = (signed long long int)dblq;
833
        }
834

    
835
        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
836
        /* (C0,C3,C1) <-- (q2,q1,q0) */
837
        env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
838
        env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
839
        env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
840
    } else {
841
        int N = 32 + (expdif % 32); /* as per AMD docs */
842

    
843
        env->fpus |= 0x400;  /* C2 <-- 1 */
844
        fptemp = pow(2.0, (double)(expdif - N));
845
        fpsrcop = (st0 / st1) / fptemp;
846
        /* fpsrcop = integer obtained by chopping */
847
        fpsrcop = (fpsrcop < 0.0) ?
848
                  -(floor(fabs(fpsrcop))) : floor(fpsrcop);
849
        st0 -= (st1 * fpsrcop * fptemp);
850
    }
851
    ST0 = double_to_floatx80(env, st0);
852
}
853

    
854
void helper_fyl2xp1(CPUX86State *env)
855
{
856
    double fptemp = floatx80_to_double(env, ST0);
857

    
858
    if ((fptemp + 1.0) > 0.0) {
859
        fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */
860
        fptemp *= floatx80_to_double(env, ST1);
861
        ST1 = double_to_floatx80(env, fptemp);
862
        fpop(env);
863
    } else {
864
        env->fpus &= ~0x4700;
865
        env->fpus |= 0x400;
866
    }
867
}
868

    
869
void helper_fsqrt(CPUX86State *env)
870
{
871
    if (floatx80_is_neg(ST0)) {
872
        env->fpus &= ~0x4700;  /* (C3,C2,C1,C0) <-- 0000 */
873
        env->fpus |= 0x400;
874
    }
875
    ST0 = floatx80_sqrt(ST0, &env->fp_status);
876
}
877

    
878
void helper_fsincos(CPUX86State *env)
879
{
880
    double fptemp = floatx80_to_double(env, ST0);
881

    
882
    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
883
        env->fpus |= 0x400;
884
    } else {
885
        ST0 = double_to_floatx80(env, sin(fptemp));
886
        fpush(env);
887
        ST0 = double_to_floatx80(env, cos(fptemp));
888
        env->fpus &= ~0x400;  /* C2 <-- 0 */
889
        /* the above code is for |arg| < 2**63 only */
890
    }
891
}
892

    
893
void helper_frndint(CPUX86State *env)
894
{
895
    ST0 = floatx80_round_to_int(ST0, &env->fp_status);
896
}
897

    
898
void helper_fscale(CPUX86State *env)
899
{
900
    if (floatx80_is_any_nan(ST1)) {
901
        ST0 = ST1;
902
    } else {
903
        int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
904
        ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
905
    }
906
}
907

    
908
void helper_fsin(CPUX86State *env)
909
{
910
    double fptemp = floatx80_to_double(env, ST0);
911

    
912
    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
913
        env->fpus |= 0x400;
914
    } else {
915
        ST0 = double_to_floatx80(env, sin(fptemp));
916
        env->fpus &= ~0x400;  /* C2 <-- 0 */
917
        /* the above code is for |arg| < 2**53 only */
918
    }
919
}
920

    
921
void helper_fcos(CPUX86State *env)
922
{
923
    double fptemp = floatx80_to_double(env, ST0);
924

    
925
    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
926
        env->fpus |= 0x400;
927
    } else {
928
        ST0 = double_to_floatx80(env, cos(fptemp));
929
        env->fpus &= ~0x400;  /* C2 <-- 0 */
930
        /* the above code is for |arg| < 2**63 only */
931
    }
932
}
933

    
934
void helper_fxam_ST0(CPUX86State *env)
935
{
936
    CPU_LDoubleU temp;
937
    int expdif;
938

    
939
    temp.d = ST0;
940

    
941
    env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
942
    if (SIGND(temp)) {
943
        env->fpus |= 0x200; /* C1 <-- 1 */
944
    }
945

    
946
    /* XXX: test fptags too */
947
    expdif = EXPD(temp);
948
    if (expdif == MAXEXPD) {
949
        if (MANTD(temp) == 0x8000000000000000ULL) {
950
            env->fpus |= 0x500; /* Infinity */
951
        } else {
952
            env->fpus |= 0x100; /* NaN */
953
        }
954
    } else if (expdif == 0) {
955
        if (MANTD(temp) == 0) {
956
            env->fpus |=  0x4000; /* Zero */
957
        } else {
958
            env->fpus |= 0x4400; /* Denormal */
959
        }
960
    } else {
961
        env->fpus |= 0x400;
962
    }
963
}
964

    
965
void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
966
{
967
    int fpus, fptag, exp, i;
968
    uint64_t mant;
969
    CPU_LDoubleU tmp;
970

    
971
    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
972
    fptag = 0;
973
    for (i = 7; i >= 0; i--) {
974
        fptag <<= 2;
975
        if (env->fptags[i]) {
976
            fptag |= 3;
977
        } else {
978
            tmp.d = env->fpregs[i].d;
979
            exp = EXPD(tmp);
980
            mant = MANTD(tmp);
981
            if (exp == 0 && mant == 0) {
982
                /* zero */
983
                fptag |= 1;
984
            } else if (exp == 0 || exp == MAXEXPD
985
                       || (mant & (1LL << 63)) == 0) {
986
                /* NaNs, infinity, denormal */
987
                fptag |= 2;
988
            }
989
        }
990
    }
991
    if (data32) {
992
        /* 32 bit */
993
        cpu_stl_data(env, ptr, env->fpuc);
994
        cpu_stl_data(env, ptr + 4, fpus);
995
        cpu_stl_data(env, ptr + 8, fptag);
996
        cpu_stl_data(env, ptr + 12, 0); /* fpip */
997
        cpu_stl_data(env, ptr + 16, 0); /* fpcs */
998
        cpu_stl_data(env, ptr + 20, 0); /* fpoo */
999
        cpu_stl_data(env, ptr + 24, 0); /* fpos */
1000
    } else {
1001
        /* 16 bit */
1002
        cpu_stw_data(env, ptr, env->fpuc);
1003
        cpu_stw_data(env, ptr + 2, fpus);
1004
        cpu_stw_data(env, ptr + 4, fptag);
1005
        cpu_stw_data(env, ptr + 6, 0);
1006
        cpu_stw_data(env, ptr + 8, 0);
1007
        cpu_stw_data(env, ptr + 10, 0);
1008
        cpu_stw_data(env, ptr + 12, 0);
1009
    }
1010
}
1011

    
1012
void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
1013
{
1014
    int i, fpus, fptag;
1015

    
1016
    if (data32) {
1017
        env->fpuc = cpu_lduw_data(env, ptr);
1018
        fpus = cpu_lduw_data(env, ptr + 4);
1019
        fptag = cpu_lduw_data(env, ptr + 8);
1020
    } else {
1021
        env->fpuc = cpu_lduw_data(env, ptr);
1022
        fpus = cpu_lduw_data(env, ptr + 2);
1023
        fptag = cpu_lduw_data(env, ptr + 4);
1024
    }
1025
    env->fpstt = (fpus >> 11) & 7;
1026
    env->fpus = fpus & ~0x3800;
1027
    for (i = 0; i < 8; i++) {
1028
        env->fptags[i] = ((fptag & 3) == 3);
1029
        fptag >>= 2;
1030
    }
1031
}
1032

    
1033
void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
1034
{
1035
    floatx80 tmp;
1036
    int i;
1037

    
1038
    helper_fstenv(env, ptr, data32);
1039

    
1040
    ptr += (14 << data32);
1041
    for (i = 0; i < 8; i++) {
1042
        tmp = ST(i);
1043
        helper_fstt(env, tmp, ptr);
1044
        ptr += 10;
1045
    }
1046

    
1047
    /* fninit */
1048
    env->fpus = 0;
1049
    env->fpstt = 0;
1050
    env->fpuc = 0x37f;
1051
    env->fptags[0] = 1;
1052
    env->fptags[1] = 1;
1053
    env->fptags[2] = 1;
1054
    env->fptags[3] = 1;
1055
    env->fptags[4] = 1;
1056
    env->fptags[5] = 1;
1057
    env->fptags[6] = 1;
1058
    env->fptags[7] = 1;
1059
}
1060

    
1061
void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
1062
{
1063
    floatx80 tmp;
1064
    int i;
1065

    
1066
    helper_fldenv(env, ptr, data32);
1067
    ptr += (14 << data32);
1068

    
1069
    for (i = 0; i < 8; i++) {
1070
        tmp = helper_fldt(env, ptr);
1071
        ST(i) = tmp;
1072
        ptr += 10;
1073
    }
1074
}
1075

    
1076
#if defined(CONFIG_USER_ONLY)
1077
void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
1078
{
1079
    helper_fsave(env, ptr, data32);
1080
}
1081

    
1082
void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
1083
{
1084
    helper_frstor(env, ptr, data32);
1085
}
1086
#endif
1087

    
1088
void helper_fxsave(CPUX86State *env, target_ulong ptr, int data64)
1089
{
1090
    int fpus, fptag, i, nb_xmm_regs;
1091
    floatx80 tmp;
1092
    target_ulong addr;
1093

    
1094
    /* The operand must be 16 byte aligned */
1095
    if (ptr & 0xf) {
1096
        raise_exception(env, EXCP0D_GPF);
1097
    }
1098

    
1099
    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1100
    fptag = 0;
1101
    for (i = 0; i < 8; i++) {
1102
        fptag |= (env->fptags[i] << i);
1103
    }
1104
    cpu_stw_data(env, ptr, env->fpuc);
1105
    cpu_stw_data(env, ptr + 2, fpus);
1106
    cpu_stw_data(env, ptr + 4, fptag ^ 0xff);
1107
#ifdef TARGET_X86_64
1108
    if (data64) {
1109
        cpu_stq_data(env, ptr + 0x08, 0); /* rip */
1110
        cpu_stq_data(env, ptr + 0x10, 0); /* rdp */
1111
    } else
1112
#endif
1113
    {
1114
        cpu_stl_data(env, ptr + 0x08, 0); /* eip */
1115
        cpu_stl_data(env, ptr + 0x0c, 0); /* sel  */
1116
        cpu_stl_data(env, ptr + 0x10, 0); /* dp */
1117
        cpu_stl_data(env, ptr + 0x14, 0); /* sel  */
1118
    }
1119

    
1120
    addr = ptr + 0x20;
1121
    for (i = 0; i < 8; i++) {
1122
        tmp = ST(i);
1123
        helper_fstt(env, tmp, addr);
1124
        addr += 16;
1125
    }
1126

    
1127
    if (env->cr[4] & CR4_OSFXSR_MASK) {
1128
        /* XXX: finish it */
1129
        cpu_stl_data(env, ptr + 0x18, env->mxcsr); /* mxcsr */
1130
        cpu_stl_data(env, ptr + 0x1c, 0x0000ffff); /* mxcsr_mask */
1131
        if (env->hflags & HF_CS64_MASK) {
1132
            nb_xmm_regs = 16;
1133
        } else {
1134
            nb_xmm_regs = 8;
1135
        }
1136
        addr = ptr + 0xa0;
1137
        /* Fast FXSAVE leaves out the XMM registers */
1138
        if (!(env->efer & MSR_EFER_FFXSR)
1139
            || (env->hflags & HF_CPL_MASK)
1140
            || !(env->hflags & HF_LMA_MASK)) {
1141
            for (i = 0; i < nb_xmm_regs; i++) {
1142
                cpu_stq_data(env, addr, env->xmm_regs[i].XMM_Q(0));
1143
                cpu_stq_data(env, addr + 8, env->xmm_regs[i].XMM_Q(1));
1144
                addr += 16;
1145
            }
1146
        }
1147
    }
1148
}
1149

    
1150
void helper_fxrstor(CPUX86State *env, target_ulong ptr, int data64)
1151
{
1152
    int i, fpus, fptag, nb_xmm_regs;
1153
    floatx80 tmp;
1154
    target_ulong addr;
1155

    
1156
    /* The operand must be 16 byte aligned */
1157
    if (ptr & 0xf) {
1158
        raise_exception(env, EXCP0D_GPF);
1159
    }
1160

    
1161
    env->fpuc = cpu_lduw_data(env, ptr);
1162
    fpus = cpu_lduw_data(env, ptr + 2);
1163
    fptag = cpu_lduw_data(env, ptr + 4);
1164
    env->fpstt = (fpus >> 11) & 7;
1165
    env->fpus = fpus & ~0x3800;
1166
    fptag ^= 0xff;
1167
    for (i = 0; i < 8; i++) {
1168
        env->fptags[i] = ((fptag >> i) & 1);
1169
    }
1170

    
1171
    addr = ptr + 0x20;
1172
    for (i = 0; i < 8; i++) {
1173
        tmp = helper_fldt(env, addr);
1174
        ST(i) = tmp;
1175
        addr += 16;
1176
    }
1177

    
1178
    if (env->cr[4] & CR4_OSFXSR_MASK) {
1179
        /* XXX: finish it */
1180
        env->mxcsr = cpu_ldl_data(env, ptr + 0x18);
1181
        /* cpu_ldl_data(env, ptr + 0x1c); */
1182
        if (env->hflags & HF_CS64_MASK) {
1183
            nb_xmm_regs = 16;
1184
        } else {
1185
            nb_xmm_regs = 8;
1186
        }
1187
        addr = ptr + 0xa0;
1188
        /* Fast FXRESTORE leaves out the XMM registers */
1189
        if (!(env->efer & MSR_EFER_FFXSR)
1190
            || (env->hflags & HF_CPL_MASK)
1191
            || !(env->hflags & HF_LMA_MASK)) {
1192
            for (i = 0; i < nb_xmm_regs; i++) {
1193
                env->xmm_regs[i].XMM_Q(0) = cpu_ldq_data(env, addr);
1194
                env->xmm_regs[i].XMM_Q(1) = cpu_ldq_data(env, addr + 8);
1195
                addr += 16;
1196
            }
1197
        }
1198
    }
1199
}
1200

    
1201
void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
1202
{
1203
    CPU_LDoubleU temp;
1204

    
1205
    temp.d = f;
1206
    *pmant = temp.l.lower;
1207
    *pexp = temp.l.upper;
1208
}
1209

    
1210
floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper)
1211
{
1212
    CPU_LDoubleU temp;
1213

    
1214
    temp.l.upper = upper;
1215
    temp.l.lower = mant;
1216
    return temp.d;
1217
}
1218

    
1219
/* MMX/SSE */
1220
/* XXX: optimize by storing fptt and fptags in the static cpu state */
1221

    
1222
#define SSE_DAZ             0x0040
1223
#define SSE_RC_MASK         0x6000
1224
#define SSE_RC_NEAR         0x0000
1225
#define SSE_RC_DOWN         0x2000
1226
#define SSE_RC_UP           0x4000
1227
#define SSE_RC_CHOP         0x6000
1228
#define SSE_FZ              0x8000
1229

    
1230
static void update_sse_status(CPUX86State *env)
1231
{
1232
    int rnd_type;
1233

    
1234
    /* set rounding mode */
1235
    switch (env->mxcsr & SSE_RC_MASK) {
1236
    default:
1237
    case SSE_RC_NEAR:
1238
        rnd_type = float_round_nearest_even;
1239
        break;
1240
    case SSE_RC_DOWN:
1241
        rnd_type = float_round_down;
1242
        break;
1243
    case SSE_RC_UP:
1244
        rnd_type = float_round_up;
1245
        break;
1246
    case SSE_RC_CHOP:
1247
        rnd_type = float_round_to_zero;
1248
        break;
1249
    }
1250
    set_float_rounding_mode(rnd_type, &env->sse_status);
1251

    
1252
    /* set denormals are zero */
1253
    set_flush_inputs_to_zero((env->mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
1254

    
1255
    /* set flush to zero */
1256
    set_flush_to_zero((env->mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
1257
}
1258

    
1259
void helper_ldmxcsr(CPUX86State *env, uint32_t val)
1260
{
1261
    env->mxcsr = val;
1262
    update_sse_status(env);
1263
}
1264

    
1265
void helper_enter_mmx(CPUX86State *env)
1266
{
1267
    env->fpstt = 0;
1268
    *(uint32_t *)(env->fptags) = 0;
1269
    *(uint32_t *)(env->fptags + 4) = 0;
1270
}
1271

    
1272
void helper_emms(CPUX86State *env)
1273
{
1274
    /* set to empty state */
1275
    *(uint32_t *)(env->fptags) = 0x01010101;
1276
    *(uint32_t *)(env->fptags + 4) = 0x01010101;
1277
}
1278

    
1279
/* XXX: suppress */
1280
void helper_movq(CPUX86State *env, void *d, void *s)
1281
{
1282
    *(uint64_t *)d = *(uint64_t *)s;
1283
}
1284

    
1285
#define SHIFT 0
1286
#include "ops_sse.h"
1287

    
1288
#define SHIFT 1
1289
#include "ops_sse.h"