Revision 5af45186 target-i386/ops_sse.h

b/target-i386/ops_sse.h
35 35
#define SUFFIX _xmm
36 36
#endif
37 37

  
38
void OPPROTO glue(op_psrlw, SUFFIX)(void)
38
void glue(helper_psrlw, SUFFIX)(Reg *d, Reg *s)
39 39
{
40
    Reg *d, *s;
41 40
    int shift;
42 41

  
43
    d = (Reg *)((char *)env + PARAM1);
44
    s = (Reg *)((char *)env + PARAM2);
45

  
46 42
    if (s->Q(0) > 15) {
47 43
        d->Q(0) = 0;
48 44
#if SHIFT == 1
......
64 60
    FORCE_RET();
65 61
}
66 62

  
67
void OPPROTO glue(op_psraw, SUFFIX)(void)
63
void glue(helper_psraw, SUFFIX)(Reg *d, Reg *s)
68 64
{
69
    Reg *d, *s;
70 65
    int shift;
71 66

  
72
    d = (Reg *)((char *)env + PARAM1);
73
    s = (Reg *)((char *)env + PARAM2);
74

  
75 67
    if (s->Q(0) > 15) {
76 68
        shift = 15;
77 69
    } else {
......
89 81
#endif
90 82
}
91 83

  
92
void OPPROTO glue(op_psllw, SUFFIX)(void)
84
void glue(helper_psllw, SUFFIX)(Reg *d, Reg *s)
93 85
{
94
    Reg *d, *s;
95 86
    int shift;
96 87

  
97
    d = (Reg *)((char *)env + PARAM1);
98
    s = (Reg *)((char *)env + PARAM2);
99

  
100 88
    if (s->Q(0) > 15) {
101 89
        d->Q(0) = 0;
102 90
#if SHIFT == 1
......
118 106
    FORCE_RET();
119 107
}
120 108

  
121
void OPPROTO glue(op_psrld, SUFFIX)(void)
109
void glue(helper_psrld, SUFFIX)(Reg *d, Reg *s)
122 110
{
123
    Reg *d, *s;
124 111
    int shift;
125 112

  
126
    d = (Reg *)((char *)env + PARAM1);
127
    s = (Reg *)((char *)env + PARAM2);
128

  
129 113
    if (s->Q(0) > 31) {
130 114
        d->Q(0) = 0;
131 115
#if SHIFT == 1
......
143 127
    FORCE_RET();
144 128
}
145 129

  
146
void OPPROTO glue(op_psrad, SUFFIX)(void)
130
void glue(helper_psrad, SUFFIX)(Reg *d, Reg *s)
147 131
{
148
    Reg *d, *s;
149 132
    int shift;
150 133

  
151
    d = (Reg *)((char *)env + PARAM1);
152
    s = (Reg *)((char *)env + PARAM2);
153

  
154 134
    if (s->Q(0) > 31) {
155 135
        shift = 31;
156 136
    } else {
......
164 144
#endif
165 145
}
166 146

  
167
void OPPROTO glue(op_pslld, SUFFIX)(void)
147
void glue(helper_pslld, SUFFIX)(Reg *d, Reg *s)
168 148
{
169
    Reg *d, *s;
170 149
    int shift;
171 150

  
172
    d = (Reg *)((char *)env + PARAM1);
173
    s = (Reg *)((char *)env + PARAM2);
174

  
175 151
    if (s->Q(0) > 31) {
176 152
        d->Q(0) = 0;
177 153
#if SHIFT == 1
......
189 165
    FORCE_RET();
190 166
}
191 167

  
192
void OPPROTO glue(op_psrlq, SUFFIX)(void)
168
void glue(helper_psrlq, SUFFIX)(Reg *d, Reg *s)
193 169
{
194
    Reg *d, *s;
195 170
    int shift;
196 171

  
197
    d = (Reg *)((char *)env + PARAM1);
198
    s = (Reg *)((char *)env + PARAM2);
199

  
200 172
    if (s->Q(0) > 63) {
201 173
        d->Q(0) = 0;
202 174
#if SHIFT == 1
......
212 184
    FORCE_RET();
213 185
}
214 186

  
215
void OPPROTO glue(op_psllq, SUFFIX)(void)
187
void glue(helper_psllq, SUFFIX)(Reg *d, Reg *s)
216 188
{
217
    Reg *d, *s;
218 189
    int shift;
219 190

  
220
    d = (Reg *)((char *)env + PARAM1);
221
    s = (Reg *)((char *)env + PARAM2);
222

  
223 191
    if (s->Q(0) > 63) {
224 192
        d->Q(0) = 0;
225 193
#if SHIFT == 1
......
236 204
}
237 205

  
238 206
#if SHIFT == 1
239
void OPPROTO glue(op_psrldq, SUFFIX)(void)
207
void glue(helper_psrldq, SUFFIX)(Reg *d, Reg *s)
240 208
{
241
    Reg *d, *s;
242 209
    int shift, i;
243 210

  
244
    d = (Reg *)((char *)env + PARAM1);
245
    s = (Reg *)((char *)env + PARAM2);
246 211
    shift = s->L(0);
247 212
    if (shift > 16)
248 213
        shift = 16;
......
253 218
    FORCE_RET();
254 219
}
255 220

  
256
void OPPROTO glue(op_pslldq, SUFFIX)(void)
221
void glue(helper_pslldq, SUFFIX)(Reg *d, Reg *s)
257 222
{
258
    Reg *d, *s;
259 223
    int shift, i;
260 224

  
261
    d = (Reg *)((char *)env + PARAM1);
262
    s = (Reg *)((char *)env + PARAM2);
263 225
    shift = s->L(0);
264 226
    if (shift > 16)
265 227
        shift = 16;
......
271 233
}
272 234
#endif
273 235

  
274
#define SSE_OP_B(name, F)\
275
void OPPROTO glue(name, SUFFIX) (void)\
236
#define SSE_HELPER_B(name, F)\
237
void glue(name, SUFFIX) (Reg *d, Reg *s)\
276 238
{\
277
    Reg *d, *s;\
278
    d = (Reg *)((char *)env + PARAM1);\
279
    s = (Reg *)((char *)env + PARAM2);\
280 239
    d->B(0) = F(d->B(0), s->B(0));\
281 240
    d->B(1) = F(d->B(1), s->B(1));\
282 241
    d->B(2) = F(d->B(2), s->B(2));\
......
297 256
    )\
298 257
}
299 258

  
300
#define SSE_OP_W(name, F)\
301
void OPPROTO glue(name, SUFFIX) (void)\
259
#define SSE_HELPER_W(name, F)\
260
void glue(name, SUFFIX) (Reg *d, Reg *s)\
302 261
{\
303
    Reg *d, *s;\
304
    d = (Reg *)((char *)env + PARAM1);\
305
    s = (Reg *)((char *)env + PARAM2);\
306 262
    d->W(0) = F(d->W(0), s->W(0));\
307 263
    d->W(1) = F(d->W(1), s->W(1));\
308 264
    d->W(2) = F(d->W(2), s->W(2));\
......
315 271
    )\
316 272
}
317 273

  
318
#define SSE_OP_L(name, F)\
319
void OPPROTO glue(name, SUFFIX) (void)\
274
#define SSE_HELPER_L(name, F)\
275
void glue(name, SUFFIX) (Reg *d, Reg *s)\
320 276
{\
321
    Reg *d, *s;\
322
    d = (Reg *)((char *)env + PARAM1);\
323
    s = (Reg *)((char *)env + PARAM2);\
324 277
    d->L(0) = F(d->L(0), s->L(0));\
325 278
    d->L(1) = F(d->L(1), s->L(1));\
326 279
    XMM_ONLY(\
......
329 282
    )\
330 283
}
331 284

  
332
#define SSE_OP_Q(name, F)\
333
void OPPROTO glue(name, SUFFIX) (void)\
285
#define SSE_HELPER_Q(name, F)\
286
void glue(name, SUFFIX) (Reg *d, Reg *s)\
334 287
{\
335
    Reg *d, *s;\
336
    d = (Reg *)((char *)env + PARAM1);\
337
    s = (Reg *)((char *)env + PARAM2);\
338 288
    d->Q(0) = F(d->Q(0), s->Q(0));\
339 289
    XMM_ONLY(\
340 290
    d->Q(1) = F(d->Q(1), s->Q(1));\
......
416 366
#define FAVG(a, b) ((a) + (b) + 1) >> 1
417 367
#endif
418 368

  
419
SSE_OP_B(op_paddb, FADD)
420
SSE_OP_W(op_paddw, FADD)
421
SSE_OP_L(op_paddl, FADD)
422
SSE_OP_Q(op_paddq, FADD)
369
SSE_HELPER_B(helper_paddb, FADD)
370
SSE_HELPER_W(helper_paddw, FADD)
371
SSE_HELPER_L(helper_paddl, FADD)
372
SSE_HELPER_Q(helper_paddq, FADD)
423 373

  
424
SSE_OP_B(op_psubb, FSUB)
425
SSE_OP_W(op_psubw, FSUB)
426
SSE_OP_L(op_psubl, FSUB)
427
SSE_OP_Q(op_psubq, FSUB)
374
SSE_HELPER_B(helper_psubb, FSUB)
375
SSE_HELPER_W(helper_psubw, FSUB)
376
SSE_HELPER_L(helper_psubl, FSUB)
377
SSE_HELPER_Q(helper_psubq, FSUB)
428 378

  
429
SSE_OP_B(op_paddusb, FADDUB)
430
SSE_OP_B(op_paddsb, FADDSB)
431
SSE_OP_B(op_psubusb, FSUBUB)
432
SSE_OP_B(op_psubsb, FSUBSB)
379
SSE_HELPER_B(helper_paddusb, FADDUB)
380
SSE_HELPER_B(helper_paddsb, FADDSB)
381
SSE_HELPER_B(helper_psubusb, FSUBUB)
382
SSE_HELPER_B(helper_psubsb, FSUBSB)
433 383

  
434
SSE_OP_W(op_paddusw, FADDUW)
435
SSE_OP_W(op_paddsw, FADDSW)
436
SSE_OP_W(op_psubusw, FSUBUW)
437
SSE_OP_W(op_psubsw, FSUBSW)
384
SSE_HELPER_W(helper_paddusw, FADDUW)
385
SSE_HELPER_W(helper_paddsw, FADDSW)
386
SSE_HELPER_W(helper_psubusw, FSUBUW)
387
SSE_HELPER_W(helper_psubsw, FSUBSW)
438 388

  
439
SSE_OP_B(op_pminub, FMINUB)
440
SSE_OP_B(op_pmaxub, FMAXUB)
389
SSE_HELPER_B(helper_pminub, FMINUB)
390
SSE_HELPER_B(helper_pmaxub, FMAXUB)
441 391

  
442
SSE_OP_W(op_pminsw, FMINSW)
443
SSE_OP_W(op_pmaxsw, FMAXSW)
392
SSE_HELPER_W(helper_pminsw, FMINSW)
393
SSE_HELPER_W(helper_pmaxsw, FMAXSW)
444 394

  
445
SSE_OP_Q(op_pand, FAND)
446
SSE_OP_Q(op_pandn, FANDN)
447
SSE_OP_Q(op_por, FOR)
448
SSE_OP_Q(op_pxor, FXOR)
395
SSE_HELPER_Q(helper_pand, FAND)
396
SSE_HELPER_Q(helper_pandn, FANDN)
397
SSE_HELPER_Q(helper_por, FOR)
398
SSE_HELPER_Q(helper_pxor, FXOR)
449 399

  
450
SSE_OP_B(op_pcmpgtb, FCMPGTB)
451
SSE_OP_W(op_pcmpgtw, FCMPGTW)
452
SSE_OP_L(op_pcmpgtl, FCMPGTL)
400
SSE_HELPER_B(helper_pcmpgtb, FCMPGTB)
401
SSE_HELPER_W(helper_pcmpgtw, FCMPGTW)
402
SSE_HELPER_L(helper_pcmpgtl, FCMPGTL)
453 403

  
454
SSE_OP_B(op_pcmpeqb, FCMPEQ)
455
SSE_OP_W(op_pcmpeqw, FCMPEQ)
456
SSE_OP_L(op_pcmpeql, FCMPEQ)
404
SSE_HELPER_B(helper_pcmpeqb, FCMPEQ)
405
SSE_HELPER_W(helper_pcmpeqw, FCMPEQ)
406
SSE_HELPER_L(helper_pcmpeql, FCMPEQ)
457 407

  
458
SSE_OP_W(op_pmullw, FMULLW)
408
SSE_HELPER_W(helper_pmullw, FMULLW)
459 409
#if SHIFT == 0
460
SSE_OP_W(op_pmulhrw, FMULHRW)
410
SSE_HELPER_W(helper_pmulhrw, FMULHRW)
461 411
#endif
462
SSE_OP_W(op_pmulhuw, FMULHUW)
463
SSE_OP_W(op_pmulhw, FMULHW)
412
SSE_HELPER_W(helper_pmulhuw, FMULHUW)
413
SSE_HELPER_W(helper_pmulhw, FMULHW)
464 414

  
465
SSE_OP_B(op_pavgb, FAVG)
466
SSE_OP_W(op_pavgw, FAVG)
415
SSE_HELPER_B(helper_pavgb, FAVG)
416
SSE_HELPER_W(helper_pavgw, FAVG)
467 417

  
468
void OPPROTO glue(op_pmuludq, SUFFIX) (void)
418
void glue(helper_pmuludq, SUFFIX) (Reg *d, Reg *s)
469 419
{
470
    Reg *d, *s;
471
    d = (Reg *)((char *)env + PARAM1);
472
    s = (Reg *)((char *)env + PARAM2);
473

  
474 420
    d->Q(0) = (uint64_t)s->L(0) * (uint64_t)d->L(0);
475 421
#if SHIFT == 1
476 422
    d->Q(1) = (uint64_t)s->L(2) * (uint64_t)d->L(2);
477 423
#endif
478 424
}
479 425

  
480
void OPPROTO glue(op_pmaddwd, SUFFIX) (void)
426
void glue(helper_pmaddwd, SUFFIX) (Reg *d, Reg *s)
481 427
{
482 428
    int i;
483
    Reg *d, *s;
484
    d = (Reg *)((char *)env + PARAM1);
485
    s = (Reg *)((char *)env + PARAM2);
486 429

  
487 430
    for(i = 0; i < (2 << SHIFT); i++) {
488 431
        d->L(i) = (int16_t)s->W(2*i) * (int16_t)d->W(2*i) +
......
500 443
        return a;
501 444
}
502 445
#endif
503
void OPPROTO glue(op_psadbw, SUFFIX) (void)
446
void glue(helper_psadbw, SUFFIX) (Reg *d, Reg *s)
504 447
{
505 448
    unsigned int val;
506
    Reg *d, *s;
507
    d = (Reg *)((char *)env + PARAM1);
508
    s = (Reg *)((char *)env + PARAM2);
509 449

  
510 450
    val = 0;
511 451
    val += abs1(d->B(0) - s->B(0));
......
531 471
#endif
532 472
}
533 473

  
534
void OPPROTO glue(op_maskmov, SUFFIX) (void)
474
void glue(helper_maskmov, SUFFIX) (Reg *d, Reg *s)
535 475
{
536 476
    int i;
537
    Reg *d, *s;
538
    d = (Reg *)((char *)env + PARAM1);
539
    s = (Reg *)((char *)env + PARAM2);
540 477
    for(i = 0; i < (8 << SHIFT); i++) {
541 478
        if (s->B(i) & 0x80)
542 479
            stb(A0 + i, d->B(i));
......
544 481
    FORCE_RET();
545 482
}
546 483

  
547
void OPPROTO glue(op_movl_mm_T0, SUFFIX) (void)
484
void glue(helper_movl_mm_T0, SUFFIX) (Reg *d, uint32_t val)
548 485
{
549
    Reg *d;
550
    d = (Reg *)((char *)env + PARAM1);
551
    d->L(0) = T0;
486
    d->L(0) = val;
552 487
    d->L(1) = 0;
553 488
#if SHIFT == 1
554 489
    d->Q(1) = 0;
555 490
#endif
556 491
}
557 492

  
558
void OPPROTO glue(op_movl_T0_mm, SUFFIX) (void)
559
{
560
    Reg *s;
561
    s = (Reg *)((char *)env + PARAM1);
562
    T0 = s->L(0);
563
}
564

  
565 493
#ifdef TARGET_X86_64
566
void OPPROTO glue(op_movq_mm_T0, SUFFIX) (void)
494
void glue(helper_movq_mm_T0, SUFFIX) (Reg *d, uint64_t val)
567 495
{
568
    Reg *d;
569
    d = (Reg *)((char *)env + PARAM1);
570
    d->Q(0) = T0;
496
    d->Q(0) = val;
571 497
#if SHIFT == 1
572 498
    d->Q(1) = 0;
573 499
#endif
574 500
}
575

  
576
void OPPROTO glue(op_movq_T0_mm, SUFFIX) (void)
577
{
578
    Reg *s;
579
    s = (Reg *)((char *)env + PARAM1);
580
    T0 = s->Q(0);
581
}
582 501
#endif
583 502

  
584 503
#if SHIFT == 0
585
void OPPROTO glue(op_pshufw, SUFFIX) (void)
504
void glue(helper_pshufw, SUFFIX) (Reg *d, Reg *s, int order)
586 505
{
587
    Reg r, *d, *s;
588
    int order;
589
    d = (Reg *)((char *)env + PARAM1);
590
    s = (Reg *)((char *)env + PARAM2);
591
    order = PARAM3;
506
    Reg r;
592 507
    r.W(0) = s->W(order & 3);
593 508
    r.W(1) = s->W((order >> 2) & 3);
594 509
    r.W(2) = s->W((order >> 4) & 3);
......
596 511
    *d = r;
597 512
}
598 513
#else
599
void OPPROTO op_shufps(void)
514
void helper_shufps(Reg *d, Reg *s, int order)
600 515
{
601
    Reg r, *d, *s;
602
    int order;
603
    d = (Reg *)((char *)env + PARAM1);
604
    s = (Reg *)((char *)env + PARAM2);
605
    order = PARAM3;
516
    Reg r;
606 517
    r.L(0) = d->L(order & 3);
607 518
    r.L(1) = d->L((order >> 2) & 3);
608 519
    r.L(2) = s->L((order >> 4) & 3);
......
610 521
    *d = r;
611 522
}
612 523

  
613
void OPPROTO op_shufpd(void)
524
void helper_shufpd(Reg *d, Reg *s, int order)
614 525
{
615
    Reg r, *d, *s;
616
    int order;
617
    d = (Reg *)((char *)env + PARAM1);
618
    s = (Reg *)((char *)env + PARAM2);
619
    order = PARAM3;
526
    Reg r;
620 527
    r.Q(0) = d->Q(order & 1);
621 528
    r.Q(1) = s->Q((order >> 1) & 1);
622 529
    *d = r;
623 530
}
624 531

  
625
void OPPROTO glue(op_pshufd, SUFFIX) (void)
532
void glue(helper_pshufd, SUFFIX) (Reg *d, Reg *s, int order)
626 533
{
627
    Reg r, *d, *s;
628
    int order;
629
    d = (Reg *)((char *)env + PARAM1);
630
    s = (Reg *)((char *)env + PARAM2);
631
    order = PARAM3;
534
    Reg r;
632 535
    r.L(0) = s->L(order & 3);
633 536
    r.L(1) = s->L((order >> 2) & 3);
634 537
    r.L(2) = s->L((order >> 4) & 3);
......
636 539
    *d = r;
637 540
}
638 541

  
639
void OPPROTO glue(op_pshuflw, SUFFIX) (void)
542
void glue(helper_pshuflw, SUFFIX) (Reg *d, Reg *s, int order)
640 543
{
641
    Reg r, *d, *s;
642
    int order;
643
    d = (Reg *)((char *)env + PARAM1);
644
    s = (Reg *)((char *)env + PARAM2);
645
    order = PARAM3;
544
    Reg r;
646 545
    r.W(0) = s->W(order & 3);
647 546
    r.W(1) = s->W((order >> 2) & 3);
648 547
    r.W(2) = s->W((order >> 4) & 3);
......
651 550
    *d = r;
652 551
}
653 552

  
654
void OPPROTO glue(op_pshufhw, SUFFIX) (void)
553
void glue(helper_pshufhw, SUFFIX) (Reg *d, Reg *s, int order)
655 554
{
656
    Reg r, *d, *s;
657
    int order;
658
    d = (Reg *)((char *)env + PARAM1);
659
    s = (Reg *)((char *)env + PARAM2);
660
    order = PARAM3;
555
    Reg r;
661 556
    r.Q(0) = s->Q(0);
662 557
    r.W(4) = s->W(4 + (order & 3));
663 558
    r.W(5) = s->W(4 + ((order >> 2) & 3));
......
671 566
/* FPU ops */
672 567
/* XXX: not accurate */
673 568

  
674
#define SSE_OP_S(name, F)\
675
void OPPROTO op_ ## name ## ps (void)\
569
#define SSE_HELPER_S(name, F)\
570
void helper_ ## name ## ps (Reg *d, Reg *s)\
676 571
{\
677
    Reg *d, *s;\
678
    d = (Reg *)((char *)env + PARAM1);\
679
    s = (Reg *)((char *)env + PARAM2);\
680 572
    d->XMM_S(0) = F(32, d->XMM_S(0), s->XMM_S(0));\
681 573
    d->XMM_S(1) = F(32, d->XMM_S(1), s->XMM_S(1));\
682 574
    d->XMM_S(2) = F(32, d->XMM_S(2), s->XMM_S(2));\
683 575
    d->XMM_S(3) = F(32, d->XMM_S(3), s->XMM_S(3));\
684 576
}\
685 577
\
686
void OPPROTO op_ ## name ## ss (void)\
578
void helper_ ## name ## ss (Reg *d, Reg *s)\
687 579
{\
688
    Reg *d, *s;\
689
    d = (Reg *)((char *)env + PARAM1);\
690
    s = (Reg *)((char *)env + PARAM2);\
691 580
    d->XMM_S(0) = F(32, d->XMM_S(0), s->XMM_S(0));\
692 581
}\
693
void OPPROTO op_ ## name ## pd (void)\
582
void helper_ ## name ## pd (Reg *d, Reg *s)\
694 583
{\
695
    Reg *d, *s;\
696
    d = (Reg *)((char *)env + PARAM1);\
697
    s = (Reg *)((char *)env + PARAM2);\
698 584
    d->XMM_D(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
699 585
    d->XMM_D(1) = F(64, d->XMM_D(1), s->XMM_D(1));\
700 586
}\
701 587
\
702
void OPPROTO op_ ## name ## sd (void)\
588
void helper_ ## name ## sd (Reg *d, Reg *s)\
703 589
{\
704
    Reg *d, *s;\
705
    d = (Reg *)((char *)env + PARAM1);\
706
    s = (Reg *)((char *)env + PARAM2);\
707 590
    d->XMM_D(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
708 591
}
709 592

  
......
715 598
#define FPU_MAX(size, a, b) (a) > (b) ? (a) : (b)
716 599
#define FPU_SQRT(size, a, b) float ## size ## _sqrt(b, &env->sse_status)
717 600

  
718
SSE_OP_S(add, FPU_ADD)
719
SSE_OP_S(sub, FPU_SUB)
720
SSE_OP_S(mul, FPU_MUL)
721
SSE_OP_S(div, FPU_DIV)
722
SSE_OP_S(min, FPU_MIN)
723
SSE_OP_S(max, FPU_MAX)
724
SSE_OP_S(sqrt, FPU_SQRT)
601
SSE_HELPER_S(add, FPU_ADD)
602
SSE_HELPER_S(sub, FPU_SUB)
603
SSE_HELPER_S(mul, FPU_MUL)
604
SSE_HELPER_S(div, FPU_DIV)
605
SSE_HELPER_S(min, FPU_MIN)
606
SSE_HELPER_S(max, FPU_MAX)
607
SSE_HELPER_S(sqrt, FPU_SQRT)
725 608

  
726 609

  
727 610
/* float to float conversions */
728
void OPPROTO op_cvtps2pd(void)
611
void helper_cvtps2pd(Reg *d, Reg *s)
729 612
{
730 613
    float32 s0, s1;
731
    Reg *d, *s;
732
    d = (Reg *)((char *)env + PARAM1);
733
    s = (Reg *)((char *)env + PARAM2);
734 614
    s0 = s->XMM_S(0);
735 615
    s1 = s->XMM_S(1);
736 616
    d->XMM_D(0) = float32_to_float64(s0, &env->sse_status);
737 617
    d->XMM_D(1) = float32_to_float64(s1, &env->sse_status);
738 618
}
739 619

  
740
void OPPROTO op_cvtpd2ps(void)
620
void helper_cvtpd2ps(Reg *d, Reg *s)
741 621
{
742
    Reg *d, *s;
743
    d = (Reg *)((char *)env + PARAM1);
744
    s = (Reg *)((char *)env + PARAM2);
745 622
    d->XMM_S(0) = float64_to_float32(s->XMM_D(0), &env->sse_status);
746 623
    d->XMM_S(1) = float64_to_float32(s->XMM_D(1), &env->sse_status);
747 624
    d->Q(1) = 0;
748 625
}
749 626

  
750
void OPPROTO op_cvtss2sd(void)
627
void helper_cvtss2sd(Reg *d, Reg *s)
751 628
{
752
    Reg *d, *s;
753
    d = (Reg *)((char *)env + PARAM1);
754
    s = (Reg *)((char *)env + PARAM2);
755 629
    d->XMM_D(0) = float32_to_float64(s->XMM_S(0), &env->sse_status);
756 630
}
757 631

  
758
void OPPROTO op_cvtsd2ss(void)
632
void helper_cvtsd2ss(Reg *d, Reg *s)
759 633
{
760
    Reg *d, *s;
761
    d = (Reg *)((char *)env + PARAM1);
762
    s = (Reg *)((char *)env + PARAM2);
763 634
    d->XMM_S(0) = float64_to_float32(s->XMM_D(0), &env->sse_status);
764 635
}
765 636

  
766 637
/* integer to float */
767
void OPPROTO op_cvtdq2ps(void)
638
void helper_cvtdq2ps(Reg *d, Reg *s)
768 639
{
769
    XMMReg *d = (XMMReg *)((char *)env + PARAM1);
770
    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
771 640
    d->XMM_S(0) = int32_to_float32(s->XMM_L(0), &env->sse_status);
772 641
    d->XMM_S(1) = int32_to_float32(s->XMM_L(1), &env->sse_status);
773 642
    d->XMM_S(2) = int32_to_float32(s->XMM_L(2), &env->sse_status);
774 643
    d->XMM_S(3) = int32_to_float32(s->XMM_L(3), &env->sse_status);
775 644
}
776 645

  
777
void OPPROTO op_cvtdq2pd(void)
646
void helper_cvtdq2pd(Reg *d, Reg *s)
778 647
{
779
    XMMReg *d = (XMMReg *)((char *)env + PARAM1);
780
    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
781 648
    int32_t l0, l1;
782 649
    l0 = (int32_t)s->XMM_L(0);
783 650
    l1 = (int32_t)s->XMM_L(1);
......
785 652
    d->XMM_D(1) = int32_to_float64(l1, &env->sse_status);
786 653
}
787 654

  
788
void OPPROTO op_cvtpi2ps(void)
655
void helper_cvtpi2ps(XMMReg *d, MMXReg *s)
789 656
{
790
    XMMReg *d = (Reg *)((char *)env + PARAM1);
791
    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
792 657
    d->XMM_S(0) = int32_to_float32(s->MMX_L(0), &env->sse_status);
793 658
    d->XMM_S(1) = int32_to_float32(s->MMX_L(1), &env->sse_status);
794 659
}
795 660

  
796
void OPPROTO op_cvtpi2pd(void)
661
void helper_cvtpi2pd(XMMReg *d, MMXReg *s)
797 662
{
798
    XMMReg *d = (Reg *)((char *)env + PARAM1);
799
    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
800 663
    d->XMM_D(0) = int32_to_float64(s->MMX_L(0), &env->sse_status);
801 664
    d->XMM_D(1) = int32_to_float64(s->MMX_L(1), &env->sse_status);
802 665
}
803 666

  
804
void OPPROTO op_cvtsi2ss(void)
667
void helper_cvtsi2ss(XMMReg *d, uint32_t val)
805 668
{
806
    XMMReg *d = (Reg *)((char *)env + PARAM1);
807
    d->XMM_S(0) = int32_to_float32(T0, &env->sse_status);
669
    d->XMM_S(0) = int32_to_float32(val, &env->sse_status);
808 670
}
809 671

  
810
void OPPROTO op_cvtsi2sd(void)
672
void helper_cvtsi2sd(XMMReg *d, uint32_t val)
811 673
{
812
    XMMReg *d = (Reg *)((char *)env + PARAM1);
813
    d->XMM_D(0) = int32_to_float64(T0, &env->sse_status);
674
    d->XMM_D(0) = int32_to_float64(val, &env->sse_status);
814 675
}
815 676

  
816 677
#ifdef TARGET_X86_64
817
void OPPROTO op_cvtsq2ss(void)
678
void helper_cvtsq2ss(XMMReg *d, uint64_t val)
818 679
{
819
    XMMReg *d = (Reg *)((char *)env + PARAM1);
820
    d->XMM_S(0) = int64_to_float32(T0, &env->sse_status);
680
    d->XMM_S(0) = int64_to_float32(val, &env->sse_status);
821 681
}
822 682

  
823
void OPPROTO op_cvtsq2sd(void)
683
void helper_cvtsq2sd(XMMReg *d, uint64_t val)
824 684
{
825
    XMMReg *d = (Reg *)((char *)env + PARAM1);
826
    d->XMM_D(0) = int64_to_float64(T0, &env->sse_status);
685
    d->XMM_D(0) = int64_to_float64(val, &env->sse_status);
827 686
}
828 687
#endif
829 688

  
830 689
/* float to integer */
831
void OPPROTO op_cvtps2dq(void)
690
void helper_cvtps2dq(XMMReg *d, XMMReg *s)
832 691
{
833
    XMMReg *d = (XMMReg *)((char *)env + PARAM1);
834
    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
835 692
    d->XMM_L(0) = float32_to_int32(s->XMM_S(0), &env->sse_status);
836 693
    d->XMM_L(1) = float32_to_int32(s->XMM_S(1), &env->sse_status);
837 694
    d->XMM_L(2) = float32_to_int32(s->XMM_S(2), &env->sse_status);
838 695
    d->XMM_L(3) = float32_to_int32(s->XMM_S(3), &env->sse_status);
839 696
}
840 697

  
841
void OPPROTO op_cvtpd2dq(void)
698
void helper_cvtpd2dq(XMMReg *d, XMMReg *s)
842 699
{
843
    XMMReg *d = (XMMReg *)((char *)env + PARAM1);
844
    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
845 700
    d->XMM_L(0) = float64_to_int32(s->XMM_D(0), &env->sse_status);
846 701
    d->XMM_L(1) = float64_to_int32(s->XMM_D(1), &env->sse_status);
847 702
    d->XMM_Q(1) = 0;
848 703
}
849 704

  
850
void OPPROTO op_cvtps2pi(void)
705
void helper_cvtps2pi(MMXReg *d, XMMReg *s)
851 706
{
852
    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
853
    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
854 707
    d->MMX_L(0) = float32_to_int32(s->XMM_S(0), &env->sse_status);
855 708
    d->MMX_L(1) = float32_to_int32(s->XMM_S(1), &env->sse_status);
856 709
}
857 710

  
858
void OPPROTO op_cvtpd2pi(void)
711
void helper_cvtpd2pi(MMXReg *d, XMMReg *s)
859 712
{
860
    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
861
    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
862 713
    d->MMX_L(0) = float64_to_int32(s->XMM_D(0), &env->sse_status);
863 714
    d->MMX_L(1) = float64_to_int32(s->XMM_D(1), &env->sse_status);
864 715
}
865 716

  
866
void OPPROTO op_cvtss2si(void)
717
int32_t helper_cvtss2si(XMMReg *s)
867 718
{
868
    XMMReg *s = (XMMReg *)((char *)env + PARAM1);
869
    T0 = float32_to_int32(s->XMM_S(0), &env->sse_status);
719
    return float32_to_int32(s->XMM_S(0), &env->sse_status);
870 720
}
871 721

  
872
void OPPROTO op_cvtsd2si(void)
722
int32_t helper_cvtsd2si(XMMReg *s)
873 723
{
874
    XMMReg *s = (XMMReg *)((char *)env + PARAM1);
875
    T0 = float64_to_int32(s->XMM_D(0), &env->sse_status);
724
    return float64_to_int32(s->XMM_D(0), &env->sse_status);
876 725
}
877 726

  
878 727
#ifdef TARGET_X86_64
879
void OPPROTO op_cvtss2sq(void)
728
int64_t helper_cvtss2sq(XMMReg *s)
880 729
{
881
    XMMReg *s = (XMMReg *)((char *)env + PARAM1);
882
    T0 = float32_to_int64(s->XMM_S(0), &env->sse_status);
730
    return float32_to_int64(s->XMM_S(0), &env->sse_status);
883 731
}
884 732

  
885
void OPPROTO op_cvtsd2sq(void)
733
int64_t helper_cvtsd2sq(XMMReg *s)
886 734
{
887
    XMMReg *s = (XMMReg *)((char *)env + PARAM1);
888
    T0 = float64_to_int64(s->XMM_D(0), &env->sse_status);
735
    return float64_to_int64(s->XMM_D(0), &env->sse_status);
889 736
}
890 737
#endif
891 738

  
892 739
/* float to integer truncated */
893
void OPPROTO op_cvttps2dq(void)
740
void helper_cvttps2dq(XMMReg *d, XMMReg *s)
894 741
{
895
    XMMReg *d = (XMMReg *)((char *)env + PARAM1);
896
    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
897 742
    d->XMM_L(0) = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status);
898 743
    d->XMM_L(1) = float32_to_int32_round_to_zero(s->XMM_S(1), &env->sse_status);
899 744
    d->XMM_L(2) = float32_to_int32_round_to_zero(s->XMM_S(2), &env->sse_status);
900 745
    d->XMM_L(3) = float32_to_int32_round_to_zero(s->XMM_S(3), &env->sse_status);
901 746
}
902 747

  
903
void OPPROTO op_cvttpd2dq(void)
748
void helper_cvttpd2dq(XMMReg *d, XMMReg *s)
904 749
{
905
    XMMReg *d = (XMMReg *)((char *)env + PARAM1);
906
    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
907 750
    d->XMM_L(0) = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status);
908 751
    d->XMM_L(1) = float64_to_int32_round_to_zero(s->XMM_D(1), &env->sse_status);
909 752
    d->XMM_Q(1) = 0;
910 753
}
911 754

  
912
void OPPROTO op_cvttps2pi(void)
755
void helper_cvttps2pi(MMXReg *d, XMMReg *s)
913 756
{
914
    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
915
    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
916 757
    d->MMX_L(0) = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status);
917 758
    d->MMX_L(1) = float32_to_int32_round_to_zero(s->XMM_S(1), &env->sse_status);
918 759
}
919 760

  
920
void OPPROTO op_cvttpd2pi(void)
761
void helper_cvttpd2pi(MMXReg *d, XMMReg *s)
921 762
{
922
    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
923
    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
924 763
    d->MMX_L(0) = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status);
925 764
    d->MMX_L(1) = float64_to_int32_round_to_zero(s->XMM_D(1), &env->sse_status);
926 765
}
927 766

  
928
void OPPROTO op_cvttss2si(void)
767
int32_t helper_cvttss2si(XMMReg *s)
929 768
{
930
    XMMReg *s = (XMMReg *)((char *)env + PARAM1);
931
    T0 = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status);
769
    return float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status);
932 770
}
933 771

  
934
void OPPROTO op_cvttsd2si(void)
772
int32_t helper_cvttsd2si(XMMReg *s)
935 773
{
936
    XMMReg *s = (XMMReg *)((char *)env + PARAM1);
937
    T0 = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status);
774
    return float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status);
938 775
}
939 776

  
940 777
#ifdef TARGET_X86_64
941
void OPPROTO op_cvttss2sq(void)
778
int64_t helper_cvttss2sq(XMMReg *s)
942 779
{
943
    XMMReg *s = (XMMReg *)((char *)env + PARAM1);
944
    T0 = float32_to_int64_round_to_zero(s->XMM_S(0), &env->sse_status);
780
    return float32_to_int64_round_to_zero(s->XMM_S(0), &env->sse_status);
945 781
}
946 782

  
947
void OPPROTO op_cvttsd2sq(void)
783
int64_t helper_cvttsd2sq(XMMReg *s)
948 784
{
949
    XMMReg *s = (XMMReg *)((char *)env + PARAM1);
950
    T0 = float64_to_int64_round_to_zero(s->XMM_D(0), &env->sse_status);
785
    return float64_to_int64_round_to_zero(s->XMM_D(0), &env->sse_status);
951 786
}
952 787
#endif
953 788

  
954
void OPPROTO op_rsqrtps(void)
789
void helper_rsqrtps(XMMReg *d, XMMReg *s)
955 790
{
956
    XMMReg *d = (XMMReg *)((char *)env + PARAM1);
957
    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
958 791
    d->XMM_S(0) = approx_rsqrt(s->XMM_S(0));
959 792
    d->XMM_S(1) = approx_rsqrt(s->XMM_S(1));
960 793
    d->XMM_S(2) = approx_rsqrt(s->XMM_S(2));
961 794
    d->XMM_S(3) = approx_rsqrt(s->XMM_S(3));
962 795
}
963 796

  
964
void OPPROTO op_rsqrtss(void)
797
void helper_rsqrtss(XMMReg *d, XMMReg *s)
965 798
{
966
    XMMReg *d = (XMMReg *)((char *)env + PARAM1);
967
    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
968 799
    d->XMM_S(0) = approx_rsqrt(s->XMM_S(0));
969 800
}
970 801

  
971
void OPPROTO op_rcpps(void)
802
void helper_rcpps(XMMReg *d, XMMReg *s)
972 803
{
973
    XMMReg *d = (XMMReg *)((char *)env + PARAM1);
974
    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
975 804
    d->XMM_S(0) = approx_rcp(s->XMM_S(0));
976 805
    d->XMM_S(1) = approx_rcp(s->XMM_S(1));
977 806
    d->XMM_S(2) = approx_rcp(s->XMM_S(2));
978 807
    d->XMM_S(3) = approx_rcp(s->XMM_S(3));
979 808
}
980 809

  
981
void OPPROTO op_rcpss(void)
810
void helper_rcpss(XMMReg *d, XMMReg *s)
982 811
{
983
    XMMReg *d = (XMMReg *)((char *)env + PARAM1);
984
    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
985 812
    d->XMM_S(0) = approx_rcp(s->XMM_S(0));
986 813
}
987 814

  
988
void OPPROTO op_haddps(void)
815
void helper_haddps(XMMReg *d, XMMReg *s)
989 816
{
990
    XMMReg *d = (XMMReg *)((char *)env + PARAM1);
991
    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
992 817
    XMMReg r;
993 818
    r.XMM_S(0) = d->XMM_S(0) + d->XMM_S(1);
994 819
    r.XMM_S(1) = d->XMM_S(2) + d->XMM_S(3);
......
997 822
    *d = r;
998 823
}
999 824

  
1000
void OPPROTO op_haddpd(void)
825
void helper_haddpd(XMMReg *d, XMMReg *s)
1001 826
{
1002
    XMMReg *d = (XMMReg *)((char *)env + PARAM1);
1003
    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
1004 827
    XMMReg r;
1005 828
    r.XMM_D(0) = d->XMM_D(0) + d->XMM_D(1);
1006 829
    r.XMM_D(1) = s->XMM_D(0) + s->XMM_D(1);
1007 830
    *d = r;
1008 831
}
1009 832

  
1010
void OPPROTO op_hsubps(void)
833
void helper_hsubps(XMMReg *d, XMMReg *s)
1011 834
{
1012
    XMMReg *d = (XMMReg *)((char *)env + PARAM1);
1013
    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
1014 835
    XMMReg r;
1015 836
    r.XMM_S(0) = d->XMM_S(0) - d->XMM_S(1);
1016 837
    r.XMM_S(1) = d->XMM_S(2) - d->XMM_S(3);
......
1019 840
    *d = r;
1020 841
}
1021 842

  
1022
void OPPROTO op_hsubpd(void)
843
void helper_hsubpd(XMMReg *d, XMMReg *s)
1023 844
{
1024
    XMMReg *d = (XMMReg *)((char *)env + PARAM1);
1025
    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
1026 845
    XMMReg r;
1027 846
    r.XMM_D(0) = d->XMM_D(0) - d->XMM_D(1);
1028 847
    r.XMM_D(1) = s->XMM_D(0) - s->XMM_D(1);
1029 848
    *d = r;
1030 849
}
1031 850

  
1032
void OPPROTO op_addsubps(void)
851
void helper_addsubps(XMMReg *d, XMMReg *s)
1033 852
{
1034
    XMMReg *d = (XMMReg *)((char *)env + PARAM1);
1035
    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
1036 853
    d->XMM_S(0) = d->XMM_S(0) - s->XMM_S(0);
1037 854
    d->XMM_S(1) = d->XMM_S(1) + s->XMM_S(1);
1038 855
    d->XMM_S(2) = d->XMM_S(2) - s->XMM_S(2);
1039 856
    d->XMM_S(3) = d->XMM_S(3) + s->XMM_S(3);
1040 857
}
1041 858

  
1042
void OPPROTO op_addsubpd(void)
859
void helper_addsubpd(XMMReg *d, XMMReg *s)
1043 860
{
1044
    XMMReg *d = (XMMReg *)((char *)env + PARAM1);
1045
    XMMReg *s = (XMMReg *)((char *)env + PARAM2);
1046 861
    d->XMM_D(0) = d->XMM_D(0) - s->XMM_D(0);
1047 862
    d->XMM_D(1) = d->XMM_D(1) + s->XMM_D(1);
1048 863
}
1049 864

  
1050 865
/* XXX: unordered */
1051
#define SSE_OP_CMP(name, F)\
1052
void OPPROTO op_ ## name ## ps (void)\
866
#define SSE_HELPER_CMP(name, F)\
867
void helper_ ## name ## ps (Reg *d, Reg *s)\
1053 868
{\
1054
    Reg *d, *s;\
1055
    d = (Reg *)((char *)env + PARAM1);\
1056
    s = (Reg *)((char *)env + PARAM2);\
1057 869
    d->XMM_L(0) = F(32, d->XMM_S(0), s->XMM_S(0));\
1058 870
    d->XMM_L(1) = F(32, d->XMM_S(1), s->XMM_S(1));\
1059 871
    d->XMM_L(2) = F(32, d->XMM_S(2), s->XMM_S(2));\
1060 872
    d->XMM_L(3) = F(32, d->XMM_S(3), s->XMM_S(3));\
1061 873
}\
1062 874
\
1063
void OPPROTO op_ ## name ## ss (void)\
875
void helper_ ## name ## ss (Reg *d, Reg *s)\
1064 876
{\
1065
    Reg *d, *s;\
1066
    d = (Reg *)((char *)env + PARAM1);\
1067
    s = (Reg *)((char *)env + PARAM2);\
1068 877
    d->XMM_L(0) = F(32, d->XMM_S(0), s->XMM_S(0));\
1069 878
}\
1070
void OPPROTO op_ ## name ## pd (void)\
879
void helper_ ## name ## pd (Reg *d, Reg *s)\
1071 880
{\
1072
    Reg *d, *s;\
1073
    d = (Reg *)((char *)env + PARAM1);\
1074
    s = (Reg *)((char *)env + PARAM2);\
1075 881
    d->XMM_Q(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
1076 882
    d->XMM_Q(1) = F(64, d->XMM_D(1), s->XMM_D(1));\
1077 883
}\
1078 884
\
1079
void OPPROTO op_ ## name ## sd (void)\
885
void helper_ ## name ## sd (Reg *d, Reg *s)\
1080 886
{\
1081
    Reg *d, *s;\
1082
    d = (Reg *)((char *)env + PARAM1);\
1083
    s = (Reg *)((char *)env + PARAM2);\
1084 887
    d->XMM_Q(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
1085 888
}
1086 889

  
......
1093 896
#define FPU_CMPNLE(size, a, b) float ## size ## _le(a, b, &env->sse_status) ? 0 : -1
1094 897
#define FPU_CMPORD(size, a, b) float ## size ## _unordered(a, b, &env->sse_status) ? 0 : -1
1095 898

  
1096
SSE_OP_CMP(cmpeq, FPU_CMPEQ)
1097
SSE_OP_CMP(cmplt, FPU_CMPLT)
1098
SSE_OP_CMP(cmple, FPU_CMPLE)
1099
SSE_OP_CMP(cmpunord, FPU_CMPUNORD)
1100
SSE_OP_CMP(cmpneq, FPU_CMPNEQ)
1101
SSE_OP_CMP(cmpnlt, FPU_CMPNLT)
1102
SSE_OP_CMP(cmpnle, FPU_CMPNLE)
1103
SSE_OP_CMP(cmpord, FPU_CMPORD)
899
SSE_HELPER_CMP(cmpeq, FPU_CMPEQ)
900
SSE_HELPER_CMP(cmplt, FPU_CMPLT)
901
SSE_HELPER_CMP(cmple, FPU_CMPLE)
902
SSE_HELPER_CMP(cmpunord, FPU_CMPUNORD)
903
SSE_HELPER_CMP(cmpneq, FPU_CMPNEQ)
904
SSE_HELPER_CMP(cmpnlt, FPU_CMPNLT)
905
SSE_HELPER_CMP(cmpnle, FPU_CMPNLE)
906
SSE_HELPER_CMP(cmpord, FPU_CMPORD)
1104 907

  
1105 908
const int comis_eflags[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
1106 909

  
1107
void OPPROTO op_ucomiss(void)
910
void helper_ucomiss(Reg *d, Reg *s)
1108 911
{
1109 912
    int ret;
1110 913
    float32 s0, s1;
1111
    Reg *d, *s;
1112
    d = (Reg *)((char *)env + PARAM1);
1113
    s = (Reg *)((char *)env + PARAM2);
1114 914

  
1115 915
    s0 = d->XMM_S(0);
1116 916
    s1 = s->XMM_S(0);
......
1119 919
    FORCE_RET();
1120 920
}
1121 921

  
1122
void OPPROTO op_comiss(void)
922
void helper_comiss(Reg *d, Reg *s)
1123 923
{
1124 924
    int ret;
1125 925
    float32 s0, s1;
1126
    Reg *d, *s;
1127
    d = (Reg *)((char *)env + PARAM1);
1128
    s = (Reg *)((char *)env + PARAM2);
1129 926

  
1130 927
    s0 = d->XMM_S(0);
1131 928
    s1 = s->XMM_S(0);
......
1134 931
    FORCE_RET();
1135 932
}
1136 933

  
1137
void OPPROTO op_ucomisd(void)
934
void helper_ucomisd(Reg *d, Reg *s)
1138 935
{
1139 936
    int ret;
1140 937
    float64 d0, d1;
1141
    Reg *d, *s;
1142
    d = (Reg *)((char *)env + PARAM1);
1143
    s = (Reg *)((char *)env + PARAM2);
1144 938

  
1145 939
    d0 = d->XMM_D(0);
1146 940
    d1 = s->XMM_D(0);
......
1149 943
    FORCE_RET();
1150 944
}
1151 945

  
1152
void OPPROTO op_comisd(void)
946
void helper_comisd(Reg *d, Reg *s)
1153 947
{
1154 948
    int ret;
1155 949
    float64 d0, d1;
1156
    Reg *d, *s;
1157
    d = (Reg *)((char *)env + PARAM1);
1158
    s = (Reg *)((char *)env + PARAM2);
1159 950

  
1160 951
    d0 = d->XMM_D(0);
1161 952
    d1 = s->XMM_D(0);
......
1164 955
    FORCE_RET();
1165 956
}
1166 957

  
1167
void OPPROTO op_movmskps(void)
958
uint32_t helper_movmskps(Reg *s)
1168 959
{
1169 960
    int b0, b1, b2, b3;
1170
    Reg *s;
1171
    s = (Reg *)((char *)env + PARAM1);
1172 961
    b0 = s->XMM_L(0) >> 31;
1173 962
    b1 = s->XMM_L(1) >> 31;
1174 963
    b2 = s->XMM_L(2) >> 31;
1175 964
    b3 = s->XMM_L(3) >> 31;
1176
    T0 = b0 | (b1 << 1) | (b2 << 2) | (b3 << 3);
965
    return b0 | (b1 << 1) | (b2 << 2) | (b3 << 3);
1177 966
}
1178 967

  
1179
void OPPROTO op_movmskpd(void)
968
uint32_t helper_movmskpd(Reg *s)
1180 969
{
1181 970
    int b0, b1;
1182
    Reg *s;
1183
    s = (Reg *)((char *)env + PARAM1);
1184 971
    b0 = s->XMM_L(1) >> 31;
1185 972
    b1 = s->XMM_L(3) >> 31;
1186
    T0 = b0 | (b1 << 1);
973
    return b0 | (b1 << 1);
1187 974
}
1188 975

  
1189 976
#endif
1190 977

  
1191
void OPPROTO glue(op_pmovmskb, SUFFIX)(void)
1192
{
1193
    Reg *s;
1194
    s = (Reg *)((char *)env + PARAM1);
1195
    T0 = 0;
1196
    T0 |= (s->XMM_B(0) >> 7);
1197
    T0 |= (s->XMM_B(1) >> 6) & 0x02;
1198
    T0 |= (s->XMM_B(2) >> 5) & 0x04;
1199
    T0 |= (s->XMM_B(3) >> 4) & 0x08;
1200
    T0 |= (s->XMM_B(4) >> 3) & 0x10;
1201
    T0 |= (s->XMM_B(5) >> 2) & 0x20;
1202
    T0 |= (s->XMM_B(6) >> 1) & 0x40;
1203
    T0 |= (s->XMM_B(7)) & 0x80;
978
uint32_t glue(helper_pmovmskb, SUFFIX)(Reg *s)
979
{
980
    uint32_t val;
981
    val = 0;
982
    val |= (s->XMM_B(0) >> 7);
983
    val |= (s->XMM_B(1) >> 6) & 0x02;
984
    val |= (s->XMM_B(2) >> 5) & 0x04;
985
    val |= (s->XMM_B(3) >> 4) & 0x08;
986
    val |= (s->XMM_B(4) >> 3) & 0x10;
987
    val |= (s->XMM_B(5) >> 2) & 0x20;
988
    val |= (s->XMM_B(6) >> 1) & 0x40;
989
    val |= (s->XMM_B(7)) & 0x80;
1204 990
#if SHIFT == 1
1205
    T0 |= (s->XMM_B(8) << 1) & 0x0100;
1206
    T0 |= (s->XMM_B(9) << 2) & 0x0200;
1207
    T0 |= (s->XMM_B(10) << 3) & 0x0400;
1208
    T0 |= (s->XMM_B(11) << 4) & 0x0800;
1209
    T0 |= (s->XMM_B(12) << 5) & 0x1000;
1210
    T0 |= (s->XMM_B(13) << 6) & 0x2000;
1211
    T0 |= (s->XMM_B(14) << 7) & 0x4000;
1212
    T0 |= (s->XMM_B(15) << 8) & 0x8000;
991
    val |= (s->XMM_B(8) << 1) & 0x0100;
992
    val |= (s->XMM_B(9) << 2) & 0x0200;
993
    val |= (s->XMM_B(10) << 3) & 0x0400;
994
    val |= (s->XMM_B(11) << 4) & 0x0800;
995
    val |= (s->XMM_B(12) << 5) & 0x1000;
996
    val |= (s->XMM_B(13) << 6) & 0x2000;
997
    val |= (s->XMM_B(14) << 7) & 0x4000;
998
    val |= (s->XMM_B(15) << 8) & 0x8000;
1213 999
#endif
1000
    return val;
1214 1001
}
1215 1002

  
1216
void OPPROTO glue(op_pinsrw, SUFFIX) (void)
1217
{
1218
    Reg *d = (Reg *)((char *)env + PARAM1);
1219
    int pos = PARAM2;
1220

  
1221
    d->W(pos) = T0;
1222
}
1223

  
1224
void OPPROTO glue(op_pextrw, SUFFIX) (void)
1225
{
1226
    Reg *s = (Reg *)((char *)env + PARAM1);
1227
    int pos = PARAM2;
1228

  
1229
    T0 = s->W(pos);
1230
}
1231

  
1232
void OPPROTO glue(op_packsswb, SUFFIX) (void)
1003
void glue(helper_packsswb, SUFFIX) (Reg *d, Reg *s)
1233 1004
{
1234
    Reg r, *d, *s;
1235
    d = (Reg *)((char *)env + PARAM1);
1236
    s = (Reg *)((char *)env + PARAM2);
1005
    Reg r;
1237 1006

  
1238 1007
    r.B(0) = satsb((int16_t)d->W(0));
1239 1008
    r.B(1) = satsb((int16_t)d->W(1));
......
1258 1027
    *d = r;
1259 1028
}
1260 1029

  
1261
void OPPROTO glue(op_packuswb, SUFFIX) (void)
1030
void glue(helper_packuswb, SUFFIX) (Reg *d, Reg *s)
1262 1031
{
1263
    Reg r, *d, *s;
1264
    d = (Reg *)((char *)env + PARAM1);
1265
    s = (Reg *)((char *)env + PARAM2);
1032
    Reg r;
1266 1033

  
1267 1034
    r.B(0) = satub((int16_t)d->W(0));
1268 1035
    r.B(1) = satub((int16_t)d->W(1));
......
1287 1054
    *d = r;
1288 1055
}
1289 1056

  
1290
void OPPROTO glue(op_packssdw, SUFFIX) (void)
1057
void glue(helper_packssdw, SUFFIX) (Reg *d, Reg *s)
1291 1058
{
1292
    Reg r, *d, *s;
1293
    d = (Reg *)((char *)env + PARAM1);
1294
    s = (Reg *)((char *)env + PARAM2);
1059
    Reg r;
1295 1060

  
1296 1061
    r.W(0) = satsw(d->L(0));
1297 1062
    r.W(1) = satsw(d->L(1));
......
1310 1075

  
1311 1076
#define UNPCK_OP(base_name, base)                               \
1312 1077
                                                                \
1313
void OPPROTO glue(op_punpck ## base_name ## bw, SUFFIX) (void)   \
1078
void glue(helper_punpck ## base_name ## bw, SUFFIX) (Reg *d, Reg *s)   \
1314 1079
{                                                               \
1315
    Reg r, *d, *s;                                              \
1316
    d = (Reg *)((char *)env + PARAM1);                          \
1317
    s = (Reg *)((char *)env + PARAM2);                          \
1080
    Reg r;                                              \
1318 1081
                                                                \
1319 1082
    r.B(0) = d->B((base << (SHIFT + 2)) + 0);                   \
1320 1083
    r.B(1) = s->B((base << (SHIFT + 2)) + 0);                   \
......
1337 1100
    *d = r;                                                     \
1338 1101
}                                                               \
1339 1102
                                                                \
1340
void OPPROTO glue(op_punpck ## base_name ## wd, SUFFIX) (void)   \
1103
void glue(helper_punpck ## base_name ## wd, SUFFIX) (Reg *d, Reg *s)   \
1341 1104
{                                                               \
1342
    Reg r, *d, *s;                                              \
1343
    d = (Reg *)((char *)env + PARAM1);                          \
1344
    s = (Reg *)((char *)env + PARAM2);                          \
1105
    Reg r;                                              \
1345 1106
                                                                \
1346 1107
    r.W(0) = d->W((base << (SHIFT + 1)) + 0);                   \
1347 1108
    r.W(1) = s->W((base << (SHIFT + 1)) + 0);                   \
......
1356 1117
    *d = r;                                                     \
1357 1118
}                                                               \
1358 1119
                                                                \
1359
void OPPROTO glue(op_punpck ## base_name ## dq, SUFFIX) (void)   \
1120
void glue(helper_punpck ## base_name ## dq, SUFFIX) (Reg *d, Reg *s)   \
1360 1121
{                                                               \
1361
    Reg r, *d, *s;                                              \
1362
    d = (Reg *)((char *)env + PARAM1);                          \
1363
    s = (Reg *)((char *)env + PARAM2);                          \
1122
    Reg r;                                              \
1364 1123
                                                                \
1365 1124
    r.L(0) = d->L((base << SHIFT) + 0);                         \
1366 1125
    r.L(1) = s->L((base << SHIFT) + 0);                         \
......
1372 1131
}                                                               \
1373 1132
                                                                \
1374 1133
XMM_ONLY(                                                       \
1375
void OPPROTO glue(op_punpck ## base_name ## qdq, SUFFIX) (void)  \
1134
void glue(helper_punpck ## base_name ## qdq, SUFFIX) (Reg *d, Reg *s)  \
1376 1135
{                                                               \
1377
    Reg r, *d, *s;                                              \
1378
    d = (Reg *)((char *)env + PARAM1);                          \
1379
    s = (Reg *)((char *)env + PARAM2);                          \
1136
    Reg r;                                              \
1380 1137
                                                                \
1381 1138
    r.Q(0) = d->Q(base);                                        \
1382 1139
    r.Q(1) = s->Q(base);                                        \
......
1389 1146

  
1390 1147
/* 3DNow! float ops */
1391 1148
#if SHIFT == 0
1392
void OPPROTO op_pi2fd(void)
1149
void helper_pi2fd(MMXReg *d, MMXReg *s)
1393 1150
{
1394
    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1395
    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1396 1151
    d->MMX_S(0) = int32_to_float32(s->MMX_L(0), &env->mmx_status);
1397 1152
    d->MMX_S(1) = int32_to_float32(s->MMX_L(1), &env->mmx_status);
1398 1153
}
1399 1154

  
1400
void OPPROTO op_pi2fw(void)
1155
void helper_pi2fw(MMXReg *d, MMXReg *s)
1401 1156
{
1402
    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1403
    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1404 1157
    d->MMX_S(0) = int32_to_float32((int16_t)s->MMX_W(0), &env->mmx_status);
1405 1158
    d->MMX_S(1) = int32_to_float32((int16_t)s->MMX_W(2), &env->mmx_status);
1406 1159
}
1407 1160

  
1408
void OPPROTO op_pf2id(void)
1161
void helper_pf2id(MMXReg *d, MMXReg *s)
1409 1162
{
1410
    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1411
    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1412 1163
    d->MMX_L(0) = float32_to_int32_round_to_zero(s->MMX_S(0), &env->mmx_status);
1413 1164
    d->MMX_L(1) = float32_to_int32_round_to_zero(s->MMX_S(1), &env->mmx_status);
1414 1165
}
1415 1166

  
1416
void OPPROTO op_pf2iw(void)
1167
void helper_pf2iw(MMXReg *d, MMXReg *s)
1417 1168
{
1418
    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1419
    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1420 1169
    d->MMX_L(0) = satsw(float32_to_int32_round_to_zero(s->MMX_S(0), &env->mmx_status));
1421 1170
    d->MMX_L(1) = satsw(float32_to_int32_round_to_zero(s->MMX_S(1), &env->mmx_status));
1422 1171
}
1423 1172

  
1424
void OPPROTO op_pfacc(void)
1173
void helper_pfacc(MMXReg *d, MMXReg *s)
1425 1174
{
1426
    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1427
    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1428 1175
    MMXReg r;
1429 1176
    r.MMX_S(0) = float32_add(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
1430 1177
    r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
1431 1178
    *d = r;
1432 1179
}
1433 1180

  
1434
void OPPROTO op_pfadd(void)
1181
void helper_pfadd(MMXReg *d, MMXReg *s)
1435 1182
{
1436
    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1437
    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1438 1183
    d->MMX_S(0) = float32_add(d->MMX_S(0), s->MMX_S(0), &env->mmx_status);
1439 1184
    d->MMX_S(1) = float32_add(d->MMX_S(1), s->MMX_S(1), &env->mmx_status);
1440 1185
}
1441 1186

  
1442
void OPPROTO op_pfcmpeq(void)
1187
void helper_pfcmpeq(MMXReg *d, MMXReg *s)
1443 1188
{
1444
    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1445
    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1446 1189
    d->MMX_L(0) = float32_eq(d->MMX_S(0), s->MMX_S(0), &env->mmx_status) ? -1 : 0;
1447 1190
    d->MMX_L(1) = float32_eq(d->MMX_S(1), s->MMX_S(1), &env->mmx_status) ? -1 : 0;
1448 1191
}
1449 1192

  
1450
void OPPROTO op_pfcmpge(void)
1193
void helper_pfcmpge(MMXReg *d, MMXReg *s)
1451 1194
{
1452
    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1453
    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1454 1195
    d->MMX_L(0) = float32_le(s->MMX_S(0), d->MMX_S(0), &env->mmx_status) ? -1 : 0;
1455 1196
    d->MMX_L(1) = float32_le(s->MMX_S(1), d->MMX_S(1), &env->mmx_status) ? -1 : 0;
1456 1197
}
1457 1198

  
1458
void OPPROTO op_pfcmpgt(void)
1199
void helper_pfcmpgt(MMXReg *d, MMXReg *s)
1459 1200
{
1460
    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1461
    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1462 1201
    d->MMX_L(0) = float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status) ? -1 : 0;
1463 1202
    d->MMX_L(1) = float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status) ? -1 : 0;
1464 1203
}
1465 1204

  
1466
void OPPROTO op_pfmax(void)
1205
void helper_pfmax(MMXReg *d, MMXReg *s)
1467 1206
{
1468
    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1469
    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1470 1207
    if (float32_lt(d->MMX_S(0), s->MMX_S(0), &env->mmx_status))
1471 1208
        d->MMX_S(0) = s->MMX_S(0);
1472 1209
    if (float32_lt(d->MMX_S(1), s->MMX_S(1), &env->mmx_status))
1473 1210
        d->MMX_S(1) = s->MMX_S(1);
1474 1211
}
1475 1212

  
1476
void OPPROTO op_pfmin(void)
1213
void helper_pfmin(MMXReg *d, MMXReg *s)
1477 1214
{
1478
    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1479
    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1480 1215
    if (float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status))
1481 1216
        d->MMX_S(0) = s->MMX_S(0);
1482 1217
    if (float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status))
1483 1218
        d->MMX_S(1) = s->MMX_S(1);
1484 1219
}
1485 1220

  
1486
void OPPROTO op_pfmul(void)
1221
void helper_pfmul(MMXReg *d, MMXReg *s)
1487 1222
{
1488
    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1489
    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1490 1223
    d->MMX_S(0) = float32_mul(d->MMX_S(0), s->MMX_S(0), &env->mmx_status);
1491 1224
    d->MMX_S(1) = float32_mul(d->MMX_S(1), s->MMX_S(1), &env->mmx_status);
1492 1225
}
1493 1226

  
1494
void OPPROTO op_pfnacc(void)
1227
void helper_pfnacc(MMXReg *d, MMXReg *s)
1495 1228
{
1496
    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1497
    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1498 1229
    MMXReg r;
1499 1230
    r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
1500 1231
    r.MMX_S(1) = float32_sub(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
1501 1232
    *d = r;
1502 1233
}
1503 1234

  
1504
void OPPROTO op_pfpnacc(void)
1235
void helper_pfpnacc(MMXReg *d, MMXReg *s)
1505 1236
{
1506
    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1507
    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1508 1237
    MMXReg r;
1509 1238
    r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
1510 1239
    r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
1511 1240
    *d = r;
1512 1241
}
1513 1242

  
1514
void OPPROTO op_pfrcp(void)
1243
void helper_pfrcp(MMXReg *d, MMXReg *s)
1515 1244
{
1516
    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1517
    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1518 1245
    d->MMX_S(0) = approx_rcp(s->MMX_S(0));
1519 1246
    d->MMX_S(1) = d->MMX_S(0);
1520 1247
}
1521 1248

  
1522
void OPPROTO op_pfrsqrt(void)
1249
void helper_pfrsqrt(MMXReg *d, MMXReg *s)
1523 1250
{
1524
    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1525
    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1526 1251
    d->MMX_L(1) = s->MMX_L(0) & 0x7fffffff;
1527 1252
    d->MMX_S(1) = approx_rsqrt(d->MMX_S(1));
1528 1253
    d->MMX_L(1) |= s->MMX_L(0) & 0x80000000;
1529 1254
    d->MMX_L(0) = d->MMX_L(1);
1530 1255
}
1531 1256

  
1532
void OPPROTO op_pfsub(void)
1257
void helper_pfsub(MMXReg *d, MMXReg *s)
1533 1258
{
1534
    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1535
    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1536 1259
    d->MMX_S(0) = float32_sub(d->MMX_S(0), s->MMX_S(0), &env->mmx_status);
1537 1260
    d->MMX_S(1) = float32_sub(d->MMX_S(1), s->MMX_S(1), &env->mmx_status);
1538 1261
}
1539 1262

  
1540
void OPPROTO op_pfsubr(void)
1263
void helper_pfsubr(MMXReg *d, MMXReg *s)
1541 1264
{
1542
    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1543
    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1544 1265
    d->MMX_S(0) = float32_sub(s->MMX_S(0), d->MMX_S(0), &env->mmx_status);
1545 1266
    d->MMX_S(1) = float32_sub(s->MMX_S(1), d->MMX_S(1), &env->mmx_status);
1546 1267
}
1547 1268

  
1548
void OPPROTO op_pswapd(void)
1269
void helper_pswapd(MMXReg *d, MMXReg *s)
1549 1270
{
1550
    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
1551
    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
1552 1271
    MMXReg r;
1553 1272
    r.MMX_L(0) = s->MMX_L(1);
1554 1273
    r.MMX_L(1) = s->MMX_L(0);

Also available in: Unified diff