Statistics
| Branch: | Revision:

root / target-ppc / int_helper.c @ ea6c0dac

History | View | Annotate | Download (56.5 kB)

1
/*
2
 *  PowerPC integer and vector emulation helpers for QEMU.
3
 *
4
 *  Copyright (c) 2003-2007 Jocelyn Mayer
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18
 */
19
#include "cpu.h"
20
#include "host-utils.h"
21
#include "helper.h"
22

    
23
#include "helper_regs.h"
24
/*****************************************************************************/
25
/* Fixed point operations helpers */
26
#if defined(TARGET_PPC64)
27

    
28
/* multiply high word */
29
uint64_t helper_mulhd(uint64_t arg1, uint64_t arg2)
30
{
31
    uint64_t tl, th;
32

    
33
    muls64(&tl, &th, arg1, arg2);
34
    return th;
35
}
36

    
37
/* multiply high word unsigned */
38
uint64_t helper_mulhdu(uint64_t arg1, uint64_t arg2)
39
{
40
    uint64_t tl, th;
41

    
42
    mulu64(&tl, &th, arg1, arg2);
43
    return th;
44
}
45

    
46
uint64_t helper_mulldo(CPUPPCState *env, uint64_t arg1, uint64_t arg2)
47
{
48
    int64_t th;
49
    uint64_t tl;
50

    
51
    muls64(&tl, (uint64_t *)&th, arg1, arg2);
52
    /* If th != 0 && th != -1, then we had an overflow */
53
    if (likely((uint64_t)(th + 1) <= 1)) {
54
        env->xer &= ~(1 << XER_OV);
55
    } else {
56
        env->xer |= (1 << XER_OV) | (1 << XER_SO);
57
    }
58
    return (int64_t)tl;
59
}
60
#endif
61

    
62
target_ulong helper_cntlzw(target_ulong t)
63
{
64
    return clz32(t);
65
}
66

    
67
#if defined(TARGET_PPC64)
68
target_ulong helper_cntlzd(target_ulong t)
69
{
70
    return clz64(t);
71
}
72
#endif
73

    
74
/* shift right arithmetic helper */
75
target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
76
                         target_ulong shift)
77
{
78
    int32_t ret;
79

    
80
    if (likely(!(shift & 0x20))) {
81
        if (likely((uint32_t)shift != 0)) {
82
            shift &= 0x1f;
83
            ret = (int32_t)value >> shift;
84
            if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
85
                env->xer &= ~(1 << XER_CA);
86
            } else {
87
                env->xer |= (1 << XER_CA);
88
            }
89
        } else {
90
            ret = (int32_t)value;
91
            env->xer &= ~(1 << XER_CA);
92
        }
93
    } else {
94
        ret = (int32_t)value >> 31;
95
        if (ret) {
96
            env->xer |= (1 << XER_CA);
97
        } else {
98
            env->xer &= ~(1 << XER_CA);
99
        }
100
    }
101
    return (target_long)ret;
102
}
103

    
104
#if defined(TARGET_PPC64)
105
target_ulong helper_srad(CPUPPCState *env, target_ulong value,
106
                         target_ulong shift)
107
{
108
    int64_t ret;
109

    
110
    if (likely(!(shift & 0x40))) {
111
        if (likely((uint64_t)shift != 0)) {
112
            shift &= 0x3f;
113
            ret = (int64_t)value >> shift;
114
            if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
115
                env->xer &= ~(1 << XER_CA);
116
            } else {
117
                env->xer |= (1 << XER_CA);
118
            }
119
        } else {
120
            ret = (int64_t)value;
121
            env->xer &= ~(1 << XER_CA);
122
        }
123
    } else {
124
        ret = (int64_t)value >> 63;
125
        if (ret) {
126
            env->xer |= (1 << XER_CA);
127
        } else {
128
            env->xer &= ~(1 << XER_CA);
129
        }
130
    }
131
    return ret;
132
}
133
#endif
134

    
135
#if defined(TARGET_PPC64)
136
target_ulong helper_popcntb(target_ulong val)
137
{
138
    val = (val & 0x5555555555555555ULL) + ((val >>  1) &
139
                                           0x5555555555555555ULL);
140
    val = (val & 0x3333333333333333ULL) + ((val >>  2) &
141
                                           0x3333333333333333ULL);
142
    val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >>  4) &
143
                                           0x0f0f0f0f0f0f0f0fULL);
144
    return val;
145
}
146

    
147
target_ulong helper_popcntw(target_ulong val)
148
{
149
    val = (val & 0x5555555555555555ULL) + ((val >>  1) &
150
                                           0x5555555555555555ULL);
151
    val = (val & 0x3333333333333333ULL) + ((val >>  2) &
152
                                           0x3333333333333333ULL);
153
    val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >>  4) &
154
                                           0x0f0f0f0f0f0f0f0fULL);
155
    val = (val & 0x00ff00ff00ff00ffULL) + ((val >>  8) &
156
                                           0x00ff00ff00ff00ffULL);
157
    val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
158
                                           0x0000ffff0000ffffULL);
159
    return val;
160
}
161

    
162
target_ulong helper_popcntd(target_ulong val)
163
{
164
    return ctpop64(val);
165
}
166
#else
167
target_ulong helper_popcntb(target_ulong val)
168
{
169
    val = (val & 0x55555555) + ((val >>  1) & 0x55555555);
170
    val = (val & 0x33333333) + ((val >>  2) & 0x33333333);
171
    val = (val & 0x0f0f0f0f) + ((val >>  4) & 0x0f0f0f0f);
172
    return val;
173
}
174

    
175
target_ulong helper_popcntw(target_ulong val)
176
{
177
    val = (val & 0x55555555) + ((val >>  1) & 0x55555555);
178
    val = (val & 0x33333333) + ((val >>  2) & 0x33333333);
179
    val = (val & 0x0f0f0f0f) + ((val >>  4) & 0x0f0f0f0f);
180
    val = (val & 0x00ff00ff) + ((val >>  8) & 0x00ff00ff);
181
    val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff);
182
    return val;
183
}
184
#endif
185

    
186
/*****************************************************************************/
187
/* PowerPC 601 specific instructions (POWER bridge) */
188
target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
189
{
190
    uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
191

    
192
    if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
193
        (int32_t)arg2 == 0) {
194
        env->spr[SPR_MQ] = 0;
195
        return INT32_MIN;
196
    } else {
197
        env->spr[SPR_MQ] = tmp % arg2;
198
        return  tmp / (int32_t)arg2;
199
    }
200
}
201

    
202
target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
203
                         target_ulong arg2)
204
{
205
    uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
206

    
207
    if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
208
        (int32_t)arg2 == 0) {
209
        env->xer |= (1 << XER_OV) | (1 << XER_SO);
210
        env->spr[SPR_MQ] = 0;
211
        return INT32_MIN;
212
    } else {
213
        env->spr[SPR_MQ] = tmp % arg2;
214
        tmp /= (int32_t)arg2;
215
        if ((int32_t)tmp != tmp) {
216
            env->xer |= (1 << XER_OV) | (1 << XER_SO);
217
        } else {
218
            env->xer &= ~(1 << XER_OV);
219
        }
220
        return tmp;
221
    }
222
}
223

    
224
target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
225
                         target_ulong arg2)
226
{
227
    if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
228
        (int32_t)arg2 == 0) {
229
        env->spr[SPR_MQ] = 0;
230
        return INT32_MIN;
231
    } else {
232
        env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
233
        return (int32_t)arg1 / (int32_t)arg2;
234
    }
235
}
236

    
237
target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
238
                          target_ulong arg2)
239
{
240
    if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
241
        (int32_t)arg2 == 0) {
242
        env->xer |= (1 << XER_OV) | (1 << XER_SO);
243
        env->spr[SPR_MQ] = 0;
244
        return INT32_MIN;
245
    } else {
246
        env->xer &= ~(1 << XER_OV);
247
        env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
248
        return (int32_t)arg1 / (int32_t)arg2;
249
    }
250
}
251

    
252
/*****************************************************************************/
253
/* 602 specific instructions */
254
/* mfrom is the most crazy instruction ever seen, imho ! */
255
/* Real implementation uses a ROM table. Do the same */
256
/* Extremely decomposed:
257
 *                      -arg / 256
258
 * return 256 * log10(10           + 1.0) + 0.5
259
 */
260
#if !defined(CONFIG_USER_ONLY)
261
target_ulong helper_602_mfrom(target_ulong arg)
262
{
263
    if (likely(arg < 602)) {
264
#include "mfrom_table.c"
265
        return mfrom_ROM_table[arg];
266
    } else {
267
        return 0;
268
    }
269
}
270
#endif
271

    
272
/*****************************************************************************/
273
/* Altivec extension helpers */
274
#if defined(HOST_WORDS_BIGENDIAN)
275
#define HI_IDX 0
276
#define LO_IDX 1
277
#else
278
#define HI_IDX 1
279
#define LO_IDX 0
280
#endif
281

    
282
#if defined(HOST_WORDS_BIGENDIAN)
283
#define VECTOR_FOR_INORDER_I(index, element)                    \
284
    for (index = 0; index < ARRAY_SIZE(r->element); index++)
285
#else
286
#define VECTOR_FOR_INORDER_I(index, element)                    \
287
    for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
288
#endif
289

    
290
/* If X is a NaN, store the corresponding QNaN into RESULT.  Otherwise,
291
 * execute the following block.  */
292
#define DO_HANDLE_NAN(result, x)                        \
293
    if (float32_is_any_nan(x)) {                        \
294
        CPU_FloatU __f;                                 \
295
        __f.f = x;                                      \
296
        __f.l = __f.l | (1 << 22);  /* Set QNaN bit. */ \
297
        result = __f.f;                                 \
298
    } else
299

    
300
#define HANDLE_NAN1(result, x)                  \
301
    DO_HANDLE_NAN(result, x)
302
#define HANDLE_NAN2(result, x, y)                       \
303
    DO_HANDLE_NAN(result, x) DO_HANDLE_NAN(result, y)
304
#define HANDLE_NAN3(result, x, y, z)                                    \
305
    DO_HANDLE_NAN(result, x) DO_HANDLE_NAN(result, y) DO_HANDLE_NAN(result, z)
306

    
307
/* Saturating arithmetic helpers.  */
308
#define SATCVT(from, to, from_type, to_type, min, max)          \
309
    static inline to_type cvt##from##to(from_type x, int *sat)  \
310
    {                                                           \
311
        to_type r;                                              \
312
                                                                \
313
        if (x < (from_type)min) {                               \
314
            r = min;                                            \
315
            *sat = 1;                                           \
316
        } else if (x > (from_type)max) {                        \
317
            r = max;                                            \
318
            *sat = 1;                                           \
319
        } else {                                                \
320
            r = x;                                              \
321
        }                                                       \
322
        return r;                                               \
323
    }
324
#define SATCVTU(from, to, from_type, to_type, min, max)         \
325
    static inline to_type cvt##from##to(from_type x, int *sat)  \
326
    {                                                           \
327
        to_type r;                                              \
328
                                                                \
329
        if (x > (from_type)max) {                               \
330
            r = max;                                            \
331
            *sat = 1;                                           \
332
        } else {                                                \
333
            r = x;                                              \
334
        }                                                       \
335
        return r;                                               \
336
    }
337
SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
338
SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
339
SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
340

    
341
SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
342
SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
343
SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
344
SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
345
SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
346
SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
347
#undef SATCVT
348
#undef SATCVTU
349

    
350
void helper_lvsl(ppc_avr_t *r, target_ulong sh)
351
{
352
    int i, j = (sh & 0xf);
353

    
354
    VECTOR_FOR_INORDER_I(i, u8) {
355
        r->u8[i] = j++;
356
    }
357
}
358

    
359
void helper_lvsr(ppc_avr_t *r, target_ulong sh)
360
{
361
    int i, j = 0x10 - (sh & 0xf);
362

    
363
    VECTOR_FOR_INORDER_I(i, u8) {
364
        r->u8[i] = j++;
365
    }
366
}
367

    
368
void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
369
{
370
#if defined(HOST_WORDS_BIGENDIAN)
371
    env->vscr = r->u32[3];
372
#else
373
    env->vscr = r->u32[0];
374
#endif
375
    set_flush_to_zero(vscr_nj, &env->vec_status);
376
}
377

    
378
void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
379
{
380
    int i;
381

    
382
    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
383
        r->u32[i] = ~a->u32[i] < b->u32[i];
384
    }
385
}
386

    
387
#define VARITH_DO(name, op, element)                                    \
388
    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
389
    {                                                                   \
390
        int i;                                                          \
391
                                                                        \
392
        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
393
            r->element[i] = a->element[i] op b->element[i];             \
394
        }                                                               \
395
    }
396
#define VARITH(suffix, element)                 \
397
    VARITH_DO(add##suffix, +, element)          \
398
    VARITH_DO(sub##suffix, -, element)
399
VARITH(ubm, u8)
400
VARITH(uhm, u16)
401
VARITH(uwm, u32)
402
#undef VARITH_DO
403
#undef VARITH
404

    
405
#define VARITHFP(suffix, func)                                          \
406
    void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
407
                          ppc_avr_t *b)                                 \
408
    {                                                                   \
409
        int i;                                                          \
410
                                                                        \
411
        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
412
            HANDLE_NAN2(r->f[i], a->f[i], b->f[i]) {                    \
413
                r->f[i] = func(a->f[i], b->f[i], &env->vec_status);     \
414
            }                                                           \
415
        }                                                               \
416
    }
417
VARITHFP(addfp, float32_add)
418
VARITHFP(subfp, float32_sub)
419
#undef VARITHFP
420

    
421
#define VARITHSAT_CASE(type, op, cvt, element)                          \
422
    {                                                                   \
423
        type result = (type)a->element[i] op (type)b->element[i];       \
424
        r->element[i] = cvt(result, &sat);                              \
425
    }
426

    
427
#define VARITHSAT_DO(name, op, optype, cvt, element)                    \
428
    void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,   \
429
                        ppc_avr_t *b)                                   \
430
    {                                                                   \
431
        int sat = 0;                                                    \
432
        int i;                                                          \
433
                                                                        \
434
        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
435
            switch (sizeof(r->element[0])) {                            \
436
            case 1:                                                     \
437
                VARITHSAT_CASE(optype, op, cvt, element);               \
438
                break;                                                  \
439
            case 2:                                                     \
440
                VARITHSAT_CASE(optype, op, cvt, element);               \
441
                break;                                                  \
442
            case 4:                                                     \
443
                VARITHSAT_CASE(optype, op, cvt, element);               \
444
                break;                                                  \
445
            }                                                           \
446
        }                                                               \
447
        if (sat) {                                                      \
448
            env->vscr |= (1 << VSCR_SAT);                               \
449
        }                                                               \
450
    }
451
#define VARITHSAT_SIGNED(suffix, element, optype, cvt)          \
452
    VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element)      \
453
    VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
454
#define VARITHSAT_UNSIGNED(suffix, element, optype, cvt)        \
455
    VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element)      \
456
    VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
457
VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
458
VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
459
VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
460
VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
461
VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
462
VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
463
#undef VARITHSAT_CASE
464
#undef VARITHSAT_DO
465
#undef VARITHSAT_SIGNED
466
#undef VARITHSAT_UNSIGNED
467

    
468
#define VAVG_DO(name, element, etype)                                   \
469
    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
470
    {                                                                   \
471
        int i;                                                          \
472
                                                                        \
473
        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
474
            etype x = (etype)a->element[i] + (etype)b->element[i] + 1;  \
475
            r->element[i] = x >> 1;                                     \
476
        }                                                               \
477
    }
478

    
479
#define VAVG(type, signed_element, signed_type, unsigned_element,       \
480
             unsigned_type)                                             \
481
    VAVG_DO(avgs##type, signed_element, signed_type)                    \
482
    VAVG_DO(avgu##type, unsigned_element, unsigned_type)
483
VAVG(b, s8, int16_t, u8, uint16_t)
484
VAVG(h, s16, int32_t, u16, uint32_t)
485
VAVG(w, s32, int64_t, u32, uint64_t)
486
#undef VAVG_DO
487
#undef VAVG
488

    
489
#define VCF(suffix, cvt, element)                                       \
490
    void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r,             \
491
                            ppc_avr_t *b, uint32_t uim)                 \
492
    {                                                                   \
493
        int i;                                                          \
494
                                                                        \
495
        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
496
            float32 t = cvt(b->element[i], &env->vec_status);           \
497
            r->f[i] = float32_scalbn(t, -uim, &env->vec_status);        \
498
        }                                                               \
499
    }
500
VCF(ux, uint32_to_float32, u32)
501
VCF(sx, int32_to_float32, s32)
502
#undef VCF
503

    
504
#define VCMP_DO(suffix, compare, element, record)                       \
505
    void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r,            \
506
                             ppc_avr_t *a, ppc_avr_t *b)                \
507
    {                                                                   \
508
        uint32_t ones = (uint32_t)-1;                                   \
509
        uint32_t all = ones;                                            \
510
        uint32_t none = 0;                                              \
511
        int i;                                                          \
512
                                                                        \
513
        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
514
            uint32_t result = (a->element[i] compare b->element[i] ?    \
515
                               ones : 0x0);                             \
516
            switch (sizeof(a->element[0])) {                            \
517
            case 4:                                                     \
518
                r->u32[i] = result;                                     \
519
                break;                                                  \
520
            case 2:                                                     \
521
                r->u16[i] = result;                                     \
522
                break;                                                  \
523
            case 1:                                                     \
524
                r->u8[i] = result;                                      \
525
                break;                                                  \
526
            }                                                           \
527
            all &= result;                                              \
528
            none |= result;                                             \
529
        }                                                               \
530
        if (record) {                                                   \
531
            env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);       \
532
        }                                                               \
533
    }
534
#define VCMP(suffix, compare, element)          \
535
    VCMP_DO(suffix, compare, element, 0)        \
536
    VCMP_DO(suffix##_dot, compare, element, 1)
537
VCMP(equb, ==, u8)
538
VCMP(equh, ==, u16)
539
VCMP(equw, ==, u32)
540
VCMP(gtub, >, u8)
541
VCMP(gtuh, >, u16)
542
VCMP(gtuw, >, u32)
543
VCMP(gtsb, >, s8)
544
VCMP(gtsh, >, s16)
545
VCMP(gtsw, >, s32)
546
#undef VCMP_DO
547
#undef VCMP
548

    
549
#define VCMPFP_DO(suffix, compare, order, record)                       \
550
    void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r,            \
551
                             ppc_avr_t *a, ppc_avr_t *b)                \
552
    {                                                                   \
553
        uint32_t ones = (uint32_t)-1;                                   \
554
        uint32_t all = ones;                                            \
555
        uint32_t none = 0;                                              \
556
        int i;                                                          \
557
                                                                        \
558
        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
559
            uint32_t result;                                            \
560
            int rel = float32_compare_quiet(a->f[i], b->f[i],           \
561
                                            &env->vec_status);          \
562
            if (rel == float_relation_unordered) {                      \
563
                result = 0;                                             \
564
            } else if (rel compare order) {                             \
565
                result = ones;                                          \
566
            } else {                                                    \
567
                result = 0;                                             \
568
            }                                                           \
569
            r->u32[i] = result;                                         \
570
            all &= result;                                              \
571
            none |= result;                                             \
572
        }                                                               \
573
        if (record) {                                                   \
574
            env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);       \
575
        }                                                               \
576
    }
577
#define VCMPFP(suffix, compare, order)          \
578
    VCMPFP_DO(suffix, compare, order, 0)        \
579
    VCMPFP_DO(suffix##_dot, compare, order, 1)
580
VCMPFP(eqfp, ==, float_relation_equal)
581
VCMPFP(gefp, !=, float_relation_less)
582
VCMPFP(gtfp, ==, float_relation_greater)
583
#undef VCMPFP_DO
584
#undef VCMPFP
585

    
586
static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
587
                                    ppc_avr_t *a, ppc_avr_t *b, int record)
588
{
589
    int i;
590
    int all_in = 0;
591

    
592
    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
593
        int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
594
        if (le_rel == float_relation_unordered) {
595
            r->u32[i] = 0xc0000000;
596
            /* ALL_IN does not need to be updated here.  */
597
        } else {
598
            float32 bneg = float32_chs(b->f[i]);
599
            int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
600
            int le = le_rel != float_relation_greater;
601
            int ge = ge_rel != float_relation_less;
602

    
603
            r->u32[i] = ((!le) << 31) | ((!ge) << 30);
604
            all_in |= (!le | !ge);
605
        }
606
    }
607
    if (record) {
608
        env->crf[6] = (all_in == 0) << 1;
609
    }
610
}
611

    
612
void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
613
{
614
    vcmpbfp_internal(env, r, a, b, 0);
615
}
616

    
617
void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
618
                        ppc_avr_t *b)
619
{
620
    vcmpbfp_internal(env, r, a, b, 1);
621
}
622

    
623
#define VCT(suffix, satcvt, element)                                    \
624
    void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r,             \
625
                            ppc_avr_t *b, uint32_t uim)                 \
626
    {                                                                   \
627
        int i;                                                          \
628
        int sat = 0;                                                    \
629
        float_status s = env->vec_status;                               \
630
                                                                        \
631
        set_float_rounding_mode(float_round_to_zero, &s);               \
632
        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
633
            if (float32_is_any_nan(b->f[i])) {                          \
634
                r->element[i] = 0;                                      \
635
            } else {                                                    \
636
                float64 t = float32_to_float64(b->f[i], &s);            \
637
                int64_t j;                                              \
638
                                                                        \
639
                t = float64_scalbn(t, uim, &s);                         \
640
                j = float64_to_int64(t, &s);                            \
641
                r->element[i] = satcvt(j, &sat);                        \
642
            }                                                           \
643
        }                                                               \
644
        if (sat) {                                                      \
645
            env->vscr |= (1 << VSCR_SAT);                               \
646
        }                                                               \
647
    }
648
VCT(uxs, cvtsduw, u32)
649
VCT(sxs, cvtsdsw, s32)
650
#undef VCT
651

    
652
void helper_vmaddfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
653
                    ppc_avr_t *c)
654
{
655
    int i;
656

    
657
    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
658
        HANDLE_NAN3(r->f[i], a->f[i], b->f[i], c->f[i]) {
659
            /* Need to do the computation in higher precision and round
660
             * once at the end.  */
661
            float64 af, bf, cf, t;
662

    
663
            af = float32_to_float64(a->f[i], &env->vec_status);
664
            bf = float32_to_float64(b->f[i], &env->vec_status);
665
            cf = float32_to_float64(c->f[i], &env->vec_status);
666
            t = float64_mul(af, cf, &env->vec_status);
667
            t = float64_add(t, bf, &env->vec_status);
668
            r->f[i] = float64_to_float32(t, &env->vec_status);
669
        }
670
    }
671
}
672

    
673
void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
674
                      ppc_avr_t *b, ppc_avr_t *c)
675
{
676
    int sat = 0;
677
    int i;
678

    
679
    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
680
        int32_t prod = a->s16[i] * b->s16[i];
681
        int32_t t = (int32_t)c->s16[i] + (prod >> 15);
682

    
683
        r->s16[i] = cvtswsh(t, &sat);
684
    }
685

    
686
    if (sat) {
687
        env->vscr |= (1 << VSCR_SAT);
688
    }
689
}
690

    
691
void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
692
                       ppc_avr_t *b, ppc_avr_t *c)
693
{
694
    int sat = 0;
695
    int i;
696

    
697
    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
698
        int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
699
        int32_t t = (int32_t)c->s16[i] + (prod >> 15);
700
        r->s16[i] = cvtswsh(t, &sat);
701
    }
702

    
703
    if (sat) {
704
        env->vscr |= (1 << VSCR_SAT);
705
    }
706
}
707

    
708
#define VMINMAX_DO(name, compare, element)                              \
709
    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
710
    {                                                                   \
711
        int i;                                                          \
712
                                                                        \
713
        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
714
            if (a->element[i] compare b->element[i]) {                  \
715
                r->element[i] = b->element[i];                          \
716
            } else {                                                    \
717
                r->element[i] = a->element[i];                          \
718
            }                                                           \
719
        }                                                               \
720
    }
721
#define VMINMAX(suffix, element)                \
722
    VMINMAX_DO(min##suffix, >, element)         \
723
    VMINMAX_DO(max##suffix, <, element)
724
VMINMAX(sb, s8)
725
VMINMAX(sh, s16)
726
VMINMAX(sw, s32)
727
VMINMAX(ub, u8)
728
VMINMAX(uh, u16)
729
VMINMAX(uw, u32)
730
#undef VMINMAX_DO
731
#undef VMINMAX
732

    
733
#define VMINMAXFP(suffix, rT, rF)                                       \
734
    void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
735
                          ppc_avr_t *b)                                 \
736
    {                                                                   \
737
        int i;                                                          \
738
                                                                        \
739
        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
740
            HANDLE_NAN2(r->f[i], a->f[i], b->f[i]) {                    \
741
                if (float32_lt_quiet(a->f[i], b->f[i],                  \
742
                                     &env->vec_status)) {               \
743
                    r->f[i] = rT->f[i];                                 \
744
                } else {                                                \
745
                    r->f[i] = rF->f[i];                                 \
746
                }                                                       \
747
            }                                                           \
748
        }                                                               \
749
    }
750
VMINMAXFP(minfp, a, b)
751
VMINMAXFP(maxfp, b, a)
752
#undef VMINMAXFP
753

    
754
void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
755
{
756
    int i;
757

    
758
    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
759
        int32_t prod = a->s16[i] * b->s16[i];
760
        r->s16[i] = (int16_t) (prod + c->s16[i]);
761
    }
762
}
763

    
764
#define VMRG_DO(name, element, highp)                                   \
765
    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
766
    {                                                                   \
767
        ppc_avr_t result;                                               \
768
        int i;                                                          \
769
        size_t n_elems = ARRAY_SIZE(r->element);                        \
770
                                                                        \
771
        for (i = 0; i < n_elems / 2; i++) {                             \
772
            if (highp) {                                                \
773
                result.element[i*2+HI_IDX] = a->element[i];             \
774
                result.element[i*2+LO_IDX] = b->element[i];             \
775
            } else {                                                    \
776
                result.element[n_elems - i * 2 - (1 + HI_IDX)] =        \
777
                    b->element[n_elems - i - 1];                        \
778
                result.element[n_elems - i * 2 - (1 + LO_IDX)] =        \
779
                    a->element[n_elems - i - 1];                        \
780
            }                                                           \
781
        }                                                               \
782
        *r = result;                                                    \
783
    }
784
#if defined(HOST_WORDS_BIGENDIAN)
785
#define MRGHI 0
786
#define MRGLO 1
787
#else
788
#define MRGHI 1
789
#define MRGLO 0
790
#endif
791
#define VMRG(suffix, element)                   \
792
    VMRG_DO(mrgl##suffix, element, MRGHI)       \
793
    VMRG_DO(mrgh##suffix, element, MRGLO)
794
VMRG(b, u8)
795
VMRG(h, u16)
796
VMRG(w, u32)
797
#undef VMRG_DO
798
#undef VMRG
799
#undef MRGHI
800
#undef MRGLO
801

    
802
void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
803
                     ppc_avr_t *b, ppc_avr_t *c)
804
{
805
    int32_t prod[16];
806
    int i;
807

    
808
    for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
809
        prod[i] = (int32_t)a->s8[i] * b->u8[i];
810
    }
811

    
812
    VECTOR_FOR_INORDER_I(i, s32) {
813
        r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
814
            prod[4 * i + 2] + prod[4 * i + 3];
815
    }
816
}
817

    
818
void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
819
                     ppc_avr_t *b, ppc_avr_t *c)
820
{
821
    int32_t prod[8];
822
    int i;
823

    
824
    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
825
        prod[i] = a->s16[i] * b->s16[i];
826
    }
827

    
828
    VECTOR_FOR_INORDER_I(i, s32) {
829
        r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
830
    }
831
}
832

    
833
void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
834
                     ppc_avr_t *b, ppc_avr_t *c)
835
{
836
    int32_t prod[8];
837
    int i;
838
    int sat = 0;
839

    
840
    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
841
        prod[i] = (int32_t)a->s16[i] * b->s16[i];
842
    }
843

    
844
    VECTOR_FOR_INORDER_I(i, s32) {
845
        int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
846

    
847
        r->u32[i] = cvtsdsw(t, &sat);
848
    }
849

    
850
    if (sat) {
851
        env->vscr |= (1 << VSCR_SAT);
852
    }
853
}
854

    
855
void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
856
                     ppc_avr_t *b, ppc_avr_t *c)
857
{
858
    uint16_t prod[16];
859
    int i;
860

    
861
    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
862
        prod[i] = a->u8[i] * b->u8[i];
863
    }
864

    
865
    VECTOR_FOR_INORDER_I(i, u32) {
866
        r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
867
            prod[4 * i + 2] + prod[4 * i + 3];
868
    }
869
}
870

    
871
void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
872
                     ppc_avr_t *b, ppc_avr_t *c)
873
{
874
    uint32_t prod[8];
875
    int i;
876

    
877
    for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
878
        prod[i] = a->u16[i] * b->u16[i];
879
    }
880

    
881
    VECTOR_FOR_INORDER_I(i, u32) {
882
        r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
883
    }
884
}
885

    
886
void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
887
                     ppc_avr_t *b, ppc_avr_t *c)
888
{
889
    uint32_t prod[8];
890
    int i;
891
    int sat = 0;
892

    
893
    for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
894
        prod[i] = a->u16[i] * b->u16[i];
895
    }
896

    
897
    VECTOR_FOR_INORDER_I(i, s32) {
898
        uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
899

    
900
        r->u32[i] = cvtuduw(t, &sat);
901
    }
902

    
903
    if (sat) {
904
        env->vscr |= (1 << VSCR_SAT);
905
    }
906
}
907

    
908
#define VMUL_DO(name, mul_element, prod_element, evenp)                 \
909
    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
910
    {                                                                   \
911
        int i;                                                          \
912
                                                                        \
913
        VECTOR_FOR_INORDER_I(i, prod_element) {                         \
914
            if (evenp) {                                                \
915
                r->prod_element[i] = a->mul_element[i * 2 + HI_IDX] *   \
916
                    b->mul_element[i * 2 + HI_IDX];                     \
917
            } else {                                                    \
918
                r->prod_element[i] = a->mul_element[i * 2 + LO_IDX] *   \
919
                    b->mul_element[i * 2 + LO_IDX];                     \
920
            }                                                           \
921
        }                                                               \
922
    }
923
#define VMUL(suffix, mul_element, prod_element)         \
924
    VMUL_DO(mule##suffix, mul_element, prod_element, 1) \
925
    VMUL_DO(mulo##suffix, mul_element, prod_element, 0)
926
VMUL(sb, s8, s16)
927
VMUL(sh, s16, s32)
928
VMUL(ub, u8, u16)
929
VMUL(uh, u16, u32)
930
#undef VMUL_DO
931
#undef VMUL
932

    
933
void helper_vnmsubfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
934
                     ppc_avr_t *b, ppc_avr_t *c)
935
{
936
    int i;
937

    
938
    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
939
        HANDLE_NAN3(r->f[i], a->f[i], b->f[i], c->f[i]) {
940
            /* Need to do the computation is higher precision and round
941
             * once at the end.  */
942
            float64 af, bf, cf, t;
943

    
944
            af = float32_to_float64(a->f[i], &env->vec_status);
945
            bf = float32_to_float64(b->f[i], &env->vec_status);
946
            cf = float32_to_float64(c->f[i], &env->vec_status);
947
            t = float64_mul(af, cf, &env->vec_status);
948
            t = float64_sub(t, bf, &env->vec_status);
949
            t = float64_chs(t);
950
            r->f[i] = float64_to_float32(t, &env->vec_status);
951
        }
952
    }
953
}
954

    
955
void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
956
                  ppc_avr_t *c)
957
{
958
    ppc_avr_t result;
959
    int i;
960

    
961
    VECTOR_FOR_INORDER_I(i, u8) {
962
        int s = c->u8[i] & 0x1f;
963
#if defined(HOST_WORDS_BIGENDIAN)
964
        int index = s & 0xf;
965
#else
966
        int index = 15 - (s & 0xf);
967
#endif
968

    
969
        if (s & 0x10) {
970
            result.u8[i] = b->u8[index];
971
        } else {
972
            result.u8[i] = a->u8[index];
973
        }
974
    }
975
    *r = result;
976
}
977

    
978
#if defined(HOST_WORDS_BIGENDIAN)
979
#define PKBIG 1
980
#else
981
#define PKBIG 0
982
#endif
983
void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
984
{
985
    int i, j;
986
    ppc_avr_t result;
987
#if defined(HOST_WORDS_BIGENDIAN)
988
    const ppc_avr_t *x[2] = { a, b };
989
#else
990
    const ppc_avr_t *x[2] = { b, a };
991
#endif
992

    
993
    VECTOR_FOR_INORDER_I(i, u64) {
994
        VECTOR_FOR_INORDER_I(j, u32) {
995
            uint32_t e = x[i]->u32[j];
996

    
997
            result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
998
                                 ((e >> 6) & 0x3e0) |
999
                                 ((e >> 3) & 0x1f));
1000
        }
1001
    }
1002
    *r = result;
1003
}
1004

    
1005
#define VPK(suffix, from, to, cvt, dosat)                               \
1006
    void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r,             \
1007
                            ppc_avr_t *a, ppc_avr_t *b)                 \
1008
    {                                                                   \
1009
        int i;                                                          \
1010
        int sat = 0;                                                    \
1011
        ppc_avr_t result;                                               \
1012
        ppc_avr_t *a0 = PKBIG ? a : b;                                  \
1013
        ppc_avr_t *a1 = PKBIG ? b : a;                                  \
1014
                                                                        \
1015
        VECTOR_FOR_INORDER_I(i, from) {                                 \
1016
            result.to[i] = cvt(a0->from[i], &sat);                      \
1017
            result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);  \
1018
        }                                                               \
1019
        *r = result;                                                    \
1020
        if (dosat && sat) {                                             \
1021
            env->vscr |= (1 << VSCR_SAT);                               \
1022
        }                                                               \
1023
    }
1024
#define I(x, y) (x)
1025
VPK(shss, s16, s8, cvtshsb, 1)
1026
VPK(shus, s16, u8, cvtshub, 1)
1027
VPK(swss, s32, s16, cvtswsh, 1)
1028
VPK(swus, s32, u16, cvtswuh, 1)
1029
VPK(uhus, u16, u8, cvtuhub, 1)
1030
VPK(uwus, u32, u16, cvtuwuh, 1)
1031
VPK(uhum, u16, u8, I, 0)
1032
VPK(uwum, u32, u16, I, 0)
1033
#undef I
1034
#undef VPK
1035
#undef PKBIG
1036

    
1037
void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1038
{
1039
    int i;
1040

    
1041
    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1042
        HANDLE_NAN1(r->f[i], b->f[i]) {
1043
            r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
1044
        }
1045
    }
1046
}
1047

    
1048
#define VRFI(suffix, rounding)                                  \
1049
    void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r,    \
1050
                             ppc_avr_t *b)                      \
1051
    {                                                           \
1052
        int i;                                                  \
1053
        float_status s = env->vec_status;                       \
1054
                                                                \
1055
        set_float_rounding_mode(rounding, &s);                  \
1056
        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                \
1057
            HANDLE_NAN1(r->f[i], b->f[i]) {                     \
1058
                r->f[i] = float32_round_to_int (b->f[i], &s);   \
1059
            }                                                   \
1060
        }                                                       \
1061
    }
1062
VRFI(n, float_round_nearest_even)
1063
VRFI(m, float_round_down)
1064
VRFI(p, float_round_up)
1065
VRFI(z, float_round_to_zero)
1066
#undef VRFI
1067

    
1068
#define VROTATE(suffix, element)                                        \
1069
    void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)   \
1070
    {                                                                   \
1071
        int i;                                                          \
1072
                                                                        \
1073
        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
1074
            unsigned int mask = ((1 <<                                  \
1075
                                  (3 + (sizeof(a->element[0]) >> 1)))   \
1076
                                 - 1);                                  \
1077
            unsigned int shift = b->element[i] & mask;                  \
1078
            r->element[i] = (a->element[i] << shift) |                  \
1079
                (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1080
        }                                                               \
1081
    }
1082
VROTATE(b, u8)
1083
VROTATE(h, u16)
1084
VROTATE(w, u32)
1085
#undef VROTATE
1086

    
1087
void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1088
{
1089
    int i;
1090

    
1091
    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1092
        HANDLE_NAN1(r->f[i], b->f[i]) {
1093
            float32 t = float32_sqrt(b->f[i], &env->vec_status);
1094

    
1095
            r->f[i] = float32_div(float32_one, t, &env->vec_status);
1096
        }
1097
    }
1098
}
1099

    
1100
void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1101
                 ppc_avr_t *c)
1102
{
1103
    r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1104
    r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1105
}
1106

    
1107
void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1108
{
1109
    int i;
1110

    
1111
    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1112
        HANDLE_NAN1(r->f[i], b->f[i]) {
1113
            r->f[i] = float32_exp2(b->f[i], &env->vec_status);
1114
        }
1115
    }
1116
}
1117

    
1118
void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1119
{
1120
    int i;
1121

    
1122
    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1123
        HANDLE_NAN1(r->f[i], b->f[i]) {
1124
            r->f[i] = float32_log2(b->f[i], &env->vec_status);
1125
        }
1126
    }
1127
}
1128

    
1129
#if defined(HOST_WORDS_BIGENDIAN)
1130
#define LEFT 0
1131
#define RIGHT 1
1132
#else
1133
#define LEFT 1
1134
#define RIGHT 0
1135
#endif
1136
/* The specification says that the results are undefined if all of the
1137
 * shift counts are not identical.  We check to make sure that they are
1138
 * to conform to what real hardware appears to do.  */
1139
#define VSHIFT(suffix, leftp)                                           \
1140
    void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)    \
1141
    {                                                                   \
1142
        int shift = b->u8[LO_IDX*15] & 0x7;                             \
1143
        int doit = 1;                                                   \
1144
        int i;                                                          \
1145
                                                                        \
1146
        for (i = 0; i < ARRAY_SIZE(r->u8); i++) {                       \
1147
            doit = doit && ((b->u8[i] & 0x7) == shift);                 \
1148
        }                                                               \
1149
        if (doit) {                                                     \
1150
            if (shift == 0) {                                           \
1151
                *r = *a;                                                \
1152
            } else if (leftp) {                                         \
1153
                uint64_t carry = a->u64[LO_IDX] >> (64 - shift);        \
1154
                                                                        \
1155
                r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry;     \
1156
                r->u64[LO_IDX] = a->u64[LO_IDX] << shift;               \
1157
            } else {                                                    \
1158
                uint64_t carry = a->u64[HI_IDX] << (64 - shift);        \
1159
                                                                        \
1160
                r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry;     \
1161
                r->u64[HI_IDX] = a->u64[HI_IDX] >> shift;               \
1162
            }                                                           \
1163
        }                                                               \
1164
    }
1165
VSHIFT(l, LEFT)
1166
VSHIFT(r, RIGHT)
1167
#undef VSHIFT
1168
#undef LEFT
1169
#undef RIGHT
1170

    
1171
#define VSL(suffix, element)                                            \
1172
    void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)   \
1173
    {                                                                   \
1174
        int i;                                                          \
1175
                                                                        \
1176
        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
1177
            unsigned int mask = ((1 <<                                  \
1178
                                  (3 + (sizeof(a->element[0]) >> 1)))   \
1179
                                 - 1);                                  \
1180
            unsigned int shift = b->element[i] & mask;                  \
1181
                                                                        \
1182
            r->element[i] = a->element[i] << shift;                     \
1183
        }                                                               \
1184
    }
1185
VSL(b, u8)
1186
VSL(h, u16)
1187
VSL(w, u32)
1188
#undef VSL
1189

    
1190
void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1191
{
1192
    int sh = shift & 0xf;
1193
    int i;
1194
    ppc_avr_t result;
1195

    
1196
#if defined(HOST_WORDS_BIGENDIAN)
1197
    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1198
        int index = sh + i;
1199
        if (index > 0xf) {
1200
            result.u8[i] = b->u8[index - 0x10];
1201
        } else {
1202
            result.u8[i] = a->u8[index];
1203
        }
1204
    }
1205
#else
1206
    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1207
        int index = (16 - sh) + i;
1208
        if (index > 0xf) {
1209
            result.u8[i] = a->u8[index - 0x10];
1210
        } else {
1211
            result.u8[i] = b->u8[index];
1212
        }
1213
    }
1214
#endif
1215
    *r = result;
1216
}
1217

    
1218
void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1219
{
1220
    int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1221

    
1222
#if defined(HOST_WORDS_BIGENDIAN)
1223
    memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1224
    memset(&r->u8[16-sh], 0, sh);
1225
#else
1226
    memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1227
    memset(&r->u8[0], 0, sh);
1228
#endif
1229
}
1230

    
1231
/* Experimental testing shows that hardware masks the immediate.  */
1232
#define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1233
#if defined(HOST_WORDS_BIGENDIAN)
1234
#define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1235
#else
1236
#define SPLAT_ELEMENT(element)                                  \
1237
    (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1238
#endif
1239
#define VSPLT(suffix, element)                                          \
1240
    void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1241
    {                                                                   \
1242
        uint32_t s = b->element[SPLAT_ELEMENT(element)];                \
1243
        int i;                                                          \
1244
                                                                        \
1245
        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
1246
            r->element[i] = s;                                          \
1247
        }                                                               \
1248
    }
1249
VSPLT(b, u8)
1250
VSPLT(h, u16)
1251
VSPLT(w, u32)
1252
#undef VSPLT
1253
#undef SPLAT_ELEMENT
1254
#undef _SPLAT_MASKED
1255

    
1256
#define VSPLTI(suffix, element, splat_type)                     \
1257
    void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat)   \
1258
    {                                                           \
1259
        splat_type x = (int8_t)(splat << 3) >> 3;               \
1260
        int i;                                                  \
1261
                                                                \
1262
        for (i = 0; i < ARRAY_SIZE(r->element); i++) {          \
1263
            r->element[i] = x;                                  \
1264
        }                                                       \
1265
    }
1266
VSPLTI(b, s8, int8_t)
1267
VSPLTI(h, s16, int16_t)
1268
VSPLTI(w, s32, int32_t)
1269
#undef VSPLTI
1270

    
1271
#define VSR(suffix, element)                                            \
1272
    void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)   \
1273
    {                                                                   \
1274
        int i;                                                          \
1275
                                                                        \
1276
        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
1277
            unsigned int mask = ((1 <<                                  \
1278
                                  (3 + (sizeof(a->element[0]) >> 1)))   \
1279
                                 - 1);                                  \
1280
            unsigned int shift = b->element[i] & mask;                  \
1281
                                                                        \
1282
            r->element[i] = a->element[i] >> shift;                     \
1283
        }                                                               \
1284
    }
1285
VSR(ab, s8)
1286
VSR(ah, s16)
1287
VSR(aw, s32)
1288
VSR(b, u8)
1289
VSR(h, u16)
1290
VSR(w, u32)
1291
#undef VSR
1292

    
1293
void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1294
{
1295
    int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
1296

    
1297
#if defined(HOST_WORDS_BIGENDIAN)
1298
    memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1299
    memset(&r->u8[0], 0, sh);
1300
#else
1301
    memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1302
    memset(&r->u8[16 - sh], 0, sh);
1303
#endif
1304
}
1305

    
1306
void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1307
{
1308
    int i;
1309

    
1310
    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1311
        r->u32[i] = a->u32[i] >= b->u32[i];
1312
    }
1313
}
1314

    
1315
void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1316
{
1317
    int64_t t;
1318
    int i, upper;
1319
    ppc_avr_t result;
1320
    int sat = 0;
1321

    
1322
#if defined(HOST_WORDS_BIGENDIAN)
1323
    upper = ARRAY_SIZE(r->s32)-1;
1324
#else
1325
    upper = 0;
1326
#endif
1327
    t = (int64_t)b->s32[upper];
1328
    for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1329
        t += a->s32[i];
1330
        result.s32[i] = 0;
1331
    }
1332
    result.s32[upper] = cvtsdsw(t, &sat);
1333
    *r = result;
1334

    
1335
    if (sat) {
1336
        env->vscr |= (1 << VSCR_SAT);
1337
    }
1338
}
1339

    
1340
void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1341
{
1342
    int i, j, upper;
1343
    ppc_avr_t result;
1344
    int sat = 0;
1345

    
1346
#if defined(HOST_WORDS_BIGENDIAN)
1347
    upper = 1;
1348
#else
1349
    upper = 0;
1350
#endif
1351
    for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1352
        int64_t t = (int64_t)b->s32[upper + i * 2];
1353

    
1354
        result.u64[i] = 0;
1355
        for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1356
            t += a->s32[2 * i + j];
1357
        }
1358
        result.s32[upper + i * 2] = cvtsdsw(t, &sat);
1359
    }
1360

    
1361
    *r = result;
1362
    if (sat) {
1363
        env->vscr |= (1 << VSCR_SAT);
1364
    }
1365
}
1366

    
1367
void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1368
{
1369
    int i, j;
1370
    int sat = 0;
1371

    
1372
    for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1373
        int64_t t = (int64_t)b->s32[i];
1374

    
1375
        for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1376
            t += a->s8[4 * i + j];
1377
        }
1378
        r->s32[i] = cvtsdsw(t, &sat);
1379
    }
1380

    
1381
    if (sat) {
1382
        env->vscr |= (1 << VSCR_SAT);
1383
    }
1384
}
1385

    
1386
void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1387
{
1388
    int sat = 0;
1389
    int i;
1390

    
1391
    for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1392
        int64_t t = (int64_t)b->s32[i];
1393

    
1394
        t += a->s16[2 * i] + a->s16[2 * i + 1];
1395
        r->s32[i] = cvtsdsw(t, &sat);
1396
    }
1397

    
1398
    if (sat) {
1399
        env->vscr |= (1 << VSCR_SAT);
1400
    }
1401
}
1402

    
1403
void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1404
{
1405
    int i, j;
1406
    int sat = 0;
1407

    
1408
    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1409
        uint64_t t = (uint64_t)b->u32[i];
1410

    
1411
        for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1412
            t += a->u8[4 * i + j];
1413
        }
1414
        r->u32[i] = cvtuduw(t, &sat);
1415
    }
1416

    
1417
    if (sat) {
1418
        env->vscr |= (1 << VSCR_SAT);
1419
    }
1420
}
1421

    
1422
#if defined(HOST_WORDS_BIGENDIAN)
1423
#define UPKHI 1
1424
#define UPKLO 0
1425
#else
1426
#define UPKHI 0
1427
#define UPKLO 1
1428
#endif
1429
#define VUPKPX(suffix, hi)                                              \
1430
    void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b)                \
1431
    {                                                                   \
1432
        int i;                                                          \
1433
        ppc_avr_t result;                                               \
1434
                                                                        \
1435
        for (i = 0; i < ARRAY_SIZE(r->u32); i++) {                      \
1436
            uint16_t e = b->u16[hi ? i : i+4];                          \
1437
            uint8_t a = (e >> 15) ? 0xff : 0;                           \
1438
            uint8_t r = (e >> 10) & 0x1f;                               \
1439
            uint8_t g = (e >> 5) & 0x1f;                                \
1440
            uint8_t b = e & 0x1f;                                       \
1441
                                                                        \
1442
            result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b;       \
1443
        }                                                               \
1444
        *r = result;                                                    \
1445
    }
1446
VUPKPX(lpx, UPKLO)
1447
VUPKPX(hpx, UPKHI)
1448
#undef VUPKPX
1449

    
1450
#define VUPK(suffix, unpacked, packee, hi)                              \
1451
    void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b)                \
1452
    {                                                                   \
1453
        int i;                                                          \
1454
        ppc_avr_t result;                                               \
1455
                                                                        \
1456
        if (hi) {                                                       \
1457
            for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) {             \
1458
                result.unpacked[i] = b->packee[i];                      \
1459
            }                                                           \
1460
        } else {                                                        \
1461
            for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1462
                 i++) {                                                 \
1463
                result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1464
            }                                                           \
1465
        }                                                               \
1466
        *r = result;                                                    \
1467
    }
1468
VUPK(hsb, s16, s8, UPKHI)
1469
VUPK(hsh, s32, s16, UPKHI)
1470
VUPK(lsb, s16, s8, UPKLO)
1471
VUPK(lsh, s32, s16, UPKLO)
1472
#undef VUPK
1473
#undef UPKHI
1474
#undef UPKLO
1475

    
1476
#undef DO_HANDLE_NAN
1477
#undef HANDLE_NAN1
1478
#undef HANDLE_NAN2
1479
#undef HANDLE_NAN3
1480
#undef VECTOR_FOR_INORDER_I
1481
#undef HI_IDX
1482
#undef LO_IDX
1483

    
1484
/*****************************************************************************/
1485
/* SPE extension helpers */
1486
/* Use a table to make this quicker */
1487
static const uint8_t hbrev[16] = {
1488
    0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
1489
    0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
1490
};
1491

    
1492
static inline uint8_t byte_reverse(uint8_t val)
1493
{
1494
    return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
1495
}
1496

    
1497
static inline uint32_t word_reverse(uint32_t val)
1498
{
1499
    return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
1500
        (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
1501
}
1502

    
1503
#define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
1504
target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
1505
{
1506
    uint32_t a, b, d, mask;
1507

    
1508
    mask = UINT32_MAX >> (32 - MASKBITS);
1509
    a = arg1 & mask;
1510
    b = arg2 & mask;
1511
    d = word_reverse(1 + word_reverse(a | ~b));
1512
    return (arg1 & ~mask) | (d & b);
1513
}
1514

    
1515
uint32_t helper_cntlsw32(uint32_t val)
1516
{
1517
    if (val & 0x80000000) {
1518
        return clz32(~val);
1519
    } else {
1520
        return clz32(val);
1521
    }
1522
}
1523

    
1524
uint32_t helper_cntlzw32(uint32_t val)
1525
{
1526
    return clz32(val);
1527
}
1528

    
1529
/* 440 specific */
1530
target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
1531
                          target_ulong low, uint32_t update_Rc)
1532
{
1533
    target_ulong mask;
1534
    int i;
1535

    
1536
    i = 1;
1537
    for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
1538
        if ((high & mask) == 0) {
1539
            if (update_Rc) {
1540
                env->crf[0] = 0x4;
1541
            }
1542
            goto done;
1543
        }
1544
        i++;
1545
    }
1546
    for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
1547
        if ((low & mask) == 0) {
1548
            if (update_Rc) {
1549
                env->crf[0] = 0x8;
1550
            }
1551
            goto done;
1552
        }
1553
        i++;
1554
    }
1555
    if (update_Rc) {
1556
        env->crf[0] = 0x2;
1557
    }
1558
 done:
1559
    env->xer = (env->xer & ~0x7F) | i;
1560
    if (update_Rc) {
1561
        env->crf[0] |= xer_so;
1562
    }
1563
    return i;
1564
}