Statistics
| Branch: | Revision:

root / target-sparc / vis_helper.c @ 0834c9ea

History | View | Annotate | Download (14.8 kB)

1
/*
2
 * VIS op helpers
3
 *
4
 *  Copyright (c) 2003-2005 Fabrice Bellard
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18
 */
19

    
20
#include "cpu.h"
21
#include "helper.h"
22

    
23
/* This function uses non-native bit order */
24
#define GET_FIELD(X, FROM, TO)                                  \
25
    ((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1))
26

    
27
/* This function uses the order in the manuals, i.e. bit 0 is 2^0 */
28
#define GET_FIELD_SP(X, FROM, TO)               \
29
    GET_FIELD(X, 63 - (TO), 63 - (FROM))
30

    
31
target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize)
32
{
33
    return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) |
34
        (GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) |
35
        (GET_FIELD_SP(pixel_addr, 17 + cubesize - 1, 17) << 17) |
36
        (GET_FIELD_SP(pixel_addr, 56, 59) << 13) |
37
        (GET_FIELD_SP(pixel_addr, 35, 38) << 9) |
38
        (GET_FIELD_SP(pixel_addr, 13, 16) << 5) |
39
        (((pixel_addr >> 55) & 1) << 4) |
40
        (GET_FIELD_SP(pixel_addr, 33, 34) << 2) |
41
        GET_FIELD_SP(pixel_addr, 11, 12);
42
}
43

    
44
#ifdef HOST_WORDS_BIGENDIAN
45
#define VIS_B64(n) b[7 - (n)]
46
#define VIS_W64(n) w[3 - (n)]
47
#define VIS_SW64(n) sw[3 - (n)]
48
#define VIS_L64(n) l[1 - (n)]
49
#define VIS_B32(n) b[3 - (n)]
50
#define VIS_W32(n) w[1 - (n)]
51
#else
52
#define VIS_B64(n) b[n]
53
#define VIS_W64(n) w[n]
54
#define VIS_SW64(n) sw[n]
55
#define VIS_L64(n) l[n]
56
#define VIS_B32(n) b[n]
57
#define VIS_W32(n) w[n]
58
#endif
59

    
60
typedef union {
61
    uint8_t b[8];
62
    uint16_t w[4];
63
    int16_t sw[4];
64
    uint32_t l[2];
65
    uint64_t ll;
66
    float64 d;
67
} VIS64;
68

    
69
typedef union {
70
    uint8_t b[4];
71
    uint16_t w[2];
72
    uint32_t l;
73
    float32 f;
74
} VIS32;
75

    
76
uint64_t helper_fpmerge(uint64_t src1, uint64_t src2)
77
{
78
    VIS64 s, d;
79

    
80
    s.ll = src1;
81
    d.ll = src2;
82

    
83
    /* Reverse calculation order to handle overlap */
84
    d.VIS_B64(7) = s.VIS_B64(3);
85
    d.VIS_B64(6) = d.VIS_B64(3);
86
    d.VIS_B64(5) = s.VIS_B64(2);
87
    d.VIS_B64(4) = d.VIS_B64(2);
88
    d.VIS_B64(3) = s.VIS_B64(1);
89
    d.VIS_B64(2) = d.VIS_B64(1);
90
    d.VIS_B64(1) = s.VIS_B64(0);
91
    /* d.VIS_B64(0) = d.VIS_B64(0); */
92

    
93
    return d.ll;
94
}
95

    
96
uint64_t helper_fmul8x16(uint64_t src1, uint64_t src2)
97
{
98
    VIS64 s, d;
99
    uint32_t tmp;
100

    
101
    s.ll = src1;
102
    d.ll = src2;
103

    
104
#define PMUL(r)                                                 \
105
    tmp = (int32_t)d.VIS_SW64(r) * (int32_t)s.VIS_B64(r);       \
106
    if ((tmp & 0xff) > 0x7f) {                                  \
107
        tmp += 0x100;                                           \
108
    }                                                           \
109
    d.VIS_W64(r) = tmp >> 8;
110

    
111
    PMUL(0);
112
    PMUL(1);
113
    PMUL(2);
114
    PMUL(3);
115
#undef PMUL
116

    
117
    return d.ll;
118
}
119

    
120
uint64_t helper_fmul8x16al(uint64_t src1, uint64_t src2)
121
{
122
    VIS64 s, d;
123
    uint32_t tmp;
124

    
125
    s.ll = src1;
126
    d.ll = src2;
127

    
128
#define PMUL(r)                                                 \
129
    tmp = (int32_t)d.VIS_SW64(1) * (int32_t)s.VIS_B64(r);       \
130
    if ((tmp & 0xff) > 0x7f) {                                  \
131
        tmp += 0x100;                                           \
132
    }                                                           \
133
    d.VIS_W64(r) = tmp >> 8;
134

    
135
    PMUL(0);
136
    PMUL(1);
137
    PMUL(2);
138
    PMUL(3);
139
#undef PMUL
140

    
141
    return d.ll;
142
}
143

    
144
uint64_t helper_fmul8x16au(uint64_t src1, uint64_t src2)
145
{
146
    VIS64 s, d;
147
    uint32_t tmp;
148

    
149
    s.ll = src1;
150
    d.ll = src2;
151

    
152
#define PMUL(r)                                                 \
153
    tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r);       \
154
    if ((tmp & 0xff) > 0x7f) {                                  \
155
        tmp += 0x100;                                           \
156
    }                                                           \
157
    d.VIS_W64(r) = tmp >> 8;
158

    
159
    PMUL(0);
160
    PMUL(1);
161
    PMUL(2);
162
    PMUL(3);
163
#undef PMUL
164

    
165
    return d.ll;
166
}
167

    
168
uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2)
169
{
170
    VIS64 s, d;
171
    uint32_t tmp;
172

    
173
    s.ll = src1;
174
    d.ll = src2;
175

    
176
#define PMUL(r)                                                         \
177
    tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
178
    if ((tmp & 0xff) > 0x7f) {                                          \
179
        tmp += 0x100;                                                   \
180
    }                                                                   \
181
    d.VIS_W64(r) = tmp >> 8;
182

    
183
    PMUL(0);
184
    PMUL(1);
185
    PMUL(2);
186
    PMUL(3);
187
#undef PMUL
188

    
189
    return d.ll;
190
}
191

    
192
uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2)
193
{
194
    VIS64 s, d;
195
    uint32_t tmp;
196

    
197
    s.ll = src1;
198
    d.ll = src2;
199

    
200
#define PMUL(r)                                                         \
201
    tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
202
    if ((tmp & 0xff) > 0x7f) {                                          \
203
        tmp += 0x100;                                                   \
204
    }                                                                   \
205
    d.VIS_W64(r) = tmp >> 8;
206

    
207
    PMUL(0);
208
    PMUL(1);
209
    PMUL(2);
210
    PMUL(3);
211
#undef PMUL
212

    
213
    return d.ll;
214
}
215

    
216
uint64_t helper_fmuld8sux16(uint64_t src1, uint64_t src2)
217
{
218
    VIS64 s, d;
219
    uint32_t tmp;
220

    
221
    s.ll = src1;
222
    d.ll = src2;
223

    
224
#define PMUL(r)                                                         \
225
    tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
226
    if ((tmp & 0xff) > 0x7f) {                                          \
227
        tmp += 0x100;                                                   \
228
    }                                                                   \
229
    d.VIS_L64(r) = tmp;
230

    
231
    /* Reverse calculation order to handle overlap */
232
    PMUL(1);
233
    PMUL(0);
234
#undef PMUL
235

    
236
    return d.ll;
237
}
238

    
239
uint64_t helper_fmuld8ulx16(uint64_t src1, uint64_t src2)
240
{
241
    VIS64 s, d;
242
    uint32_t tmp;
243

    
244
    s.ll = src1;
245
    d.ll = src2;
246

    
247
#define PMUL(r)                                                         \
248
    tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
249
    if ((tmp & 0xff) > 0x7f) {                                          \
250
        tmp += 0x100;                                                   \
251
    }                                                                   \
252
    d.VIS_L64(r) = tmp;
253

    
254
    /* Reverse calculation order to handle overlap */
255
    PMUL(1);
256
    PMUL(0);
257
#undef PMUL
258

    
259
    return d.ll;
260
}
261

    
262
uint64_t helper_fexpand(uint64_t src1, uint64_t src2)
263
{
264
    VIS32 s;
265
    VIS64 d;
266

    
267
    s.l = (uint32_t)src1;
268
    d.ll = src2;
269
    d.VIS_W64(0) = s.VIS_B32(0) << 4;
270
    d.VIS_W64(1) = s.VIS_B32(1) << 4;
271
    d.VIS_W64(2) = s.VIS_B32(2) << 4;
272
    d.VIS_W64(3) = s.VIS_B32(3) << 4;
273

    
274
    return d.ll;
275
}
276

    
277
#define VIS_HELPER(name, F)                             \
278
    uint64_t name##16(uint64_t src1, uint64_t src2)     \
279
    {                                                   \
280
        VIS64 s, d;                                     \
281
                                                        \
282
        s.ll = src1;                                    \
283
        d.ll = src2;                                    \
284
                                                        \
285
        d.VIS_W64(0) = F(d.VIS_W64(0), s.VIS_W64(0));   \
286
        d.VIS_W64(1) = F(d.VIS_W64(1), s.VIS_W64(1));   \
287
        d.VIS_W64(2) = F(d.VIS_W64(2), s.VIS_W64(2));   \
288
        d.VIS_W64(3) = F(d.VIS_W64(3), s.VIS_W64(3));   \
289
                                                        \
290
        return d.ll;                                    \
291
    }                                                   \
292
                                                        \
293
    uint32_t name##16s(uint32_t src1, uint32_t src2)    \
294
    {                                                   \
295
        VIS32 s, d;                                     \
296
                                                        \
297
        s.l = src1;                                     \
298
        d.l = src2;                                     \
299
                                                        \
300
        d.VIS_W32(0) = F(d.VIS_W32(0), s.VIS_W32(0));   \
301
        d.VIS_W32(1) = F(d.VIS_W32(1), s.VIS_W32(1));   \
302
                                                        \
303
        return d.l;                                     \
304
    }                                                   \
305
                                                        \
306
    uint64_t name##32(uint64_t src1, uint64_t src2)     \
307
    {                                                   \
308
        VIS64 s, d;                                     \
309
                                                        \
310
        s.ll = src1;                                    \
311
        d.ll = src2;                                    \
312
                                                        \
313
        d.VIS_L64(0) = F(d.VIS_L64(0), s.VIS_L64(0));   \
314
        d.VIS_L64(1) = F(d.VIS_L64(1), s.VIS_L64(1));   \
315
                                                        \
316
        return d.ll;                                    \
317
    }                                                   \
318
                                                        \
319
    uint32_t name##32s(uint32_t src1, uint32_t src2)    \
320
    {                                                   \
321
        VIS32 s, d;                                     \
322
                                                        \
323
        s.l = src1;                                     \
324
        d.l = src2;                                     \
325
                                                        \
326
        d.l = F(d.l, s.l);                              \
327
                                                        \
328
        return d.l;                                     \
329
    }
330

    
331
#define FADD(a, b) ((a) + (b))
332
#define FSUB(a, b) ((a) - (b))
333
VIS_HELPER(helper_fpadd, FADD)
334
VIS_HELPER(helper_fpsub, FSUB)
335

    
336
#define VIS_CMPHELPER(name, F)                                    \
337
    uint64_t name##16(uint64_t src1, uint64_t src2)               \
338
    {                                                             \
339
        VIS64 s, d;                                               \
340
                                                                  \
341
        s.ll = src1;                                              \
342
        d.ll = src2;                                              \
343
                                                                  \
344
        d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0;     \
345
        d.VIS_W64(0) |= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0;    \
346
        d.VIS_W64(0) |= F(s.VIS_W64(2), d.VIS_W64(2)) ? 4 : 0;    \
347
        d.VIS_W64(0) |= F(s.VIS_W64(3), d.VIS_W64(3)) ? 8 : 0;    \
348
        d.VIS_W64(1) = d.VIS_W64(2) = d.VIS_W64(3) = 0;           \
349
                                                                  \
350
        return d.ll;                                              \
351
    }                                                             \
352
                                                                  \
353
    uint64_t name##32(uint64_t src1, uint64_t src2)               \
354
    {                                                             \
355
        VIS64 s, d;                                               \
356
                                                                  \
357
        s.ll = src1;                                              \
358
        d.ll = src2;                                              \
359
                                                                  \
360
        d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0;     \
361
        d.VIS_L64(0) |= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0;    \
362
        d.VIS_L64(1) = 0;                                         \
363
                                                                  \
364
        return d.ll;                                              \
365
    }
366

    
367
#define FCMPGT(a, b) ((a) > (b))
368
#define FCMPEQ(a, b) ((a) == (b))
369
#define FCMPLE(a, b) ((a) <= (b))
370
#define FCMPNE(a, b) ((a) != (b))
371

    
372
VIS_CMPHELPER(helper_fcmpgt, FCMPGT)
373
VIS_CMPHELPER(helper_fcmpeq, FCMPEQ)
374
VIS_CMPHELPER(helper_fcmple, FCMPLE)
375
VIS_CMPHELPER(helper_fcmpne, FCMPNE)
376

    
377
uint64_t helper_pdist(uint64_t sum, uint64_t src1, uint64_t src2)
378
{
379
    int i;
380
    for (i = 0; i < 8; i++) {
381
        int s1, s2;
382

    
383
        s1 = (src1 >> (56 - (i * 8))) & 0xff;
384
        s2 = (src2 >> (56 - (i * 8))) & 0xff;
385

    
386
        /* Absolute value of difference. */
387
        s1 -= s2;
388
        if (s1 < 0) {
389
            s1 = -s1;
390
        }
391

    
392
        sum += s1;
393
    }
394

    
395
    return sum;
396
}
397

    
398
uint32_t helper_fpack16(uint64_t gsr, uint64_t rs2)
399
{
400
    int scale = (gsr >> 3) & 0xf;
401
    uint32_t ret = 0;
402
    int byte;
403

    
404
    for (byte = 0; byte < 4; byte++) {
405
        uint32_t val;
406
        int16_t src = rs2 >> (byte * 16);
407
        int32_t scaled = src << scale;
408
        int32_t from_fixed = scaled >> 7;
409

    
410
        val = (from_fixed < 0 ?  0 :
411
               from_fixed > 255 ?  255 : from_fixed);
412

    
413
        ret |= val << (8 * byte);
414
    }
415

    
416
    return ret;
417
}
418

    
419
uint64_t helper_fpack32(uint64_t gsr, uint64_t rs1, uint64_t rs2)
420
{
421
    int scale = (gsr >> 3) & 0x1f;
422
    uint64_t ret = 0;
423
    int word;
424

    
425
    ret = (rs1 << 8) & ~(0x000000ff000000ffULL);
426
    for (word = 0; word < 2; word++) {
427
        uint64_t val;
428
        int32_t src = rs2 >> (word * 32);
429
        int64_t scaled = (int64_t)src << scale;
430
        int64_t from_fixed = scaled >> 23;
431

    
432
        val = (from_fixed < 0 ? 0 :
433
               (from_fixed > 255) ? 255 : from_fixed);
434

    
435
        ret |= val << (32 * word);
436
    }
437

    
438
    return ret;
439
}
440

    
441
uint32_t helper_fpackfix(uint64_t gsr, uint64_t rs2)
442
{
443
    int scale = (gsr >> 3) & 0x1f;
444
    uint32_t ret = 0;
445
    int word;
446

    
447
    for (word = 0; word < 2; word++) {
448
        uint32_t val;
449
        int32_t src = rs2 >> (word * 32);
450
        int64_t scaled = src << scale;
451
        int64_t from_fixed = scaled >> 16;
452

    
453
        val = (from_fixed < -32768 ? -32768 :
454
               from_fixed > 32767 ?  32767 : from_fixed);
455

    
456
        ret |= (val & 0xffff) << (word * 16);
457
    }
458

    
459
    return ret;
460
}
461

    
462
uint64_t helper_bshuffle(uint64_t gsr, uint64_t src1, uint64_t src2)
463
{
464
    union {
465
        uint64_t ll[2];
466
        uint8_t b[16];
467
    } s;
468
    VIS64 r;
469
    uint32_t i, mask, host;
470

    
471
    /* Set up S such that we can index across all of the bytes.  */
472
#ifdef HOST_WORDS_BIGENDIAN
473
    s.ll[0] = src1;
474
    s.ll[1] = src2;
475
    host = 0;
476
#else
477
    s.ll[1] = src1;
478
    s.ll[0] = src2;
479
    host = 15;
480
#endif
481
    mask = gsr >> 32;
482

    
483
    for (i = 0; i < 8; ++i) {
484
        unsigned e = (mask >> (28 - i*4)) & 0xf;
485
        r.VIS_B64(i) = s.b[e ^ host];
486
    }
487

    
488
    return r.ll;
489
}