root / target-sparc / vis_helper.c @ 0834c9ea
History | View | Annotate | Download (14.8 kB)
1 |
/*
|
---|---|
2 |
* VIS op helpers
|
3 |
*
|
4 |
* Copyright (c) 2003-2005 Fabrice Bellard
|
5 |
*
|
6 |
* This library is free software; you can redistribute it and/or
|
7 |
* modify it under the terms of the GNU Lesser General Public
|
8 |
* License as published by the Free Software Foundation; either
|
9 |
* version 2 of the License, or (at your option) any later version.
|
10 |
*
|
11 |
* This library is distributed in the hope that it will be useful,
|
12 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14 |
* Lesser General Public License for more details.
|
15 |
*
|
16 |
* You should have received a copy of the GNU Lesser General Public
|
17 |
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
18 |
*/
|
19 |
|
20 |
#include "cpu.h" |
21 |
#include "helper.h" |
22 |
|
23 |
/* This function uses non-native bit order */
|
24 |
#define GET_FIELD(X, FROM, TO) \
|
25 |
((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1)) |
26 |
|
27 |
/* This function uses the order in the manuals, i.e. bit 0 is 2^0 */
|
28 |
#define GET_FIELD_SP(X, FROM, TO) \
|
29 |
GET_FIELD(X, 63 - (TO), 63 - (FROM)) |
30 |
|
31 |
target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize) |
32 |
{ |
33 |
return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) | |
34 |
(GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) | |
35 |
(GET_FIELD_SP(pixel_addr, 17 + cubesize - 1, 17) << 17) | |
36 |
(GET_FIELD_SP(pixel_addr, 56, 59) << 13) | |
37 |
(GET_FIELD_SP(pixel_addr, 35, 38) << 9) | |
38 |
(GET_FIELD_SP(pixel_addr, 13, 16) << 5) | |
39 |
(((pixel_addr >> 55) & 1) << 4) | |
40 |
(GET_FIELD_SP(pixel_addr, 33, 34) << 2) | |
41 |
GET_FIELD_SP(pixel_addr, 11, 12); |
42 |
} |
43 |
|
44 |
#ifdef HOST_WORDS_BIGENDIAN
|
45 |
#define VIS_B64(n) b[7 - (n)] |
46 |
#define VIS_W64(n) w[3 - (n)] |
47 |
#define VIS_SW64(n) sw[3 - (n)] |
48 |
#define VIS_L64(n) l[1 - (n)] |
49 |
#define VIS_B32(n) b[3 - (n)] |
50 |
#define VIS_W32(n) w[1 - (n)] |
51 |
#else
|
52 |
#define VIS_B64(n) b[n]
|
53 |
#define VIS_W64(n) w[n]
|
54 |
#define VIS_SW64(n) sw[n]
|
55 |
#define VIS_L64(n) l[n]
|
56 |
#define VIS_B32(n) b[n]
|
57 |
#define VIS_W32(n) w[n]
|
58 |
#endif
|
59 |
|
60 |
typedef union { |
61 |
uint8_t b[8];
|
62 |
uint16_t w[4];
|
63 |
int16_t sw[4];
|
64 |
uint32_t l[2];
|
65 |
uint64_t ll; |
66 |
float64 d; |
67 |
} VIS64; |
68 |
|
69 |
typedef union { |
70 |
uint8_t b[4];
|
71 |
uint16_t w[2];
|
72 |
uint32_t l; |
73 |
float32 f; |
74 |
} VIS32; |
75 |
|
76 |
uint64_t helper_fpmerge(uint64_t src1, uint64_t src2) |
77 |
{ |
78 |
VIS64 s, d; |
79 |
|
80 |
s.ll = src1; |
81 |
d.ll = src2; |
82 |
|
83 |
/* Reverse calculation order to handle overlap */
|
84 |
d.VIS_B64(7) = s.VIS_B64(3); |
85 |
d.VIS_B64(6) = d.VIS_B64(3); |
86 |
d.VIS_B64(5) = s.VIS_B64(2); |
87 |
d.VIS_B64(4) = d.VIS_B64(2); |
88 |
d.VIS_B64(3) = s.VIS_B64(1); |
89 |
d.VIS_B64(2) = d.VIS_B64(1); |
90 |
d.VIS_B64(1) = s.VIS_B64(0); |
91 |
/* d.VIS_B64(0) = d.VIS_B64(0); */
|
92 |
|
93 |
return d.ll;
|
94 |
} |
95 |
|
96 |
uint64_t helper_fmul8x16(uint64_t src1, uint64_t src2) |
97 |
{ |
98 |
VIS64 s, d; |
99 |
uint32_t tmp; |
100 |
|
101 |
s.ll = src1; |
102 |
d.ll = src2; |
103 |
|
104 |
#define PMUL(r) \
|
105 |
tmp = (int32_t)d.VIS_SW64(r) * (int32_t)s.VIS_B64(r); \ |
106 |
if ((tmp & 0xff) > 0x7f) { \ |
107 |
tmp += 0x100; \
|
108 |
} \ |
109 |
d.VIS_W64(r) = tmp >> 8;
|
110 |
|
111 |
PMUL(0);
|
112 |
PMUL(1);
|
113 |
PMUL(2);
|
114 |
PMUL(3);
|
115 |
#undef PMUL
|
116 |
|
117 |
return d.ll;
|
118 |
} |
119 |
|
120 |
uint64_t helper_fmul8x16al(uint64_t src1, uint64_t src2) |
121 |
{ |
122 |
VIS64 s, d; |
123 |
uint32_t tmp; |
124 |
|
125 |
s.ll = src1; |
126 |
d.ll = src2; |
127 |
|
128 |
#define PMUL(r) \
|
129 |
tmp = (int32_t)d.VIS_SW64(1) * (int32_t)s.VIS_B64(r); \
|
130 |
if ((tmp & 0xff) > 0x7f) { \ |
131 |
tmp += 0x100; \
|
132 |
} \ |
133 |
d.VIS_W64(r) = tmp >> 8;
|
134 |
|
135 |
PMUL(0);
|
136 |
PMUL(1);
|
137 |
PMUL(2);
|
138 |
PMUL(3);
|
139 |
#undef PMUL
|
140 |
|
141 |
return d.ll;
|
142 |
} |
143 |
|
144 |
uint64_t helper_fmul8x16au(uint64_t src1, uint64_t src2) |
145 |
{ |
146 |
VIS64 s, d; |
147 |
uint32_t tmp; |
148 |
|
149 |
s.ll = src1; |
150 |
d.ll = src2; |
151 |
|
152 |
#define PMUL(r) \
|
153 |
tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r); \
|
154 |
if ((tmp & 0xff) > 0x7f) { \ |
155 |
tmp += 0x100; \
|
156 |
} \ |
157 |
d.VIS_W64(r) = tmp >> 8;
|
158 |
|
159 |
PMUL(0);
|
160 |
PMUL(1);
|
161 |
PMUL(2);
|
162 |
PMUL(3);
|
163 |
#undef PMUL
|
164 |
|
165 |
return d.ll;
|
166 |
} |
167 |
|
168 |
uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2) |
169 |
{ |
170 |
VIS64 s, d; |
171 |
uint32_t tmp; |
172 |
|
173 |
s.ll = src1; |
174 |
d.ll = src2; |
175 |
|
176 |
#define PMUL(r) \
|
177 |
tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \
|
178 |
if ((tmp & 0xff) > 0x7f) { \ |
179 |
tmp += 0x100; \
|
180 |
} \ |
181 |
d.VIS_W64(r) = tmp >> 8;
|
182 |
|
183 |
PMUL(0);
|
184 |
PMUL(1);
|
185 |
PMUL(2);
|
186 |
PMUL(3);
|
187 |
#undef PMUL
|
188 |
|
189 |
return d.ll;
|
190 |
} |
191 |
|
192 |
uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2) |
193 |
{ |
194 |
VIS64 s, d; |
195 |
uint32_t tmp; |
196 |
|
197 |
s.ll = src1; |
198 |
d.ll = src2; |
199 |
|
200 |
#define PMUL(r) \
|
201 |
tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \
|
202 |
if ((tmp & 0xff) > 0x7f) { \ |
203 |
tmp += 0x100; \
|
204 |
} \ |
205 |
d.VIS_W64(r) = tmp >> 8;
|
206 |
|
207 |
PMUL(0);
|
208 |
PMUL(1);
|
209 |
PMUL(2);
|
210 |
PMUL(3);
|
211 |
#undef PMUL
|
212 |
|
213 |
return d.ll;
|
214 |
} |
215 |
|
216 |
uint64_t helper_fmuld8sux16(uint64_t src1, uint64_t src2) |
217 |
{ |
218 |
VIS64 s, d; |
219 |
uint32_t tmp; |
220 |
|
221 |
s.ll = src1; |
222 |
d.ll = src2; |
223 |
|
224 |
#define PMUL(r) \
|
225 |
tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \
|
226 |
if ((tmp & 0xff) > 0x7f) { \ |
227 |
tmp += 0x100; \
|
228 |
} \ |
229 |
d.VIS_L64(r) = tmp; |
230 |
|
231 |
/* Reverse calculation order to handle overlap */
|
232 |
PMUL(1);
|
233 |
PMUL(0);
|
234 |
#undef PMUL
|
235 |
|
236 |
return d.ll;
|
237 |
} |
238 |
|
239 |
uint64_t helper_fmuld8ulx16(uint64_t src1, uint64_t src2) |
240 |
{ |
241 |
VIS64 s, d; |
242 |
uint32_t tmp; |
243 |
|
244 |
s.ll = src1; |
245 |
d.ll = src2; |
246 |
|
247 |
#define PMUL(r) \
|
248 |
tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \
|
249 |
if ((tmp & 0xff) > 0x7f) { \ |
250 |
tmp += 0x100; \
|
251 |
} \ |
252 |
d.VIS_L64(r) = tmp; |
253 |
|
254 |
/* Reverse calculation order to handle overlap */
|
255 |
PMUL(1);
|
256 |
PMUL(0);
|
257 |
#undef PMUL
|
258 |
|
259 |
return d.ll;
|
260 |
} |
261 |
|
262 |
uint64_t helper_fexpand(uint64_t src1, uint64_t src2) |
263 |
{ |
264 |
VIS32 s; |
265 |
VIS64 d; |
266 |
|
267 |
s.l = (uint32_t)src1; |
268 |
d.ll = src2; |
269 |
d.VIS_W64(0) = s.VIS_B32(0) << 4; |
270 |
d.VIS_W64(1) = s.VIS_B32(1) << 4; |
271 |
d.VIS_W64(2) = s.VIS_B32(2) << 4; |
272 |
d.VIS_W64(3) = s.VIS_B32(3) << 4; |
273 |
|
274 |
return d.ll;
|
275 |
} |
276 |
|
277 |
#define VIS_HELPER(name, F) \
|
278 |
uint64_t name##16(uint64_t src1, uint64_t src2) \ |
279 |
{ \ |
280 |
VIS64 s, d; \ |
281 |
\ |
282 |
s.ll = src1; \ |
283 |
d.ll = src2; \ |
284 |
\ |
285 |
d.VIS_W64(0) = F(d.VIS_W64(0), s.VIS_W64(0)); \ |
286 |
d.VIS_W64(1) = F(d.VIS_W64(1), s.VIS_W64(1)); \ |
287 |
d.VIS_W64(2) = F(d.VIS_W64(2), s.VIS_W64(2)); \ |
288 |
d.VIS_W64(3) = F(d.VIS_W64(3), s.VIS_W64(3)); \ |
289 |
\ |
290 |
return d.ll; \
|
291 |
} \ |
292 |
\ |
293 |
uint32_t name##16s(uint32_t src1, uint32_t src2) \ |
294 |
{ \ |
295 |
VIS32 s, d; \ |
296 |
\ |
297 |
s.l = src1; \ |
298 |
d.l = src2; \ |
299 |
\ |
300 |
d.VIS_W32(0) = F(d.VIS_W32(0), s.VIS_W32(0)); \ |
301 |
d.VIS_W32(1) = F(d.VIS_W32(1), s.VIS_W32(1)); \ |
302 |
\ |
303 |
return d.l; \
|
304 |
} \ |
305 |
\ |
306 |
uint64_t name##32(uint64_t src1, uint64_t src2) \ |
307 |
{ \ |
308 |
VIS64 s, d; \ |
309 |
\ |
310 |
s.ll = src1; \ |
311 |
d.ll = src2; \ |
312 |
\ |
313 |
d.VIS_L64(0) = F(d.VIS_L64(0), s.VIS_L64(0)); \ |
314 |
d.VIS_L64(1) = F(d.VIS_L64(1), s.VIS_L64(1)); \ |
315 |
\ |
316 |
return d.ll; \
|
317 |
} \ |
318 |
\ |
319 |
uint32_t name##32s(uint32_t src1, uint32_t src2) \ |
320 |
{ \ |
321 |
VIS32 s, d; \ |
322 |
\ |
323 |
s.l = src1; \ |
324 |
d.l = src2; \ |
325 |
\ |
326 |
d.l = F(d.l, s.l); \ |
327 |
\ |
328 |
return d.l; \
|
329 |
} |
330 |
|
331 |
#define FADD(a, b) ((a) + (b))
|
332 |
#define FSUB(a, b) ((a) - (b))
|
333 |
VIS_HELPER(helper_fpadd, FADD) |
334 |
VIS_HELPER(helper_fpsub, FSUB) |
335 |
|
336 |
#define VIS_CMPHELPER(name, F) \
|
337 |
uint64_t name##16(uint64_t src1, uint64_t src2) \ |
338 |
{ \ |
339 |
VIS64 s, d; \ |
340 |
\ |
341 |
s.ll = src1; \ |
342 |
d.ll = src2; \ |
343 |
\ |
344 |
d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0; \ |
345 |
d.VIS_W64(0) |= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0; \ |
346 |
d.VIS_W64(0) |= F(s.VIS_W64(2), d.VIS_W64(2)) ? 4 : 0; \ |
347 |
d.VIS_W64(0) |= F(s.VIS_W64(3), d.VIS_W64(3)) ? 8 : 0; \ |
348 |
d.VIS_W64(1) = d.VIS_W64(2) = d.VIS_W64(3) = 0; \ |
349 |
\ |
350 |
return d.ll; \
|
351 |
} \ |
352 |
\ |
353 |
uint64_t name##32(uint64_t src1, uint64_t src2) \ |
354 |
{ \ |
355 |
VIS64 s, d; \ |
356 |
\ |
357 |
s.ll = src1; \ |
358 |
d.ll = src2; \ |
359 |
\ |
360 |
d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0; \ |
361 |
d.VIS_L64(0) |= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0; \ |
362 |
d.VIS_L64(1) = 0; \ |
363 |
\ |
364 |
return d.ll; \
|
365 |
} |
366 |
|
367 |
#define FCMPGT(a, b) ((a) > (b))
|
368 |
#define FCMPEQ(a, b) ((a) == (b))
|
369 |
#define FCMPLE(a, b) ((a) <= (b))
|
370 |
#define FCMPNE(a, b) ((a) != (b))
|
371 |
|
372 |
VIS_CMPHELPER(helper_fcmpgt, FCMPGT) |
373 |
VIS_CMPHELPER(helper_fcmpeq, FCMPEQ) |
374 |
VIS_CMPHELPER(helper_fcmple, FCMPLE) |
375 |
VIS_CMPHELPER(helper_fcmpne, FCMPNE) |
376 |
|
377 |
uint64_t helper_pdist(uint64_t sum, uint64_t src1, uint64_t src2) |
378 |
{ |
379 |
int i;
|
380 |
for (i = 0; i < 8; i++) { |
381 |
int s1, s2;
|
382 |
|
383 |
s1 = (src1 >> (56 - (i * 8))) & 0xff; |
384 |
s2 = (src2 >> (56 - (i * 8))) & 0xff; |
385 |
|
386 |
/* Absolute value of difference. */
|
387 |
s1 -= s2; |
388 |
if (s1 < 0) { |
389 |
s1 = -s1; |
390 |
} |
391 |
|
392 |
sum += s1; |
393 |
} |
394 |
|
395 |
return sum;
|
396 |
} |
397 |
|
398 |
uint32_t helper_fpack16(uint64_t gsr, uint64_t rs2) |
399 |
{ |
400 |
int scale = (gsr >> 3) & 0xf; |
401 |
uint32_t ret = 0;
|
402 |
int byte;
|
403 |
|
404 |
for (byte = 0; byte < 4; byte++) { |
405 |
uint32_t val; |
406 |
int16_t src = rs2 >> (byte * 16);
|
407 |
int32_t scaled = src << scale; |
408 |
int32_t from_fixed = scaled >> 7;
|
409 |
|
410 |
val = (from_fixed < 0 ? 0 : |
411 |
from_fixed > 255 ? 255 : from_fixed); |
412 |
|
413 |
ret |= val << (8 * byte);
|
414 |
} |
415 |
|
416 |
return ret;
|
417 |
} |
418 |
|
419 |
uint64_t helper_fpack32(uint64_t gsr, uint64_t rs1, uint64_t rs2) |
420 |
{ |
421 |
int scale = (gsr >> 3) & 0x1f; |
422 |
uint64_t ret = 0;
|
423 |
int word;
|
424 |
|
425 |
ret = (rs1 << 8) & ~(0x000000ff000000ffULL); |
426 |
for (word = 0; word < 2; word++) { |
427 |
uint64_t val; |
428 |
int32_t src = rs2 >> (word * 32);
|
429 |
int64_t scaled = (int64_t)src << scale; |
430 |
int64_t from_fixed = scaled >> 23;
|
431 |
|
432 |
val = (from_fixed < 0 ? 0 : |
433 |
(from_fixed > 255) ? 255 : from_fixed); |
434 |
|
435 |
ret |= val << (32 * word);
|
436 |
} |
437 |
|
438 |
return ret;
|
439 |
} |
440 |
|
441 |
uint32_t helper_fpackfix(uint64_t gsr, uint64_t rs2) |
442 |
{ |
443 |
int scale = (gsr >> 3) & 0x1f; |
444 |
uint32_t ret = 0;
|
445 |
int word;
|
446 |
|
447 |
for (word = 0; word < 2; word++) { |
448 |
uint32_t val; |
449 |
int32_t src = rs2 >> (word * 32);
|
450 |
int64_t scaled = src << scale; |
451 |
int64_t from_fixed = scaled >> 16;
|
452 |
|
453 |
val = (from_fixed < -32768 ? -32768 : |
454 |
from_fixed > 32767 ? 32767 : from_fixed); |
455 |
|
456 |
ret |= (val & 0xffff) << (word * 16); |
457 |
} |
458 |
|
459 |
return ret;
|
460 |
} |
461 |
|
462 |
uint64_t helper_bshuffle(uint64_t gsr, uint64_t src1, uint64_t src2) |
463 |
{ |
464 |
union {
|
465 |
uint64_t ll[2];
|
466 |
uint8_t b[16];
|
467 |
} s; |
468 |
VIS64 r; |
469 |
uint32_t i, mask, host; |
470 |
|
471 |
/* Set up S such that we can index across all of the bytes. */
|
472 |
#ifdef HOST_WORDS_BIGENDIAN
|
473 |
s.ll[0] = src1;
|
474 |
s.ll[1] = src2;
|
475 |
host = 0;
|
476 |
#else
|
477 |
s.ll[1] = src1;
|
478 |
s.ll[0] = src2;
|
479 |
host = 15;
|
480 |
#endif
|
481 |
mask = gsr >> 32;
|
482 |
|
483 |
for (i = 0; i < 8; ++i) { |
484 |
unsigned e = (mask >> (28 - i*4)) & 0xf; |
485 |
r.VIS_B64(i) = s.b[e ^ host]; |
486 |
} |
487 |
|
488 |
return r.ll;
|
489 |
} |