Revision e494ead5 host-utils.c
b/host-utils.c | ||
---|---|---|
1 | 1 |
/* |
2 | 2 |
* Utility compute operations used by translated code. |
3 | 3 |
* |
4 |
* Copyright (c) 2003 Fabrice Bellard |
|
4 | 5 |
* Copyright (c) 2007 Aurelien Jarno |
5 | 6 |
* |
6 | 7 |
* Permission is hereby granted, free of charge, to any person obtaining a copy |
... | ... | |
24 | 25 |
|
25 | 26 |
#include "vl.h" |
26 | 27 |
|
27 |
/* Signed 64x64 -> 128 multiplication */ |
|
28 |
/* Long integer helpers */ |
|
29 |
static void add128 (uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b) |
|
30 |
{ |
|
31 |
*plow += a; |
|
32 |
/* carry test */ |
|
33 |
if (*plow < a) |
|
34 |
(*phigh)++; |
|
35 |
*phigh += b; |
|
36 |
} |
|
28 | 37 |
|
29 |
void muls64(int64_t *phigh, int64_t *plow, int64_t a, int64_t b)
|
|
38 |
static void neg128 (uint64_t *plow, uint64_t *phigh)
|
|
30 | 39 |
{ |
31 |
#if defined(__x86_64__) |
|
32 |
__asm__ ("imul %0\n\t" |
|
33 |
: "=d" (*phigh), "=a" (*plow) |
|
34 |
: "a" (a), "0" (b) |
|
35 |
); |
|
36 |
#else |
|
37 |
int64_t ph; |
|
38 |
uint64_t pm1, pm2, pl; |
|
40 |
*plow = ~*plow; |
|
41 |
*phigh = ~*phigh; |
|
42 |
add128(plow, phigh, 1, 0); |
|
43 |
} |
|
39 | 44 |
|
40 |
pl = (uint64_t)((uint32_t)a) * (uint64_t)((uint32_t)b);
|
|
41 |
pm1 = (a >> 32) * (uint32_t)b;
|
|
42 |
pm2 = (uint32_t)a * (b >> 32);
|
|
43 |
ph = (a >> 32) * (b >> 32);
|
|
45 |
static void mul64 (uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b)
|
|
46 |
{
|
|
47 |
uint32_t a0, a1, b0, b1;
|
|
48 |
uint64_t v;
|
|
44 | 49 |
|
45 |
ph += (int64_t)pm1 >> 32; |
|
46 |
ph += (int64_t)pm2 >> 32; |
|
47 |
pm1 = (uint64_t)((uint32_t)pm1) + (uint64_t)((uint32_t)pm2) + (pl >> 32); |
|
50 |
a0 = a; |
|
51 |
a1 = a >> 32; |
|
48 | 52 |
|
49 |
*phigh = ph + ((int64_t)pm1 >> 32); |
|
50 |
*plow = (pm1 << 32) + (uint32_t)pl; |
|
51 |
#endif |
|
53 |
b0 = b; |
|
54 |
b1 = b >> 32; |
|
55 |
|
|
56 |
v = (uint64_t)a0 * (uint64_t)b0; |
|
57 |
*plow = v; |
|
58 |
*phigh = 0; |
|
59 |
|
|
60 |
v = (uint64_t)a0 * (uint64_t)b1; |
|
61 |
add128(plow, phigh, v << 32, v >> 32); |
|
62 |
|
|
63 |
v = (uint64_t)a1 * (uint64_t)b0; |
|
64 |
add128(plow, phigh, v << 32, v >> 32); |
|
65 |
|
|
66 |
v = (uint64_t)a1 * (uint64_t)b1; |
|
67 |
*phigh += v; |
|
52 | 68 |
} |
53 | 69 |
|
70 |
|
|
54 | 71 |
/* Unsigned 64x64 -> 128 multiplication */ |
55 |
void mulu64(uint64_t *phigh, uint64_t *plow, uint64_t a, uint64_t b)
|
|
72 |
void mulu64 (uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b)
|
|
56 | 73 |
{ |
57 | 74 |
#if defined(__x86_64__) |
58 | 75 |
__asm__ ("mul %0\n\t" |
59 | 76 |
: "=d" (*phigh), "=a" (*plow) |
60 |
: "a" (a), "0" (b) |
|
61 |
); |
|
77 |
: "a" (a), "0" (b)); |
|
62 | 78 |
#else |
63 |
uint64_t ph, pm1, pm2, pl; |
|
64 |
|
|
65 |
pl = (uint64_t)((uint32_t)a) * (uint64_t)((uint32_t)b); |
|
66 |
pm1 = (a >> 32) * (uint32_t)b; |
|
67 |
pm2 = (uint32_t)a * (b >> 32); |
|
68 |
ph = (a >> 32) * (b >> 32); |
|
79 |
mul64(plow, phigh, a, b); |
|
80 |
#endif |
|
81 |
#if defined(DEBUG_MULDIV) |
|
82 |
printf("mulu64: 0x%016llx * 0x%016llx = 0x%016llx%016llx\n", |
|
83 |
a, b, *phigh, *plow); |
|
84 |
#endif |
|
85 |
} |
|
69 | 86 |
|
70 |
ph += pm1 >> 32; |
|
71 |
ph += pm2 >> 32; |
|
72 |
pm1 = (uint64_t)((uint32_t)pm1) + (uint64_t)((uint32_t)pm2) + (pl >> 32); |
|
87 |
/* Signed 64x64 -> 128 multiplication */ |
|
88 |
void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b) |
|
89 |
{ |
|
90 |
#if defined(__x86_64__) |
|
91 |
__asm__ ("imul %0\n\t" |
|
92 |
: "=d" (*phigh), "=a" (*plow) |
|
93 |
: "a" (a), "0" (b)); |
|
94 |
#else |
|
95 |
int sa, sb; |
|
73 | 96 |
|
74 |
*phigh = ph + (pm1 >> 32); |
|
75 |
*plow = (pm1 << 32) + (uint32_t)pl; |
|
97 |
sa = (a < 0); |
|
98 |
if (sa) |
|
99 |
a = -a; |
|
100 |
sb = (b < 0); |
|
101 |
if (sb) |
|
102 |
b = -b; |
|
103 |
mul64(plow, phigh, a, b); |
|
104 |
if (sa ^ sb) { |
|
105 |
neg128(plow, phigh); |
|
106 |
} |
|
107 |
#endif |
|
108 |
#if defined(DEBUG_MULDIV) |
|
109 |
printf("muls64: 0x%016llx * 0x%016llx = 0x%016llx%016llx\n", |
|
110 |
a, b, *phigh, *plow); |
|
76 | 111 |
#endif |
77 | 112 |
} |
Also available in: Unified diff