root / dyngen.h @ b8076a74
History | View | Annotate | Download (12.1 kB)
1 |
/*
|
---|---|
2 |
* dyngen helpers
|
3 |
*
|
4 |
* Copyright (c) 2003 Fabrice Bellard
|
5 |
*
|
6 |
* This library is free software; you can redistribute it and/or
|
7 |
* modify it under the terms of the GNU Lesser General Public
|
8 |
* License as published by the Free Software Foundation; either
|
9 |
* version 2 of the License, or (at your option) any later version.
|
10 |
*
|
11 |
* This library is distributed in the hope that it will be useful,
|
12 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14 |
* Lesser General Public License for more details.
|
15 |
*
|
16 |
* You should have received a copy of the GNU Lesser General Public
|
17 |
* License along with this library; if not, write to the Free Software
|
18 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
19 |
*/
|
20 |
|
21 |
int __op_param1, __op_param2, __op_param3;
|
22 |
int __op_gen_label1, __op_gen_label2, __op_gen_label3;
|
23 |
int __op_jmp0, __op_jmp1, __op_jmp2, __op_jmp3;
|
24 |
|
25 |
#ifdef __i386__
|
26 |
static inline void flush_icache_range(unsigned long start, unsigned long stop) |
27 |
{ |
28 |
} |
29 |
#endif
|
30 |
|
31 |
#ifdef __x86_64__
|
32 |
static inline void flush_icache_range(unsigned long start, unsigned long stop) |
33 |
{ |
34 |
} |
35 |
#endif
|
36 |
|
37 |
#ifdef __s390__
|
38 |
static inline void flush_icache_range(unsigned long start, unsigned long stop) |
39 |
{ |
40 |
} |
41 |
#endif
|
42 |
|
43 |
#ifdef __ia64__
|
44 |
static inline void flush_icache_range(unsigned long start, unsigned long stop) |
45 |
{ |
46 |
while (start < stop) {
|
47 |
asm volatile ("fc %0" :: "r"(start)); |
48 |
start += 32;
|
49 |
} |
50 |
asm volatile (";;sync.i;;srlz.i;;"); |
51 |
} |
52 |
#endif
|
53 |
|
54 |
#ifdef __powerpc__
|
55 |
|
56 |
#define MIN_CACHE_LINE_SIZE 8 /* conservative value */ |
57 |
|
58 |
static void inline flush_icache_range(unsigned long start, unsigned long stop) |
59 |
{ |
60 |
unsigned long p; |
61 |
|
62 |
p = start & ~(MIN_CACHE_LINE_SIZE - 1);
|
63 |
stop = (stop + MIN_CACHE_LINE_SIZE - 1) & ~(MIN_CACHE_LINE_SIZE - 1); |
64 |
|
65 |
for (p = start; p < stop; p += MIN_CACHE_LINE_SIZE) {
|
66 |
asm volatile ("dcbst 0,%0" : : "r"(p) : "memory"); |
67 |
} |
68 |
asm volatile ("sync" : : : "memory"); |
69 |
for (p = start; p < stop; p += MIN_CACHE_LINE_SIZE) {
|
70 |
asm volatile ("icbi 0,%0" : : "r"(p) : "memory"); |
71 |
} |
72 |
asm volatile ("sync" : : : "memory"); |
73 |
asm volatile ("isync" : : : "memory"); |
74 |
} |
75 |
#endif
|
76 |
|
77 |
#ifdef __alpha__
|
78 |
static inline void flush_icache_range(unsigned long start, unsigned long stop) |
79 |
{ |
80 |
asm ("imb"); |
81 |
} |
82 |
#endif
|
83 |
|
84 |
#ifdef __sparc__
|
85 |
|
86 |
static void inline flush_icache_range(unsigned long start, unsigned long stop) |
87 |
{ |
88 |
unsigned long p; |
89 |
|
90 |
p = start & ~(8UL - 1UL); |
91 |
stop = (stop + (8UL - 1UL)) & ~(8UL - 1UL); |
92 |
|
93 |
for (; p < stop; p += 8) |
94 |
__asm__ __volatile__("flush\t%0" : : "r" (p)); |
95 |
} |
96 |
|
97 |
#endif
|
98 |
|
99 |
#ifdef __arm__
|
100 |
static inline void flush_icache_range(unsigned long start, unsigned long stop) |
101 |
{ |
102 |
register unsigned long _beg __asm ("a1") = start; |
103 |
register unsigned long _end __asm ("a2") = stop; |
104 |
register unsigned long _flg __asm ("a3") = 0; |
105 |
__asm __volatile__ ("swi 0x9f0002" : : "r" (_beg), "r" (_end), "r" (_flg)); |
106 |
} |
107 |
#endif
|
108 |
|
109 |
#ifdef __mc68000
|
110 |
#include <asm/cachectl.h> |
111 |
static inline void flush_icache_range(unsigned long start, unsigned long stop) |
112 |
{ |
113 |
cacheflush(start,FLUSH_SCOPE_LINE,FLUSH_CACHE_BOTH,stop-start+16);
|
114 |
} |
115 |
#endif
|
116 |
|
117 |
#ifdef __alpha__
|
118 |
|
119 |
register int gp asm("$29"); |
120 |
|
121 |
static inline void immediate_ldah(void *p, int val) { |
122 |
uint32_t *dest = p; |
123 |
long high = ((val >> 16) + ((val >> 15) & 1)) & 0xffff; |
124 |
|
125 |
*dest &= ~0xffff;
|
126 |
*dest |= high; |
127 |
*dest |= 31 << 16; |
128 |
} |
129 |
static inline void immediate_lda(void *dest, int val) { |
130 |
*(uint16_t *) dest = val; |
131 |
} |
132 |
void fix_bsr(void *p, int offset) { |
133 |
uint32_t *dest = p; |
134 |
*dest &= ~((1 << 21) - 1); |
135 |
*dest |= (offset >> 2) & ((1 << 21) - 1); |
136 |
} |
137 |
|
138 |
#endif /* __alpha__ */ |
139 |
|
140 |
#ifdef __arm__
|
141 |
|
142 |
#define MAX_OP_SIZE (128 * 4) /* in bytes */ |
143 |
/* max size of the code that can be generated without calling arm_flush_ldr */
|
144 |
#define MAX_FRAG_SIZE (1024 * 4) |
145 |
//#define MAX_FRAG_SIZE (135 * 4) /* for testing */
|
146 |
|
147 |
typedef struct LDREntry { |
148 |
uint8_t *ptr; |
149 |
uint32_t *data_ptr; |
150 |
} LDREntry; |
151 |
|
152 |
static LDREntry arm_ldr_table[1024]; |
153 |
static uint32_t arm_data_table[1024]; |
154 |
|
155 |
extern char exec_loop; |
156 |
|
157 |
static inline void arm_reloc_pc24(uint32_t *ptr, uint32_t insn, int val) |
158 |
{ |
159 |
*ptr = (insn & ~0xffffff) | ((insn + ((val - (int)ptr) >> 2)) & 0xffffff); |
160 |
} |
161 |
|
162 |
static uint8_t *arm_flush_ldr(uint8_t *gen_code_ptr,
|
163 |
LDREntry *ldr_start, LDREntry *ldr_end, |
164 |
uint32_t *data_start, uint32_t *data_end, |
165 |
int gen_jmp)
|
166 |
{ |
167 |
LDREntry *le; |
168 |
uint32_t *ptr; |
169 |
int offset, data_size, target;
|
170 |
uint8_t *data_ptr; |
171 |
uint32_t insn; |
172 |
|
173 |
data_size = (uint8_t *)data_end - (uint8_t *)data_start; |
174 |
|
175 |
if (gen_jmp) {
|
176 |
/* generate branch to skip the data */
|
177 |
if (data_size == 0) |
178 |
return gen_code_ptr;
|
179 |
target = (long)gen_code_ptr + data_size + 4; |
180 |
arm_reloc_pc24((uint32_t *)gen_code_ptr, 0xeafffffe, target);
|
181 |
gen_code_ptr += 4;
|
182 |
} |
183 |
|
184 |
/* copy the data */
|
185 |
data_ptr = gen_code_ptr; |
186 |
memcpy(gen_code_ptr, data_start, data_size); |
187 |
gen_code_ptr += data_size; |
188 |
|
189 |
/* patch the ldr to point to the data */
|
190 |
for(le = ldr_start; le < ldr_end; le++) {
|
191 |
ptr = (uint32_t *)le->ptr; |
192 |
offset = ((unsigned long)(le->data_ptr) - (unsigned long)data_start) + |
193 |
(unsigned long)data_ptr - |
194 |
(unsigned long)ptr - 8; |
195 |
insn = *ptr & ~(0xfff | 0x00800000); |
196 |
if (offset < 0) { |
197 |
offset = - offset; |
198 |
} else {
|
199 |
insn |= 0x00800000;
|
200 |
} |
201 |
if (offset > 0xfff) { |
202 |
fprintf(stderr, "Error ldr offset\n");
|
203 |
abort(); |
204 |
} |
205 |
insn |= offset; |
206 |
*ptr = insn; |
207 |
} |
208 |
return gen_code_ptr;
|
209 |
} |
210 |
|
211 |
#endif /* __arm__ */ |
212 |
|
213 |
#ifdef __ia64
|
214 |
|
215 |
|
216 |
/* Patch instruction with "val" where "mask" has 1 bits. */
|
217 |
static inline void ia64_patch (uint64_t insn_addr, uint64_t mask, uint64_t val) |
218 |
{ |
219 |
uint64_t m0, m1, v0, v1, b0, b1, *b = (uint64_t *) (insn_addr & -16);
|
220 |
# define insn_mask ((1UL << 41) - 1) |
221 |
unsigned long shift; |
222 |
|
223 |
b0 = b[0]; b1 = b[1]; |
224 |
shift = 5 + 41 * (insn_addr % 16); /* 5 template, 3 x 41-bit insns */ |
225 |
if (shift >= 64) { |
226 |
m1 = mask << (shift - 64);
|
227 |
v1 = val << (shift - 64);
|
228 |
} else {
|
229 |
m0 = mask << shift; m1 = mask >> (64 - shift);
|
230 |
v0 = val << shift; v1 = val >> (64 - shift);
|
231 |
b[0] = (b0 & ~m0) | (v0 & m0);
|
232 |
} |
233 |
b[1] = (b1 & ~m1) | (v1 & m1);
|
234 |
} |
235 |
|
236 |
static inline void ia64_patch_imm60 (uint64_t insn_addr, uint64_t val) |
237 |
{ |
238 |
ia64_patch(insn_addr, |
239 |
0x011ffffe000UL,
|
240 |
( ((val & 0x0800000000000000UL) >> 23) /* bit 59 -> 36 */ |
241 |
| ((val & 0x00000000000fffffUL) << 13) /* bit 0 -> 13 */)); |
242 |
ia64_patch(insn_addr - 1, 0x1fffffffffcUL, val >> 18); |
243 |
} |
244 |
|
245 |
static inline void ia64_imm64 (void *insn, uint64_t val) |
246 |
{ |
247 |
/* Ignore the slot number of the relocation; GCC and Intel
|
248 |
toolchains differed for some time on whether IMM64 relocs are
|
249 |
against slot 1 (Intel) or slot 2 (GCC). */
|
250 |
uint64_t insn_addr = (uint64_t) insn & ~3UL;
|
251 |
|
252 |
ia64_patch(insn_addr + 2,
|
253 |
0x01fffefe000UL,
|
254 |
( ((val & 0x8000000000000000UL) >> 27) /* bit 63 -> 36 */ |
255 |
| ((val & 0x0000000000200000UL) << 0) /* bit 21 -> 21 */ |
256 |
| ((val & 0x00000000001f0000UL) << 6) /* bit 16 -> 22 */ |
257 |
| ((val & 0x000000000000ff80UL) << 20) /* bit 7 -> 27 */ |
258 |
| ((val & 0x000000000000007fUL) << 13) /* bit 0 -> 13 */) |
259 |
); |
260 |
ia64_patch(insn_addr + 1, 0x1ffffffffffUL, val >> 22); |
261 |
} |
262 |
|
263 |
static inline void ia64_imm60b (void *insn, uint64_t val) |
264 |
{ |
265 |
/* Ignore the slot number of the relocation; GCC and Intel
|
266 |
toolchains differed for some time on whether IMM64 relocs are
|
267 |
against slot 1 (Intel) or slot 2 (GCC). */
|
268 |
uint64_t insn_addr = (uint64_t) insn & ~3UL;
|
269 |
|
270 |
if (val + ((uint64_t) 1 << 59) >= (1UL << 60)) |
271 |
fprintf(stderr, "%s: value %ld out of IMM60 range\n",
|
272 |
__FUNCTION__, (int64_t) val); |
273 |
ia64_patch_imm60(insn_addr + 2, val);
|
274 |
} |
275 |
|
276 |
static inline void ia64_imm22 (void *insn, uint64_t val) |
277 |
{ |
278 |
if (val + (1 << 21) >= (1 << 22)) |
279 |
fprintf(stderr, "%s: value %li out of IMM22 range\n",
|
280 |
__FUNCTION__, (int64_t)val); |
281 |
ia64_patch((uint64_t) insn, 0x01fffcfe000UL,
|
282 |
( ((val & 0x200000UL) << 15) /* bit 21 -> 36 */ |
283 |
| ((val & 0x1f0000UL) << 6) /* bit 16 -> 22 */ |
284 |
| ((val & 0x00ff80UL) << 20) /* bit 7 -> 27 */ |
285 |
| ((val & 0x00007fUL) << 13) /* bit 0 -> 13 */)); |
286 |
} |
287 |
|
288 |
/* Like ia64_imm22(), but also clear bits 20-21. For addl, this has
|
289 |
the effect of turning "addl rX=imm22,rY" into "addl
|
290 |
rX=imm22,r0". */
|
291 |
static inline void ia64_imm22_r0 (void *insn, uint64_t val) |
292 |
{ |
293 |
if (val + (1 << 21) >= (1 << 22)) |
294 |
fprintf(stderr, "%s: value %li out of IMM22 range\n",
|
295 |
__FUNCTION__, (int64_t)val); |
296 |
ia64_patch((uint64_t) insn, 0x01fffcfe000UL | (0x3UL << 20), |
297 |
( ((val & 0x200000UL) << 15) /* bit 21 -> 36 */ |
298 |
| ((val & 0x1f0000UL) << 6) /* bit 16 -> 22 */ |
299 |
| ((val & 0x00ff80UL) << 20) /* bit 7 -> 27 */ |
300 |
| ((val & 0x00007fUL) << 13) /* bit 0 -> 13 */)); |
301 |
} |
302 |
|
303 |
static inline void ia64_imm21b (void *insn, uint64_t val) |
304 |
{ |
305 |
if (val + (1 << 20) >= (1 << 21)) |
306 |
fprintf(stderr, "%s: value %li out of IMM21b range\n",
|
307 |
__FUNCTION__, (int64_t)val); |
308 |
ia64_patch((uint64_t) insn, 0x11ffffe000UL,
|
309 |
( ((val & 0x100000UL) << 16) /* bit 20 -> 36 */ |
310 |
| ((val & 0x0fffffUL) << 13) /* bit 0 -> 13 */)); |
311 |
} |
312 |
|
313 |
static inline void ia64_nop_b (void *insn) |
314 |
{ |
315 |
ia64_patch((uint64_t) insn, (1UL << 41) - 1, 2UL << 37); |
316 |
} |
317 |
|
318 |
static inline void ia64_ldxmov(void *insn, uint64_t val) |
319 |
{ |
320 |
if (val + (1 << 21) < (1 << 22)) |
321 |
ia64_patch((uint64_t) insn, 0x1fff80fe000UL, 8UL << 37); |
322 |
} |
323 |
|
324 |
static inline int ia64_patch_ltoff(void *insn, uint64_t val, |
325 |
int relaxable)
|
326 |
{ |
327 |
if (relaxable && (val + (1 << 21) < (1 << 22))) { |
328 |
ia64_imm22_r0(insn, val); |
329 |
return 0; |
330 |
} |
331 |
return 1; |
332 |
} |
333 |
|
334 |
struct ia64_fixup {
|
335 |
struct ia64_fixup *next;
|
336 |
void *addr; /* address that needs to be patched */ |
337 |
long value;
|
338 |
}; |
339 |
|
340 |
#define IA64_PLT(insn, plt_index) \
|
341 |
do { \
|
342 |
struct ia64_fixup *fixup = alloca(sizeof(*fixup)); \ |
343 |
fixup->next = plt_fixes; \ |
344 |
plt_fixes = fixup; \ |
345 |
fixup->addr = (insn); \ |
346 |
fixup->value = (plt_index); \ |
347 |
plt_offset[(plt_index)] = 1; \
|
348 |
} while (0) |
349 |
|
350 |
#define IA64_LTOFF(insn, val, relaxable) \
|
351 |
do { \
|
352 |
if (ia64_patch_ltoff(insn, val, relaxable)) { \
|
353 |
struct ia64_fixup *fixup = alloca(sizeof(*fixup)); \ |
354 |
fixup->next = ltoff_fixes; \ |
355 |
ltoff_fixes = fixup; \ |
356 |
fixup->addr = (insn); \ |
357 |
fixup->value = (val); \ |
358 |
} \ |
359 |
} while (0) |
360 |
|
361 |
static inline void ia64_apply_fixes (uint8_t **gen_code_pp, |
362 |
struct ia64_fixup *ltoff_fixes,
|
363 |
uint64_t gp, |
364 |
struct ia64_fixup *plt_fixes,
|
365 |
int num_plts,
|
366 |
unsigned long *plt_target, |
367 |
unsigned int *plt_offset) |
368 |
{ |
369 |
static const uint8_t plt_bundle[] = { |
370 |
0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, /* nop 0; movl r1=GP */ |
371 |
0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x60, |
372 |
|
373 |
0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, /* nop 0; brl IP */ |
374 |
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0 |
375 |
}; |
376 |
uint8_t *gen_code_ptr = *gen_code_pp, *plt_start, *got_start, *vp; |
377 |
struct ia64_fixup *fixup;
|
378 |
unsigned int offset = 0; |
379 |
struct fdesc {
|
380 |
long ip;
|
381 |
long gp;
|
382 |
} *fdesc; |
383 |
int i;
|
384 |
|
385 |
if (plt_fixes) {
|
386 |
plt_start = gen_code_ptr; |
387 |
|
388 |
for (i = 0; i < num_plts; ++i) { |
389 |
if (plt_offset[i]) {
|
390 |
plt_offset[i] = offset; |
391 |
offset += sizeof(plt_bundle);
|
392 |
|
393 |
fdesc = (struct fdesc *) plt_target[i];
|
394 |
memcpy(gen_code_ptr, plt_bundle, sizeof(plt_bundle));
|
395 |
ia64_imm64 (gen_code_ptr + 0x02, fdesc->gp);
|
396 |
ia64_imm60b(gen_code_ptr + 0x12,
|
397 |
(fdesc->ip - (long) (gen_code_ptr + 0x10)) >> 4); |
398 |
gen_code_ptr += sizeof(plt_bundle);
|
399 |
} |
400 |
} |
401 |
|
402 |
for (fixup = plt_fixes; fixup; fixup = fixup->next)
|
403 |
ia64_imm21b(fixup->addr, |
404 |
((long) plt_start + plt_offset[fixup->value]
|
405 |
- ((long) fixup->addr & ~0xf)) >> 4); |
406 |
} |
407 |
|
408 |
got_start = gen_code_ptr; |
409 |
|
410 |
/* First, create the GOT: */
|
411 |
for (fixup = ltoff_fixes; fixup; fixup = fixup->next) {
|
412 |
/* first check if we already have this value in the GOT: */
|
413 |
for (vp = got_start; vp < gen_code_ptr; ++vp)
|
414 |
if (*(uint64_t *) vp == fixup->value)
|
415 |
break;
|
416 |
if (vp == gen_code_ptr) {
|
417 |
/* Nope, we need to put the value in the GOT: */
|
418 |
*(uint64_t *) vp = fixup->value; |
419 |
gen_code_ptr += 8;
|
420 |
} |
421 |
ia64_imm22(fixup->addr, (long) vp - gp);
|
422 |
} |
423 |
*gen_code_pp = gen_code_ptr; |
424 |
} |
425 |
|
426 |
#endif
|