root / xen-mapcache.c @ feature-archipelago
History | View | Annotate | Download (12.4 kB)
1 |
/*
|
---|---|
2 |
* Copyright (C) 2011 Citrix Ltd.
|
3 |
*
|
4 |
* This work is licensed under the terms of the GNU GPL, version 2. See
|
5 |
* the COPYING file in the top-level directory.
|
6 |
*
|
7 |
* Contributions after 2012-01-13 are licensed under the terms of the
|
8 |
* GNU GPL, version 2 or (at your option) any later version.
|
9 |
*/
|
10 |
|
11 |
#include "config.h" |
12 |
|
13 |
#include <sys/resource.h> |
14 |
|
15 |
#include "hw/xen/xen_backend.h" |
16 |
#include "sysemu/blockdev.h" |
17 |
#include "qemu/bitmap.h" |
18 |
|
19 |
#include <xen/hvm/params.h> |
20 |
#include <sys/mman.h> |
21 |
|
22 |
#include "sysemu/xen-mapcache.h" |
23 |
#include "trace.h" |
24 |
|
25 |
|
26 |
//#define MAPCACHE_DEBUG
|
27 |
|
28 |
#ifdef MAPCACHE_DEBUG
|
29 |
# define DPRINTF(fmt, ...) do { \ |
30 |
fprintf(stderr, "xen_mapcache: " fmt, ## __VA_ARGS__); \ |
31 |
} while (0) |
32 |
#else
|
33 |
# define DPRINTF(fmt, ...) do { } while (0) |
34 |
#endif
|
35 |
|
36 |
#if defined(__i386__)
|
37 |
# define MCACHE_BUCKET_SHIFT 16 |
38 |
# define MCACHE_MAX_SIZE (1UL<<31) /* 2GB Cap */ |
39 |
#elif defined(__x86_64__)
|
40 |
# define MCACHE_BUCKET_SHIFT 20 |
41 |
# define MCACHE_MAX_SIZE (1UL<<35) /* 32GB Cap */ |
42 |
#endif
|
43 |
#define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT) |
44 |
|
45 |
/* This is the size of the virtual address space reserve to QEMU that will not
|
46 |
* be use by MapCache.
|
47 |
* From empirical tests I observed that qemu use 75MB more than the
|
48 |
* max_mcache_size.
|
49 |
*/
|
50 |
#define NON_MCACHE_MEMORY_SIZE (80 * 1024 * 1024) |
51 |
|
52 |
#define mapcache_lock() ((void)0) |
53 |
#define mapcache_unlock() ((void)0) |
54 |
|
55 |
typedef struct MapCacheEntry { |
56 |
hwaddr paddr_index; |
57 |
uint8_t *vaddr_base; |
58 |
unsigned long *valid_mapping; |
59 |
uint8_t lock; |
60 |
hwaddr size; |
61 |
struct MapCacheEntry *next;
|
62 |
} MapCacheEntry; |
63 |
|
64 |
typedef struct MapCacheRev { |
65 |
uint8_t *vaddr_req; |
66 |
hwaddr paddr_index; |
67 |
hwaddr size; |
68 |
QTAILQ_ENTRY(MapCacheRev) next; |
69 |
} MapCacheRev; |
70 |
|
71 |
typedef struct MapCache { |
72 |
MapCacheEntry *entry; |
73 |
unsigned long nr_buckets; |
74 |
QTAILQ_HEAD(map_cache_head, MapCacheRev) locked_entries; |
75 |
|
76 |
/* For most cases (>99.9%), the page address is the same. */
|
77 |
MapCacheEntry *last_entry; |
78 |
unsigned long max_mcache_size; |
79 |
unsigned int mcache_bucket_shift; |
80 |
|
81 |
phys_offset_to_gaddr_t phys_offset_to_gaddr; |
82 |
void *opaque;
|
83 |
} MapCache; |
84 |
|
85 |
static MapCache *mapcache;
|
86 |
|
87 |
static inline int test_bits(int nr, int size, const unsigned long *addr) |
88 |
{ |
89 |
unsigned long res = find_next_zero_bit(addr, size + nr, nr); |
90 |
if (res >= nr + size)
|
91 |
return 1; |
92 |
else
|
93 |
return 0; |
94 |
} |
95 |
|
96 |
void xen_map_cache_init(phys_offset_to_gaddr_t f, void *opaque) |
97 |
{ |
98 |
unsigned long size; |
99 |
struct rlimit rlimit_as;
|
100 |
|
101 |
mapcache = g_malloc0(sizeof (MapCache));
|
102 |
|
103 |
mapcache->phys_offset_to_gaddr = f; |
104 |
mapcache->opaque = opaque; |
105 |
|
106 |
QTAILQ_INIT(&mapcache->locked_entries); |
107 |
|
108 |
if (geteuid() == 0) { |
109 |
rlimit_as.rlim_cur = RLIM_INFINITY; |
110 |
rlimit_as.rlim_max = RLIM_INFINITY; |
111 |
mapcache->max_mcache_size = MCACHE_MAX_SIZE; |
112 |
} else {
|
113 |
getrlimit(RLIMIT_AS, &rlimit_as); |
114 |
rlimit_as.rlim_cur = rlimit_as.rlim_max; |
115 |
|
116 |
if (rlimit_as.rlim_max != RLIM_INFINITY) {
|
117 |
fprintf(stderr, "Warning: QEMU's maximum size of virtual"
|
118 |
" memory is not infinity.\n");
|
119 |
} |
120 |
if (rlimit_as.rlim_max < MCACHE_MAX_SIZE + NON_MCACHE_MEMORY_SIZE) {
|
121 |
mapcache->max_mcache_size = rlimit_as.rlim_max - |
122 |
NON_MCACHE_MEMORY_SIZE; |
123 |
} else {
|
124 |
mapcache->max_mcache_size = MCACHE_MAX_SIZE; |
125 |
} |
126 |
} |
127 |
|
128 |
setrlimit(RLIMIT_AS, &rlimit_as); |
129 |
|
130 |
mapcache->nr_buckets = |
131 |
(((mapcache->max_mcache_size >> XC_PAGE_SHIFT) + |
132 |
(1UL << (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)) - 1) >> |
133 |
(MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)); |
134 |
|
135 |
size = mapcache->nr_buckets * sizeof (MapCacheEntry);
|
136 |
size = (size + XC_PAGE_SIZE - 1) & ~(XC_PAGE_SIZE - 1); |
137 |
DPRINTF("%s, nr_buckets = %lx size %lu\n", __func__,
|
138 |
mapcache->nr_buckets, size); |
139 |
mapcache->entry = g_malloc0(size); |
140 |
} |
141 |
|
142 |
static void xen_remap_bucket(MapCacheEntry *entry, |
143 |
hwaddr size, |
144 |
hwaddr address_index) |
145 |
{ |
146 |
uint8_t *vaddr_base; |
147 |
xen_pfn_t *pfns; |
148 |
int *err;
|
149 |
unsigned int i; |
150 |
hwaddr nb_pfn = size >> XC_PAGE_SHIFT; |
151 |
|
152 |
trace_xen_remap_bucket(address_index); |
153 |
|
154 |
pfns = g_malloc0(nb_pfn * sizeof (xen_pfn_t));
|
155 |
err = g_malloc0(nb_pfn * sizeof (int)); |
156 |
|
157 |
if (entry->vaddr_base != NULL) { |
158 |
if (munmap(entry->vaddr_base, entry->size) != 0) { |
159 |
perror("unmap fails");
|
160 |
exit(-1);
|
161 |
} |
162 |
} |
163 |
if (entry->valid_mapping != NULL) { |
164 |
g_free(entry->valid_mapping); |
165 |
entry->valid_mapping = NULL;
|
166 |
} |
167 |
|
168 |
for (i = 0; i < nb_pfn; i++) { |
169 |
pfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-XC_PAGE_SHIFT)) + i; |
170 |
} |
171 |
|
172 |
vaddr_base = xc_map_foreign_bulk(xen_xc, xen_domid, PROT_READ|PROT_WRITE, |
173 |
pfns, err, nb_pfn); |
174 |
if (vaddr_base == NULL) { |
175 |
perror("xc_map_foreign_bulk");
|
176 |
exit(-1);
|
177 |
} |
178 |
|
179 |
entry->vaddr_base = vaddr_base; |
180 |
entry->paddr_index = address_index; |
181 |
entry->size = size; |
182 |
entry->valid_mapping = (unsigned long *) g_malloc0(sizeof(unsigned long) * |
183 |
BITS_TO_LONGS(size >> XC_PAGE_SHIFT)); |
184 |
|
185 |
bitmap_zero(entry->valid_mapping, nb_pfn); |
186 |
for (i = 0; i < nb_pfn; i++) { |
187 |
if (!err[i]) {
|
188 |
bitmap_set(entry->valid_mapping, i, 1);
|
189 |
} |
190 |
} |
191 |
|
192 |
g_free(pfns); |
193 |
g_free(err); |
194 |
} |
195 |
|
196 |
uint8_t *xen_map_cache(hwaddr phys_addr, hwaddr size, |
197 |
uint8_t lock) |
198 |
{ |
199 |
MapCacheEntry *entry, *pentry = NULL;
|
200 |
hwaddr address_index; |
201 |
hwaddr address_offset; |
202 |
hwaddr __size = size; |
203 |
hwaddr __test_bit_size = size; |
204 |
bool translated = false; |
205 |
|
206 |
tryagain:
|
207 |
address_index = phys_addr >> MCACHE_BUCKET_SHIFT; |
208 |
address_offset = phys_addr & (MCACHE_BUCKET_SIZE - 1);
|
209 |
|
210 |
trace_xen_map_cache(phys_addr); |
211 |
|
212 |
/* __test_bit_size is always a multiple of XC_PAGE_SIZE */
|
213 |
if (size) {
|
214 |
__test_bit_size = size + (phys_addr & (XC_PAGE_SIZE - 1));
|
215 |
|
216 |
if (__test_bit_size % XC_PAGE_SIZE) {
|
217 |
__test_bit_size += XC_PAGE_SIZE - (__test_bit_size % XC_PAGE_SIZE); |
218 |
} |
219 |
} else {
|
220 |
__test_bit_size = XC_PAGE_SIZE; |
221 |
} |
222 |
|
223 |
if (mapcache->last_entry != NULL && |
224 |
mapcache->last_entry->paddr_index == address_index && |
225 |
!lock && !__size && |
226 |
test_bits(address_offset >> XC_PAGE_SHIFT, |
227 |
__test_bit_size >> XC_PAGE_SHIFT, |
228 |
mapcache->last_entry->valid_mapping)) { |
229 |
trace_xen_map_cache_return(mapcache->last_entry->vaddr_base + address_offset); |
230 |
return mapcache->last_entry->vaddr_base + address_offset;
|
231 |
} |
232 |
|
233 |
/* size is always a multiple of MCACHE_BUCKET_SIZE */
|
234 |
if (size) {
|
235 |
__size = size + address_offset; |
236 |
if (__size % MCACHE_BUCKET_SIZE) {
|
237 |
__size += MCACHE_BUCKET_SIZE - (__size % MCACHE_BUCKET_SIZE); |
238 |
} |
239 |
} else {
|
240 |
__size = MCACHE_BUCKET_SIZE; |
241 |
} |
242 |
|
243 |
entry = &mapcache->entry[address_index % mapcache->nr_buckets]; |
244 |
|
245 |
while (entry && entry->lock && entry->vaddr_base &&
|
246 |
(entry->paddr_index != address_index || entry->size != __size || |
247 |
!test_bits(address_offset >> XC_PAGE_SHIFT, |
248 |
__test_bit_size >> XC_PAGE_SHIFT, |
249 |
entry->valid_mapping))) { |
250 |
pentry = entry; |
251 |
entry = entry->next; |
252 |
} |
253 |
if (!entry) {
|
254 |
entry = g_malloc0(sizeof (MapCacheEntry));
|
255 |
pentry->next = entry; |
256 |
xen_remap_bucket(entry, __size, address_index); |
257 |
} else if (!entry->lock) { |
258 |
if (!entry->vaddr_base || entry->paddr_index != address_index ||
|
259 |
entry->size != __size || |
260 |
!test_bits(address_offset >> XC_PAGE_SHIFT, |
261 |
__test_bit_size >> XC_PAGE_SHIFT, |
262 |
entry->valid_mapping)) { |
263 |
xen_remap_bucket(entry, __size, address_index); |
264 |
} |
265 |
} |
266 |
|
267 |
if(!test_bits(address_offset >> XC_PAGE_SHIFT,
|
268 |
__test_bit_size >> XC_PAGE_SHIFT, |
269 |
entry->valid_mapping)) { |
270 |
mapcache->last_entry = NULL;
|
271 |
if (!translated && mapcache->phys_offset_to_gaddr) {
|
272 |
phys_addr = mapcache->phys_offset_to_gaddr(phys_addr, size, mapcache->opaque); |
273 |
translated = true;
|
274 |
goto tryagain;
|
275 |
} |
276 |
trace_xen_map_cache_return(NULL);
|
277 |
return NULL; |
278 |
} |
279 |
|
280 |
mapcache->last_entry = entry; |
281 |
if (lock) {
|
282 |
MapCacheRev *reventry = g_malloc0(sizeof(MapCacheRev));
|
283 |
entry->lock++; |
284 |
reventry->vaddr_req = mapcache->last_entry->vaddr_base + address_offset; |
285 |
reventry->paddr_index = mapcache->last_entry->paddr_index; |
286 |
reventry->size = entry->size; |
287 |
QTAILQ_INSERT_HEAD(&mapcache->locked_entries, reventry, next); |
288 |
} |
289 |
|
290 |
trace_xen_map_cache_return(mapcache->last_entry->vaddr_base + address_offset); |
291 |
return mapcache->last_entry->vaddr_base + address_offset;
|
292 |
} |
293 |
|
294 |
ram_addr_t xen_ram_addr_from_mapcache(void *ptr)
|
295 |
{ |
296 |
MapCacheEntry *entry = NULL;
|
297 |
MapCacheRev *reventry; |
298 |
hwaddr paddr_index; |
299 |
hwaddr size; |
300 |
int found = 0; |
301 |
|
302 |
QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { |
303 |
if (reventry->vaddr_req == ptr) {
|
304 |
paddr_index = reventry->paddr_index; |
305 |
size = reventry->size; |
306 |
found = 1;
|
307 |
break;
|
308 |
} |
309 |
} |
310 |
if (!found) {
|
311 |
fprintf(stderr, "%s, could not find %p\n", __func__, ptr);
|
312 |
QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { |
313 |
DPRINTF(" "TARGET_FMT_plx" -> %p is present\n", reventry->paddr_index, |
314 |
reventry->vaddr_req); |
315 |
} |
316 |
abort(); |
317 |
return 0; |
318 |
} |
319 |
|
320 |
entry = &mapcache->entry[paddr_index % mapcache->nr_buckets]; |
321 |
while (entry && (entry->paddr_index != paddr_index || entry->size != size)) {
|
322 |
entry = entry->next; |
323 |
} |
324 |
if (!entry) {
|
325 |
DPRINTF("Trying to find address %p that is not in the mapcache!\n", ptr);
|
326 |
return 0; |
327 |
} |
328 |
return (reventry->paddr_index << MCACHE_BUCKET_SHIFT) +
|
329 |
((unsigned long) ptr - (unsigned long) entry->vaddr_base); |
330 |
} |
331 |
|
332 |
void xen_invalidate_map_cache_entry(uint8_t *buffer)
|
333 |
{ |
334 |
MapCacheEntry *entry = NULL, *pentry = NULL; |
335 |
MapCacheRev *reventry; |
336 |
hwaddr paddr_index; |
337 |
hwaddr size; |
338 |
int found = 0; |
339 |
|
340 |
QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { |
341 |
if (reventry->vaddr_req == buffer) {
|
342 |
paddr_index = reventry->paddr_index; |
343 |
size = reventry->size; |
344 |
found = 1;
|
345 |
break;
|
346 |
} |
347 |
} |
348 |
if (!found) {
|
349 |
DPRINTF("%s, could not find %p\n", __func__, buffer);
|
350 |
QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { |
351 |
DPRINTF(" "TARGET_FMT_plx" -> %p is present\n", reventry->paddr_index, reventry->vaddr_req); |
352 |
} |
353 |
return;
|
354 |
} |
355 |
QTAILQ_REMOVE(&mapcache->locked_entries, reventry, next); |
356 |
g_free(reventry); |
357 |
|
358 |
if (mapcache->last_entry != NULL && |
359 |
mapcache->last_entry->paddr_index == paddr_index) { |
360 |
mapcache->last_entry = NULL;
|
361 |
} |
362 |
|
363 |
entry = &mapcache->entry[paddr_index % mapcache->nr_buckets]; |
364 |
while (entry && (entry->paddr_index != paddr_index || entry->size != size)) {
|
365 |
pentry = entry; |
366 |
entry = entry->next; |
367 |
} |
368 |
if (!entry) {
|
369 |
DPRINTF("Trying to unmap address %p that is not in the mapcache!\n", buffer);
|
370 |
return;
|
371 |
} |
372 |
entry->lock--; |
373 |
if (entry->lock > 0 || pentry == NULL) { |
374 |
return;
|
375 |
} |
376 |
|
377 |
pentry->next = entry->next; |
378 |
if (munmap(entry->vaddr_base, entry->size) != 0) { |
379 |
perror("unmap fails");
|
380 |
exit(-1);
|
381 |
} |
382 |
g_free(entry->valid_mapping); |
383 |
g_free(entry); |
384 |
} |
385 |
|
386 |
void xen_invalidate_map_cache(void) |
387 |
{ |
388 |
unsigned long i; |
389 |
MapCacheRev *reventry; |
390 |
|
391 |
/* Flush pending AIO before destroying the mapcache */
|
392 |
bdrv_drain_all(); |
393 |
|
394 |
QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { |
395 |
DPRINTF("There should be no locked mappings at this time, "
|
396 |
"but "TARGET_FMT_plx" -> %p is present\n", |
397 |
reventry->paddr_index, reventry->vaddr_req); |
398 |
} |
399 |
|
400 |
mapcache_lock(); |
401 |
|
402 |
for (i = 0; i < mapcache->nr_buckets; i++) { |
403 |
MapCacheEntry *entry = &mapcache->entry[i]; |
404 |
|
405 |
if (entry->vaddr_base == NULL) { |
406 |
continue;
|
407 |
} |
408 |
if (entry->lock > 0) { |
409 |
continue;
|
410 |
} |
411 |
|
412 |
if (munmap(entry->vaddr_base, entry->size) != 0) { |
413 |
perror("unmap fails");
|
414 |
exit(-1);
|
415 |
} |
416 |
|
417 |
entry->paddr_index = 0;
|
418 |
entry->vaddr_base = NULL;
|
419 |
entry->size = 0;
|
420 |
g_free(entry->valid_mapping); |
421 |
entry->valid_mapping = NULL;
|
422 |
} |
423 |
|
424 |
mapcache->last_entry = NULL;
|
425 |
|
426 |
mapcache_unlock(); |
427 |
} |