root / drivers / block-cache.c @ abdb293f
History | View | Annotate | Download (18 kB)
1 |
/*
|
---|---|
2 |
* Copyright (c) 2008, XenSource Inc.
|
3 |
* All rights reserved.
|
4 |
*
|
5 |
* Redistribution and use in source and binary forms, with or without
|
6 |
* modification, are permitted provided that the following conditions are met:
|
7 |
* * Redistributions of source code must retain the above copyright
|
8 |
* notice, this list of conditions and the following disclaimer.
|
9 |
* * Redistributions in binary form must reproduce the above copyright
|
10 |
* notice, this list of conditions and the following disclaimer in the
|
11 |
* documentation and/or other materials provided with the distribution.
|
12 |
* * Neither the name of XenSource Inc. nor the names of its contributors
|
13 |
* may be used to endorse or promote products derived from this software
|
14 |
* without specific prior written permission.
|
15 |
*
|
16 |
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
17 |
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
18 |
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
19 |
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
20 |
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
21 |
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
22 |
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
23 |
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
24 |
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
25 |
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
26 |
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
27 |
*/
|
28 |
|
29 |
#ifdef HAVE_CONFIG_H
|
30 |
#include "config.h" |
31 |
#endif
|
32 |
|
33 |
#include <errno.h> |
34 |
#include <fcntl.h> |
35 |
#include <unistd.h> |
36 |
#include <stdlib.h> |
37 |
#include <sys/mman.h> |
38 |
|
39 |
#include "tapdisk.h" |
40 |
#include "tapdisk-utils.h" |
41 |
#include "tapdisk-driver.h" |
42 |
#include "tapdisk-server.h" |
43 |
#include "tapdisk-interface.h" |
44 |
|
45 |
#ifdef DEBUG
|
46 |
#define DBG(_f, _a...) tlog_write(TLOG_DBG, _f, ##_a) |
47 |
#else
|
48 |
#define DBG(_f, _a...) ((void)0) |
49 |
#endif
|
50 |
|
51 |
#define WARN(_f, _a...) tlog_write(TLOG_WARN, _f, ##_a) |
52 |
|
53 |
#define RADIX_TREE_PAGE_SHIFT 12 /* 4K pages */ |
54 |
#define RADIX_TREE_PAGE_SIZE (1 << RADIX_TREE_PAGE_SHIFT) |
55 |
|
56 |
#define RADIX_TREE_NODE_SHIFT 9 /* 512B nodes */ |
57 |
#define RADIX_TREE_NODE_SIZE (1 << RADIX_TREE_NODE_SHIFT) |
58 |
#define RADIX_TREE_NODE_MASK (RADIX_TREE_NODE_SIZE - 1) |
59 |
|
60 |
#define BLOCK_CACHE_NODES_PER_PAGE (1 << (RADIX_TREE_PAGE_SHIFT - RADIX_TREE_NODE_SHIFT)) |
61 |
|
62 |
#define BLOCK_CACHE_MAX_SIZE (10 << 20) /* 100MB cache */ |
63 |
#define BLOCK_CACHE_REQUESTS (TAPDISK_DATA_REQUESTS << 3) |
64 |
#define BLOCK_CACHE_PAGE_IDLETIME 60 |
65 |
|
66 |
typedef struct radix_tree radix_tree_t; |
67 |
typedef struct radix_tree_node radix_tree_node_t; |
68 |
typedef struct radix_tree_link radix_tree_link_t; |
69 |
typedef struct radix_tree_leaf radix_tree_leaf_t; |
70 |
typedef struct radix_tree_page radix_tree_page_t; |
71 |
|
72 |
typedef struct block_cache block_cache_t; |
73 |
typedef struct block_cache_request block_cache_request_t; |
74 |
typedef struct block_cache_stats block_cache_stats_t; |
75 |
|
76 |
struct radix_tree_page {
|
77 |
char *buf;
|
78 |
size_t size; |
79 |
uint64_t sec; |
80 |
radix_tree_link_t *owners[BLOCK_CACHE_NODES_PER_PAGE]; |
81 |
}; |
82 |
|
83 |
struct radix_tree_leaf {
|
84 |
radix_tree_page_t *page; |
85 |
char *buf;
|
86 |
}; |
87 |
|
88 |
struct radix_tree_link {
|
89 |
uint32_t time; |
90 |
union {
|
91 |
radix_tree_node_t *next; |
92 |
radix_tree_leaf_t leaf; |
93 |
} u; |
94 |
}; |
95 |
|
96 |
struct radix_tree_node {
|
97 |
int height;
|
98 |
radix_tree_link_t links[RADIX_TREE_NODE_SIZE]; |
99 |
}; |
100 |
|
101 |
struct radix_tree {
|
102 |
int height;
|
103 |
uint64_t size; |
104 |
uint32_t nodes; |
105 |
radix_tree_node_t *root; |
106 |
|
107 |
block_cache_t *cache; |
108 |
}; |
109 |
|
110 |
struct block_cache_request {
|
111 |
int err;
|
112 |
char *buf;
|
113 |
uint64_t secs; |
114 |
td_request_t treq; |
115 |
block_cache_t *cache; |
116 |
}; |
117 |
|
118 |
struct block_cache_stats {
|
119 |
uint64_t reads; |
120 |
uint64_t hits; |
121 |
uint64_t misses; |
122 |
uint64_t prunes; |
123 |
}; |
124 |
|
125 |
struct block_cache {
|
126 |
int ptype;
|
127 |
char *name;
|
128 |
|
129 |
uint64_t sectors; |
130 |
|
131 |
block_cache_request_t requests[BLOCK_CACHE_REQUESTS]; |
132 |
block_cache_request_t *request_free_list[BLOCK_CACHE_REQUESTS]; |
133 |
int requests_free;
|
134 |
|
135 |
event_id_t timeout_id; |
136 |
|
137 |
radix_tree_t tree; |
138 |
|
139 |
block_cache_stats_t stats; |
140 |
}; |
141 |
|
142 |
static inline uint64_t |
143 |
radix_tree_calculate_size(int height)
|
144 |
{ |
145 |
return (uint64_t)RADIX_TREE_NODE_SIZE <<
|
146 |
(height * RADIX_TREE_NODE_SHIFT); |
147 |
} |
148 |
|
149 |
static inline int |
150 |
radix_tree_calculate_height(uint64_t sectors) |
151 |
{ |
152 |
int height;
|
153 |
uint64_t tree_size; |
154 |
|
155 |
height = 1; /* always allocate root node */ |
156 |
tree_size = radix_tree_calculate_size(height); |
157 |
while (sectors > tree_size)
|
158 |
tree_size = radix_tree_calculate_size(++height); |
159 |
|
160 |
return height;
|
161 |
} |
162 |
|
163 |
static inline int |
164 |
radix_tree_index(radix_tree_node_t *node, uint64_t sector) |
165 |
{ |
166 |
return ((sector >> (node->height * RADIX_TREE_NODE_SHIFT)) &
|
167 |
RADIX_TREE_NODE_MASK); |
168 |
} |
169 |
|
170 |
static inline int |
171 |
radix_tree_node_contains_leaves(radix_tree_t *tree, radix_tree_node_t *node) |
172 |
{ |
173 |
return (node->height == 0); |
174 |
} |
175 |
|
176 |
static inline int |
177 |
radix_tree_node_is_root(radix_tree_t *tree, radix_tree_node_t *node) |
178 |
{ |
179 |
return (node->height == tree->height);
|
180 |
} |
181 |
|
182 |
static inline uint64_t |
183 |
radix_tree_size(radix_tree_t *tree) |
184 |
{ |
185 |
return tree->size + tree->nodes * sizeof(radix_tree_node_t); |
186 |
} |
187 |
|
188 |
static inline void |
189 |
radix_tree_clear_link(radix_tree_link_t *link) |
190 |
{ |
191 |
if (link)
|
192 |
memset(link, 0, sizeof(radix_tree_link_t)); |
193 |
} |
194 |
|
195 |
static inline radix_tree_node_t * |
196 |
radix_tree_allocate_node(radix_tree_t *tree, int height)
|
197 |
{ |
198 |
radix_tree_node_t *node; |
199 |
|
200 |
node = calloc(1, sizeof(radix_tree_node_t)); |
201 |
if (!node)
|
202 |
return NULL; |
203 |
|
204 |
node->height = height; |
205 |
tree->nodes++; |
206 |
|
207 |
return node;
|
208 |
} |
209 |
|
210 |
static inline radix_tree_node_t * |
211 |
radix_tree_allocate_child_node(radix_tree_t *tree, radix_tree_node_t *parent) |
212 |
{ |
213 |
return radix_tree_allocate_node(tree, parent->height - 1); |
214 |
} |
215 |
|
216 |
void
|
217 |
radix_tree_free_node(radix_tree_t *tree, radix_tree_node_t *node) |
218 |
{ |
219 |
if (!node)
|
220 |
return;
|
221 |
|
222 |
free(node); |
223 |
tree->nodes--; |
224 |
} |
225 |
|
226 |
static inline radix_tree_page_t * |
227 |
radix_tree_allocate_page(radix_tree_t *tree, |
228 |
char *buf, uint64_t sec, size_t size)
|
229 |
{ |
230 |
radix_tree_page_t *page; |
231 |
|
232 |
page = calloc(1, sizeof(radix_tree_page_t)); |
233 |
if (!page)
|
234 |
return NULL; |
235 |
|
236 |
page->buf = buf; |
237 |
page->sec = sec; |
238 |
page->size = size; |
239 |
tree->size += size; |
240 |
|
241 |
return page;
|
242 |
} |
243 |
|
244 |
static inline void |
245 |
radix_tree_free_page(radix_tree_t *tree, radix_tree_page_t *page) |
246 |
{ |
247 |
int i;
|
248 |
|
249 |
for (i = 0; i < page->size >> RADIX_TREE_NODE_SHIFT; i++) |
250 |
DBG("%s: ejecting sector 0x%llx\n",
|
251 |
tree->cache->name, page->sec + i); |
252 |
|
253 |
tree->cache->stats.prunes += (page->size >> RADIX_TREE_NODE_SHIFT); |
254 |
tree->size -= page->size; |
255 |
free(page->buf); |
256 |
free(page); |
257 |
} |
258 |
|
259 |
/*
|
260 |
* remove a leaf and the shared radix_tree_page_t containing its buffer.
|
261 |
* leaves are deleted, nodes are not; gc will reap the nodes later.
|
262 |
*/
|
263 |
static void |
264 |
radix_tree_remove_page(radix_tree_t *tree, radix_tree_page_t *page) |
265 |
{ |
266 |
int i;
|
267 |
|
268 |
if (!page)
|
269 |
return;
|
270 |
|
271 |
for (i = 0; i < BLOCK_CACHE_NODES_PER_PAGE; i++) |
272 |
radix_tree_clear_link(page->owners[i]); |
273 |
|
274 |
radix_tree_free_page(tree, page); |
275 |
} |
276 |
|
277 |
static void |
278 |
radix_tree_insert_leaf(radix_tree_t *tree, radix_tree_link_t *link, |
279 |
radix_tree_page_t *page, off_t off) |
280 |
{ |
281 |
int i;
|
282 |
|
283 |
if (off + RADIX_TREE_NODE_SIZE > page->size)
|
284 |
return;
|
285 |
|
286 |
for (i = 0; i < BLOCK_CACHE_NODES_PER_PAGE; i++) { |
287 |
if (page->owners[i])
|
288 |
continue;
|
289 |
|
290 |
page->owners[i] = link; |
291 |
link->u.leaf.page = page; |
292 |
link->u.leaf.buf = page->buf + off; |
293 |
|
294 |
break;
|
295 |
} |
296 |
} |
297 |
|
298 |
static char * |
299 |
radix_tree_find_leaf(radix_tree_t *tree, uint64_t sector) |
300 |
{ |
301 |
int idx;
|
302 |
struct timeval now;
|
303 |
radix_tree_link_t *link; |
304 |
radix_tree_node_t *node; |
305 |
|
306 |
node = tree->root; |
307 |
gettimeofday(&now, NULL);
|
308 |
|
309 |
do {
|
310 |
idx = radix_tree_index(node, sector); |
311 |
link = node->links + idx; |
312 |
link->time = now.tv_sec; |
313 |
|
314 |
if (radix_tree_node_contains_leaves(tree, node))
|
315 |
return link->u.leaf.buf;
|
316 |
|
317 |
if (!link->u.next)
|
318 |
return NULL; |
319 |
|
320 |
node = link->u.next; |
321 |
} while (1); |
322 |
} |
323 |
|
324 |
static char * |
325 |
radix_tree_add_leaf(radix_tree_t *tree, uint64_t sector, |
326 |
radix_tree_page_t *page, off_t off) |
327 |
{ |
328 |
int idx;
|
329 |
struct timeval now;
|
330 |
radix_tree_link_t *link; |
331 |
radix_tree_node_t *node; |
332 |
|
333 |
node = tree->root; |
334 |
gettimeofday(&now, NULL);
|
335 |
|
336 |
do {
|
337 |
idx = radix_tree_index(node, sector); |
338 |
link = node->links + idx; |
339 |
link->time = now.tv_sec; |
340 |
|
341 |
if (radix_tree_node_contains_leaves(tree, node)) {
|
342 |
radix_tree_remove_page(tree, link->u.leaf.page); |
343 |
radix_tree_insert_leaf(tree, link, page, off); |
344 |
return link->u.leaf.buf;
|
345 |
} |
346 |
|
347 |
if (!link->u.next) {
|
348 |
link->u.next = radix_tree_allocate_child_node(tree, |
349 |
node); |
350 |
if (!link->u.next)
|
351 |
return NULL; |
352 |
} |
353 |
|
354 |
node = link->u.next; |
355 |
} while (1); |
356 |
} |
357 |
|
358 |
static int |
359 |
radix_tree_add_leaves(radix_tree_t *tree, char *buf,
|
360 |
uint64_t sector, uint64_t sectors) |
361 |
{ |
362 |
int i;
|
363 |
radix_tree_page_t *page; |
364 |
|
365 |
page = radix_tree_allocate_page(tree, buf, sector, |
366 |
sectors << RADIX_TREE_NODE_SHIFT); |
367 |
if (!page)
|
368 |
return -ENOMEM;
|
369 |
|
370 |
for (i = 0; i < sectors; i++) |
371 |
if (!radix_tree_add_leaf(tree, sector + i,
|
372 |
page, (i << RADIX_TREE_NODE_SHIFT))) |
373 |
goto fail;
|
374 |
|
375 |
return 0; |
376 |
|
377 |
fail:
|
378 |
page->buf = NULL;
|
379 |
radix_tree_remove_page(tree, page); |
380 |
return -ENOMEM;
|
381 |
} |
382 |
|
383 |
static void |
384 |
radix_tree_delete_branch(radix_tree_t *tree, radix_tree_node_t *node) |
385 |
{ |
386 |
int i;
|
387 |
radix_tree_link_t *link; |
388 |
|
389 |
if (!node)
|
390 |
return;
|
391 |
|
392 |
for (i = 0; i < RADIX_TREE_NODE_SIZE; i++) { |
393 |
link = node->links + i; |
394 |
|
395 |
if (radix_tree_node_contains_leaves(tree, node))
|
396 |
radix_tree_remove_page(tree, link->u.leaf.page); |
397 |
else
|
398 |
radix_tree_delete_branch(tree, link->u.next); |
399 |
|
400 |
radix_tree_clear_link(link); |
401 |
} |
402 |
|
403 |
radix_tree_free_node(tree, node); |
404 |
} |
405 |
|
406 |
static inline void |
407 |
radix_tree_destroy(radix_tree_t *tree) |
408 |
{ |
409 |
radix_tree_delete_branch(tree, tree->root); |
410 |
tree->root = NULL;
|
411 |
} |
412 |
|
413 |
/*
|
414 |
* returns 1 if @node is empty after pruning, 0 otherwise
|
415 |
*/
|
416 |
static int |
417 |
radix_tree_prune_branch(radix_tree_t *tree, |
418 |
radix_tree_node_t *node, uint32_t now) |
419 |
{ |
420 |
int i, empty;
|
421 |
radix_tree_link_t *link; |
422 |
|
423 |
empty = 1;
|
424 |
if (!node)
|
425 |
return empty;
|
426 |
|
427 |
for (i = 0; i < RADIX_TREE_NODE_SIZE; i++) { |
428 |
link = node->links + i; |
429 |
|
430 |
if (now - link->time < BLOCK_CACHE_PAGE_IDLETIME) {
|
431 |
if (radix_tree_node_contains_leaves(tree, node)) {
|
432 |
empty = 0;
|
433 |
continue;
|
434 |
} |
435 |
|
436 |
if (radix_tree_prune_branch(tree, link->u.next, now))
|
437 |
radix_tree_clear_link(link); |
438 |
else
|
439 |
empty = 0;
|
440 |
|
441 |
continue;
|
442 |
} |
443 |
|
444 |
if (radix_tree_node_contains_leaves(tree, node))
|
445 |
radix_tree_remove_page(tree, link->u.leaf.page); |
446 |
else
|
447 |
radix_tree_delete_branch(tree, link->u.next); |
448 |
|
449 |
radix_tree_clear_link(link); |
450 |
} |
451 |
|
452 |
if (empty && !radix_tree_node_is_root(tree, node))
|
453 |
radix_tree_free_node(tree, node); |
454 |
|
455 |
return empty;
|
456 |
} |
457 |
|
458 |
/*
|
459 |
* walk tree and free any node that has been idle for too long
|
460 |
*/
|
461 |
static void |
462 |
radix_tree_prune(radix_tree_t *tree) |
463 |
{ |
464 |
struct timeval now;
|
465 |
|
466 |
if (!tree->root)
|
467 |
return;
|
468 |
|
469 |
DPRINTF("tree %s has %"PRIu64" bytes\n", |
470 |
tree->cache->name, tree->size); |
471 |
|
472 |
gettimeofday(&now, NULL);
|
473 |
radix_tree_prune_branch(tree, tree->root, now.tv_sec); |
474 |
|
475 |
DPRINTF("tree %s now has %"PRIu64" bytes\n", |
476 |
tree->cache->name, tree->size); |
477 |
} |
478 |
|
479 |
static inline int |
480 |
radix_tree_initialize(radix_tree_t *tree, uint64_t sectors) |
481 |
{ |
482 |
tree->height = radix_tree_calculate_height(sectors); |
483 |
tree->root = radix_tree_allocate_node(tree, tree->height); |
484 |
if (!tree->root)
|
485 |
return -ENOMEM;
|
486 |
|
487 |
return 0; |
488 |
} |
489 |
|
490 |
static inline void |
491 |
radix_tree_free(radix_tree_t *tree) |
492 |
{ |
493 |
radix_tree_destroy(tree); |
494 |
} |
495 |
|
496 |
static void |
497 |
block_cache_prune_event(event_id_t id, char mode, void *private) |
498 |
{ |
499 |
radix_tree_t *tree; |
500 |
block_cache_t *cache; |
501 |
|
502 |
cache = (block_cache_t *)private; |
503 |
tree = &cache->tree; |
504 |
|
505 |
radix_tree_prune(tree); |
506 |
} |
507 |
|
508 |
static inline block_cache_request_t * |
509 |
block_cache_get_request(block_cache_t *cache) |
510 |
{ |
511 |
if (!cache->requests_free)
|
512 |
return NULL; |
513 |
|
514 |
return cache->request_free_list[--cache->requests_free];
|
515 |
} |
516 |
|
517 |
static inline void |
518 |
block_cache_put_request(block_cache_t *cache, block_cache_request_t *breq) |
519 |
{ |
520 |
memset(breq, 0, sizeof(block_cache_request_t)); |
521 |
cache->request_free_list[cache->requests_free++] = breq; |
522 |
} |
523 |
|
524 |
static int |
525 |
block_cache_open(td_driver_t *driver, const char *name, td_flag_t flags) |
526 |
{ |
527 |
int i, err;
|
528 |
radix_tree_t *tree; |
529 |
block_cache_t *cache; |
530 |
|
531 |
if (!td_flag_test(flags, TD_OPEN_RDONLY))
|
532 |
return -EINVAL;
|
533 |
|
534 |
if (driver->info.sector_size != RADIX_TREE_NODE_SIZE)
|
535 |
return -EINVAL;
|
536 |
|
537 |
cache = (block_cache_t *)driver->data; |
538 |
err = tapdisk_namedup(&cache->name, (char *)name);
|
539 |
if (err)
|
540 |
return -ENOMEM;
|
541 |
|
542 |
cache->sectors = driver->info.size; |
543 |
|
544 |
tree = &cache->tree; |
545 |
err = radix_tree_initialize(tree, cache->sectors); |
546 |
if (err)
|
547 |
goto fail;
|
548 |
|
549 |
tree->cache = cache; |
550 |
cache->requests_free = BLOCK_CACHE_REQUESTS; |
551 |
for (i = 0; i < BLOCK_CACHE_REQUESTS; i++) |
552 |
cache->request_free_list[i] = cache->requests + i; |
553 |
|
554 |
cache->timeout_id = tapdisk_server_register_event(SCHEDULER_POLL_TIMEOUT, |
555 |
-1, /* dummy fd */ |
556 |
BLOCK_CACHE_PAGE_IDLETIME << 1,
|
557 |
block_cache_prune_event, |
558 |
cache); |
559 |
if (cache->timeout_id < 0) |
560 |
goto fail;
|
561 |
|
562 |
DPRINTF("opening cache for %s, sectors: %"PRIu64", " |
563 |
"tree: %p, height: %d\n",
|
564 |
cache->name, cache->sectors, tree, tree->height); |
565 |
|
566 |
if (mlockall(MCL_CURRENT | MCL_FUTURE))
|
567 |
DPRINTF("mlockall failed: %d\n", -errno);
|
568 |
|
569 |
return 0; |
570 |
|
571 |
fail:
|
572 |
free(cache->name); |
573 |
radix_tree_free(&cache->tree); |
574 |
return err;
|
575 |
} |
576 |
|
577 |
static int |
578 |
block_cache_close(td_driver_t *driver) |
579 |
{ |
580 |
radix_tree_t *tree; |
581 |
block_cache_t *cache; |
582 |
|
583 |
cache = (block_cache_t *)driver->data; |
584 |
tree = &cache->tree; |
585 |
|
586 |
DPRINTF("closing cache for %s\n", cache->name);
|
587 |
|
588 |
tapdisk_server_unregister_event(cache->timeout_id); |
589 |
radix_tree_free(tree); |
590 |
free(cache->name); |
591 |
|
592 |
return 0; |
593 |
} |
594 |
|
595 |
static inline uint64_t |
596 |
block_cache_hash(block_cache_t *cache, char *buf)
|
597 |
{ |
598 |
int i, n;
|
599 |
uint64_t cksm, *data; |
600 |
|
601 |
return 0; |
602 |
|
603 |
cksm = 0;
|
604 |
data = (uint64_t *)buf; |
605 |
n = RADIX_TREE_NODE_SIZE / sizeof(uint64_t);
|
606 |
|
607 |
for (i = 0; i < n; i++) |
608 |
cksm += data[i]; |
609 |
|
610 |
return ~cksm;
|
611 |
} |
612 |
|
613 |
static void |
614 |
block_cache_hit(block_cache_t *cache, td_request_t treq, char *iov[])
|
615 |
{ |
616 |
int i;
|
617 |
off_t off; |
618 |
|
619 |
cache->stats.hits += treq.secs; |
620 |
|
621 |
for (i = 0; i < treq.secs; i++) { |
622 |
DBG("%s: block cache hit: sec 0x%08llx, hash: 0x%08llx\n",
|
623 |
cache->name, treq.sec + i, block_cache_hash(cache, iov[i])); |
624 |
|
625 |
off = i << RADIX_TREE_NODE_SHIFT; |
626 |
memcpy(treq.buf + off, iov[i], RADIX_TREE_NODE_SIZE); |
627 |
} |
628 |
|
629 |
td_complete_request(treq, 0);
|
630 |
} |
631 |
|
632 |
static void |
633 |
block_cache_populate_cache(td_request_t clone, int err)
|
634 |
{ |
635 |
int i;
|
636 |
radix_tree_t *tree; |
637 |
block_cache_t *cache; |
638 |
block_cache_request_t *breq; |
639 |
|
640 |
breq = (block_cache_request_t *)clone.cb_data; |
641 |
cache = breq->cache; |
642 |
tree = &cache->tree; |
643 |
breq->secs -= clone.secs; |
644 |
breq->err = (breq->err ? breq->err : err); |
645 |
|
646 |
if (breq->secs)
|
647 |
return;
|
648 |
|
649 |
if (breq->err) {
|
650 |
free(breq->buf); |
651 |
goto out;
|
652 |
} |
653 |
|
654 |
for (i = 0; i < breq->treq.secs; i++) { |
655 |
off_t off = i << RADIX_TREE_NODE_SHIFT; |
656 |
DBG("%s: populating sec 0x%08llx\n",
|
657 |
cache->name, breq->treq.sec + i); |
658 |
memcpy(breq->treq.buf + off, |
659 |
breq->buf + off, RADIX_TREE_NODE_SIZE); |
660 |
} |
661 |
|
662 |
if (radix_tree_add_leaves(tree, breq->buf,
|
663 |
breq->treq.sec, breq->treq.secs)) |
664 |
free(breq->buf); |
665 |
|
666 |
out:
|
667 |
td_complete_request(breq->treq, breq->err); |
668 |
block_cache_put_request(cache, breq); |
669 |
} |
670 |
|
671 |
static void |
672 |
block_cache_miss(block_cache_t *cache, td_request_t treq) |
673 |
{ |
674 |
void *buf;
|
675 |
size_t size; |
676 |
td_request_t clone; |
677 |
radix_tree_t *tree; |
678 |
block_cache_request_t *breq; |
679 |
|
680 |
DBG("%s: block cache miss: sec 0x%08llx\n", cache->name, treq.sec);
|
681 |
|
682 |
clone = treq; |
683 |
tree = &cache->tree; |
684 |
size = treq.secs << RADIX_TREE_NODE_SHIFT; |
685 |
|
686 |
cache->stats.misses += treq.secs; |
687 |
|
688 |
if (radix_tree_size(tree) + size >= BLOCK_CACHE_MAX_SIZE)
|
689 |
goto out;
|
690 |
|
691 |
breq = block_cache_get_request(cache); |
692 |
if (!breq)
|
693 |
goto out;
|
694 |
|
695 |
if (posix_memalign(&buf, RADIX_TREE_NODE_SIZE, size)) {
|
696 |
block_cache_put_request(cache, breq); |
697 |
goto out;
|
698 |
} |
699 |
|
700 |
breq->treq = treq; |
701 |
breq->secs = treq.secs; |
702 |
breq->err = 0;
|
703 |
breq->buf = buf; |
704 |
breq->cache = cache; |
705 |
|
706 |
clone.buf = buf; |
707 |
clone.cb = block_cache_populate_cache; |
708 |
clone.cb_data = breq; |
709 |
|
710 |
out:
|
711 |
td_forward_request(clone); |
712 |
} |
713 |
|
714 |
static void |
715 |
block_cache_queue_read(td_driver_t *driver, td_request_t treq) |
716 |
{ |
717 |
int i;
|
718 |
radix_tree_t *tree; |
719 |
block_cache_t *cache; |
720 |
char *iov[BLOCK_CACHE_NODES_PER_PAGE];
|
721 |
|
722 |
cache = (block_cache_t *)driver->data; |
723 |
tree = &cache->tree; |
724 |
|
725 |
cache->stats.reads += treq.secs; |
726 |
|
727 |
if (treq.secs > BLOCK_CACHE_NODES_PER_PAGE)
|
728 |
return td_forward_request(treq);
|
729 |
|
730 |
for (i = 0; i < treq.secs; i++) { |
731 |
iov[i] = radix_tree_find_leaf(tree, treq.sec + i); |
732 |
if (!iov[i])
|
733 |
return block_cache_miss(cache, treq);
|
734 |
} |
735 |
|
736 |
return block_cache_hit(cache, treq, iov);
|
737 |
} |
738 |
|
739 |
static void |
740 |
block_cache_queue_write(td_driver_t *driver, td_request_t treq) |
741 |
{ |
742 |
td_complete_request(treq, -EPERM); |
743 |
} |
744 |
|
745 |
static int |
746 |
block_cache_get_parent_id(td_driver_t *driver, td_disk_id_t *id) |
747 |
{ |
748 |
return -EINVAL;
|
749 |
} |
750 |
|
751 |
static int |
752 |
block_cache_validate_parent(td_driver_t *driver, |
753 |
td_driver_t *pdriver, td_flag_t flags) |
754 |
{ |
755 |
if (!td_flag_test(pdriver->state, TD_DRIVER_RDONLY))
|
756 |
return -EINVAL;
|
757 |
|
758 |
if (strcmp(driver->name, pdriver->name))
|
759 |
return -EINVAL;
|
760 |
|
761 |
return 0; |
762 |
} |
763 |
|
764 |
static void |
765 |
block_cache_debug(td_driver_t *driver) |
766 |
{ |
767 |
block_cache_t *cache; |
768 |
block_cache_stats_t *stats; |
769 |
|
770 |
cache = (block_cache_t *)driver->data; |
771 |
stats = &cache->stats; |
772 |
|
773 |
WARN("BLOCK CACHE %s\n", cache->name);
|
774 |
WARN("reads: %"PRIu64", hits: %"PRIu64", " |
775 |
"misses: %"PRIu64", prunes: %"PRIu64"\n", |
776 |
stats->reads, stats->hits, stats->misses, stats->prunes); |
777 |
} |
778 |
|
779 |
struct tap_disk tapdisk_block_cache = {
|
780 |
.disk_type = "tapdisk_block_cache",
|
781 |
.flags = 0,
|
782 |
.private_data_size = sizeof(block_cache_t),
|
783 |
.td_open = block_cache_open, |
784 |
.td_close = block_cache_close, |
785 |
.td_queue_read = block_cache_queue_read, |
786 |
.td_queue_write = block_cache_queue_write, |
787 |
.td_get_parent_id = block_cache_get_parent_id, |
788 |
.td_validate_parent = block_cache_validate_parent, |
789 |
.td_debug = block_cache_debug, |
790 |
}; |