root / drivers / block-lcache.c @ abdb293f
History | View | Annotate | Download (8.6 kB)
1 |
/*
|
---|---|
2 |
* Copyright (c) 2010, XenSource Inc.
|
3 |
* All rights reserved.
|
4 |
*
|
5 |
* Redistribution and use in source and binary forms, with or without
|
6 |
* modification, are permitted provided that the following conditions are met:
|
7 |
* * Redistributions of source code must retain the above copyright
|
8 |
* notice, this list of conditions and the following disclaimer.
|
9 |
* * Redistributions in binary form must reproduce the above copyright
|
10 |
* notice, this list of conditions and the following disclaimer in the
|
11 |
* documentation and/or other materials provided with the distribution.
|
12 |
* * Neither the name of XenSource Inc. nor the names of its contributors
|
13 |
* may be used to endorse or promote products derived from this software
|
14 |
* without specific prior written permission.
|
15 |
*
|
16 |
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
17 |
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
18 |
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
19 |
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
20 |
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
21 |
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
22 |
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
23 |
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
24 |
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
25 |
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
26 |
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
27 |
*/
|
28 |
|
29 |
/* Local persistent cache: write any sectors not found in the leaf back to the
|
30 |
* leaf.
|
31 |
*/
|
32 |
|
33 |
#ifdef HAVE_CONFIG_H
|
34 |
#include "config.h" |
35 |
#endif
|
36 |
|
37 |
#include <errno.h> |
38 |
#include <fcntl.h> |
39 |
#include <unistd.h> |
40 |
#include <stdlib.h> |
41 |
#include <limits.h> |
42 |
#include <sys/mman.h> |
43 |
#include <sys/vfs.h> |
44 |
|
45 |
#include "vhd.h" |
46 |
#include "tapdisk.h" |
47 |
#include "tapdisk-utils.h" |
48 |
#include "tapdisk-driver.h" |
49 |
#include "tapdisk-server.h" |
50 |
#include "tapdisk-interface.h" |
51 |
|
52 |
#define DEBUG 1 |
53 |
|
54 |
#ifdef DEBUG
|
55 |
#define DBG(_f, _a...) tlog_write(TLOG_DBG, _f, ##_a) |
56 |
#else
|
57 |
#define DBG(_f, _a...) ((void)0) |
58 |
#endif
|
59 |
#define WARN(_f, _a...) tlog_syslog(TLOG_WARN, "WARNING: "_f "in %s:%d", \ |
60 |
##_a, __func__, __LINE__) |
61 |
#define INFO(_f, _a...) tlog_syslog(TLOG_INFO, _f, ##_a) |
62 |
#define BUG() td_panic()
|
63 |
#define BUG_ON(_cond) if (unlikely(_cond)) { td_panic(); } |
64 |
#define WARN_ON(_p) if (unlikely(_cond)) { WARN(_cond); } |
65 |
|
66 |
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
67 |
|
68 |
#define TD_LCACHE_MAX_REQ (MAX_REQUESTS*2) |
69 |
#define TD_LCACHE_BUFSZ (MAX_SEGMENTS_PER_REQ * \
|
70 |
sysconf(_SC_PAGE_SIZE)) |
71 |
|
72 |
|
73 |
typedef struct lcache td_lcache_t; |
74 |
typedef struct lcache_request td_lcache_req_t; |
75 |
|
76 |
struct lcache_request {
|
77 |
char *buf;
|
78 |
int err;
|
79 |
|
80 |
td_request_t treq; |
81 |
int secs;
|
82 |
|
83 |
td_vbd_request_t vreq; |
84 |
struct td_iovec iov;
|
85 |
|
86 |
td_lcache_t *cache; |
87 |
}; |
88 |
|
89 |
struct lcache {
|
90 |
char *name;
|
91 |
|
92 |
td_lcache_req_t reqv[TD_LCACHE_MAX_REQ]; |
93 |
td_lcache_req_t *free[TD_LCACHE_MAX_REQ]; |
94 |
int n_free;
|
95 |
|
96 |
char *buf;
|
97 |
size_t bufsz; |
98 |
|
99 |
int wr_en;
|
100 |
struct timeval ts;
|
101 |
}; |
102 |
|
103 |
static td_lcache_req_t *
|
104 |
lcache_alloc_request(td_lcache_t *cache) |
105 |
{ |
106 |
td_lcache_req_t *req = NULL;
|
107 |
|
108 |
if (likely(cache->n_free))
|
109 |
req = cache->free[--cache->n_free]; |
110 |
|
111 |
return req;
|
112 |
} |
113 |
|
114 |
static void |
115 |
lcache_free_request(td_lcache_t *cache, td_lcache_req_t *req) |
116 |
{ |
117 |
BUG_ON(cache->n_free >= TD_LCACHE_MAX_REQ); |
118 |
cache->free[cache->n_free++] = req; |
119 |
} |
120 |
|
121 |
static void |
122 |
lcache_destroy_buffers(td_lcache_t *cache) |
123 |
{ |
124 |
td_lcache_req_t *req; |
125 |
|
126 |
do {
|
127 |
req = lcache_alloc_request(cache); |
128 |
if (req)
|
129 |
munmap(req->buf, TD_LCACHE_BUFSZ); |
130 |
} while (req);
|
131 |
} |
132 |
|
133 |
static int |
134 |
lcache_create_buffers(td_lcache_t *cache) |
135 |
{ |
136 |
int prot, flags, i, err;
|
137 |
|
138 |
prot = PROT_READ|PROT_WRITE; |
139 |
flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_LOCKED; |
140 |
|
141 |
cache->n_free = 0;
|
142 |
|
143 |
for (i = 0; i < TD_LCACHE_MAX_REQ; i++) { |
144 |
td_lcache_req_t *req = &cache->reqv[i]; |
145 |
|
146 |
req->buf = mmap(NULL, TD_LCACHE_BUFSZ, prot, flags, -1, 0); |
147 |
if (req->buf == MAP_FAILED) {
|
148 |
req->buf = NULL;
|
149 |
err = -errno; |
150 |
goto fail;
|
151 |
} |
152 |
|
153 |
lcache_free_request(cache, req); |
154 |
} |
155 |
|
156 |
return 0; |
157 |
|
158 |
fail:
|
159 |
EPRINTF("Buffer init failure: %d", err);
|
160 |
lcache_destroy_buffers(cache); |
161 |
return err;
|
162 |
} |
163 |
|
164 |
static int |
165 |
lcache_close(td_driver_t *driver) |
166 |
{ |
167 |
td_lcache_t *cache = driver->data; |
168 |
|
169 |
lcache_destroy_buffers(cache); |
170 |
|
171 |
free(cache->name); |
172 |
|
173 |
return 0; |
174 |
} |
175 |
|
176 |
static int |
177 |
lcache_open(td_driver_t *driver, const char *name, td_flag_t flags) |
178 |
{ |
179 |
td_lcache_t *cache = driver->data; |
180 |
int err;
|
181 |
|
182 |
err = tapdisk_namedup(&cache->name, (char *)name);
|
183 |
if (err)
|
184 |
goto fail;
|
185 |
|
186 |
err = lcache_create_buffers(cache); |
187 |
if (err)
|
188 |
goto fail;
|
189 |
|
190 |
timerclear(&cache->ts); |
191 |
cache->wr_en = 1;
|
192 |
|
193 |
return 0; |
194 |
|
195 |
fail:
|
196 |
lcache_close(driver); |
197 |
return err;
|
198 |
} |
199 |
|
200 |
/*
|
201 |
* NB. lcache->{wr_en,ts}: test free space in the caching SR before
|
202 |
* attempting to store our reads. VHD block allocation writes on Ext3
|
203 |
* have the nasty property of blocking excessively after running out
|
204 |
* of space. We therefore enable/disable ourselves at a 1/s
|
205 |
* granularity, querying free space through statfs beforehand.
|
206 |
*/
|
207 |
|
208 |
static long |
209 |
lcache_fs_bfree(const td_lcache_t *cache, long *bsize) |
210 |
{ |
211 |
struct statfs fst;
|
212 |
int err;
|
213 |
|
214 |
err = statfs(cache->name, &fst); |
215 |
if (err)
|
216 |
return err;
|
217 |
|
218 |
if (likely(bsize))
|
219 |
*bsize = fst.f_bsize; |
220 |
|
221 |
return MIN(fst.f_bfree, LONG_MAX);
|
222 |
} |
223 |
|
224 |
static int |
225 |
__lcache_wr_enabled(const td_lcache_t *cache)
|
226 |
{ |
227 |
long threshold = 2<<20; /* B */ |
228 |
long bfree, bsz = 1; |
229 |
int enable;
|
230 |
|
231 |
bfree = lcache_fs_bfree(cache, &bsz); |
232 |
enable = bfree > threshold / bsz; |
233 |
|
234 |
return enable;
|
235 |
} |
236 |
|
237 |
static int |
238 |
lcache_wr_enabled(td_lcache_t *cache) |
239 |
{ |
240 |
const int timeout = 1; /* s */ |
241 |
struct timeval now, delta;
|
242 |
|
243 |
gettimeofday(&now, NULL);
|
244 |
timersub(&now, &cache->ts, &delta); |
245 |
|
246 |
if (delta.tv_sec >= timeout) {
|
247 |
cache->wr_en = __lcache_wr_enabled(cache); |
248 |
cache->ts = now; |
249 |
} |
250 |
|
251 |
return cache->wr_en;
|
252 |
} |
253 |
|
254 |
static void |
255 |
__lcache_write_cb(td_vbd_request_t *vreq, int error,
|
256 |
void *token, int final) |
257 |
{ |
258 |
td_lcache_req_t *req = containerof(vreq, td_lcache_req_t, vreq); |
259 |
td_lcache_t *cache = token; |
260 |
|
261 |
if (error == -ENOSPC)
|
262 |
cache->wr_en = 0;
|
263 |
|
264 |
lcache_free_request(cache, req); |
265 |
} |
266 |
|
267 |
static void |
268 |
lcache_store_read(td_lcache_t *cache, td_lcache_req_t *req) |
269 |
{ |
270 |
td_vbd_request_t *vreq; |
271 |
struct td_iovec *iov;
|
272 |
td_vbd_t *vbd; |
273 |
int err;
|
274 |
|
275 |
iov = &req->iov; |
276 |
iov->base = req->buf; |
277 |
iov->secs = req->treq.secs; |
278 |
|
279 |
vreq = &req->vreq; |
280 |
vreq->op = TD_OP_WRITE; |
281 |
vreq->sec = req->treq.sec; |
282 |
vreq->iov = iov; |
283 |
vreq->iovcnt = 1;
|
284 |
vreq->cb = __lcache_write_cb; |
285 |
vreq->token = cache; |
286 |
|
287 |
vbd = req->treq.vreq->vbd; |
288 |
|
289 |
err = tapdisk_vbd_queue_request(vbd, vreq); |
290 |
BUG_ON(err); |
291 |
} |
292 |
|
293 |
static void |
294 |
lcache_complete_read(td_lcache_t *cache, td_lcache_req_t *req) |
295 |
{ |
296 |
if (likely(!req->err)) {
|
297 |
size_t sz = req->treq.secs << SECTOR_SHIFT; |
298 |
memcpy(req->treq.buf, req->buf, sz); |
299 |
} |
300 |
|
301 |
td_complete_request(req->treq, req->err); |
302 |
|
303 |
if (unlikely(req->err) || !lcache_wr_enabled(cache)) {
|
304 |
lcache_free_request(cache, req); |
305 |
return;
|
306 |
} |
307 |
|
308 |
lcache_store_read(cache, req); |
309 |
} |
310 |
|
311 |
static void |
312 |
__lcache_read_cb(td_request_t treq, int err)
|
313 |
{ |
314 |
td_lcache_req_t *req = treq.cb_data; |
315 |
td_lcache_t *cache = req->cache; |
316 |
|
317 |
BUG_ON(req->secs < treq.secs); |
318 |
req->secs -= treq.secs; |
319 |
req->err = req->err ? : err; |
320 |
|
321 |
if (!req->secs)
|
322 |
lcache_complete_read(cache, req); |
323 |
} |
324 |
|
325 |
static void |
326 |
lcache_queue_read(td_driver_t *driver, td_request_t treq) |
327 |
{ |
328 |
td_lcache_t *cache = driver->data; |
329 |
td_request_t clone; |
330 |
td_lcache_req_t *req; |
331 |
|
332 |
req = lcache_alloc_request(cache); |
333 |
if (!req) {
|
334 |
td_complete_request(treq, -EBUSY); |
335 |
return;
|
336 |
} |
337 |
|
338 |
req->treq = treq; |
339 |
req->cache = cache; |
340 |
|
341 |
req->secs = req->treq.secs; |
342 |
req->err = 0;
|
343 |
|
344 |
clone = treq; |
345 |
clone.buf = req->buf; |
346 |
clone.cb = __lcache_read_cb; |
347 |
clone.cb_data = req; |
348 |
|
349 |
td_forward_request(clone); |
350 |
} |
351 |
|
352 |
static int |
353 |
lcache_get_parent_id(td_driver_t *driver, td_disk_id_t *id) |
354 |
{ |
355 |
return -EINVAL;
|
356 |
} |
357 |
|
358 |
static int |
359 |
lcache_validate_parent(td_driver_t *driver, |
360 |
td_driver_t *pdriver, td_flag_t flags) |
361 |
{ |
362 |
if (strcmp(driver->name, pdriver->name))
|
363 |
return -EINVAL;
|
364 |
|
365 |
return 0; |
366 |
} |
367 |
|
368 |
struct tap_disk tapdisk_lcache = {
|
369 |
.disk_type = "tapdisk_lcache",
|
370 |
.flags = 0,
|
371 |
.private_data_size = sizeof(td_lcache_t),
|
372 |
.td_open = lcache_open, |
373 |
.td_close = lcache_close, |
374 |
.td_queue_read = lcache_queue_read, |
375 |
.td_get_parent_id = lcache_get_parent_id, |
376 |
.td_validate_parent = lcache_validate_parent, |
377 |
}; |