Statistics
| Branch: | Revision:

root / drivers / block-lcache.c @ abdb293f

History | View | Annotate | Download (8.6 kB)

1
/*
2
 * Copyright (c) 2010, XenSource Inc.
3
 * All rights reserved.
4
 *
5
 * Redistribution and use in source and binary forms, with or without
6
 * modification, are permitted provided that the following conditions are met:
7
 *     * Redistributions of source code must retain the above copyright
8
 *       notice, this list of conditions and the following disclaimer.
9
 *     * Redistributions in binary form must reproduce the above copyright
10
 *       notice, this list of conditions and the following disclaimer in the
11
 *       documentation and/or other materials provided with the distribution.
12
 *     * Neither the name of XenSource Inc. nor the names of its contributors
13
 *       may be used to endorse or promote products derived from this software
14
 *       without specific prior written permission.
15
 *
16
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
20
 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
 */
28

    
29
/* Local persistent cache: write any sectors not found in the leaf back to the 
30
 * leaf.
31
 */
32

    
33
#ifdef HAVE_CONFIG_H
34
#include "config.h"
35
#endif
36

    
37
#include <errno.h>
38
#include <fcntl.h>
39
#include <unistd.h>
40
#include <stdlib.h>
41
#include <limits.h>
42
#include <sys/mman.h>
43
#include <sys/vfs.h>
44

    
45
#include "vhd.h"
46
#include "tapdisk.h"
47
#include "tapdisk-utils.h"
48
#include "tapdisk-driver.h"
49
#include "tapdisk-server.h"
50
#include "tapdisk-interface.h"
51

    
52
#define DEBUG 1
53

    
54
#ifdef DEBUG
55
#define DBG(_f, _a...) tlog_write(TLOG_DBG, _f, ##_a)
56
#else
57
#define DBG(_f, _a...) ((void)0)
58
#endif
59
#define WARN(_f, _a...) tlog_syslog(TLOG_WARN, "WARNING: "_f "in %s:%d", \
60
                                    ##_a, __func__, __LINE__)
61
#define INFO(_f, _a...) tlog_syslog(TLOG_INFO, _f, ##_a)
62
#define BUG()           td_panic()
63
#define BUG_ON(_cond)   if (unlikely(_cond)) { td_panic(); }
64
#define WARN_ON(_p)     if (unlikely(_cond)) { WARN(_cond); }
65

    
66
#define MIN(a, b)       ((a) < (b) ? (a) : (b))
67

    
68
#define TD_LCACHE_MAX_REQ               (MAX_REQUESTS*2)
69
#define TD_LCACHE_BUFSZ                 (MAX_SEGMENTS_PER_REQ * \
70
                                         sysconf(_SC_PAGE_SIZE))
71

    
72

    
73
typedef struct lcache                   td_lcache_t;
74
typedef struct lcache_request           td_lcache_req_t;
75

    
76
struct lcache_request {
77
        char                           *buf;
78
        int                             err;
79

    
80
        td_request_t                    treq;
81
        int                             secs;
82

    
83
        td_vbd_request_t                vreq;
84
        struct td_iovec                 iov;
85

    
86
        td_lcache_t                    *cache;
87
};
88

    
89
struct lcache {
90
        char                           *name;
91

    
92
        td_lcache_req_t                 reqv[TD_LCACHE_MAX_REQ];
93
        td_lcache_req_t                *free[TD_LCACHE_MAX_REQ];
94
        int                             n_free;
95

    
96
        char                           *buf;
97
        size_t                          bufsz;
98

    
99
        int                             wr_en;
100
        struct timeval                  ts;
101
};
102

    
103
static td_lcache_req_t *
104
lcache_alloc_request(td_lcache_t *cache)
105
{
106
        td_lcache_req_t *req = NULL;
107

    
108
        if (likely(cache->n_free))
109
                req = cache->free[--cache->n_free];
110

    
111
        return req;
112
}
113

    
114
static void
115
lcache_free_request(td_lcache_t *cache, td_lcache_req_t *req)
116
{
117
        BUG_ON(cache->n_free >= TD_LCACHE_MAX_REQ);
118
        cache->free[cache->n_free++] = req;
119
}
120

    
121
static void
122
lcache_destroy_buffers(td_lcache_t *cache)
123
{
124
        td_lcache_req_t *req;
125

    
126
        do {
127
                req = lcache_alloc_request(cache);
128
                if (req)
129
                        munmap(req->buf, TD_LCACHE_BUFSZ);
130
        } while (req);
131
}
132

    
133
static int
134
lcache_create_buffers(td_lcache_t *cache)
135
{
136
        int prot, flags, i, err;
137

    
138
        prot  = PROT_READ|PROT_WRITE;
139
        flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_LOCKED;
140

    
141
        cache->n_free = 0;
142

    
143
        for (i = 0; i < TD_LCACHE_MAX_REQ; i++) {
144
                td_lcache_req_t *req = &cache->reqv[i];
145

    
146
                req->buf = mmap(NULL, TD_LCACHE_BUFSZ, prot, flags, -1, 0);
147
                if (req->buf == MAP_FAILED) {
148
                        req->buf = NULL;
149
                        err = -errno;
150
                        goto fail;
151
                }
152

    
153
                lcache_free_request(cache, req);
154
        }
155

    
156
        return 0;
157

    
158
fail:
159
        EPRINTF("Buffer init failure: %d", err);
160
        lcache_destroy_buffers(cache);
161
        return err;
162
}
163

    
164
static int
165
lcache_close(td_driver_t *driver)
166
{
167
        td_lcache_t *cache = driver->data;
168

    
169
        lcache_destroy_buffers(cache);
170

    
171
        free(cache->name);
172

    
173
        return 0;
174
}
175

    
176
static int
177
lcache_open(td_driver_t *driver, const char *name, td_flag_t flags)
178
{
179
        td_lcache_t *cache = driver->data;
180
        int err;
181

    
182
        err  = tapdisk_namedup(&cache->name, (char *)name);
183
        if (err)
184
                goto fail;
185

    
186
        err = lcache_create_buffers(cache);
187
        if (err)
188
                goto fail;
189

    
190
        timerclear(&cache->ts);
191
        cache->wr_en = 1;
192

    
193
        return 0;
194

    
195
fail:
196
        lcache_close(driver);
197
        return err;
198
}
199

    
200
/*
201
 * NB. lcache->{wr_en,ts}: test free space in the caching SR before
202
 * attempting to store our reads. VHD block allocation writes on Ext3
203
 * have the nasty property of blocking excessively after running out
204
 * of space. We therefore enable/disable ourselves at a 1/s
205
 * granularity, querying free space through statfs beforehand.
206
 */
207

    
208
static long
209
lcache_fs_bfree(const td_lcache_t *cache, long *bsize)
210
{
211
        struct statfs fst;
212
        int err;
213

    
214
        err = statfs(cache->name, &fst);
215
        if (err)
216
                return err;
217

    
218
        if (likely(bsize))
219
                *bsize = fst.f_bsize;
220

    
221
        return MIN(fst.f_bfree, LONG_MAX);
222
}
223

    
224
static int
225
__lcache_wr_enabled(const td_lcache_t *cache)
226
{
227
        long threshold = 2<<20; /* B */
228
        long bfree, bsz = 1;
229
        int enable;
230

    
231
        bfree  = lcache_fs_bfree(cache, &bsz);
232
        enable = bfree > threshold / bsz;
233

    
234
        return enable;
235
}
236

    
237
static int
238
lcache_wr_enabled(td_lcache_t *cache)
239
{
240
        const int timeout = 1; /* s */
241
        struct timeval now, delta;
242

    
243
        gettimeofday(&now, NULL);
244
        timersub(&now, &cache->ts, &delta);
245

    
246
        if (delta.tv_sec >= timeout) {
247
                cache->wr_en = __lcache_wr_enabled(cache);
248
                cache->ts    = now;
249
        }
250

    
251
        return cache->wr_en;
252
}
253

    
254
static void
255
__lcache_write_cb(td_vbd_request_t *vreq, int error,
256
                  void *token, int final)
257
{
258
        td_lcache_req_t *req = containerof(vreq, td_lcache_req_t, vreq);
259
        td_lcache_t *cache = token;
260

    
261
        if (error == -ENOSPC)
262
                cache->wr_en = 0;
263

    
264
        lcache_free_request(cache, req);
265
}
266

    
267
static void
268
lcache_store_read(td_lcache_t *cache, td_lcache_req_t *req)
269
{
270
        td_vbd_request_t *vreq;
271
        struct td_iovec *iov;
272
        td_vbd_t *vbd;
273
        int err;
274

    
275
        iov          = &req->iov;
276
        iov->base    = req->buf;
277
        iov->secs    = req->treq.secs;
278

    
279
        vreq         = &req->vreq;
280
        vreq->op     = TD_OP_WRITE;
281
        vreq->sec    = req->treq.sec;
282
        vreq->iov    = iov;
283
        vreq->iovcnt = 1;
284
        vreq->cb     = __lcache_write_cb;
285
        vreq->token  = cache;
286

    
287
        vbd = req->treq.vreq->vbd;
288

    
289
        err = tapdisk_vbd_queue_request(vbd, vreq);
290
        BUG_ON(err);
291
}
292

    
293
static void
294
lcache_complete_read(td_lcache_t *cache, td_lcache_req_t *req)
295
{
296
        if (likely(!req->err)) {
297
                size_t sz = req->treq.secs << SECTOR_SHIFT;
298
                memcpy(req->treq.buf, req->buf, sz);
299
        }
300

    
301
        td_complete_request(req->treq, req->err);
302

    
303
        if (unlikely(req->err) || !lcache_wr_enabled(cache)) {
304
                lcache_free_request(cache, req);
305
                return;
306
        }
307

    
308
        lcache_store_read(cache, req);
309
}
310

    
311
static void
312
__lcache_read_cb(td_request_t treq, int err)
313
{
314
        td_lcache_req_t *req = treq.cb_data;
315
        td_lcache_t *cache = req->cache;
316

    
317
        BUG_ON(req->secs < treq.secs);
318
        req->secs -= treq.secs;
319
        req->err   = req->err ? : err;
320

    
321
        if (!req->secs)
322
                lcache_complete_read(cache, req);
323
}
324

    
325
static void
326
lcache_queue_read(td_driver_t *driver, td_request_t treq)
327
{
328
        td_lcache_t *cache = driver->data;
329
        td_request_t clone;
330
        td_lcache_req_t *req;
331

    
332
        req = lcache_alloc_request(cache);
333
        if (!req) {
334
                td_complete_request(treq, -EBUSY);
335
                return;
336
        }
337

    
338
        req->treq    = treq;
339
        req->cache   = cache;
340

    
341
        req->secs    = req->treq.secs;
342
        req->err     = 0;
343

    
344
        clone         = treq;
345
        clone.buf     = req->buf;
346
        clone.cb      = __lcache_read_cb;
347
        clone.cb_data = req;
348

    
349
        td_forward_request(clone);
350
}
351

    
352
static int
353
lcache_get_parent_id(td_driver_t *driver, td_disk_id_t *id)
354
{
355
        return -EINVAL;
356
}
357

    
358
static int
359
lcache_validate_parent(td_driver_t *driver,
360
                       td_driver_t *pdriver, td_flag_t flags)
361
{
362
        if (strcmp(driver->name, pdriver->name))
363
                return -EINVAL;
364

    
365
        return 0;
366
}
367

    
368
struct tap_disk tapdisk_lcache = {
369
        .disk_type                  = "tapdisk_lcache",
370
        .flags                      = 0,
371
        .private_data_size          = sizeof(td_lcache_t),
372
        .td_open                    = lcache_open,
373
        .td_close                   = lcache_close,
374
        .td_queue_read              = lcache_queue_read,
375
        .td_get_parent_id           = lcache_get_parent_id,
376
        .td_validate_parent         = lcache_validate_parent,
377
};