root / drivers / tapdisk-vbd.c @ abdb293f
History | View | Annotate | Download (29.6 kB)
1 |
/*
|
---|---|
2 |
* Copyright (c) 2008, XenSource Inc.
|
3 |
* Copyright (c) 2010, Citrix Systems, Inc.
|
4 |
*
|
5 |
* All rights reserved.
|
6 |
*
|
7 |
* Redistribution and use in source and binary forms, with or without
|
8 |
* modification, are permitted provided that the following conditions are met:
|
9 |
* * Redistributions of source code must retain the above copyright
|
10 |
* notice, this list of conditions and the following disclaimer.
|
11 |
* * Redistributions in binary form must reproduce the above copyright
|
12 |
* notice, this list of conditions and the following disclaimer in the
|
13 |
* documentation and/or other materials provided with the distribution.
|
14 |
* * Neither the name of XenSource Inc. nor the names of its contributors
|
15 |
* may be used to endorse or promote products derived from this software
|
16 |
* without specific prior written permission.
|
17 |
*
|
18 |
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
19 |
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
20 |
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
21 |
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
22 |
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
23 |
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
24 |
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
25 |
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
26 |
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
27 |
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
28 |
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
29 |
*/
|
30 |
|
31 |
#ifdef HAVE_CONFIG_H
|
32 |
#include "config.h" |
33 |
#endif
|
34 |
|
35 |
#include <stdio.h> |
36 |
#include <errno.h> |
37 |
#include <fcntl.h> |
38 |
#include <regex.h> |
39 |
#include <unistd.h> |
40 |
#include <stdlib.h> |
41 |
#include <libgen.h> |
42 |
#include <sys/mman.h> |
43 |
#include <sys/ioctl.h> |
44 |
|
45 |
#include "libvhd.h" |
46 |
#include "tapdisk-blktap.h" |
47 |
#include "tapdisk-image.h" |
48 |
#include "tapdisk-driver.h" |
49 |
#include "tapdisk-server.h" |
50 |
#include "tapdisk-vbd.h" |
51 |
#include "tapdisk-disktype.h" |
52 |
#include "tapdisk-interface.h" |
53 |
#include "tapdisk-stats.h" |
54 |
#include "tapdisk-storage.h" |
55 |
|
56 |
#define DBG(_level, _f, _a...) tlog_write(_level, _f, ##_a) |
57 |
#define ERR(_err, _f, _a...) tlog_error(_err, _f, ##_a) |
58 |
|
59 |
#if 1 |
60 |
#define ASSERT(p) \
|
61 |
do { \
|
62 |
if (!(p)) { \
|
63 |
DPRINTF("Assertion '%s' failed, line %d, " \
|
64 |
"file %s", #p, __LINE__, __FILE__); \ |
65 |
*(int*)0 = 0; \ |
66 |
} \ |
67 |
} while (0) |
68 |
#else
|
69 |
#define ASSERT(p) ((void)0) |
70 |
#endif
|
71 |
|
72 |
#define TD_VBD_EIO_RETRIES 10 |
73 |
#define TD_VBD_EIO_SLEEP 1 |
74 |
#define TD_VBD_WATCHDOG_TIMEOUT 10 |
75 |
|
76 |
static void tapdisk_vbd_complete_vbd_request(td_vbd_t *, td_vbd_request_t *); |
77 |
static int tapdisk_vbd_queue_ready(td_vbd_t *); |
78 |
static void tapdisk_vbd_check_queue_state(td_vbd_t *); |
79 |
|
80 |
/*
|
81 |
* initialization
|
82 |
*/
|
83 |
|
84 |
static void |
85 |
tapdisk_vbd_mark_progress(td_vbd_t *vbd) |
86 |
{ |
87 |
gettimeofday(&vbd->ts, NULL);
|
88 |
} |
89 |
|
90 |
td_vbd_t* |
91 |
tapdisk_vbd_create(uint16_t uuid) |
92 |
{ |
93 |
td_vbd_t *vbd; |
94 |
|
95 |
vbd = calloc(1, sizeof(td_vbd_t)); |
96 |
if (!vbd) {
|
97 |
EPRINTF("failed to allocate tapdisk state\n");
|
98 |
return NULL; |
99 |
} |
100 |
|
101 |
vbd->uuid = uuid; |
102 |
|
103 |
INIT_LIST_HEAD(&vbd->images); |
104 |
INIT_LIST_HEAD(&vbd->new_requests); |
105 |
INIT_LIST_HEAD(&vbd->pending_requests); |
106 |
INIT_LIST_HEAD(&vbd->failed_requests); |
107 |
INIT_LIST_HEAD(&vbd->completed_requests); |
108 |
INIT_LIST_HEAD(&vbd->next); |
109 |
tapdisk_vbd_mark_progress(vbd); |
110 |
|
111 |
return vbd;
|
112 |
} |
113 |
|
114 |
int
|
115 |
tapdisk_vbd_initialize(int rfd, int wfd, uint16_t uuid) |
116 |
{ |
117 |
td_vbd_t *vbd; |
118 |
|
119 |
vbd = tapdisk_server_get_vbd(uuid); |
120 |
if (vbd) {
|
121 |
EPRINTF("duplicate vbds! %u\n", uuid);
|
122 |
return -EEXIST;
|
123 |
} |
124 |
|
125 |
vbd = tapdisk_vbd_create(uuid); |
126 |
|
127 |
tapdisk_server_add_vbd(vbd); |
128 |
|
129 |
return 0; |
130 |
} |
131 |
|
132 |
static int |
133 |
tapdisk_vbd_validate_chain(td_vbd_t *vbd) |
134 |
{ |
135 |
return tapdisk_image_validate_chain(&vbd->images);
|
136 |
} |
137 |
|
138 |
void
|
139 |
tapdisk_vbd_close_vdi(td_vbd_t *vbd) |
140 |
{ |
141 |
tapdisk_image_close_chain(&vbd->images); |
142 |
|
143 |
if (vbd->secondary &&
|
144 |
vbd->secondary_mode != TD_VBD_SECONDARY_MIRROR) { |
145 |
tapdisk_image_close(vbd->secondary); |
146 |
vbd->secondary = NULL;
|
147 |
} |
148 |
|
149 |
if (vbd->retired) {
|
150 |
tapdisk_image_close(vbd->retired); |
151 |
vbd->retired = NULL;
|
152 |
} |
153 |
|
154 |
td_flag_set(vbd->state, TD_VBD_CLOSED); |
155 |
} |
156 |
|
157 |
static int |
158 |
tapdisk_vbd_add_block_cache(td_vbd_t *vbd) |
159 |
{ |
160 |
td_image_t *cache, *image, *target, *tmp; |
161 |
int err;
|
162 |
|
163 |
target = NULL;
|
164 |
|
165 |
tapdisk_vbd_for_each_image(vbd, image, tmp) |
166 |
if (td_flag_test(image->flags, TD_OPEN_RDONLY) &&
|
167 |
td_flag_test(image->flags, TD_OPEN_SHAREABLE)) { |
168 |
target = image; |
169 |
break;
|
170 |
} |
171 |
|
172 |
if (!target)
|
173 |
return 0; |
174 |
|
175 |
cache = tapdisk_image_allocate(target->name, |
176 |
DISK_TYPE_BLOCK_CACHE, |
177 |
target->flags); |
178 |
if (!cache)
|
179 |
return -ENOMEM;
|
180 |
|
181 |
/* try to load existing cache */
|
182 |
err = td_load(cache); |
183 |
if (!err)
|
184 |
goto done;
|
185 |
|
186 |
/* hack driver to send open() correct image size */
|
187 |
if (!target->driver) {
|
188 |
err = -ENODEV; |
189 |
goto fail;
|
190 |
} |
191 |
|
192 |
cache->driver = tapdisk_driver_allocate(cache->type, |
193 |
cache->name, |
194 |
cache->flags); |
195 |
if (!cache->driver) {
|
196 |
err = -ENOMEM; |
197 |
goto fail;
|
198 |
} |
199 |
|
200 |
cache->driver->info = target->driver->info; |
201 |
|
202 |
/* try to open new cache */
|
203 |
err = td_open(cache); |
204 |
if (!err)
|
205 |
goto done;
|
206 |
|
207 |
fail:
|
208 |
/* give up */
|
209 |
tapdisk_image_free(target); |
210 |
return err;
|
211 |
|
212 |
done:
|
213 |
/* insert cache before image */
|
214 |
list_add(&cache->next, target->next.prev); |
215 |
return 0; |
216 |
} |
217 |
|
218 |
static int |
219 |
tapdisk_vbd_add_local_cache(td_vbd_t *vbd) |
220 |
{ |
221 |
td_image_t *cache, *parent; |
222 |
int err;
|
223 |
|
224 |
parent = tapdisk_vbd_first_image(vbd); |
225 |
if (tapdisk_vbd_is_last_image(vbd, parent)) {
|
226 |
DPRINTF("Single-image chain, nothing to cache");
|
227 |
return 0; |
228 |
} |
229 |
|
230 |
cache = tapdisk_image_allocate(parent->name, |
231 |
DISK_TYPE_LCACHE, |
232 |
parent->flags); |
233 |
|
234 |
if (!cache)
|
235 |
return -ENOMEM;
|
236 |
|
237 |
/* try to load existing cache */
|
238 |
err = td_load(cache); |
239 |
if (!err)
|
240 |
goto done;
|
241 |
|
242 |
cache->driver = tapdisk_driver_allocate(cache->type, |
243 |
cache->name, |
244 |
cache->flags); |
245 |
if (!cache->driver) {
|
246 |
err = -ENOMEM; |
247 |
goto fail;
|
248 |
} |
249 |
|
250 |
cache->driver->info = parent->driver->info; |
251 |
|
252 |
/* try to open new cache */
|
253 |
err = td_open(cache); |
254 |
if (!err)
|
255 |
goto done;
|
256 |
|
257 |
fail:
|
258 |
tapdisk_image_free(cache); |
259 |
return err;
|
260 |
|
261 |
done:
|
262 |
/* insert cache right above leaf image */
|
263 |
list_add(&cache->next, &parent->next); |
264 |
|
265 |
DPRINTF("Added local_cache driver\n");
|
266 |
return 0; |
267 |
} |
268 |
|
269 |
int
|
270 |
tapdisk_vbd_add_secondary(td_vbd_t *vbd) |
271 |
{ |
272 |
td_image_t *leaf, *second = NULL;
|
273 |
const char *path; |
274 |
int type, err;
|
275 |
|
276 |
DPRINTF("Adding secondary image: %s\n", vbd->secondary_name);
|
277 |
|
278 |
type = tapdisk_disktype_parse_params(vbd->secondary_name, &path); |
279 |
if (type < 0) |
280 |
return type;
|
281 |
|
282 |
leaf = tapdisk_vbd_first_image(vbd); |
283 |
if (!leaf) {
|
284 |
err = -EINVAL; |
285 |
goto fail;
|
286 |
} |
287 |
|
288 |
err = tapdisk_image_open(type, path, leaf->flags, &second); |
289 |
if (err)
|
290 |
goto fail;
|
291 |
|
292 |
if (second->info.size != leaf->info.size) {
|
293 |
EPRINTF("Secondary image size %"PRIu64" != image size %"PRIu64"\n", |
294 |
second->info.size, leaf->info.size); |
295 |
err = -EINVAL; |
296 |
goto fail;
|
297 |
} |
298 |
|
299 |
vbd->secondary = second; |
300 |
leaf->flags |= TD_IGNORE_ENOSPC; |
301 |
if (td_flag_test(vbd->flags, TD_OPEN_STANDBY)) {
|
302 |
DPRINTF("In standby mode\n");
|
303 |
vbd->secondary_mode = TD_VBD_SECONDARY_STANDBY; |
304 |
} else {
|
305 |
DPRINTF("In mirror mode\n");
|
306 |
vbd->secondary_mode = TD_VBD_SECONDARY_MIRROR; |
307 |
/* we actually need this image to also be part of the chain,
|
308 |
* since it may already contain data */
|
309 |
list_add(&second->next, &leaf->next); |
310 |
} |
311 |
|
312 |
DPRINTF("Added secondary image\n");
|
313 |
return 0; |
314 |
|
315 |
fail:
|
316 |
if (second)
|
317 |
tapdisk_image_close(second); |
318 |
return err;
|
319 |
} |
320 |
|
321 |
static void signal_enospc(td_vbd_t *vbd) |
322 |
{ |
323 |
int fd, err;
|
324 |
char *fn;
|
325 |
|
326 |
err = asprintf(&fn, BLKTAP2_ENOSPC_SIGNAL_FILE"%d", vbd->tap->minor);
|
327 |
if (err == -1) { |
328 |
EPRINTF("Failed to signal ENOSPC condition\n");
|
329 |
return;
|
330 |
} |
331 |
|
332 |
fd = open(fn, O_WRONLY | O_CREAT | O_NONBLOCK, 0666);
|
333 |
if (fd == -1) |
334 |
EPRINTF("Failed to open file to signal ENOSPC condition\n");
|
335 |
else
|
336 |
close(fd); |
337 |
|
338 |
free(fn); |
339 |
} |
340 |
|
341 |
#if 0
|
342 |
static int
|
343 |
tapdisk_vbd_open_index(td_vbd_t *vbd)
|
344 |
{
|
345 |
int err;
|
346 |
char *path;
|
347 |
td_flag_t flags;
|
348 |
td_image_t *last, *image;
|
349 |
|
350 |
last = tapdisk_vbd_last_image(vbd);
|
351 |
err = asprintf(&path, "%s.bat", last->name);
|
352 |
if (err == -1)
|
353 |
return -errno;
|
354 |
|
355 |
err = access(path, R_OK);
|
356 |
if (err == -1) {
|
357 |
free(path);
|
358 |
return -errno;
|
359 |
}
|
360 |
|
361 |
flags = vbd->flags | TD_OPEN_RDONLY | TD_OPEN_SHAREABLE;
|
362 |
image = tapdisk_image_allocate(path, DISK_TYPE_VINDEX, flags);
|
363 |
if (!image) {
|
364 |
err = -ENOMEM;
|
365 |
goto fail;
|
366 |
}
|
367 |
|
368 |
err = td_open(image);
|
369 |
if (err)
|
370 |
goto fail;
|
371 |
|
372 |
tapdisk_vbd_add_image(vbd, image);
|
373 |
return 0;
|
374 |
|
375 |
fail:
|
376 |
if (image)
|
377 |
tapdisk_image_free(image);
|
378 |
free(path);
|
379 |
return err;
|
380 |
}
|
381 |
#endif
|
382 |
|
383 |
static int |
384 |
tapdisk_vbd_add_dirty_log(td_vbd_t *vbd) |
385 |
{ |
386 |
int err;
|
387 |
td_driver_t *driver; |
388 |
td_image_t *log, *parent; |
389 |
|
390 |
driver = NULL;
|
391 |
log = NULL;
|
392 |
|
393 |
parent = tapdisk_vbd_first_image(vbd); |
394 |
|
395 |
log = tapdisk_image_allocate(parent->name, |
396 |
DISK_TYPE_LOG, |
397 |
parent->flags); |
398 |
if (!log)
|
399 |
return -ENOMEM;
|
400 |
|
401 |
driver = tapdisk_driver_allocate(log->type, |
402 |
log->name, |
403 |
log->flags); |
404 |
if (!driver) {
|
405 |
err = -ENOMEM; |
406 |
goto fail;
|
407 |
} |
408 |
|
409 |
driver->info = parent->driver->info; |
410 |
log->driver = driver; |
411 |
|
412 |
err = td_open(log); |
413 |
if (err)
|
414 |
goto fail;
|
415 |
|
416 |
tapdisk_vbd_add_image(vbd, log); |
417 |
return 0; |
418 |
|
419 |
fail:
|
420 |
tapdisk_image_free(log); |
421 |
return err;
|
422 |
} |
423 |
|
424 |
int
|
425 |
tapdisk_vbd_open_vdi(td_vbd_t *vbd, const char *name, td_flag_t flags, int prt_devnum) |
426 |
{ |
427 |
char *tmp = vbd->name;
|
428 |
int err;
|
429 |
|
430 |
if (!list_empty(&vbd->images)) {
|
431 |
err = -EBUSY; |
432 |
goto fail;
|
433 |
} |
434 |
|
435 |
if (!name && !vbd->name) {
|
436 |
err = -EINVAL; |
437 |
goto fail;
|
438 |
} |
439 |
|
440 |
if (name) {
|
441 |
vbd->name = strdup(name); |
442 |
if (!vbd->name) {
|
443 |
err = -errno; |
444 |
goto fail;
|
445 |
} |
446 |
} |
447 |
|
448 |
err = tapdisk_image_open_chain(vbd->name, flags, prt_devnum, &vbd->images); |
449 |
if (err)
|
450 |
goto fail;
|
451 |
|
452 |
td_flag_clear(vbd->state, TD_VBD_CLOSED); |
453 |
vbd->flags = flags; |
454 |
|
455 |
if (td_flag_test(vbd->flags, TD_OPEN_LOG_DIRTY)) {
|
456 |
err = tapdisk_vbd_add_dirty_log(vbd); |
457 |
if (err)
|
458 |
goto fail;
|
459 |
} |
460 |
|
461 |
if (td_flag_test(vbd->flags, TD_OPEN_ADD_CACHE)) {
|
462 |
err = tapdisk_vbd_add_block_cache(vbd); |
463 |
if (err)
|
464 |
goto fail;
|
465 |
} |
466 |
|
467 |
if (td_flag_test(vbd->flags, TD_OPEN_LOCAL_CACHE)) {
|
468 |
err = tapdisk_vbd_add_local_cache(vbd); |
469 |
if (err)
|
470 |
goto fail;
|
471 |
} |
472 |
|
473 |
err = tapdisk_vbd_validate_chain(vbd); |
474 |
if (err)
|
475 |
goto fail;
|
476 |
|
477 |
if (td_flag_test(vbd->flags, TD_OPEN_SECONDARY)) {
|
478 |
err = tapdisk_vbd_add_secondary(vbd); |
479 |
if (err)
|
480 |
goto fail;
|
481 |
} |
482 |
|
483 |
if (tmp != vbd->name)
|
484 |
free(tmp); |
485 |
|
486 |
return err;
|
487 |
|
488 |
fail:
|
489 |
if (vbd->name != tmp) {
|
490 |
free(vbd->name); |
491 |
vbd->name = tmp; |
492 |
} |
493 |
|
494 |
if (!list_empty(&vbd->images))
|
495 |
tapdisk_image_close_chain(&vbd->images); |
496 |
|
497 |
vbd->flags = 0;
|
498 |
|
499 |
return err;
|
500 |
} |
501 |
|
502 |
void
|
503 |
tapdisk_vbd_detach(td_vbd_t *vbd) |
504 |
{ |
505 |
td_blktap_t *tap = vbd->tap; |
506 |
|
507 |
if (tap) {
|
508 |
tapdisk_blktap_close(tap); |
509 |
vbd->tap = NULL;
|
510 |
} |
511 |
} |
512 |
|
513 |
int
|
514 |
tapdisk_vbd_attach(td_vbd_t *vbd, const char *devname, int minor) |
515 |
{ |
516 |
|
517 |
if (vbd->tap)
|
518 |
return -EALREADY;
|
519 |
|
520 |
return tapdisk_blktap_open(devname, vbd, &vbd->tap);
|
521 |
} |
522 |
|
523 |
int
|
524 |
tapdisk_vbd_open(td_vbd_t *vbd, const char *name, |
525 |
int minor, const char *ring, td_flag_t flags) |
526 |
{ |
527 |
int err;
|
528 |
|
529 |
err = tapdisk_vbd_open_vdi(vbd, name, flags, -1);
|
530 |
if (err)
|
531 |
goto out;
|
532 |
|
533 |
err = tapdisk_vbd_attach(vbd, ring, minor); |
534 |
if (err)
|
535 |
goto out;
|
536 |
|
537 |
return 0; |
538 |
|
539 |
out:
|
540 |
tapdisk_vbd_detach(vbd); |
541 |
tapdisk_vbd_close_vdi(vbd); |
542 |
free(vbd->name); |
543 |
vbd->name = NULL;
|
544 |
return err;
|
545 |
} |
546 |
|
547 |
static void |
548 |
tapdisk_vbd_queue_count(td_vbd_t *vbd, int *new,
|
549 |
int *pending, int *failed, int *completed) |
550 |
{ |
551 |
int n, p, f, c;
|
552 |
td_vbd_request_t *vreq, *tvreq; |
553 |
|
554 |
n = 0;
|
555 |
p = 0;
|
556 |
f = 0;
|
557 |
c = 0;
|
558 |
|
559 |
tapdisk_vbd_for_each_request(vreq, tvreq, &vbd->new_requests) |
560 |
n++; |
561 |
|
562 |
tapdisk_vbd_for_each_request(vreq, tvreq, &vbd->pending_requests) |
563 |
p++; |
564 |
|
565 |
tapdisk_vbd_for_each_request(vreq, tvreq, &vbd->failed_requests) |
566 |
f++; |
567 |
|
568 |
tapdisk_vbd_for_each_request(vreq, tvreq, &vbd->completed_requests) |
569 |
c++; |
570 |
|
571 |
*new = n; |
572 |
*pending = p; |
573 |
*failed = f; |
574 |
*completed = c; |
575 |
} |
576 |
|
577 |
static int |
578 |
tapdisk_vbd_shutdown(td_vbd_t *vbd) |
579 |
{ |
580 |
int new, pending, failed, completed;
|
581 |
|
582 |
if (!list_empty(&vbd->pending_requests))
|
583 |
return -EAGAIN;
|
584 |
|
585 |
tapdisk_vbd_queue_count(vbd, &new, &pending, &failed, &completed); |
586 |
|
587 |
DPRINTF("%s: state: 0x%08x, new: 0x%02x, pending: 0x%02x, "
|
588 |
"failed: 0x%02x, completed: 0x%02x\n",
|
589 |
vbd->name, vbd->state, new, pending, failed, completed); |
590 |
DPRINTF("last activity: %010ld.%06ld, errors: 0x%04"PRIx64", " |
591 |
"retries: 0x%04"PRIx64", received: 0x%08"PRIx64", " |
592 |
"returned: 0x%08"PRIx64", kicked: 0x%08"PRIx64"\n", |
593 |
vbd->ts.tv_sec, vbd->ts.tv_usec, |
594 |
vbd->errors, vbd->retries, vbd->received, vbd->returned, |
595 |
vbd->kicked); |
596 |
|
597 |
tapdisk_vbd_close_vdi(vbd); |
598 |
tapdisk_vbd_detach(vbd); |
599 |
tapdisk_server_remove_vbd(vbd); |
600 |
free(vbd->name); |
601 |
free(vbd); |
602 |
|
603 |
return 0; |
604 |
} |
605 |
|
606 |
int
|
607 |
tapdisk_vbd_close(td_vbd_t *vbd) |
608 |
{ |
609 |
/*
|
610 |
* don't close if any requests are pending in the aio layer
|
611 |
*/
|
612 |
if (!list_empty(&vbd->pending_requests))
|
613 |
goto fail;
|
614 |
|
615 |
/*
|
616 |
* if the queue is still active and we have more
|
617 |
* requests, try to complete them before closing.
|
618 |
*/
|
619 |
if (tapdisk_vbd_queue_ready(vbd) &&
|
620 |
(!list_empty(&vbd->new_requests) || |
621 |
!list_empty(&vbd->failed_requests) || |
622 |
!list_empty(&vbd->completed_requests))) |
623 |
goto fail;
|
624 |
|
625 |
return tapdisk_vbd_shutdown(vbd);
|
626 |
|
627 |
fail:
|
628 |
td_flag_set(vbd->state, TD_VBD_SHUTDOWN_REQUESTED); |
629 |
DBG(TLOG_WARN, "%s: requests pending\n", vbd->name);
|
630 |
return -EAGAIN;
|
631 |
} |
632 |
|
633 |
/*
|
634 |
* control operations
|
635 |
*/
|
636 |
|
637 |
void
|
638 |
tapdisk_vbd_debug(td_vbd_t *vbd) |
639 |
{ |
640 |
td_image_t *image, *tmp; |
641 |
int new, pending, failed, completed;
|
642 |
|
643 |
tapdisk_vbd_queue_count(vbd, &new, &pending, &failed, &completed); |
644 |
|
645 |
DBG(TLOG_WARN, "%s: state: 0x%08x, new: 0x%02x, pending: 0x%02x, "
|
646 |
"failed: 0x%02x, completed: 0x%02x, last activity: %010ld.%06ld, "
|
647 |
"errors: 0x%04"PRIx64", retries: 0x%04"PRIx64", " |
648 |
"received: 0x%08"PRIx64", returned: 0x%08"PRIx64", " |
649 |
"kicked: 0x%08"PRIx64"\n", |
650 |
vbd->name, vbd->state, new, pending, failed, completed, |
651 |
vbd->ts.tv_sec, vbd->ts.tv_usec, vbd->errors, vbd->retries, |
652 |
vbd->received, vbd->returned, vbd->kicked); |
653 |
|
654 |
tapdisk_vbd_for_each_image(vbd, image, tmp) |
655 |
td_debug(image); |
656 |
} |
657 |
|
658 |
static void |
659 |
tapdisk_vbd_drop_log(td_vbd_t *vbd) |
660 |
{ |
661 |
if (td_flag_test(vbd->state, TD_VBD_LOG_DROPPED))
|
662 |
return;
|
663 |
|
664 |
tapdisk_vbd_debug(vbd); |
665 |
tlog_precious(); |
666 |
td_flag_set(vbd->state, TD_VBD_LOG_DROPPED); |
667 |
} |
668 |
|
669 |
int
|
670 |
tapdisk_vbd_get_disk_info(td_vbd_t *vbd, td_disk_info_t *info) |
671 |
{ |
672 |
if (list_empty(&vbd->images))
|
673 |
return -EINVAL;
|
674 |
|
675 |
*info = tapdisk_vbd_first_image(vbd)->info; |
676 |
return 0; |
677 |
} |
678 |
|
679 |
static int |
680 |
tapdisk_vbd_queue_ready(td_vbd_t *vbd) |
681 |
{ |
682 |
return (!td_flag_test(vbd->state, TD_VBD_DEAD) &&
|
683 |
!td_flag_test(vbd->state, TD_VBD_CLOSED) && |
684 |
!td_flag_test(vbd->state, TD_VBD_QUIESCED) && |
685 |
!td_flag_test(vbd->state, TD_VBD_QUIESCE_REQUESTED)); |
686 |
} |
687 |
|
688 |
int
|
689 |
tapdisk_vbd_retry_needed(td_vbd_t *vbd) |
690 |
{ |
691 |
return !(list_empty(&vbd->failed_requests) &&
|
692 |
list_empty(&vbd->new_requests)); |
693 |
} |
694 |
|
695 |
int
|
696 |
tapdisk_vbd_lock(td_vbd_t *vbd) |
697 |
{ |
698 |
return 0; |
699 |
} |
700 |
|
701 |
int
|
702 |
tapdisk_vbd_quiesce_queue(td_vbd_t *vbd) |
703 |
{ |
704 |
if (!list_empty(&vbd->pending_requests)) {
|
705 |
td_flag_set(vbd->state, TD_VBD_QUIESCE_REQUESTED); |
706 |
return -EAGAIN;
|
707 |
} |
708 |
|
709 |
td_flag_clear(vbd->state, TD_VBD_QUIESCE_REQUESTED); |
710 |
td_flag_set(vbd->state, TD_VBD_QUIESCED); |
711 |
return 0; |
712 |
} |
713 |
|
714 |
int
|
715 |
tapdisk_vbd_start_queue(td_vbd_t *vbd) |
716 |
{ |
717 |
td_flag_clear(vbd->state, TD_VBD_QUIESCED); |
718 |
td_flag_clear(vbd->state, TD_VBD_QUIESCE_REQUESTED); |
719 |
tapdisk_vbd_mark_progress(vbd); |
720 |
return 0; |
721 |
} |
722 |
|
723 |
int
|
724 |
tapdisk_vbd_kill_queue(td_vbd_t *vbd) |
725 |
{ |
726 |
tapdisk_vbd_quiesce_queue(vbd); |
727 |
td_flag_set(vbd->state, TD_VBD_DEAD); |
728 |
return 0; |
729 |
} |
730 |
|
731 |
#if 0
|
732 |
static int
|
733 |
tapdisk_vbd_open_image(td_vbd_t *vbd, td_image_t *image)
|
734 |
{
|
735 |
int err;
|
736 |
td_image_t *parent;
|
737 |
|
738 |
err = td_open(image);
|
739 |
if (err)
|
740 |
return err;
|
741 |
|
742 |
if (!tapdisk_vbd_is_last_image(vbd, image)) {
|
743 |
parent = tapdisk_vbd_next_image(image);
|
744 |
err = td_validate_parent(image, parent);
|
745 |
if (err) {
|
746 |
td_close(image);
|
747 |
return err;
|
748 |
}
|
749 |
}
|
750 |
|
751 |
return 0;
|
752 |
}
|
753 |
#endif
|
754 |
|
755 |
int
|
756 |
tapdisk_vbd_pause(td_vbd_t *vbd) |
757 |
{ |
758 |
int err;
|
759 |
|
760 |
DBG(TLOG_DBG, "pause requested\n");
|
761 |
|
762 |
td_flag_set(vbd->state, TD_VBD_PAUSE_REQUESTED); |
763 |
|
764 |
err = tapdisk_vbd_quiesce_queue(vbd); |
765 |
if (err)
|
766 |
return err;
|
767 |
|
768 |
tapdisk_vbd_close_vdi(vbd); |
769 |
|
770 |
DBG(TLOG_DBG, "pause completed\n");
|
771 |
|
772 |
td_flag_clear(vbd->state, TD_VBD_PAUSE_REQUESTED); |
773 |
td_flag_set(vbd->state, TD_VBD_PAUSED); |
774 |
|
775 |
return 0; |
776 |
} |
777 |
|
778 |
int
|
779 |
tapdisk_vbd_resume(td_vbd_t *vbd, const char *name) |
780 |
{ |
781 |
int i, err;
|
782 |
|
783 |
DBG(TLOG_DBG, "resume requested\n");
|
784 |
|
785 |
if (!td_flag_test(vbd->state, TD_VBD_PAUSED)) {
|
786 |
EPRINTF("resume request for unpaused vbd %s\n", vbd->name);
|
787 |
return -EINVAL;
|
788 |
} |
789 |
|
790 |
for (i = 0; i < TD_VBD_EIO_RETRIES; i++) { |
791 |
err = tapdisk_vbd_open_vdi(vbd, name, vbd->flags | TD_OPEN_STRICT, -1);
|
792 |
if (!err)
|
793 |
break;
|
794 |
|
795 |
sleep(TD_VBD_EIO_SLEEP); |
796 |
} |
797 |
|
798 |
if (err)
|
799 |
return err;
|
800 |
|
801 |
DBG(TLOG_DBG, "resume completed\n");
|
802 |
|
803 |
tapdisk_vbd_start_queue(vbd); |
804 |
td_flag_clear(vbd->state, TD_VBD_PAUSED); |
805 |
td_flag_clear(vbd->state, TD_VBD_PAUSE_REQUESTED); |
806 |
tapdisk_vbd_check_state(vbd); |
807 |
|
808 |
DBG(TLOG_DBG, "state checked\n");
|
809 |
|
810 |
return 0; |
811 |
} |
812 |
|
813 |
static int |
814 |
tapdisk_vbd_request_ttl(td_vbd_request_t *vreq, |
815 |
const struct timeval *now) |
816 |
{ |
817 |
struct timeval delta;
|
818 |
timersub(now, &vreq->ts, &delta); |
819 |
return TD_VBD_REQUEST_TIMEOUT - delta.tv_sec;
|
820 |
} |
821 |
|
822 |
static int |
823 |
__tapdisk_vbd_request_timeout(td_vbd_request_t *vreq, |
824 |
const struct timeval *now) |
825 |
{ |
826 |
int timeout;
|
827 |
|
828 |
timeout = tapdisk_vbd_request_ttl(vreq, now) < 0;
|
829 |
if (timeout)
|
830 |
ERR(vreq->error, |
831 |
"req %s timed out, retried %d times\n",
|
832 |
vreq->name, vreq->num_retries); |
833 |
|
834 |
return timeout;
|
835 |
} |
836 |
|
837 |
static int |
838 |
tapdisk_vbd_request_timeout(td_vbd_request_t *vreq) |
839 |
{ |
840 |
struct timeval now;
|
841 |
gettimeofday(&now, NULL);
|
842 |
return __tapdisk_vbd_request_timeout(vreq, &now);
|
843 |
} |
844 |
|
845 |
static void |
846 |
tapdisk_vbd_check_queue_state(td_vbd_t *vbd) |
847 |
{ |
848 |
td_vbd_request_t *vreq, *tmp; |
849 |
struct timeval now;
|
850 |
|
851 |
gettimeofday(&now, NULL);
|
852 |
tapdisk_vbd_for_each_request(vreq, tmp, &vbd->failed_requests) |
853 |
if (__tapdisk_vbd_request_timeout(vreq, &now))
|
854 |
tapdisk_vbd_complete_vbd_request(vbd, vreq); |
855 |
|
856 |
if (!list_empty(&vbd->new_requests) ||
|
857 |
!list_empty(&vbd->failed_requests)) |
858 |
tapdisk_vbd_issue_requests(vbd); |
859 |
|
860 |
} |
861 |
|
862 |
void
|
863 |
tapdisk_vbd_check_state(td_vbd_t *vbd) |
864 |
{ |
865 |
tapdisk_vbd_check_queue_state(vbd); |
866 |
|
867 |
if (td_flag_test(vbd->state, TD_VBD_QUIESCE_REQUESTED))
|
868 |
tapdisk_vbd_quiesce_queue(vbd); |
869 |
|
870 |
if (td_flag_test(vbd->state, TD_VBD_PAUSE_REQUESTED))
|
871 |
tapdisk_vbd_pause(vbd); |
872 |
|
873 |
if (td_flag_test(vbd->state, TD_VBD_SHUTDOWN_REQUESTED))
|
874 |
tapdisk_vbd_close(vbd); |
875 |
} |
876 |
|
877 |
void
|
878 |
tapdisk_vbd_check_progress(td_vbd_t *vbd) |
879 |
{ |
880 |
time_t diff; |
881 |
struct timeval now, delta;
|
882 |
|
883 |
if (list_empty(&vbd->pending_requests))
|
884 |
return;
|
885 |
|
886 |
gettimeofday(&now, NULL);
|
887 |
timersub(&now, &vbd->ts, &delta); |
888 |
diff = delta.tv_sec; |
889 |
|
890 |
if (diff >= TD_VBD_WATCHDOG_TIMEOUT && tapdisk_vbd_queue_ready(vbd)) {
|
891 |
DBG(TLOG_WARN, "%s: watchdog timeout: pending requests "
|
892 |
"idle for %ld seconds\n", vbd->name, diff);
|
893 |
tapdisk_vbd_drop_log(vbd); |
894 |
return;
|
895 |
} |
896 |
|
897 |
tapdisk_server_set_max_timeout(TD_VBD_WATCHDOG_TIMEOUT - diff); |
898 |
} |
899 |
|
900 |
/*
|
901 |
* request submission
|
902 |
*/
|
903 |
|
904 |
static int |
905 |
tapdisk_vbd_check_queue(td_vbd_t *vbd) |
906 |
{ |
907 |
if (list_empty(&vbd->images))
|
908 |
return -ENOSYS;
|
909 |
|
910 |
if (!tapdisk_vbd_queue_ready(vbd))
|
911 |
return -EAGAIN;
|
912 |
|
913 |
return 0; |
914 |
} |
915 |
|
916 |
static int |
917 |
tapdisk_vbd_request_should_retry(td_vbd_t *vbd, td_vbd_request_t *vreq) |
918 |
{ |
919 |
if (td_flag_test(vbd->state, TD_VBD_DEAD) ||
|
920 |
td_flag_test(vbd->state, TD_VBD_SHUTDOWN_REQUESTED)) |
921 |
return 0; |
922 |
|
923 |
switch (abs(vreq->error)) {
|
924 |
case EPERM:
|
925 |
case ENOSYS:
|
926 |
case ESTALE:
|
927 |
case ENOSPC:
|
928 |
return 0; |
929 |
} |
930 |
|
931 |
if (tapdisk_vbd_request_timeout(vreq))
|
932 |
return 0; |
933 |
|
934 |
return 1; |
935 |
} |
936 |
|
937 |
static void |
938 |
tapdisk_vbd_complete_vbd_request(td_vbd_t *vbd, td_vbd_request_t *vreq) |
939 |
{ |
940 |
if (!vreq->submitting && !vreq->secs_pending) {
|
941 |
if (vreq->error &&
|
942 |
tapdisk_vbd_request_should_retry(vbd, vreq)) |
943 |
tapdisk_vbd_move_request(vreq, &vbd->failed_requests); |
944 |
else
|
945 |
tapdisk_vbd_move_request(vreq, &vbd->completed_requests); |
946 |
} |
947 |
} |
948 |
|
949 |
static void |
950 |
FIXME_maybe_count_enospc_redirect(td_vbd_t *vbd, td_request_t treq) |
951 |
{ |
952 |
int write = treq.op == TD_OP_WRITE;
|
953 |
if (write &&
|
954 |
treq.image == tapdisk_vbd_first_image(vbd) && |
955 |
vbd->FIXME_enospc_redirect_count_enabled) |
956 |
vbd->FIXME_enospc_redirect_count += treq.secs; |
957 |
} |
958 |
|
959 |
static void |
960 |
__tapdisk_vbd_complete_td_request(td_vbd_t *vbd, td_vbd_request_t *vreq, |
961 |
td_request_t treq, int res)
|
962 |
{ |
963 |
td_image_t *image = treq.image; |
964 |
int err;
|
965 |
|
966 |
err = (res <= 0 ? res : -res);
|
967 |
vbd->secs_pending -= treq.secs; |
968 |
vreq->secs_pending -= treq.secs; |
969 |
|
970 |
if (err != -EBUSY) {
|
971 |
int write = treq.op == TD_OP_WRITE;
|
972 |
td_sector_count_add(&image->stats.hits, treq.secs, write); |
973 |
if (err)
|
974 |
td_sector_count_add(&image->stats.fail, |
975 |
treq.secs, write); |
976 |
|
977 |
FIXME_maybe_count_enospc_redirect(vbd, treq); |
978 |
} |
979 |
|
980 |
if (err) {
|
981 |
if (err != -EBUSY) {
|
982 |
if (!vreq->error &&
|
983 |
err != vreq->prev_error) |
984 |
tlog_drv_error(image->driver, err, |
985 |
"req %s: %s 0x%04x secs @ 0x%08"PRIx64,
|
986 |
vreq->name, |
987 |
(treq.op == TD_OP_WRITE ? "write" : "read"), |
988 |
treq.secs, treq.sec); |
989 |
vbd->errors++; |
990 |
} |
991 |
vreq->error = (vreq->error ? : err); |
992 |
} |
993 |
|
994 |
tapdisk_vbd_complete_vbd_request(vbd, vreq); |
995 |
} |
996 |
|
997 |
static void |
998 |
__tapdisk_vbd_reissue_td_request(td_vbd_t *vbd, |
999 |
td_image_t *image, td_request_t treq) |
1000 |
{ |
1001 |
td_image_t *parent; |
1002 |
td_vbd_request_t *vreq; |
1003 |
|
1004 |
vreq = treq.vreq; |
1005 |
gettimeofday(&vreq->last_try, NULL);
|
1006 |
|
1007 |
vreq->submitting++; |
1008 |
|
1009 |
if (tapdisk_vbd_is_last_image(vbd, image)) {
|
1010 |
memset(treq.buf, 0, treq.secs << SECTOR_SHIFT);
|
1011 |
td_complete_request(treq, 0);
|
1012 |
goto done;
|
1013 |
} |
1014 |
|
1015 |
parent = tapdisk_vbd_next_image(image); |
1016 |
treq.image = parent; |
1017 |
|
1018 |
/* return zeros for requests that extend beyond end of parent image */
|
1019 |
if (treq.sec + treq.secs > parent->info.size) {
|
1020 |
td_request_t clone = treq; |
1021 |
|
1022 |
if (parent->info.size > treq.sec) {
|
1023 |
int secs = parent->info.size - treq.sec;
|
1024 |
clone.sec += secs; |
1025 |
clone.secs -= secs; |
1026 |
clone.buf += (secs << SECTOR_SHIFT); |
1027 |
treq.secs = secs; |
1028 |
} else
|
1029 |
treq.secs = 0;
|
1030 |
|
1031 |
memset(clone.buf, 0, clone.secs << SECTOR_SHIFT);
|
1032 |
td_complete_request(clone, 0);
|
1033 |
|
1034 |
if (!treq.secs)
|
1035 |
goto done;
|
1036 |
} |
1037 |
|
1038 |
switch (treq.op) {
|
1039 |
case TD_OP_WRITE:
|
1040 |
td_queue_write(parent, treq); |
1041 |
break;
|
1042 |
|
1043 |
case TD_OP_READ:
|
1044 |
td_queue_read(parent, treq); |
1045 |
break;
|
1046 |
} |
1047 |
|
1048 |
done:
|
1049 |
vreq->submitting--; |
1050 |
if (!vreq->secs_pending)
|
1051 |
tapdisk_vbd_complete_vbd_request(vbd, vreq); |
1052 |
} |
1053 |
|
1054 |
void
|
1055 |
tapdisk_vbd_forward_request(td_request_t treq) |
1056 |
{ |
1057 |
td_vbd_t *vbd; |
1058 |
td_image_t *image; |
1059 |
td_vbd_request_t *vreq; |
1060 |
|
1061 |
image = treq.image; |
1062 |
vreq = treq.vreq; |
1063 |
vbd = vreq->vbd; |
1064 |
|
1065 |
tapdisk_vbd_mark_progress(vbd); |
1066 |
|
1067 |
if (tapdisk_vbd_queue_ready(vbd))
|
1068 |
__tapdisk_vbd_reissue_td_request(vbd, image, treq); |
1069 |
else
|
1070 |
__tapdisk_vbd_complete_td_request(vbd, vreq, treq, -EBUSY); |
1071 |
} |
1072 |
|
1073 |
void
|
1074 |
tapdisk_vbd_complete_td_request(td_request_t treq, int res)
|
1075 |
{ |
1076 |
td_vbd_t *vbd; |
1077 |
td_image_t *image, *leaf; |
1078 |
td_vbd_request_t *vreq; |
1079 |
|
1080 |
image = treq.image; |
1081 |
vreq = treq.vreq; |
1082 |
vbd = vreq->vbd; |
1083 |
|
1084 |
tapdisk_vbd_mark_progress(vbd); |
1085 |
|
1086 |
if (abs(res) == ENOSPC && td_flag_test(image->flags,
|
1087 |
TD_IGNORE_ENOSPC)) { |
1088 |
res = 0;
|
1089 |
leaf = tapdisk_vbd_first_image(vbd); |
1090 |
if (vbd->secondary_mode == TD_VBD_SECONDARY_MIRROR) {
|
1091 |
DPRINTF("ENOSPC: disabling mirroring\n");
|
1092 |
list_del_init(&leaf->next); |
1093 |
vbd->retired = leaf; |
1094 |
} else if (vbd->secondary_mode == TD_VBD_SECONDARY_STANDBY) { |
1095 |
DPRINTF("ENOSPC: failing over to secondary image\n");
|
1096 |
list_add(&vbd->secondary->next, leaf->next.prev); |
1097 |
vbd->FIXME_enospc_redirect_count_enabled = 1;
|
1098 |
} |
1099 |
if (vbd->secondary_mode != TD_VBD_SECONDARY_DISABLED) {
|
1100 |
vbd->secondary = NULL;
|
1101 |
vbd->secondary_mode = TD_VBD_SECONDARY_DISABLED; |
1102 |
signal_enospc(vbd); |
1103 |
} |
1104 |
} |
1105 |
|
1106 |
DBG(TLOG_DBG, "%s: req %s seg %d sec 0x%08"PRIx64
|
1107 |
" secs 0x%04x buf %p op %d res %d\n", image->name,
|
1108 |
vreq->name, treq.sidx, treq.sec, treq.secs, |
1109 |
treq.buf, vreq->op, res); |
1110 |
|
1111 |
__tapdisk_vbd_complete_td_request(vbd, vreq, treq, res); |
1112 |
} |
1113 |
|
1114 |
static inline void |
1115 |
queue_mirror_req(td_vbd_t *vbd, td_request_t clone) |
1116 |
{ |
1117 |
clone.image = vbd->secondary; |
1118 |
td_queue_write(vbd->secondary, clone); |
1119 |
} |
1120 |
|
1121 |
static int |
1122 |
tapdisk_vbd_issue_request(td_vbd_t *vbd, td_vbd_request_t *vreq) |
1123 |
{ |
1124 |
td_image_t *image; |
1125 |
td_request_t treq; |
1126 |
td_sector_t sec; |
1127 |
int i, err;
|
1128 |
|
1129 |
sec = vreq->sec; |
1130 |
image = tapdisk_vbd_first_image(vbd); |
1131 |
|
1132 |
vreq->submitting = 1;
|
1133 |
|
1134 |
tapdisk_vbd_mark_progress(vbd); |
1135 |
vreq->last_try = vbd->ts; |
1136 |
|
1137 |
tapdisk_vbd_move_request(vreq, &vbd->pending_requests); |
1138 |
|
1139 |
err = tapdisk_vbd_check_queue(vbd); |
1140 |
if (err) {
|
1141 |
vreq->error = err; |
1142 |
goto fail;
|
1143 |
} |
1144 |
|
1145 |
err = tapdisk_image_check_request(image, vreq); |
1146 |
if (err) {
|
1147 |
vreq->error = err; |
1148 |
goto fail;
|
1149 |
} |
1150 |
|
1151 |
for (i = 0; i < vreq->iovcnt; i++) { |
1152 |
struct td_iovec *iov = &vreq->iov[i];
|
1153 |
|
1154 |
treq.sidx = i; |
1155 |
treq.buf = iov->base; |
1156 |
treq.sec = sec; |
1157 |
treq.secs = iov->secs; |
1158 |
treq.image = image; |
1159 |
treq.cb = tapdisk_vbd_complete_td_request; |
1160 |
treq.cb_data = NULL;
|
1161 |
treq.vreq = vreq; |
1162 |
|
1163 |
|
1164 |
vreq->secs_pending += iov->secs; |
1165 |
vbd->secs_pending += iov->secs; |
1166 |
if (vbd->secondary_mode == TD_VBD_SECONDARY_MIRROR &&
|
1167 |
vreq->op == TD_OP_WRITE) { |
1168 |
vreq->secs_pending += iov->secs; |
1169 |
vbd->secs_pending += iov->secs; |
1170 |
} |
1171 |
|
1172 |
switch (vreq->op) {
|
1173 |
case TD_OP_WRITE:
|
1174 |
treq.op = TD_OP_WRITE; |
1175 |
/* it's important to queue the mirror request before queuing
|
1176 |
* the main one. If the main image runs into ENOSPC, the
|
1177 |
* mirroring could be disabled before td_queue_write returns,
|
1178 |
* so if the mirror request was queued after (which would then
|
1179 |
* not happen), we'd lose that write and cause the process to
|
1180 |
* hang with unacknowledged writes */
|
1181 |
if (vbd->secondary_mode == TD_VBD_SECONDARY_MIRROR)
|
1182 |
queue_mirror_req(vbd, treq); |
1183 |
td_queue_write(treq.image, treq); |
1184 |
break;
|
1185 |
|
1186 |
case TD_OP_READ:
|
1187 |
treq.op = TD_OP_READ; |
1188 |
td_queue_read(treq.image, treq); |
1189 |
break;
|
1190 |
} |
1191 |
|
1192 |
DBG(TLOG_DBG, "%s: req %s seg %d sec 0x%08"PRIx64" secs 0x%04x " |
1193 |
"buf %p op %d\n", image->name, vreq->name, i, treq.sec, treq.secs,
|
1194 |
treq.buf, vreq->op); |
1195 |
sec += iov->secs; |
1196 |
} |
1197 |
|
1198 |
err = 0;
|
1199 |
|
1200 |
out:
|
1201 |
vreq->submitting--; |
1202 |
if (!vreq->secs_pending) {
|
1203 |
err = (err ? : vreq->error); |
1204 |
tapdisk_vbd_complete_vbd_request(vbd, vreq); |
1205 |
} |
1206 |
|
1207 |
return err;
|
1208 |
|
1209 |
fail:
|
1210 |
vreq->error = err; |
1211 |
goto out;
|
1212 |
} |
1213 |
|
1214 |
static int |
1215 |
tapdisk_vbd_request_completed(td_vbd_t *vbd, td_vbd_request_t *vreq) |
1216 |
{ |
1217 |
return vreq->list_head == &vbd->completed_requests;
|
1218 |
} |
1219 |
|
1220 |
static int |
1221 |
tapdisk_vbd_reissue_failed_requests(td_vbd_t *vbd) |
1222 |
{ |
1223 |
int err;
|
1224 |
struct timeval now;
|
1225 |
td_vbd_request_t *vreq, *tmp; |
1226 |
|
1227 |
err = 0;
|
1228 |
gettimeofday(&now, NULL);
|
1229 |
|
1230 |
tapdisk_vbd_for_each_request(vreq, tmp, &vbd->failed_requests) { |
1231 |
if (vreq->secs_pending)
|
1232 |
continue;
|
1233 |
|
1234 |
if (td_flag_test(vbd->state, TD_VBD_SHUTDOWN_REQUESTED)) {
|
1235 |
tapdisk_vbd_complete_vbd_request(vbd, vreq); |
1236 |
continue;
|
1237 |
} |
1238 |
|
1239 |
if (vreq->error != -EBUSY &&
|
1240 |
now.tv_sec - vreq->last_try.tv_sec < TD_VBD_RETRY_INTERVAL) |
1241 |
continue;
|
1242 |
|
1243 |
vbd->retries++; |
1244 |
vreq->num_retries++; |
1245 |
|
1246 |
vreq->prev_error = vreq->error; |
1247 |
vreq->error = 0;
|
1248 |
|
1249 |
DBG(TLOG_DBG, "retry #%d of req %s, "
|
1250 |
"sec 0x%08"PRIx64", iovcnt: %d\n", vreq->num_retries, |
1251 |
vreq->name, vreq->sec, vreq->iovcnt); |
1252 |
|
1253 |
err = tapdisk_vbd_issue_request(vbd, vreq); |
1254 |
/*
|
1255 |
* if this request failed, but was not completed,
|
1256 |
* we'll back off for a while.
|
1257 |
*/
|
1258 |
if (err && !tapdisk_vbd_request_completed(vbd, vreq))
|
1259 |
break;
|
1260 |
} |
1261 |
|
1262 |
return 0; |
1263 |
} |
1264 |
|
1265 |
static void |
1266 |
tapdisk_vbd_count_new_request(td_vbd_t *vbd, td_vbd_request_t *vreq) |
1267 |
{ |
1268 |
struct td_iovec *iov;
|
1269 |
int write;
|
1270 |
|
1271 |
write = vreq->op == TD_OP_WRITE; |
1272 |
|
1273 |
for (iov = &vreq->iov[0]; iov < &vreq->iov[vreq->iovcnt]; iov++) |
1274 |
td_sector_count_add(&vbd->secs, iov->secs, write); |
1275 |
} |
1276 |
|
1277 |
static int |
1278 |
tapdisk_vbd_issue_new_requests(td_vbd_t *vbd) |
1279 |
{ |
1280 |
int err;
|
1281 |
td_vbd_request_t *vreq, *tmp; |
1282 |
|
1283 |
tapdisk_vbd_for_each_request(vreq, tmp, &vbd->new_requests) { |
1284 |
err = tapdisk_vbd_issue_request(vbd, vreq); |
1285 |
/*
|
1286 |
* if this request failed, but was not completed,
|
1287 |
* we'll back off for a while.
|
1288 |
*/
|
1289 |
if (err && !tapdisk_vbd_request_completed(vbd, vreq))
|
1290 |
return err;
|
1291 |
|
1292 |
tapdisk_vbd_count_new_request(vbd, vreq); |
1293 |
} |
1294 |
|
1295 |
return 0; |
1296 |
} |
1297 |
|
1298 |
int
|
1299 |
tapdisk_vbd_recheck_state(td_vbd_t *vbd) |
1300 |
{ |
1301 |
if (list_empty(&vbd->new_requests))
|
1302 |
return 0; |
1303 |
|
1304 |
if (td_flag_test(vbd->state, TD_VBD_QUIESCED) ||
|
1305 |
td_flag_test(vbd->state, TD_VBD_QUIESCE_REQUESTED)) |
1306 |
return 0; |
1307 |
|
1308 |
tapdisk_vbd_issue_new_requests(vbd); |
1309 |
|
1310 |
return 1; |
1311 |
} |
1312 |
|
1313 |
static int |
1314 |
tapdisk_vbd_kill_requests(td_vbd_t *vbd) |
1315 |
{ |
1316 |
td_vbd_request_t *vreq, *tmp; |
1317 |
|
1318 |
tapdisk_vbd_for_each_request(vreq, tmp, &vbd->new_requests) { |
1319 |
vreq->error = -ESHUTDOWN; |
1320 |
tapdisk_vbd_move_request(vreq, &vbd->completed_requests); |
1321 |
} |
1322 |
|
1323 |
tapdisk_vbd_for_each_request(vreq, tmp, &vbd->failed_requests) { |
1324 |
vreq->error = -ESHUTDOWN; |
1325 |
tapdisk_vbd_move_request(vreq, &vbd->completed_requests); |
1326 |
} |
1327 |
|
1328 |
return 0; |
1329 |
} |
1330 |
|
1331 |
int
|
1332 |
tapdisk_vbd_issue_requests(td_vbd_t *vbd) |
1333 |
{ |
1334 |
int err;
|
1335 |
|
1336 |
if (td_flag_test(vbd->state, TD_VBD_DEAD))
|
1337 |
return tapdisk_vbd_kill_requests(vbd);
|
1338 |
|
1339 |
if (td_flag_test(vbd->state, TD_VBD_QUIESCED) ||
|
1340 |
td_flag_test(vbd->state, TD_VBD_QUIESCE_REQUESTED)) |
1341 |
return -EAGAIN;
|
1342 |
|
1343 |
err = tapdisk_vbd_reissue_failed_requests(vbd); |
1344 |
if (err)
|
1345 |
return err;
|
1346 |
|
1347 |
return tapdisk_vbd_issue_new_requests(vbd);
|
1348 |
} |
1349 |
|
1350 |
int
|
1351 |
tapdisk_vbd_queue_request(td_vbd_t *vbd, td_vbd_request_t *vreq) |
1352 |
{ |
1353 |
gettimeofday(&vreq->ts, NULL);
|
1354 |
vreq->vbd = vbd; |
1355 |
|
1356 |
list_add_tail(&vreq->next, &vbd->new_requests); |
1357 |
vbd->received++; |
1358 |
|
1359 |
return 0; |
1360 |
} |
1361 |
|
1362 |
void
|
1363 |
tapdisk_vbd_kick(td_vbd_t *vbd) |
1364 |
{ |
1365 |
const struct list_head *list = &vbd->completed_requests; |
1366 |
td_vbd_request_t *vreq, *prev, *next; |
1367 |
|
1368 |
vbd->kicked++; |
1369 |
|
1370 |
while (!list_empty(list)) {
|
1371 |
prev = list_entry(list->next, td_vbd_request_t, next); |
1372 |
list_del(&prev->next); |
1373 |
|
1374 |
tapdisk_vbd_for_each_request(vreq, next, list) { |
1375 |
if (vreq->token == prev->token) {
|
1376 |
|
1377 |
prev->cb(prev, prev->error, prev->token, 0);
|
1378 |
vbd->returned++; |
1379 |
|
1380 |
list_del(&vreq->next); |
1381 |
prev = vreq; |
1382 |
} |
1383 |
} |
1384 |
|
1385 |
prev->cb(prev, prev->error, prev->token, 1);
|
1386 |
vbd->returned++; |
1387 |
} |
1388 |
} |
1389 |
|
1390 |
void
|
1391 |
tapdisk_vbd_stats(td_vbd_t *vbd, td_stats_t *st) |
1392 |
{ |
1393 |
td_image_t *image, *next; |
1394 |
|
1395 |
tapdisk_stats_enter(st, '{');
|
1396 |
tapdisk_stats_field(st, "name", "s", vbd->name); |
1397 |
|
1398 |
tapdisk_stats_field(st, "secs", "["); |
1399 |
tapdisk_stats_val(st, "llu", vbd->secs.rd);
|
1400 |
tapdisk_stats_val(st, "llu", vbd->secs.wr);
|
1401 |
tapdisk_stats_leave(st, ']');
|
1402 |
|
1403 |
tapdisk_stats_field(st, "images", "["); |
1404 |
tapdisk_vbd_for_each_image(vbd, image, next) |
1405 |
tapdisk_image_stats(image, st); |
1406 |
tapdisk_stats_leave(st, ']');
|
1407 |
|
1408 |
if (vbd->tap) {
|
1409 |
tapdisk_stats_field(st, "tap", "{"); |
1410 |
tapdisk_blktap_stats(vbd->tap, st); |
1411 |
tapdisk_stats_leave(st, '}');
|
1412 |
} |
1413 |
|
1414 |
tapdisk_stats_field(st, |
1415 |
"FIXME_enospc_redirect_count",
|
1416 |
"llu", vbd->FIXME_enospc_redirect_count);
|
1417 |
|
1418 |
tapdisk_stats_leave(st, '}');
|
1419 |
} |