Revision d9d33417

b/Makefile.objs
14 14

  
15 15
block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o
16 16
block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o
17
block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o
17
block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
18 18
block-nested-$(CONFIG_WIN32) += raw-win32.o
19 19
block-nested-$(CONFIG_POSIX) += raw-posix.o
20 20
block-nested-$(CONFIG_CURL) += curl.o
b/block/blkverify.c
1
/*
2
 * Block protocol for block driver correctness testing
3
 *
4
 * Copyright (C) 2010 IBM, Corp.
5
 *
6
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7
 * See the COPYING file in the top-level directory.
8
 */
9

  
10
#include <stdarg.h>
11
#include "qemu_socket.h" /* for EINPROGRESS on Windows */
12
#include "block_int.h"
13

  
14
typedef struct {
15
    BlockDriverState *test_file;
16
} BDRVBlkverifyState;
17

  
18
typedef struct BlkverifyAIOCB BlkverifyAIOCB;
19
struct BlkverifyAIOCB {
20
    BlockDriverAIOCB common;
21
    QEMUBH *bh;
22

  
23
    /* Request metadata */
24
    bool is_write;
25
    int64_t sector_num;
26
    int nb_sectors;
27

  
28
    int ret;                    /* first completed request's result */
29
    unsigned int done;          /* completion counter */
30
    bool *finished;             /* completion signal for cancel */
31

  
32
    QEMUIOVector *qiov;         /* user I/O vector */
33
    QEMUIOVector raw_qiov;      /* cloned I/O vector for raw file */
34
    void *buf;                  /* buffer for raw file I/O */
35

  
36
    void (*verify)(BlkverifyAIOCB *acb);
37
};
38

  
39
static void blkverify_aio_cancel(BlockDriverAIOCB *blockacb)
40
{
41
    BlkverifyAIOCB *acb = (BlkverifyAIOCB *)blockacb;
42
    bool finished = false;
43

  
44
    /* Wait until request completes, invokes its callback, and frees itself */
45
    acb->finished = &finished;
46
    while (!finished) {
47
        qemu_aio_wait();
48
    }
49
}
50

  
51
static AIOPool blkverify_aio_pool = {
52
    .aiocb_size         = sizeof(BlkverifyAIOCB),
53
    .cancel             = blkverify_aio_cancel,
54
};
55

  
56
static void blkverify_err(BlkverifyAIOCB *acb, const char *fmt, ...)
57
{
58
    va_list ap;
59

  
60
    va_start(ap, fmt);
61
    fprintf(stderr, "blkverify: %s sector_num=%ld nb_sectors=%d ",
62
            acb->is_write ? "write" : "read", acb->sector_num,
63
            acb->nb_sectors);
64
    vfprintf(stderr, fmt, ap);
65
    fprintf(stderr, "\n");
66
    va_end(ap);
67
    exit(1);
68
}
69

  
70
/* Valid blkverify filenames look like blkverify:path/to/raw_image:path/to/image */
71
static int blkverify_open(BlockDriverState *bs, const char *filename, int flags)
72
{
73
    BDRVBlkverifyState *s = bs->opaque;
74
    int ret;
75
    char *raw, *c;
76

  
77
    /* Parse the blkverify: prefix */
78
    if (strncmp(filename, "blkverify:", strlen("blkverify:"))) {
79
        return -EINVAL;
80
    }
81
    filename += strlen("blkverify:");
82

  
83
    /* Parse the raw image filename */
84
    c = strchr(filename, ':');
85
    if (c == NULL) {
86
        return -EINVAL;
87
    }
88

  
89
    raw = strdup(filename);
90
    raw[c - filename] = '\0';
91
    ret = bdrv_file_open(&bs->file, raw, flags);
92
    free(raw);
93
    if (ret < 0) {
94
        return ret;
95
    }
96
    filename = c + 1;
97

  
98
    /* Open the test file */
99
    s->test_file = bdrv_new("");
100
    ret = bdrv_open(s->test_file, filename, flags, NULL);
101
    if (ret < 0) {
102
        bdrv_delete(s->test_file);
103
        s->test_file = NULL;
104
        return ret;
105
    }
106

  
107
    return 0;
108
}
109

  
110
static void blkverify_close(BlockDriverState *bs)
111
{
112
    BDRVBlkverifyState *s = bs->opaque;
113

  
114
    bdrv_delete(s->test_file);
115
    s->test_file = NULL;
116
}
117

  
118
static void blkverify_flush(BlockDriverState *bs)
119
{
120
    BDRVBlkverifyState *s = bs->opaque;
121

  
122
    /* Only flush test file, the raw file is not important */
123
    bdrv_flush(s->test_file);
124
}
125

  
126
static int64_t blkverify_getlength(BlockDriverState *bs)
127
{
128
    BDRVBlkverifyState *s = bs->opaque;
129

  
130
    return bdrv_getlength(s->test_file);
131
}
132

  
133
/**
134
 * Check that I/O vector contents are identical
135
 *
136
 * @a:          I/O vector
137
 * @b:          I/O vector
138
 * @ret:        Offset to first mismatching byte or -1 if match
139
 */
140
static ssize_t blkverify_iovec_compare(QEMUIOVector *a, QEMUIOVector *b)
141
{
142
    int i;
143
    ssize_t offset = 0;
144

  
145
    assert(a->niov == b->niov);
146
    for (i = 0; i < a->niov; i++) {
147
        size_t len = 0;
148
        uint8_t *p = (uint8_t *)a->iov[i].iov_base;
149
        uint8_t *q = (uint8_t *)b->iov[i].iov_base;
150

  
151
        assert(a->iov[i].iov_len == b->iov[i].iov_len);
152
        while (len < a->iov[i].iov_len && *p++ == *q++) {
153
            len++;
154
        }
155

  
156
        offset += len;
157

  
158
        if (len != a->iov[i].iov_len) {
159
            return offset;
160
        }
161
    }
162
    return -1;
163
}
164

  
165
typedef struct {
166
    int src_index;
167
    struct iovec *src_iov;
168
    void *dest_base;
169
} IOVectorSortElem;
170

  
171
static int sortelem_cmp_src_base(const void *a, const void *b)
172
{
173
    const IOVectorSortElem *elem_a = a;
174
    const IOVectorSortElem *elem_b = b;
175

  
176
    /* Don't overflow */
177
    if (elem_a->src_iov->iov_base < elem_b->src_iov->iov_base) {
178
        return -1;
179
    } else if (elem_a->src_iov->iov_base > elem_b->src_iov->iov_base) {
180
        return 1;
181
    } else {
182
        return 0;
183
    }
184
}
185

  
186
static int sortelem_cmp_src_index(const void *a, const void *b)
187
{
188
    const IOVectorSortElem *elem_a = a;
189
    const IOVectorSortElem *elem_b = b;
190

  
191
    return elem_a->src_index - elem_b->src_index;
192
}
193

  
194
/**
195
 * Copy contents of I/O vector
196
 *
197
 * The relative relationships of overlapping iovecs are preserved.  This is
198
 * necessary to ensure identical semantics in the cloned I/O vector.
199
 */
200
static void blkverify_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src,
201
                                  void *buf)
202
{
203
    IOVectorSortElem sortelems[src->niov];
204
    void *last_end;
205
    int i;
206

  
207
    /* Sort by source iovecs by base address */
208
    for (i = 0; i < src->niov; i++) {
209
        sortelems[i].src_index = i;
210
        sortelems[i].src_iov = &src->iov[i];
211
    }
212
    qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_base);
213

  
214
    /* Allocate buffer space taking into account overlapping iovecs */
215
    last_end = NULL;
216
    for (i = 0; i < src->niov; i++) {
217
        struct iovec *cur = sortelems[i].src_iov;
218
        ptrdiff_t rewind = 0;
219

  
220
        /* Detect overlap */
221
        if (last_end && last_end > cur->iov_base) {
222
            rewind = last_end - cur->iov_base;
223
        }
224

  
225
        sortelems[i].dest_base = buf - rewind;
226
        buf += cur->iov_len - MIN(rewind, cur->iov_len);
227
        last_end = MAX(cur->iov_base + cur->iov_len, last_end);
228
    }
229

  
230
    /* Sort by source iovec index and build destination iovec */
231
    qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_index);
232
    for (i = 0; i < src->niov; i++) {
233
        qemu_iovec_add(dest, sortelems[i].dest_base, src->iov[i].iov_len);
234
    }
235
}
236

  
237
static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
238
                                         int64_t sector_num, QEMUIOVector *qiov,
239
                                         int nb_sectors,
240
                                         BlockDriverCompletionFunc *cb,
241
                                         void *opaque)
242
{
243
    BlkverifyAIOCB *acb = qemu_aio_get(&blkverify_aio_pool, bs, cb, opaque);
244

  
245
    acb->bh = NULL;
246
    acb->is_write = is_write;
247
    acb->sector_num = sector_num;
248
    acb->nb_sectors = nb_sectors;
249
    acb->ret = -EINPROGRESS;
250
    acb->done = 0;
251
    acb->qiov = qiov;
252
    acb->buf = NULL;
253
    acb->verify = NULL;
254
    acb->finished = NULL;
255
    return acb;
256
}
257

  
258
static void blkverify_aio_bh(void *opaque)
259
{
260
    BlkverifyAIOCB *acb = opaque;
261

  
262
    qemu_bh_delete(acb->bh);
263
    if (acb->buf) {
264
        qemu_iovec_destroy(&acb->raw_qiov);
265
        qemu_vfree(acb->buf);
266
    }
267
    acb->common.cb(acb->common.opaque, acb->ret);
268
    if (acb->finished) {
269
        *acb->finished = true;
270
    }
271
    qemu_aio_release(acb);
272
}
273

  
274
static void blkverify_aio_cb(void *opaque, int ret)
275
{
276
    BlkverifyAIOCB *acb = opaque;
277

  
278
    switch (++acb->done) {
279
    case 1:
280
        acb->ret = ret;
281
        break;
282

  
283
    case 2:
284
        if (acb->ret != ret) {
285
            blkverify_err(acb, "return value mismatch %d != %d", acb->ret, ret);
286
        }
287

  
288
        if (acb->verify) {
289
            acb->verify(acb);
290
        }
291

  
292
        acb->bh = qemu_bh_new(blkverify_aio_bh, acb);
293
        qemu_bh_schedule(acb->bh);
294
        break;
295
    }
296
}
297

  
298
static void blkverify_verify_readv(BlkverifyAIOCB *acb)
299
{
300
    ssize_t offset = blkverify_iovec_compare(acb->qiov, &acb->raw_qiov);
301
    if (offset != -1) {
302
        blkverify_err(acb, "contents mismatch in sector %ld",
303
                      acb->sector_num + (offset / BDRV_SECTOR_SIZE));
304
    }
305
}
306

  
307
static BlockDriverAIOCB *blkverify_aio_readv(BlockDriverState *bs,
308
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
309
        BlockDriverCompletionFunc *cb, void *opaque)
310
{
311
    BDRVBlkverifyState *s = bs->opaque;
312
    BlkverifyAIOCB *acb = blkverify_aio_get(bs, false, sector_num, qiov,
313
                                            nb_sectors, cb, opaque);
314

  
315
    acb->verify = blkverify_verify_readv;
316
    acb->buf = qemu_blockalign(bs->file, qiov->size);
317
    qemu_iovec_init(&acb->raw_qiov, acb->qiov->niov);
318
    blkverify_iovec_clone(&acb->raw_qiov, qiov, acb->buf);
319

  
320
    if (!bdrv_aio_readv(s->test_file, sector_num, qiov, nb_sectors,
321
                        blkverify_aio_cb, acb)) {
322
        blkverify_aio_cb(acb, -EIO);
323
    }
324
    if (!bdrv_aio_readv(bs->file, sector_num, &acb->raw_qiov, nb_sectors,
325
                        blkverify_aio_cb, acb)) {
326
        blkverify_aio_cb(acb, -EIO);
327
    }
328
    return &acb->common;
329
}
330

  
331
static BlockDriverAIOCB *blkverify_aio_writev(BlockDriverState *bs,
332
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
333
        BlockDriverCompletionFunc *cb, void *opaque)
334
{
335
    BDRVBlkverifyState *s = bs->opaque;
336
    BlkverifyAIOCB *acb = blkverify_aio_get(bs, true, sector_num, qiov,
337
                                            nb_sectors, cb, opaque);
338

  
339
    if (!bdrv_aio_writev(s->test_file, sector_num, qiov, nb_sectors,
340
                         blkverify_aio_cb, acb)) {
341
        blkverify_aio_cb(acb, -EIO);
342
    }
343
    if (!bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
344
                         blkverify_aio_cb, acb)) {
345
        blkverify_aio_cb(acb, -EIO);
346
    }
347
    return &acb->common;
348
}
349

  
350
static BlockDriverAIOCB *blkverify_aio_flush(BlockDriverState *bs,
351
                                             BlockDriverCompletionFunc *cb,
352
                                             void *opaque)
353
{
354
    BDRVBlkverifyState *s = bs->opaque;
355

  
356
    /* Only flush test file, the raw file is not important */
357
    return bdrv_aio_flush(s->test_file, cb, opaque);
358
}
359

  
360
static BlockDriver bdrv_blkverify = {
361
    .format_name        = "blkverify",
362
    .protocol_name      = "blkverify",
363

  
364
    .instance_size      = sizeof(BDRVBlkverifyState),
365

  
366
    .bdrv_getlength     = blkverify_getlength,
367

  
368
    .bdrv_file_open     = blkverify_open,
369
    .bdrv_close         = blkverify_close,
370
    .bdrv_flush         = blkverify_flush,
371

  
372
    .bdrv_aio_readv     = blkverify_aio_readv,
373
    .bdrv_aio_writev    = blkverify_aio_writev,
374
    .bdrv_aio_flush     = blkverify_aio_flush,
375
};
376

  
377
static void bdrv_blkverify_init(void)
378
{
379
    bdrv_register(&bdrv_blkverify);
380
}
381

  
382
block_init(bdrv_blkverify_init);
b/docs/blkverify.txt
1
= Block driver correctness testing with blkverify =
2

  
3
== Introduction ==
4

  
5
This document describes how to use the blkverify protocol to test that a block
6
driver is operating correctly.
7

  
8
It is difficult to test and debug block drivers against real guests.  Often
9
processes inside the guest will crash because corrupt sectors were read as part
10
of the executable.  Other times obscure errors are raised by a program inside
11
the guest.  These issues are extremely hard to trace back to bugs in the block
12
driver.
13

  
14
Blkverify solves this problem by catching data corruption inside QEMU the first
15
time bad data is read and reporting the disk sector that is corrupted.
16

  
17
== How it works ==
18

  
19
The blkverify protocol has two child block devices, the "test" device and the
20
"raw" device.  Read/write operations are mirrored to both devices so their
21
state should always be in sync.
22

  
23
The "raw" device is a raw image, a flat file, that has identical starting
24
contents to the "test" image.  The idea is that the "raw" device will handle
25
read/write operations correctly and not corrupt data.  It can be used as a
26
reference for comparison against the "test" device.
27

  
28
After a mirrored read operation completes, blkverify will compare the data and
29
raise an error if it is not identical.  This makes it possible to catch the
30
first instance where corrupt data is read.
31

  
32
== Example ==
33

  
34
Imagine raw.img has 0xcd repeated throughout its first sector:
35

  
36
    $ ./qemu-io -c 'read -v 0 512' raw.img
37
    00000000:  cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd  ................
38
    00000010:  cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd  ................
39
    [...]
40
    000001e0:  cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd  ................
41
    000001f0:  cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd  ................
42
    read 512/512 bytes at offset 0
43
    512.000000 bytes, 1 ops; 0.0000 sec (97.656 MiB/sec and 200000.0000 ops/sec)
44

  
45
And test.img is corrupt, its first sector is zeroed when it shouldn't be:
46

  
47
    $ ./qemu-io -c 'read -v 0 512' test.img
48
    00000000:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
49
    00000010:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
50
    [...]
51
    000001e0:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
52
    000001f0:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
53
    read 512/512 bytes at offset 0
54
    512.000000 bytes, 1 ops; 0.0000 sec (81.380 MiB/sec and 166666.6667 ops/sec)
55

  
56
This error is caught by blkverify:
57

  
58
    $ ./qemu-io -c 'read 0 512' blkverify:a.img:b.img
59
    blkverify: read sector_num=0 nb_sectors=4 contents mismatch in sector 0
60

  
61
A more realistic scenario is verifying the installation of a guest OS:
62

  
63
    $ ./qemu-img create raw.img 16G
64
    $ ./qemu-img create -f qcow2 test.qcow2 16G
65
    $ x86_64-softmmu/qemu-system-x86_64 -cdrom debian.iso \
66
                                        -drive file=blkverify:raw.img:test.qcow2
67

  
68
If the installation is aborted when blkverify detects corruption, use qemu-io
69
to explore the contents of the disk image at the sector in question.

Also available in: Unified diff