Statistics
| Branch: | Revision:

root / block / qed.c @ c743849b

History | View | Annotate | Download (39.5 kB)

1
/*
2
 * QEMU Enhanced Disk Format
3
 *
4
 * Copyright IBM, Corp. 2010
5
 *
6
 * Authors:
7
 *  Stefan Hajnoczi   <stefanha@linux.vnet.ibm.com>
8
 *  Anthony Liguori   <aliguori@us.ibm.com>
9
 *
10
 * This work is licensed under the terms of the GNU LGPL, version 2 or later.
11
 * See the COPYING.LIB file in the top-level directory.
12
 *
13
 */
14

    
15
#include "trace.h"
16
#include "qed.h"
17

    
18
static void qed_aio_cancel(BlockDriverAIOCB *blockacb)
19
{
20
    QEDAIOCB *acb = (QEDAIOCB *)blockacb;
21
    bool finished = false;
22

    
23
    /* Wait for the request to finish */
24
    acb->finished = &finished;
25
    while (!finished) {
26
        qemu_aio_wait();
27
    }
28
}
29

    
30
static AIOPool qed_aio_pool = {
31
    .aiocb_size         = sizeof(QEDAIOCB),
32
    .cancel             = qed_aio_cancel,
33
};
34

    
35
static int bdrv_qed_probe(const uint8_t *buf, int buf_size,
36
                          const char *filename)
37
{
38
    const QEDHeader *header = (const QEDHeader *)buf;
39

    
40
    if (buf_size < sizeof(*header)) {
41
        return 0;
42
    }
43
    if (le32_to_cpu(header->magic) != QED_MAGIC) {
44
        return 0;
45
    }
46
    return 100;
47
}
48

    
49
/**
50
 * Check whether an image format is raw
51
 *
52
 * @fmt:    Backing file format, may be NULL
53
 */
54
static bool qed_fmt_is_raw(const char *fmt)
55
{
56
    return fmt && strcmp(fmt, "raw") == 0;
57
}
58

    
59
static void qed_header_le_to_cpu(const QEDHeader *le, QEDHeader *cpu)
60
{
61
    cpu->magic = le32_to_cpu(le->magic);
62
    cpu->cluster_size = le32_to_cpu(le->cluster_size);
63
    cpu->table_size = le32_to_cpu(le->table_size);
64
    cpu->header_size = le32_to_cpu(le->header_size);
65
    cpu->features = le64_to_cpu(le->features);
66
    cpu->compat_features = le64_to_cpu(le->compat_features);
67
    cpu->autoclear_features = le64_to_cpu(le->autoclear_features);
68
    cpu->l1_table_offset = le64_to_cpu(le->l1_table_offset);
69
    cpu->image_size = le64_to_cpu(le->image_size);
70
    cpu->backing_filename_offset = le32_to_cpu(le->backing_filename_offset);
71
    cpu->backing_filename_size = le32_to_cpu(le->backing_filename_size);
72
}
73

    
74
static void qed_header_cpu_to_le(const QEDHeader *cpu, QEDHeader *le)
75
{
76
    le->magic = cpu_to_le32(cpu->magic);
77
    le->cluster_size = cpu_to_le32(cpu->cluster_size);
78
    le->table_size = cpu_to_le32(cpu->table_size);
79
    le->header_size = cpu_to_le32(cpu->header_size);
80
    le->features = cpu_to_le64(cpu->features);
81
    le->compat_features = cpu_to_le64(cpu->compat_features);
82
    le->autoclear_features = cpu_to_le64(cpu->autoclear_features);
83
    le->l1_table_offset = cpu_to_le64(cpu->l1_table_offset);
84
    le->image_size = cpu_to_le64(cpu->image_size);
85
    le->backing_filename_offset = cpu_to_le32(cpu->backing_filename_offset);
86
    le->backing_filename_size = cpu_to_le32(cpu->backing_filename_size);
87
}
88

    
89
static int qed_write_header_sync(BDRVQEDState *s)
90
{
91
    QEDHeader le;
92
    int ret;
93

    
94
    qed_header_cpu_to_le(&s->header, &le);
95
    ret = bdrv_pwrite(s->bs->file, 0, &le, sizeof(le));
96
    if (ret != sizeof(le)) {
97
        return ret;
98
    }
99
    return 0;
100
}
101

    
102
typedef struct {
103
    GenericCB gencb;
104
    BDRVQEDState *s;
105
    struct iovec iov;
106
    QEMUIOVector qiov;
107
    int nsectors;
108
    uint8_t *buf;
109
} QEDWriteHeaderCB;
110

    
111
static void qed_write_header_cb(void *opaque, int ret)
112
{
113
    QEDWriteHeaderCB *write_header_cb = opaque;
114

    
115
    qemu_vfree(write_header_cb->buf);
116
    gencb_complete(write_header_cb, ret);
117
}
118

    
119
static void qed_write_header_read_cb(void *opaque, int ret)
120
{
121
    QEDWriteHeaderCB *write_header_cb = opaque;
122
    BDRVQEDState *s = write_header_cb->s;
123
    BlockDriverAIOCB *acb;
124

    
125
    if (ret) {
126
        qed_write_header_cb(write_header_cb, ret);
127
        return;
128
    }
129

    
130
    /* Update header */
131
    qed_header_cpu_to_le(&s->header, (QEDHeader *)write_header_cb->buf);
132

    
133
    acb = bdrv_aio_writev(s->bs->file, 0, &write_header_cb->qiov,
134
                          write_header_cb->nsectors, qed_write_header_cb,
135
                          write_header_cb);
136
    if (!acb) {
137
        qed_write_header_cb(write_header_cb, -EIO);
138
    }
139
}
140

    
141
/**
142
 * Update header in-place (does not rewrite backing filename or other strings)
143
 *
144
 * This function only updates known header fields in-place and does not affect
145
 * extra data after the QED header.
146
 */
147
static void qed_write_header(BDRVQEDState *s, BlockDriverCompletionFunc cb,
148
                             void *opaque)
149
{
150
    /* We must write full sectors for O_DIRECT but cannot necessarily generate
151
     * the data following the header if an unrecognized compat feature is
152
     * active.  Therefore, first read the sectors containing the header, update
153
     * them, and write back.
154
     */
155

    
156
    BlockDriverAIOCB *acb;
157
    int nsectors = (sizeof(QEDHeader) + BDRV_SECTOR_SIZE - 1) /
158
                   BDRV_SECTOR_SIZE;
159
    size_t len = nsectors * BDRV_SECTOR_SIZE;
160
    QEDWriteHeaderCB *write_header_cb = gencb_alloc(sizeof(*write_header_cb),
161
                                                    cb, opaque);
162

    
163
    write_header_cb->s = s;
164
    write_header_cb->nsectors = nsectors;
165
    write_header_cb->buf = qemu_blockalign(s->bs, len);
166
    write_header_cb->iov.iov_base = write_header_cb->buf;
167
    write_header_cb->iov.iov_len = len;
168
    qemu_iovec_init_external(&write_header_cb->qiov, &write_header_cb->iov, 1);
169

    
170
    acb = bdrv_aio_readv(s->bs->file, 0, &write_header_cb->qiov, nsectors,
171
                         qed_write_header_read_cb, write_header_cb);
172
    if (!acb) {
173
        qed_write_header_cb(write_header_cb, -EIO);
174
    }
175
}
176

    
177
static uint64_t qed_max_image_size(uint32_t cluster_size, uint32_t table_size)
178
{
179
    uint64_t table_entries;
180
    uint64_t l2_size;
181

    
182
    table_entries = (table_size * cluster_size) / sizeof(uint64_t);
183
    l2_size = table_entries * cluster_size;
184

    
185
    return l2_size * table_entries;
186
}
187

    
188
static bool qed_is_cluster_size_valid(uint32_t cluster_size)
189
{
190
    if (cluster_size < QED_MIN_CLUSTER_SIZE ||
191
        cluster_size > QED_MAX_CLUSTER_SIZE) {
192
        return false;
193
    }
194
    if (cluster_size & (cluster_size - 1)) {
195
        return false; /* not power of 2 */
196
    }
197
    return true;
198
}
199

    
200
static bool qed_is_table_size_valid(uint32_t table_size)
201
{
202
    if (table_size < QED_MIN_TABLE_SIZE ||
203
        table_size > QED_MAX_TABLE_SIZE) {
204
        return false;
205
    }
206
    if (table_size & (table_size - 1)) {
207
        return false; /* not power of 2 */
208
    }
209
    return true;
210
}
211

    
212
static bool qed_is_image_size_valid(uint64_t image_size, uint32_t cluster_size,
213
                                    uint32_t table_size)
214
{
215
    if (image_size % BDRV_SECTOR_SIZE != 0) {
216
        return false; /* not multiple of sector size */
217
    }
218
    if (image_size > qed_max_image_size(cluster_size, table_size)) {
219
        return false; /* image is too large */
220
    }
221
    return true;
222
}
223

    
224
/**
225
 * Read a string of known length from the image file
226
 *
227
 * @file:       Image file
228
 * @offset:     File offset to start of string, in bytes
229
 * @n:          String length in bytes
230
 * @buf:        Destination buffer
231
 * @buflen:     Destination buffer length in bytes
232
 * @ret:        0 on success, -errno on failure
233
 *
234
 * The string is NUL-terminated.
235
 */
236
static int qed_read_string(BlockDriverState *file, uint64_t offset, size_t n,
237
                           char *buf, size_t buflen)
238
{
239
    int ret;
240
    if (n >= buflen) {
241
        return -EINVAL;
242
    }
243
    ret = bdrv_pread(file, offset, buf, n);
244
    if (ret < 0) {
245
        return ret;
246
    }
247
    buf[n] = '\0';
248
    return 0;
249
}
250

    
251
/**
252
 * Allocate new clusters
253
 *
254
 * @s:          QED state
255
 * @n:          Number of contiguous clusters to allocate
256
 * @ret:        Offset of first allocated cluster
257
 *
258
 * This function only produces the offset where the new clusters should be
259
 * written.  It updates BDRVQEDState but does not make any changes to the image
260
 * file.
261
 */
262
static uint64_t qed_alloc_clusters(BDRVQEDState *s, unsigned int n)
263
{
264
    uint64_t offset = s->file_size;
265
    s->file_size += n * s->header.cluster_size;
266
    return offset;
267
}
268

    
269
QEDTable *qed_alloc_table(BDRVQEDState *s)
270
{
271
    /* Honor O_DIRECT memory alignment requirements */
272
    return qemu_blockalign(s->bs,
273
                           s->header.cluster_size * s->header.table_size);
274
}
275

    
276
/**
277
 * Allocate a new zeroed L2 table
278
 */
279
static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
280
{
281
    CachedL2Table *l2_table = qed_alloc_l2_cache_entry(&s->l2_cache);
282

    
283
    l2_table->table = qed_alloc_table(s);
284
    l2_table->offset = qed_alloc_clusters(s, s->header.table_size);
285

    
286
    memset(l2_table->table->offsets, 0,
287
           s->header.cluster_size * s->header.table_size);
288
    return l2_table;
289
}
290

    
291
static void qed_aio_next_io(void *opaque, int ret);
292

    
293
static int bdrv_qed_open(BlockDriverState *bs, int flags)
294
{
295
    BDRVQEDState *s = bs->opaque;
296
    QEDHeader le_header;
297
    int64_t file_size;
298
    int ret;
299

    
300
    s->bs = bs;
301
    QSIMPLEQ_INIT(&s->allocating_write_reqs);
302

    
303
    ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header));
304
    if (ret < 0) {
305
        return ret;
306
    }
307
    ret = 0; /* ret should always be 0 or -errno */
308
    qed_header_le_to_cpu(&le_header, &s->header);
309

    
310
    if (s->header.magic != QED_MAGIC) {
311
        return -EINVAL;
312
    }
313
    if (s->header.features & ~QED_FEATURE_MASK) {
314
        return -ENOTSUP; /* image uses unsupported feature bits */
315
    }
316
    if (!qed_is_cluster_size_valid(s->header.cluster_size)) {
317
        return -EINVAL;
318
    }
319

    
320
    /* Round down file size to the last cluster */
321
    file_size = bdrv_getlength(bs->file);
322
    if (file_size < 0) {
323
        return file_size;
324
    }
325
    s->file_size = qed_start_of_cluster(s, file_size);
326

    
327
    if (!qed_is_table_size_valid(s->header.table_size)) {
328
        return -EINVAL;
329
    }
330
    if (!qed_is_image_size_valid(s->header.image_size,
331
                                 s->header.cluster_size,
332
                                 s->header.table_size)) {
333
        return -EINVAL;
334
    }
335
    if (!qed_check_table_offset(s, s->header.l1_table_offset)) {
336
        return -EINVAL;
337
    }
338

    
339
    s->table_nelems = (s->header.cluster_size * s->header.table_size) /
340
                      sizeof(uint64_t);
341
    s->l2_shift = ffs(s->header.cluster_size) - 1;
342
    s->l2_mask = s->table_nelems - 1;
343
    s->l1_shift = s->l2_shift + ffs(s->table_nelems) - 1;
344

    
345
    if ((s->header.features & QED_F_BACKING_FILE)) {
346
        if ((uint64_t)s->header.backing_filename_offset +
347
            s->header.backing_filename_size >
348
            s->header.cluster_size * s->header.header_size) {
349
            return -EINVAL;
350
        }
351

    
352
        ret = qed_read_string(bs->file, s->header.backing_filename_offset,
353
                              s->header.backing_filename_size, bs->backing_file,
354
                              sizeof(bs->backing_file));
355
        if (ret < 0) {
356
            return ret;
357
        }
358

    
359
        if (s->header.features & QED_F_BACKING_FORMAT_NO_PROBE) {
360
            pstrcpy(bs->backing_format, sizeof(bs->backing_format), "raw");
361
        }
362
    }
363

    
364
    /* Reset unknown autoclear feature bits.  This is a backwards
365
     * compatibility mechanism that allows images to be opened by older
366
     * programs, which "knock out" unknown feature bits.  When an image is
367
     * opened by a newer program again it can detect that the autoclear
368
     * feature is no longer valid.
369
     */
370
    if ((s->header.autoclear_features & ~QED_AUTOCLEAR_FEATURE_MASK) != 0 &&
371
        !bdrv_is_read_only(bs->file)) {
372
        s->header.autoclear_features &= QED_AUTOCLEAR_FEATURE_MASK;
373

    
374
        ret = qed_write_header_sync(s);
375
        if (ret) {
376
            return ret;
377
        }
378

    
379
        /* From here on only known autoclear feature bits are valid */
380
        bdrv_flush(bs->file);
381
    }
382

    
383
    s->l1_table = qed_alloc_table(s);
384
    qed_init_l2_cache(&s->l2_cache);
385

    
386
    ret = qed_read_l1_table_sync(s);
387
    if (ret) {
388
        goto out;
389
    }
390

    
391
    /* If image was not closed cleanly, check consistency */
392
    if (s->header.features & QED_F_NEED_CHECK) {
393
        /* Read-only images cannot be fixed.  There is no risk of corruption
394
         * since write operations are not possible.  Therefore, allow
395
         * potentially inconsistent images to be opened read-only.  This can
396
         * aid data recovery from an otherwise inconsistent image.
397
         */
398
        if (!bdrv_is_read_only(bs->file)) {
399
            BdrvCheckResult result = {0};
400

    
401
            ret = qed_check(s, &result, true);
402
            if (!ret && !result.corruptions && !result.check_errors) {
403
                /* Ensure fixes reach storage before clearing check bit */
404
                bdrv_flush(s->bs);
405

    
406
                s->header.features &= ~QED_F_NEED_CHECK;
407
                qed_write_header_sync(s);
408
            }
409
        }
410
    }
411

    
412
out:
413
    if (ret) {
414
        qed_free_l2_cache(&s->l2_cache);
415
        qemu_vfree(s->l1_table);
416
    }
417
    return ret;
418
}
419

    
420
static void bdrv_qed_close(BlockDriverState *bs)
421
{
422
    BDRVQEDState *s = bs->opaque;
423

    
424
    /* Ensure writes reach stable storage */
425
    bdrv_flush(bs->file);
426

    
427
    /* Clean shutdown, no check required on next open */
428
    if (s->header.features & QED_F_NEED_CHECK) {
429
        s->header.features &= ~QED_F_NEED_CHECK;
430
        qed_write_header_sync(s);
431
    }
432

    
433
    qed_free_l2_cache(&s->l2_cache);
434
    qemu_vfree(s->l1_table);
435
}
436

    
437
static int bdrv_qed_flush(BlockDriverState *bs)
438
{
439
    return bdrv_flush(bs->file);
440
}
441

    
442
static int qed_create(const char *filename, uint32_t cluster_size,
443
                      uint64_t image_size, uint32_t table_size,
444
                      const char *backing_file, const char *backing_fmt)
445
{
446
    QEDHeader header = {
447
        .magic = QED_MAGIC,
448
        .cluster_size = cluster_size,
449
        .table_size = table_size,
450
        .header_size = 1,
451
        .features = 0,
452
        .compat_features = 0,
453
        .l1_table_offset = cluster_size,
454
        .image_size = image_size,
455
    };
456
    QEDHeader le_header;
457
    uint8_t *l1_table = NULL;
458
    size_t l1_size = header.cluster_size * header.table_size;
459
    int ret = 0;
460
    BlockDriverState *bs = NULL;
461

    
462
    ret = bdrv_create_file(filename, NULL);
463
    if (ret < 0) {
464
        return ret;
465
    }
466

    
467
    ret = bdrv_file_open(&bs, filename, BDRV_O_RDWR | BDRV_O_CACHE_WB);
468
    if (ret < 0) {
469
        return ret;
470
    }
471

    
472
    /* File must start empty and grow, check truncate is supported */
473
    ret = bdrv_truncate(bs, 0);
474
    if (ret < 0) {
475
        goto out;
476
    }
477

    
478
    if (backing_file) {
479
        header.features |= QED_F_BACKING_FILE;
480
        header.backing_filename_offset = sizeof(le_header);
481
        header.backing_filename_size = strlen(backing_file);
482

    
483
        if (qed_fmt_is_raw(backing_fmt)) {
484
            header.features |= QED_F_BACKING_FORMAT_NO_PROBE;
485
        }
486
    }
487

    
488
    qed_header_cpu_to_le(&header, &le_header);
489
    ret = bdrv_pwrite(bs, 0, &le_header, sizeof(le_header));
490
    if (ret < 0) {
491
        goto out;
492
    }
493
    ret = bdrv_pwrite(bs, sizeof(le_header), backing_file,
494
                      header.backing_filename_size);
495
    if (ret < 0) {
496
        goto out;
497
    }
498

    
499
    l1_table = qemu_mallocz(l1_size);
500
    ret = bdrv_pwrite(bs, header.l1_table_offset, l1_table, l1_size);
501
    if (ret < 0) {
502
        goto out;
503
    }
504

    
505
    ret = 0; /* success */
506
out:
507
    qemu_free(l1_table);
508
    bdrv_delete(bs);
509
    return ret;
510
}
511

    
512
static int bdrv_qed_create(const char *filename, QEMUOptionParameter *options)
513
{
514
    uint64_t image_size = 0;
515
    uint32_t cluster_size = QED_DEFAULT_CLUSTER_SIZE;
516
    uint32_t table_size = QED_DEFAULT_TABLE_SIZE;
517
    const char *backing_file = NULL;
518
    const char *backing_fmt = NULL;
519

    
520
    while (options && options->name) {
521
        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
522
            image_size = options->value.n;
523
        } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
524
            backing_file = options->value.s;
525
        } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FMT)) {
526
            backing_fmt = options->value.s;
527
        } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) {
528
            if (options->value.n) {
529
                cluster_size = options->value.n;
530
            }
531
        } else if (!strcmp(options->name, BLOCK_OPT_TABLE_SIZE)) {
532
            if (options->value.n) {
533
                table_size = options->value.n;
534
            }
535
        }
536
        options++;
537
    }
538

    
539
    if (!qed_is_cluster_size_valid(cluster_size)) {
540
        fprintf(stderr, "QED cluster size must be within range [%u, %u] and power of 2\n",
541
                QED_MIN_CLUSTER_SIZE, QED_MAX_CLUSTER_SIZE);
542
        return -EINVAL;
543
    }
544
    if (!qed_is_table_size_valid(table_size)) {
545
        fprintf(stderr, "QED table size must be within range [%u, %u] and power of 2\n",
546
                QED_MIN_TABLE_SIZE, QED_MAX_TABLE_SIZE);
547
        return -EINVAL;
548
    }
549
    if (!qed_is_image_size_valid(image_size, cluster_size, table_size)) {
550
        fprintf(stderr, "QED image size must be a non-zero multiple of "
551
                        "cluster size and less than %" PRIu64 " bytes\n",
552
                qed_max_image_size(cluster_size, table_size));
553
        return -EINVAL;
554
    }
555

    
556
    return qed_create(filename, cluster_size, image_size, table_size,
557
                      backing_file, backing_fmt);
558
}
559

    
560
typedef struct {
561
    int is_allocated;
562
    int *pnum;
563
} QEDIsAllocatedCB;
564

    
565
static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t len)
566
{
567
    QEDIsAllocatedCB *cb = opaque;
568
    *cb->pnum = len / BDRV_SECTOR_SIZE;
569
    cb->is_allocated = ret == QED_CLUSTER_FOUND;
570
}
571

    
572
static int bdrv_qed_is_allocated(BlockDriverState *bs, int64_t sector_num,
573
                                  int nb_sectors, int *pnum)
574
{
575
    BDRVQEDState *s = bs->opaque;
576
    uint64_t pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE;
577
    size_t len = (size_t)nb_sectors * BDRV_SECTOR_SIZE;
578
    QEDIsAllocatedCB cb = {
579
        .is_allocated = -1,
580
        .pnum = pnum,
581
    };
582
    QEDRequest request = { .l2_table = NULL };
583

    
584
    async_context_push();
585

    
586
    qed_find_cluster(s, &request, pos, len, qed_is_allocated_cb, &cb);
587

    
588
    while (cb.is_allocated == -1) {
589
        qemu_aio_wait();
590
    }
591

    
592
    async_context_pop();
593

    
594
    qed_unref_l2_cache_entry(request.l2_table);
595

    
596
    return cb.is_allocated;
597
}
598

    
599
static int bdrv_qed_make_empty(BlockDriverState *bs)
600
{
601
    return -ENOTSUP;
602
}
603

    
604
static BDRVQEDState *acb_to_s(QEDAIOCB *acb)
605
{
606
    return acb->common.bs->opaque;
607
}
608

    
609
/**
610
 * Read from the backing file or zero-fill if no backing file
611
 *
612
 * @s:          QED state
613
 * @pos:        Byte position in device
614
 * @qiov:       Destination I/O vector
615
 * @cb:         Completion function
616
 * @opaque:     User data for completion function
617
 *
618
 * This function reads qiov->size bytes starting at pos from the backing file.
619
 * If there is no backing file then zeroes are read.
620
 */
621
static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
622
                                  QEMUIOVector *qiov,
623
                                  BlockDriverCompletionFunc *cb, void *opaque)
624
{
625
    BlockDriverAIOCB *aiocb;
626
    uint64_t backing_length = 0;
627
    size_t size;
628

    
629
    /* If there is a backing file, get its length.  Treat the absence of a
630
     * backing file like a zero length backing file.
631
     */
632
    if (s->bs->backing_hd) {
633
        int64_t l = bdrv_getlength(s->bs->backing_hd);
634
        if (l < 0) {
635
            cb(opaque, l);
636
            return;
637
        }
638
        backing_length = l;
639
    }
640

    
641
    /* Zero all sectors if reading beyond the end of the backing file */
642
    if (pos >= backing_length ||
643
        pos + qiov->size > backing_length) {
644
        qemu_iovec_memset(qiov, 0, qiov->size);
645
    }
646

    
647
    /* Complete now if there are no backing file sectors to read */
648
    if (pos >= backing_length) {
649
        cb(opaque, 0);
650
        return;
651
    }
652

    
653
    /* If the read straddles the end of the backing file, shorten it */
654
    size = MIN((uint64_t)backing_length - pos, qiov->size);
655

    
656
    BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING);
657
    aiocb = bdrv_aio_readv(s->bs->backing_hd, pos / BDRV_SECTOR_SIZE,
658
                           qiov, size / BDRV_SECTOR_SIZE, cb, opaque);
659
    if (!aiocb) {
660
        cb(opaque, -EIO);
661
    }
662
}
663

    
664
typedef struct {
665
    GenericCB gencb;
666
    BDRVQEDState *s;
667
    QEMUIOVector qiov;
668
    struct iovec iov;
669
    uint64_t offset;
670
} CopyFromBackingFileCB;
671

    
672
static void qed_copy_from_backing_file_cb(void *opaque, int ret)
673
{
674
    CopyFromBackingFileCB *copy_cb = opaque;
675
    qemu_vfree(copy_cb->iov.iov_base);
676
    gencb_complete(&copy_cb->gencb, ret);
677
}
678

    
679
static void qed_copy_from_backing_file_write(void *opaque, int ret)
680
{
681
    CopyFromBackingFileCB *copy_cb = opaque;
682
    BDRVQEDState *s = copy_cb->s;
683
    BlockDriverAIOCB *aiocb;
684

    
685
    if (ret) {
686
        qed_copy_from_backing_file_cb(copy_cb, ret);
687
        return;
688
    }
689

    
690
    BLKDBG_EVENT(s->bs->file, BLKDBG_COW_WRITE);
691
    aiocb = bdrv_aio_writev(s->bs->file, copy_cb->offset / BDRV_SECTOR_SIZE,
692
                            &copy_cb->qiov,
693
                            copy_cb->qiov.size / BDRV_SECTOR_SIZE,
694
                            qed_copy_from_backing_file_cb, copy_cb);
695
    if (!aiocb) {
696
        qed_copy_from_backing_file_cb(copy_cb, -EIO);
697
    }
698
}
699

    
700
/**
701
 * Copy data from backing file into the image
702
 *
703
 * @s:          QED state
704
 * @pos:        Byte position in device
705
 * @len:        Number of bytes
706
 * @offset:     Byte offset in image file
707
 * @cb:         Completion function
708
 * @opaque:     User data for completion function
709
 */
710
static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos,
711
                                       uint64_t len, uint64_t offset,
712
                                       BlockDriverCompletionFunc *cb,
713
                                       void *opaque)
714
{
715
    CopyFromBackingFileCB *copy_cb;
716

    
717
    /* Skip copy entirely if there is no work to do */
718
    if (len == 0) {
719
        cb(opaque, 0);
720
        return;
721
    }
722

    
723
    copy_cb = gencb_alloc(sizeof(*copy_cb), cb, opaque);
724
    copy_cb->s = s;
725
    copy_cb->offset = offset;
726
    copy_cb->iov.iov_base = qemu_blockalign(s->bs, len);
727
    copy_cb->iov.iov_len = len;
728
    qemu_iovec_init_external(&copy_cb->qiov, &copy_cb->iov, 1);
729

    
730
    qed_read_backing_file(s, pos, &copy_cb->qiov,
731
                          qed_copy_from_backing_file_write, copy_cb);
732
}
733

    
734
/**
735
 * Link one or more contiguous clusters into a table
736
 *
737
 * @s:              QED state
738
 * @table:          L2 table
739
 * @index:          First cluster index
740
 * @n:              Number of contiguous clusters
741
 * @cluster:        First cluster byte offset in image file
742
 */
743
static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index,
744
                                unsigned int n, uint64_t cluster)
745
{
746
    int i;
747
    for (i = index; i < index + n; i++) {
748
        table->offsets[i] = cluster;
749
        cluster += s->header.cluster_size;
750
    }
751
}
752

    
753
static void qed_aio_complete_bh(void *opaque)
754
{
755
    QEDAIOCB *acb = opaque;
756
    BlockDriverCompletionFunc *cb = acb->common.cb;
757
    void *user_opaque = acb->common.opaque;
758
    int ret = acb->bh_ret;
759
    bool *finished = acb->finished;
760

    
761
    qemu_bh_delete(acb->bh);
762
    qemu_aio_release(acb);
763

    
764
    /* Invoke callback */
765
    cb(user_opaque, ret);
766

    
767
    /* Signal cancel completion */
768
    if (finished) {
769
        *finished = true;
770
    }
771
}
772

    
773
static void qed_aio_complete(QEDAIOCB *acb, int ret)
774
{
775
    BDRVQEDState *s = acb_to_s(acb);
776

    
777
    trace_qed_aio_complete(s, acb, ret);
778

    
779
    /* Free resources */
780
    qemu_iovec_destroy(&acb->cur_qiov);
781
    qed_unref_l2_cache_entry(acb->request.l2_table);
782

    
783
    /* Arrange for a bh to invoke the completion function */
784
    acb->bh_ret = ret;
785
    acb->bh = qemu_bh_new(qed_aio_complete_bh, acb);
786
    qemu_bh_schedule(acb->bh);
787

    
788
    /* Start next allocating write request waiting behind this one.  Note that
789
     * requests enqueue themselves when they first hit an unallocated cluster
790
     * but they wait until the entire request is finished before waking up the
791
     * next request in the queue.  This ensures that we don't cycle through
792
     * requests multiple times but rather finish one at a time completely.
793
     */
794
    if (acb == QSIMPLEQ_FIRST(&s->allocating_write_reqs)) {
795
        QSIMPLEQ_REMOVE_HEAD(&s->allocating_write_reqs, next);
796
        acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
797
        if (acb) {
798
            qed_aio_next_io(acb, 0);
799
        }
800
    }
801
}
802

    
803
/**
804
 * Commit the current L2 table to the cache
805
 */
806
static void qed_commit_l2_update(void *opaque, int ret)
807
{
808
    QEDAIOCB *acb = opaque;
809
    BDRVQEDState *s = acb_to_s(acb);
810
    CachedL2Table *l2_table = acb->request.l2_table;
811

    
812
    qed_commit_l2_cache_entry(&s->l2_cache, l2_table);
813

    
814
    /* This is guaranteed to succeed because we just committed the entry to the
815
     * cache.
816
     */
817
    acb->request.l2_table = qed_find_l2_cache_entry(&s->l2_cache,
818
                                                    l2_table->offset);
819
    assert(acb->request.l2_table != NULL);
820

    
821
    qed_aio_next_io(opaque, ret);
822
}
823

    
824
/**
825
 * Update L1 table with new L2 table offset and write it out
826
 */
827
static void qed_aio_write_l1_update(void *opaque, int ret)
828
{
829
    QEDAIOCB *acb = opaque;
830
    BDRVQEDState *s = acb_to_s(acb);
831
    int index;
832

    
833
    if (ret) {
834
        qed_aio_complete(acb, ret);
835
        return;
836
    }
837

    
838
    index = qed_l1_index(s, acb->cur_pos);
839
    s->l1_table->offsets[index] = acb->request.l2_table->offset;
840

    
841
    qed_write_l1_table(s, index, 1, qed_commit_l2_update, acb);
842
}
843

    
844
/**
845
 * Update L2 table with new cluster offsets and write them out
846
 */
847
static void qed_aio_write_l2_update(void *opaque, int ret)
848
{
849
    QEDAIOCB *acb = opaque;
850
    BDRVQEDState *s = acb_to_s(acb);
851
    bool need_alloc = acb->find_cluster_ret == QED_CLUSTER_L1;
852
    int index;
853

    
854
    if (ret) {
855
        goto err;
856
    }
857

    
858
    if (need_alloc) {
859
        qed_unref_l2_cache_entry(acb->request.l2_table);
860
        acb->request.l2_table = qed_new_l2_table(s);
861
    }
862

    
863
    index = qed_l2_index(s, acb->cur_pos);
864
    qed_update_l2_table(s, acb->request.l2_table->table, index, acb->cur_nclusters,
865
                         acb->cur_cluster);
866

    
867
    if (need_alloc) {
868
        /* Write out the whole new L2 table */
869
        qed_write_l2_table(s, &acb->request, 0, s->table_nelems, true,
870
                            qed_aio_write_l1_update, acb);
871
    } else {
872
        /* Write out only the updated part of the L2 table */
873
        qed_write_l2_table(s, &acb->request, index, acb->cur_nclusters, false,
874
                            qed_aio_next_io, acb);
875
    }
876
    return;
877

    
878
err:
879
    qed_aio_complete(acb, ret);
880
}
881

    
882
/**
883
 * Flush new data clusters before updating the L2 table
884
 *
885
 * This flush is necessary when a backing file is in use.  A crash during an
886
 * allocating write could result in empty clusters in the image.  If the write
887
 * only touched a subregion of the cluster, then backing image sectors have
888
 * been lost in the untouched region.  The solution is to flush after writing a
889
 * new data cluster and before updating the L2 table.
890
 */
891
static void qed_aio_write_flush_before_l2_update(void *opaque, int ret)
892
{
893
    QEDAIOCB *acb = opaque;
894
    BDRVQEDState *s = acb_to_s(acb);
895

    
896
    if (!bdrv_aio_flush(s->bs->file, qed_aio_write_l2_update, opaque)) {
897
        qed_aio_complete(acb, -EIO);
898
    }
899
}
900

    
901
/**
902
 * Write data to the image file
903
 */
904
static void qed_aio_write_main(void *opaque, int ret)
905
{
906
    QEDAIOCB *acb = opaque;
907
    BDRVQEDState *s = acb_to_s(acb);
908
    uint64_t offset = acb->cur_cluster +
909
                      qed_offset_into_cluster(s, acb->cur_pos);
910
    BlockDriverCompletionFunc *next_fn;
911
    BlockDriverAIOCB *file_acb;
912

    
913
    trace_qed_aio_write_main(s, acb, ret, offset, acb->cur_qiov.size);
914

    
915
    if (ret) {
916
        qed_aio_complete(acb, ret);
917
        return;
918
    }
919

    
920
    if (acb->find_cluster_ret == QED_CLUSTER_FOUND) {
921
        next_fn = qed_aio_next_io;
922
    } else {
923
        if (s->bs->backing_hd) {
924
            next_fn = qed_aio_write_flush_before_l2_update;
925
        } else {
926
            next_fn = qed_aio_write_l2_update;
927
        }
928
    }
929

    
930
    BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO);
931
    file_acb = bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
932
                               &acb->cur_qiov,
933
                               acb->cur_qiov.size / BDRV_SECTOR_SIZE,
934
                               next_fn, acb);
935
    if (!file_acb) {
936
        qed_aio_complete(acb, -EIO);
937
    }
938
}
939

    
940
/**
941
 * Populate back untouched region of new data cluster
942
 */
943
static void qed_aio_write_postfill(void *opaque, int ret)
944
{
945
    QEDAIOCB *acb = opaque;
946
    BDRVQEDState *s = acb_to_s(acb);
947
    uint64_t start = acb->cur_pos + acb->cur_qiov.size;
948
    uint64_t len =
949
        qed_start_of_cluster(s, start + s->header.cluster_size - 1) - start;
950
    uint64_t offset = acb->cur_cluster +
951
                      qed_offset_into_cluster(s, acb->cur_pos) +
952
                      acb->cur_qiov.size;
953

    
954
    if (ret) {
955
        qed_aio_complete(acb, ret);
956
        return;
957
    }
958

    
959
    trace_qed_aio_write_postfill(s, acb, start, len, offset);
960
    qed_copy_from_backing_file(s, start, len, offset,
961
                                qed_aio_write_main, acb);
962
}
963

    
964
/**
965
 * Populate front untouched region of new data cluster
966
 */
967
static void qed_aio_write_prefill(void *opaque, int ret)
968
{
969
    QEDAIOCB *acb = opaque;
970
    BDRVQEDState *s = acb_to_s(acb);
971
    uint64_t start = qed_start_of_cluster(s, acb->cur_pos);
972
    uint64_t len = qed_offset_into_cluster(s, acb->cur_pos);
973

    
974
    trace_qed_aio_write_prefill(s, acb, start, len, acb->cur_cluster);
975
    qed_copy_from_backing_file(s, start, len, acb->cur_cluster,
976
                                qed_aio_write_postfill, acb);
977
}
978

    
979
/**
980
 * Write new data cluster
981
 *
982
 * @acb:        Write request
983
 * @len:        Length in bytes
984
 *
985
 * This path is taken when writing to previously unallocated clusters.
986
 */
987
static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
988
{
989
    BDRVQEDState *s = acb_to_s(acb);
990

    
991
    /* Freeze this request if another allocating write is in progress */
992
    if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs)) {
993
        QSIMPLEQ_INSERT_TAIL(&s->allocating_write_reqs, acb, next);
994
    }
995
    if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs)) {
996
        return; /* wait for existing request to finish */
997
    }
998

    
999
    acb->cur_nclusters = qed_bytes_to_clusters(s,
1000
            qed_offset_into_cluster(s, acb->cur_pos) + len);
1001
    acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters);
1002
    qemu_iovec_copy(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
1003

    
1004
    /* Write new cluster if the image is already marked dirty */
1005
    if (s->header.features & QED_F_NEED_CHECK) {
1006
        qed_aio_write_prefill(acb, 0);
1007
        return;
1008
    }
1009

    
1010
    /* Mark the image dirty before writing the new cluster */
1011
    s->header.features |= QED_F_NEED_CHECK;
1012
    qed_write_header(s, qed_aio_write_prefill, acb);
1013
}
1014

    
1015
/**
1016
 * Write data cluster in place
1017
 *
1018
 * @acb:        Write request
1019
 * @offset:     Cluster offset in bytes
1020
 * @len:        Length in bytes
1021
 *
1022
 * This path is taken when writing to already allocated clusters.
1023
 */
1024
static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len)
1025
{
1026
    /* Calculate the I/O vector */
1027
    acb->cur_cluster = offset;
1028
    qemu_iovec_copy(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
1029

    
1030
    /* Do the actual write */
1031
    qed_aio_write_main(acb, 0);
1032
}
1033

    
1034
/**
1035
 * Write data cluster
1036
 *
1037
 * @opaque:     Write request
1038
 * @ret:        QED_CLUSTER_FOUND, QED_CLUSTER_L2, QED_CLUSTER_L1,
1039
 *              or -errno
1040
 * @offset:     Cluster offset in bytes
1041
 * @len:        Length in bytes
1042
 *
1043
 * Callback from qed_find_cluster().
1044
 */
1045
static void qed_aio_write_data(void *opaque, int ret,
1046
                               uint64_t offset, size_t len)
1047
{
1048
    QEDAIOCB *acb = opaque;
1049

    
1050
    trace_qed_aio_write_data(acb_to_s(acb), acb, ret, offset, len);
1051

    
1052
    acb->find_cluster_ret = ret;
1053

    
1054
    switch (ret) {
1055
    case QED_CLUSTER_FOUND:
1056
        qed_aio_write_inplace(acb, offset, len);
1057
        break;
1058

    
1059
    case QED_CLUSTER_L2:
1060
    case QED_CLUSTER_L1:
1061
        qed_aio_write_alloc(acb, len);
1062
        break;
1063

    
1064
    default:
1065
        qed_aio_complete(acb, ret);
1066
        break;
1067
    }
1068
}
1069

    
1070
/**
1071
 * Read data cluster
1072
 *
1073
 * @opaque:     Read request
1074
 * @ret:        QED_CLUSTER_FOUND, QED_CLUSTER_L2, QED_CLUSTER_L1,
1075
 *              or -errno
1076
 * @offset:     Cluster offset in bytes
1077
 * @len:        Length in bytes
1078
 *
1079
 * Callback from qed_find_cluster().
1080
 */
1081
static void qed_aio_read_data(void *opaque, int ret,
1082
                              uint64_t offset, size_t len)
1083
{
1084
    QEDAIOCB *acb = opaque;
1085
    BDRVQEDState *s = acb_to_s(acb);
1086
    BlockDriverState *bs = acb->common.bs;
1087
    BlockDriverAIOCB *file_acb;
1088

    
1089
    /* Adjust offset into cluster */
1090
    offset += qed_offset_into_cluster(s, acb->cur_pos);
1091

    
1092
    trace_qed_aio_read_data(s, acb, ret, offset, len);
1093

    
1094
    if (ret < 0) {
1095
        goto err;
1096
    }
1097

    
1098
    qemu_iovec_copy(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
1099

    
1100
    /* Handle backing file and unallocated sparse hole reads */
1101
    if (ret != QED_CLUSTER_FOUND) {
1102
        qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
1103
                              qed_aio_next_io, acb);
1104
        return;
1105
    }
1106

    
1107
    BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
1108
    file_acb = bdrv_aio_readv(bs->file, offset / BDRV_SECTOR_SIZE,
1109
                              &acb->cur_qiov,
1110
                              acb->cur_qiov.size / BDRV_SECTOR_SIZE,
1111
                              qed_aio_next_io, acb);
1112
    if (!file_acb) {
1113
        ret = -EIO;
1114
        goto err;
1115
    }
1116
    return;
1117

    
1118
err:
1119
    qed_aio_complete(acb, ret);
1120
}
1121

    
1122
/**
1123
 * Begin next I/O or complete the request
1124
 */
1125
static void qed_aio_next_io(void *opaque, int ret)
1126
{
1127
    QEDAIOCB *acb = opaque;
1128
    BDRVQEDState *s = acb_to_s(acb);
1129
    QEDFindClusterFunc *io_fn =
1130
        acb->is_write ? qed_aio_write_data : qed_aio_read_data;
1131

    
1132
    trace_qed_aio_next_io(s, acb, ret, acb->cur_pos + acb->cur_qiov.size);
1133

    
1134
    /* Handle I/O error */
1135
    if (ret) {
1136
        qed_aio_complete(acb, ret);
1137
        return;
1138
    }
1139

    
1140
    acb->qiov_offset += acb->cur_qiov.size;
1141
    acb->cur_pos += acb->cur_qiov.size;
1142
    qemu_iovec_reset(&acb->cur_qiov);
1143

    
1144
    /* Complete request */
1145
    if (acb->cur_pos >= acb->end_pos) {
1146
        qed_aio_complete(acb, 0);
1147
        return;
1148
    }
1149

    
1150
    /* Find next cluster and start I/O */
1151
    qed_find_cluster(s, &acb->request,
1152
                      acb->cur_pos, acb->end_pos - acb->cur_pos,
1153
                      io_fn, acb);
1154
}
1155

    
1156
static BlockDriverAIOCB *qed_aio_setup(BlockDriverState *bs,
1157
                                       int64_t sector_num,
1158
                                       QEMUIOVector *qiov, int nb_sectors,
1159
                                       BlockDriverCompletionFunc *cb,
1160
                                       void *opaque, bool is_write)
1161
{
1162
    QEDAIOCB *acb = qemu_aio_get(&qed_aio_pool, bs, cb, opaque);
1163

    
1164
    trace_qed_aio_setup(bs->opaque, acb, sector_num, nb_sectors,
1165
                         opaque, is_write);
1166

    
1167
    acb->is_write = is_write;
1168
    acb->finished = NULL;
1169
    acb->qiov = qiov;
1170
    acb->qiov_offset = 0;
1171
    acb->cur_pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE;
1172
    acb->end_pos = acb->cur_pos + nb_sectors * BDRV_SECTOR_SIZE;
1173
    acb->request.l2_table = NULL;
1174
    qemu_iovec_init(&acb->cur_qiov, qiov->niov);
1175

    
1176
    /* Start request */
1177
    qed_aio_next_io(acb, 0);
1178
    return &acb->common;
1179
}
1180

    
1181
static BlockDriverAIOCB *bdrv_qed_aio_readv(BlockDriverState *bs,
1182
                                            int64_t sector_num,
1183
                                            QEMUIOVector *qiov, int nb_sectors,
1184
                                            BlockDriverCompletionFunc *cb,
1185
                                            void *opaque)
1186
{
1187
    return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, false);
1188
}
1189

    
1190
static BlockDriverAIOCB *bdrv_qed_aio_writev(BlockDriverState *bs,
1191
                                             int64_t sector_num,
1192
                                             QEMUIOVector *qiov, int nb_sectors,
1193
                                             BlockDriverCompletionFunc *cb,
1194
                                             void *opaque)
1195
{
1196
    return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, true);
1197
}
1198

    
1199
static BlockDriverAIOCB *bdrv_qed_aio_flush(BlockDriverState *bs,
1200
                                            BlockDriverCompletionFunc *cb,
1201
                                            void *opaque)
1202
{
1203
    return bdrv_aio_flush(bs->file, cb, opaque);
1204
}
1205

    
1206
static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset)
1207
{
1208
    return -ENOTSUP;
1209
}
1210

    
1211
static int64_t bdrv_qed_getlength(BlockDriverState *bs)
1212
{
1213
    BDRVQEDState *s = bs->opaque;
1214
    return s->header.image_size;
1215
}
1216

    
1217
static int bdrv_qed_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1218
{
1219
    BDRVQEDState *s = bs->opaque;
1220

    
1221
    memset(bdi, 0, sizeof(*bdi));
1222
    bdi->cluster_size = s->header.cluster_size;
1223
    return 0;
1224
}
1225

    
1226
static int bdrv_qed_change_backing_file(BlockDriverState *bs,
1227
                                        const char *backing_file,
1228
                                        const char *backing_fmt)
1229
{
1230
    BDRVQEDState *s = bs->opaque;
1231
    QEDHeader new_header, le_header;
1232
    void *buffer;
1233
    size_t buffer_len, backing_file_len;
1234
    int ret;
1235

    
1236
    /* Refuse to set backing filename if unknown compat feature bits are
1237
     * active.  If the image uses an unknown compat feature then we may not
1238
     * know the layout of data following the header structure and cannot safely
1239
     * add a new string.
1240
     */
1241
    if (backing_file && (s->header.compat_features &
1242
                         ~QED_COMPAT_FEATURE_MASK)) {
1243
        return -ENOTSUP;
1244
    }
1245

    
1246
    memcpy(&new_header, &s->header, sizeof(new_header));
1247

    
1248
    new_header.features &= ~(QED_F_BACKING_FILE |
1249
                             QED_F_BACKING_FORMAT_NO_PROBE);
1250

    
1251
    /* Adjust feature flags */
1252
    if (backing_file) {
1253
        new_header.features |= QED_F_BACKING_FILE;
1254

    
1255
        if (qed_fmt_is_raw(backing_fmt)) {
1256
            new_header.features |= QED_F_BACKING_FORMAT_NO_PROBE;
1257
        }
1258
    }
1259

    
1260
    /* Calculate new header size */
1261
    backing_file_len = 0;
1262

    
1263
    if (backing_file) {
1264
        backing_file_len = strlen(backing_file);
1265
    }
1266

    
1267
    buffer_len = sizeof(new_header);
1268
    new_header.backing_filename_offset = buffer_len;
1269
    new_header.backing_filename_size = backing_file_len;
1270
    buffer_len += backing_file_len;
1271

    
1272
    /* Make sure we can rewrite header without failing */
1273
    if (buffer_len > new_header.header_size * new_header.cluster_size) {
1274
        return -ENOSPC;
1275
    }
1276

    
1277
    /* Prepare new header */
1278
    buffer = qemu_malloc(buffer_len);
1279

    
1280
    qed_header_cpu_to_le(&new_header, &le_header);
1281
    memcpy(buffer, &le_header, sizeof(le_header));
1282
    buffer_len = sizeof(le_header);
1283

    
1284
    memcpy(buffer + buffer_len, backing_file, backing_file_len);
1285
    buffer_len += backing_file_len;
1286

    
1287
    /* Write new header */
1288
    ret = bdrv_pwrite_sync(bs->file, 0, buffer, buffer_len);
1289
    qemu_free(buffer);
1290
    if (ret == 0) {
1291
        memcpy(&s->header, &new_header, sizeof(new_header));
1292
    }
1293
    return ret;
1294
}
1295

    
1296
static int bdrv_qed_check(BlockDriverState *bs, BdrvCheckResult *result)
1297
{
1298
    BDRVQEDState *s = bs->opaque;
1299

    
1300
    return qed_check(s, result, false);
1301
}
1302

    
1303
static QEMUOptionParameter qed_create_options[] = {
1304
    {
1305
        .name = BLOCK_OPT_SIZE,
1306
        .type = OPT_SIZE,
1307
        .help = "Virtual disk size (in bytes)"
1308
    }, {
1309
        .name = BLOCK_OPT_BACKING_FILE,
1310
        .type = OPT_STRING,
1311
        .help = "File name of a base image"
1312
    }, {
1313
        .name = BLOCK_OPT_BACKING_FMT,
1314
        .type = OPT_STRING,
1315
        .help = "Image format of the base image"
1316
    }, {
1317
        .name = BLOCK_OPT_CLUSTER_SIZE,
1318
        .type = OPT_SIZE,
1319
        .help = "Cluster size (in bytes)"
1320
    }, {
1321
        .name = BLOCK_OPT_TABLE_SIZE,
1322
        .type = OPT_SIZE,
1323
        .help = "L1/L2 table size (in clusters)"
1324
    },
1325
    { /* end of list */ }
1326
};
1327

    
1328
static BlockDriver bdrv_qed = {
1329
    .format_name              = "qed",
1330
    .instance_size            = sizeof(BDRVQEDState),
1331
    .create_options           = qed_create_options,
1332

    
1333
    .bdrv_probe               = bdrv_qed_probe,
1334
    .bdrv_open                = bdrv_qed_open,
1335
    .bdrv_close               = bdrv_qed_close,
1336
    .bdrv_create              = bdrv_qed_create,
1337
    .bdrv_flush               = bdrv_qed_flush,
1338
    .bdrv_is_allocated        = bdrv_qed_is_allocated,
1339
    .bdrv_make_empty          = bdrv_qed_make_empty,
1340
    .bdrv_aio_readv           = bdrv_qed_aio_readv,
1341
    .bdrv_aio_writev          = bdrv_qed_aio_writev,
1342
    .bdrv_aio_flush           = bdrv_qed_aio_flush,
1343
    .bdrv_truncate            = bdrv_qed_truncate,
1344
    .bdrv_getlength           = bdrv_qed_getlength,
1345
    .bdrv_get_info            = bdrv_qed_get_info,
1346
    .bdrv_change_backing_file = bdrv_qed_change_backing_file,
1347
    .bdrv_check               = bdrv_qed_check,
1348
};
1349

    
1350
static void bdrv_qed_init(void)
1351
{
1352
    bdrv_register(&bdrv_qed);
1353
}
1354

    
1355
block_init(bdrv_qed_init);