Statistics
| Branch: | Revision:

root / block.c @ c488c7f6

History | View | Annotate | Download (90.4 kB)

1
/*
2
 * QEMU System Emulator block driver
3
 *
4
 * Copyright (c) 2003 Fabrice Bellard
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
 * THE SOFTWARE.
23
 */
24
#include "config-host.h"
25
#include "qemu-common.h"
26
#include "trace.h"
27
#include "monitor.h"
28
#include "block_int.h"
29
#include "module.h"
30
#include "qemu-objects.h"
31
#include "qemu-coroutine.h"
32

    
33
#ifdef CONFIG_BSD
34
#include <sys/types.h>
35
#include <sys/stat.h>
36
#include <sys/ioctl.h>
37
#include <sys/queue.h>
38
#ifndef __DragonFly__
39
#include <sys/disk.h>
40
#endif
41
#endif
42

    
43
#ifdef _WIN32
44
#include <windows.h>
45
#endif
46

    
47
static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
48
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
49
        BlockDriverCompletionFunc *cb, void *opaque);
50
static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
51
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
52
        BlockDriverCompletionFunc *cb, void *opaque);
53
static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
54
        BlockDriverCompletionFunc *cb, void *opaque);
55
static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
56
        BlockDriverCompletionFunc *cb, void *opaque);
57
static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
58
                        uint8_t *buf, int nb_sectors);
59
static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
60
                         const uint8_t *buf, int nb_sectors);
61
static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
62
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
63
        BlockDriverCompletionFunc *cb, void *opaque);
64
static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
65
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
66
        BlockDriverCompletionFunc *cb, void *opaque);
67
static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
68
                                         int64_t sector_num, int nb_sectors,
69
                                         QEMUIOVector *iov);
70
static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
71
                                         int64_t sector_num, int nb_sectors,
72
                                         QEMUIOVector *iov);
73
static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs);
74

    
75
static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
76
    QTAILQ_HEAD_INITIALIZER(bdrv_states);
77

    
78
static QLIST_HEAD(, BlockDriver) bdrv_drivers =
79
    QLIST_HEAD_INITIALIZER(bdrv_drivers);
80

    
81
/* The device to use for VM snapshots */
82
static BlockDriverState *bs_snapshots;
83

    
84
/* If non-zero, use only whitelisted block drivers */
85
static int use_bdrv_whitelist;
86

    
87
#ifdef _WIN32
88
static int is_windows_drive_prefix(const char *filename)
89
{
90
    return (((filename[0] >= 'a' && filename[0] <= 'z') ||
91
             (filename[0] >= 'A' && filename[0] <= 'Z')) &&
92
            filename[1] == ':');
93
}
94

    
95
int is_windows_drive(const char *filename)
96
{
97
    if (is_windows_drive_prefix(filename) &&
98
        filename[2] == '\0')
99
        return 1;
100
    if (strstart(filename, "\\\\.\\", NULL) ||
101
        strstart(filename, "//./", NULL))
102
        return 1;
103
    return 0;
104
}
105
#endif
106

    
107
/* check if the path starts with "<protocol>:" */
108
static int path_has_protocol(const char *path)
109
{
110
#ifdef _WIN32
111
    if (is_windows_drive(path) ||
112
        is_windows_drive_prefix(path)) {
113
        return 0;
114
    }
115
#endif
116

    
117
    return strchr(path, ':') != NULL;
118
}
119

    
120
int path_is_absolute(const char *path)
121
{
122
    const char *p;
123
#ifdef _WIN32
124
    /* specific case for names like: "\\.\d:" */
125
    if (*path == '/' || *path == '\\')
126
        return 1;
127
#endif
128
    p = strchr(path, ':');
129
    if (p)
130
        p++;
131
    else
132
        p = path;
133
#ifdef _WIN32
134
    return (*p == '/' || *p == '\\');
135
#else
136
    return (*p == '/');
137
#endif
138
}
139

    
140
/* if filename is absolute, just copy it to dest. Otherwise, build a
141
   path to it by considering it is relative to base_path. URL are
142
   supported. */
143
void path_combine(char *dest, int dest_size,
144
                  const char *base_path,
145
                  const char *filename)
146
{
147
    const char *p, *p1;
148
    int len;
149

    
150
    if (dest_size <= 0)
151
        return;
152
    if (path_is_absolute(filename)) {
153
        pstrcpy(dest, dest_size, filename);
154
    } else {
155
        p = strchr(base_path, ':');
156
        if (p)
157
            p++;
158
        else
159
            p = base_path;
160
        p1 = strrchr(base_path, '/');
161
#ifdef _WIN32
162
        {
163
            const char *p2;
164
            p2 = strrchr(base_path, '\\');
165
            if (!p1 || p2 > p1)
166
                p1 = p2;
167
        }
168
#endif
169
        if (p1)
170
            p1++;
171
        else
172
            p1 = base_path;
173
        if (p1 > p)
174
            p = p1;
175
        len = p - base_path;
176
        if (len > dest_size - 1)
177
            len = dest_size - 1;
178
        memcpy(dest, base_path, len);
179
        dest[len] = '\0';
180
        pstrcat(dest, dest_size, filename);
181
    }
182
}
183

    
184
void bdrv_register(BlockDriver *bdrv)
185
{
186
    if (bdrv->bdrv_co_readv) {
187
        /* Emulate AIO by coroutines, and sync by AIO */
188
        bdrv->bdrv_aio_readv = bdrv_co_aio_readv_em;
189
        bdrv->bdrv_aio_writev = bdrv_co_aio_writev_em;
190
        bdrv->bdrv_read = bdrv_read_em;
191
        bdrv->bdrv_write = bdrv_write_em;
192
     } else {
193
        bdrv->bdrv_co_readv = bdrv_co_readv_em;
194
        bdrv->bdrv_co_writev = bdrv_co_writev_em;
195

    
196
        if (!bdrv->bdrv_aio_readv) {
197
            /* add AIO emulation layer */
198
            bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
199
            bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
200
        } else if (!bdrv->bdrv_read) {
201
            /* add synchronous IO emulation layer */
202
            bdrv->bdrv_read = bdrv_read_em;
203
            bdrv->bdrv_write = bdrv_write_em;
204
        }
205
    }
206

    
207
    if (!bdrv->bdrv_aio_flush)
208
        bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
209

    
210
    QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
211
}
212

    
213
/* create a new block device (by default it is empty) */
214
BlockDriverState *bdrv_new(const char *device_name)
215
{
216
    BlockDriverState *bs;
217

    
218
    bs = g_malloc0(sizeof(BlockDriverState));
219
    pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
220
    if (device_name[0] != '\0') {
221
        QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
222
    }
223
    return bs;
224
}
225

    
226
BlockDriver *bdrv_find_format(const char *format_name)
227
{
228
    BlockDriver *drv1;
229
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
230
        if (!strcmp(drv1->format_name, format_name)) {
231
            return drv1;
232
        }
233
    }
234
    return NULL;
235
}
236

    
237
static int bdrv_is_whitelisted(BlockDriver *drv)
238
{
239
    static const char *whitelist[] = {
240
        CONFIG_BDRV_WHITELIST
241
    };
242
    const char **p;
243

    
244
    if (!whitelist[0])
245
        return 1;               /* no whitelist, anything goes */
246

    
247
    for (p = whitelist; *p; p++) {
248
        if (!strcmp(drv->format_name, *p)) {
249
            return 1;
250
        }
251
    }
252
    return 0;
253
}
254

    
255
BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
256
{
257
    BlockDriver *drv = bdrv_find_format(format_name);
258
    return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
259
}
260

    
261
int bdrv_create(BlockDriver *drv, const char* filename,
262
    QEMUOptionParameter *options)
263
{
264
    if (!drv->bdrv_create)
265
        return -ENOTSUP;
266

    
267
    return drv->bdrv_create(filename, options);
268
}
269

    
270
int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
271
{
272
    BlockDriver *drv;
273

    
274
    drv = bdrv_find_protocol(filename);
275
    if (drv == NULL) {
276
        return -ENOENT;
277
    }
278

    
279
    return bdrv_create(drv, filename, options);
280
}
281

    
282
#ifdef _WIN32
283
void get_tmp_filename(char *filename, int size)
284
{
285
    char temp_dir[MAX_PATH];
286

    
287
    GetTempPath(MAX_PATH, temp_dir);
288
    GetTempFileName(temp_dir, "qem", 0, filename);
289
}
290
#else
291
void get_tmp_filename(char *filename, int size)
292
{
293
    int fd;
294
    const char *tmpdir;
295
    /* XXX: race condition possible */
296
    tmpdir = getenv("TMPDIR");
297
    if (!tmpdir)
298
        tmpdir = "/tmp";
299
    snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
300
    fd = mkstemp(filename);
301
    close(fd);
302
}
303
#endif
304

    
305
/*
306
 * Detect host devices. By convention, /dev/cdrom[N] is always
307
 * recognized as a host CDROM.
308
 */
309
static BlockDriver *find_hdev_driver(const char *filename)
310
{
311
    int score_max = 0, score;
312
    BlockDriver *drv = NULL, *d;
313

    
314
    QLIST_FOREACH(d, &bdrv_drivers, list) {
315
        if (d->bdrv_probe_device) {
316
            score = d->bdrv_probe_device(filename);
317
            if (score > score_max) {
318
                score_max = score;
319
                drv = d;
320
            }
321
        }
322
    }
323

    
324
    return drv;
325
}
326

    
327
BlockDriver *bdrv_find_protocol(const char *filename)
328
{
329
    BlockDriver *drv1;
330
    char protocol[128];
331
    int len;
332
    const char *p;
333

    
334
    /* TODO Drivers without bdrv_file_open must be specified explicitly */
335

    
336
    /*
337
     * XXX(hch): we really should not let host device detection
338
     * override an explicit protocol specification, but moving this
339
     * later breaks access to device names with colons in them.
340
     * Thanks to the brain-dead persistent naming schemes on udev-
341
     * based Linux systems those actually are quite common.
342
     */
343
    drv1 = find_hdev_driver(filename);
344
    if (drv1) {
345
        return drv1;
346
    }
347

    
348
    if (!path_has_protocol(filename)) {
349
        return bdrv_find_format("file");
350
    }
351
    p = strchr(filename, ':');
352
    assert(p != NULL);
353
    len = p - filename;
354
    if (len > sizeof(protocol) - 1)
355
        len = sizeof(protocol) - 1;
356
    memcpy(protocol, filename, len);
357
    protocol[len] = '\0';
358
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
359
        if (drv1->protocol_name &&
360
            !strcmp(drv1->protocol_name, protocol)) {
361
            return drv1;
362
        }
363
    }
364
    return NULL;
365
}
366

    
367
static int find_image_format(const char *filename, BlockDriver **pdrv)
368
{
369
    int ret, score, score_max;
370
    BlockDriver *drv1, *drv;
371
    uint8_t buf[2048];
372
    BlockDriverState *bs;
373

    
374
    ret = bdrv_file_open(&bs, filename, 0);
375
    if (ret < 0) {
376
        *pdrv = NULL;
377
        return ret;
378
    }
379

    
380
    /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
381
    if (bs->sg || !bdrv_is_inserted(bs)) {
382
        bdrv_delete(bs);
383
        drv = bdrv_find_format("raw");
384
        if (!drv) {
385
            ret = -ENOENT;
386
        }
387
        *pdrv = drv;
388
        return ret;
389
    }
390

    
391
    ret = bdrv_pread(bs, 0, buf, sizeof(buf));
392
    bdrv_delete(bs);
393
    if (ret < 0) {
394
        *pdrv = NULL;
395
        return ret;
396
    }
397

    
398
    score_max = 0;
399
    drv = NULL;
400
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
401
        if (drv1->bdrv_probe) {
402
            score = drv1->bdrv_probe(buf, ret, filename);
403
            if (score > score_max) {
404
                score_max = score;
405
                drv = drv1;
406
            }
407
        }
408
    }
409
    if (!drv) {
410
        ret = -ENOENT;
411
    }
412
    *pdrv = drv;
413
    return ret;
414
}
415

    
416
/**
417
 * Set the current 'total_sectors' value
418
 */
419
static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
420
{
421
    BlockDriver *drv = bs->drv;
422

    
423
    /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
424
    if (bs->sg)
425
        return 0;
426

    
427
    /* query actual device if possible, otherwise just trust the hint */
428
    if (drv->bdrv_getlength) {
429
        int64_t length = drv->bdrv_getlength(bs);
430
        if (length < 0) {
431
            return length;
432
        }
433
        hint = length >> BDRV_SECTOR_BITS;
434
    }
435

    
436
    bs->total_sectors = hint;
437
    return 0;
438
}
439

    
440
/**
441
 * Set open flags for a given cache mode
442
 *
443
 * Return 0 on success, -1 if the cache mode was invalid.
444
 */
445
int bdrv_parse_cache_flags(const char *mode, int *flags)
446
{
447
    *flags &= ~BDRV_O_CACHE_MASK;
448

    
449
    if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
450
        *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
451
    } else if (!strcmp(mode, "directsync")) {
452
        *flags |= BDRV_O_NOCACHE;
453
    } else if (!strcmp(mode, "writeback")) {
454
        *flags |= BDRV_O_CACHE_WB;
455
    } else if (!strcmp(mode, "unsafe")) {
456
        *flags |= BDRV_O_CACHE_WB;
457
        *flags |= BDRV_O_NO_FLUSH;
458
    } else if (!strcmp(mode, "writethrough")) {
459
        /* this is the default */
460
    } else {
461
        return -1;
462
    }
463

    
464
    return 0;
465
}
466

    
467
/*
468
 * Common part for opening disk images and files
469
 */
470
static int bdrv_open_common(BlockDriverState *bs, const char *filename,
471
    int flags, BlockDriver *drv)
472
{
473
    int ret, open_flags;
474

    
475
    assert(drv != NULL);
476

    
477
    bs->file = NULL;
478
    bs->total_sectors = 0;
479
    bs->encrypted = 0;
480
    bs->valid_key = 0;
481
    bs->open_flags = flags;
482
    /* buffer_alignment defaulted to 512, drivers can change this value */
483
    bs->buffer_alignment = 512;
484

    
485
    pstrcpy(bs->filename, sizeof(bs->filename), filename);
486

    
487
    if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
488
        return -ENOTSUP;
489
    }
490

    
491
    bs->drv = drv;
492
    bs->opaque = g_malloc0(drv->instance_size);
493

    
494
    if (flags & BDRV_O_CACHE_WB)
495
        bs->enable_write_cache = 1;
496

    
497
    /*
498
     * Clear flags that are internal to the block layer before opening the
499
     * image.
500
     */
501
    open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
502

    
503
    /*
504
     * Snapshots should be writable.
505
     */
506
    if (bs->is_temporary) {
507
        open_flags |= BDRV_O_RDWR;
508
    }
509

    
510
    /* Open the image, either directly or using a protocol */
511
    if (drv->bdrv_file_open) {
512
        ret = drv->bdrv_file_open(bs, filename, open_flags);
513
    } else {
514
        ret = bdrv_file_open(&bs->file, filename, open_flags);
515
        if (ret >= 0) {
516
            ret = drv->bdrv_open(bs, open_flags);
517
        }
518
    }
519

    
520
    if (ret < 0) {
521
        goto free_and_fail;
522
    }
523

    
524
    bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
525

    
526
    ret = refresh_total_sectors(bs, bs->total_sectors);
527
    if (ret < 0) {
528
        goto free_and_fail;
529
    }
530

    
531
#ifndef _WIN32
532
    if (bs->is_temporary) {
533
        unlink(filename);
534
    }
535
#endif
536
    return 0;
537

    
538
free_and_fail:
539
    if (bs->file) {
540
        bdrv_delete(bs->file);
541
        bs->file = NULL;
542
    }
543
    g_free(bs->opaque);
544
    bs->opaque = NULL;
545
    bs->drv = NULL;
546
    return ret;
547
}
548

    
549
/*
550
 * Opens a file using a protocol (file, host_device, nbd, ...)
551
 */
552
int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
553
{
554
    BlockDriverState *bs;
555
    BlockDriver *drv;
556
    int ret;
557

    
558
    drv = bdrv_find_protocol(filename);
559
    if (!drv) {
560
        return -ENOENT;
561
    }
562

    
563
    bs = bdrv_new("");
564
    ret = bdrv_open_common(bs, filename, flags, drv);
565
    if (ret < 0) {
566
        bdrv_delete(bs);
567
        return ret;
568
    }
569
    bs->growable = 1;
570
    *pbs = bs;
571
    return 0;
572
}
573

    
574
/*
575
 * Opens a disk image (raw, qcow2, vmdk, ...)
576
 */
577
int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
578
              BlockDriver *drv)
579
{
580
    int ret;
581

    
582
    if (flags & BDRV_O_SNAPSHOT) {
583
        BlockDriverState *bs1;
584
        int64_t total_size;
585
        int is_protocol = 0;
586
        BlockDriver *bdrv_qcow2;
587
        QEMUOptionParameter *options;
588
        char tmp_filename[PATH_MAX];
589
        char backing_filename[PATH_MAX];
590

    
591
        /* if snapshot, we create a temporary backing file and open it
592
           instead of opening 'filename' directly */
593

    
594
        /* if there is a backing file, use it */
595
        bs1 = bdrv_new("");
596
        ret = bdrv_open(bs1, filename, 0, drv);
597
        if (ret < 0) {
598
            bdrv_delete(bs1);
599
            return ret;
600
        }
601
        total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
602

    
603
        if (bs1->drv && bs1->drv->protocol_name)
604
            is_protocol = 1;
605

    
606
        bdrv_delete(bs1);
607

    
608
        get_tmp_filename(tmp_filename, sizeof(tmp_filename));
609

    
610
        /* Real path is meaningless for protocols */
611
        if (is_protocol)
612
            snprintf(backing_filename, sizeof(backing_filename),
613
                     "%s", filename);
614
        else if (!realpath(filename, backing_filename))
615
            return -errno;
616

    
617
        bdrv_qcow2 = bdrv_find_format("qcow2");
618
        options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
619

    
620
        set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
621
        set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
622
        if (drv) {
623
            set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
624
                drv->format_name);
625
        }
626

    
627
        ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
628
        free_option_parameters(options);
629
        if (ret < 0) {
630
            return ret;
631
        }
632

    
633
        filename = tmp_filename;
634
        drv = bdrv_qcow2;
635
        bs->is_temporary = 1;
636
    }
637

    
638
    /* Find the right image format driver */
639
    if (!drv) {
640
        ret = find_image_format(filename, &drv);
641
    }
642

    
643
    if (!drv) {
644
        goto unlink_and_fail;
645
    }
646

    
647
    /* Open the image */
648
    ret = bdrv_open_common(bs, filename, flags, drv);
649
    if (ret < 0) {
650
        goto unlink_and_fail;
651
    }
652

    
653
    /* If there is a backing file, use it */
654
    if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
655
        char backing_filename[PATH_MAX];
656
        int back_flags;
657
        BlockDriver *back_drv = NULL;
658

    
659
        bs->backing_hd = bdrv_new("");
660

    
661
        if (path_has_protocol(bs->backing_file)) {
662
            pstrcpy(backing_filename, sizeof(backing_filename),
663
                    bs->backing_file);
664
        } else {
665
            path_combine(backing_filename, sizeof(backing_filename),
666
                         filename, bs->backing_file);
667
        }
668

    
669
        if (bs->backing_format[0] != '\0') {
670
            back_drv = bdrv_find_format(bs->backing_format);
671
        }
672

    
673
        /* backing files always opened read-only */
674
        back_flags =
675
            flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
676

    
677
        ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
678
        if (ret < 0) {
679
            bdrv_close(bs);
680
            return ret;
681
        }
682
        if (bs->is_temporary) {
683
            bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
684
        } else {
685
            /* base image inherits from "parent" */
686
            bs->backing_hd->keep_read_only = bs->keep_read_only;
687
        }
688
    }
689

    
690
    if (!bdrv_key_required(bs)) {
691
        /* call the change callback */
692
        bs->media_changed = 1;
693
        if (bs->change_cb)
694
            bs->change_cb(bs->change_opaque, CHANGE_MEDIA);
695
    }
696

    
697
    return 0;
698

    
699
unlink_and_fail:
700
    if (bs->is_temporary) {
701
        unlink(filename);
702
    }
703
    return ret;
704
}
705

    
706
void bdrv_close(BlockDriverState *bs)
707
{
708
    if (bs->drv) {
709
        if (bs == bs_snapshots) {
710
            bs_snapshots = NULL;
711
        }
712
        if (bs->backing_hd) {
713
            bdrv_delete(bs->backing_hd);
714
            bs->backing_hd = NULL;
715
        }
716
        bs->drv->bdrv_close(bs);
717
        g_free(bs->opaque);
718
#ifdef _WIN32
719
        if (bs->is_temporary) {
720
            unlink(bs->filename);
721
        }
722
#endif
723
        bs->opaque = NULL;
724
        bs->drv = NULL;
725

    
726
        if (bs->file != NULL) {
727
            bdrv_close(bs->file);
728
        }
729

    
730
        /* call the change callback */
731
        bs->media_changed = 1;
732
        if (bs->change_cb)
733
            bs->change_cb(bs->change_opaque, CHANGE_MEDIA);
734
    }
735
}
736

    
737
void bdrv_close_all(void)
738
{
739
    BlockDriverState *bs;
740

    
741
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
742
        bdrv_close(bs);
743
    }
744
}
745

    
746
/* make a BlockDriverState anonymous by removing from bdrv_state list.
747
   Also, NULL terminate the device_name to prevent double remove */
748
void bdrv_make_anon(BlockDriverState *bs)
749
{
750
    if (bs->device_name[0] != '\0') {
751
        QTAILQ_REMOVE(&bdrv_states, bs, list);
752
    }
753
    bs->device_name[0] = '\0';
754
}
755

    
756
void bdrv_delete(BlockDriverState *bs)
757
{
758
    assert(!bs->peer);
759

    
760
    /* remove from list, if necessary */
761
    bdrv_make_anon(bs);
762

    
763
    bdrv_close(bs);
764
    if (bs->file != NULL) {
765
        bdrv_delete(bs->file);
766
    }
767

    
768
    assert(bs != bs_snapshots);
769
    g_free(bs);
770
}
771

    
772
int bdrv_attach(BlockDriverState *bs, DeviceState *qdev)
773
{
774
    if (bs->peer) {
775
        return -EBUSY;
776
    }
777
    bs->peer = qdev;
778
    return 0;
779
}
780

    
781
void bdrv_detach(BlockDriverState *bs, DeviceState *qdev)
782
{
783
    assert(bs->peer == qdev);
784
    bs->peer = NULL;
785
    bs->change_cb = NULL;
786
    bs->change_opaque = NULL;
787
}
788

    
789
DeviceState *bdrv_get_attached(BlockDriverState *bs)
790
{
791
    return bs->peer;
792
}
793

    
794
/*
795
 * Run consistency checks on an image
796
 *
797
 * Returns 0 if the check could be completed (it doesn't mean that the image is
798
 * free of errors) or -errno when an internal error occurred. The results of the
799
 * check are stored in res.
800
 */
801
int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
802
{
803
    if (bs->drv->bdrv_check == NULL) {
804
        return -ENOTSUP;
805
    }
806

    
807
    memset(res, 0, sizeof(*res));
808
    return bs->drv->bdrv_check(bs, res);
809
}
810

    
811
#define COMMIT_BUF_SECTORS 2048
812

    
813
/* commit COW file into the raw image */
814
int bdrv_commit(BlockDriverState *bs)
815
{
816
    BlockDriver *drv = bs->drv;
817
    BlockDriver *backing_drv;
818
    int64_t sector, total_sectors;
819
    int n, ro, open_flags;
820
    int ret = 0, rw_ret = 0;
821
    uint8_t *buf;
822
    char filename[1024];
823
    BlockDriverState *bs_rw, *bs_ro;
824

    
825
    if (!drv)
826
        return -ENOMEDIUM;
827
    
828
    if (!bs->backing_hd) {
829
        return -ENOTSUP;
830
    }
831

    
832
    if (bs->backing_hd->keep_read_only) {
833
        return -EACCES;
834
    }
835

    
836
    backing_drv = bs->backing_hd->drv;
837
    ro = bs->backing_hd->read_only;
838
    strncpy(filename, bs->backing_hd->filename, sizeof(filename));
839
    open_flags =  bs->backing_hd->open_flags;
840

    
841
    if (ro) {
842
        /* re-open as RW */
843
        bdrv_delete(bs->backing_hd);
844
        bs->backing_hd = NULL;
845
        bs_rw = bdrv_new("");
846
        rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
847
            backing_drv);
848
        if (rw_ret < 0) {
849
            bdrv_delete(bs_rw);
850
            /* try to re-open read-only */
851
            bs_ro = bdrv_new("");
852
            ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
853
                backing_drv);
854
            if (ret < 0) {
855
                bdrv_delete(bs_ro);
856
                /* drive not functional anymore */
857
                bs->drv = NULL;
858
                return ret;
859
            }
860
            bs->backing_hd = bs_ro;
861
            return rw_ret;
862
        }
863
        bs->backing_hd = bs_rw;
864
    }
865

    
866
    total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
867
    buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
868

    
869
    for (sector = 0; sector < total_sectors; sector += n) {
870
        if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
871

    
872
            if (bdrv_read(bs, sector, buf, n) != 0) {
873
                ret = -EIO;
874
                goto ro_cleanup;
875
            }
876

    
877
            if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
878
                ret = -EIO;
879
                goto ro_cleanup;
880
            }
881
        }
882
    }
883

    
884
    if (drv->bdrv_make_empty) {
885
        ret = drv->bdrv_make_empty(bs);
886
        bdrv_flush(bs);
887
    }
888

    
889
    /*
890
     * Make sure all data we wrote to the backing device is actually
891
     * stable on disk.
892
     */
893
    if (bs->backing_hd)
894
        bdrv_flush(bs->backing_hd);
895

    
896
ro_cleanup:
897
    g_free(buf);
898

    
899
    if (ro) {
900
        /* re-open as RO */
901
        bdrv_delete(bs->backing_hd);
902
        bs->backing_hd = NULL;
903
        bs_ro = bdrv_new("");
904
        ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
905
            backing_drv);
906
        if (ret < 0) {
907
            bdrv_delete(bs_ro);
908
            /* drive not functional anymore */
909
            bs->drv = NULL;
910
            return ret;
911
        }
912
        bs->backing_hd = bs_ro;
913
        bs->backing_hd->keep_read_only = 0;
914
    }
915

    
916
    return ret;
917
}
918

    
919
void bdrv_commit_all(void)
920
{
921
    BlockDriverState *bs;
922

    
923
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
924
        bdrv_commit(bs);
925
    }
926
}
927

    
928
/*
929
 * Return values:
930
 * 0        - success
931
 * -EINVAL  - backing format specified, but no file
932
 * -ENOSPC  - can't update the backing file because no space is left in the
933
 *            image file header
934
 * -ENOTSUP - format driver doesn't support changing the backing file
935
 */
936
int bdrv_change_backing_file(BlockDriverState *bs,
937
    const char *backing_file, const char *backing_fmt)
938
{
939
    BlockDriver *drv = bs->drv;
940

    
941
    if (drv->bdrv_change_backing_file != NULL) {
942
        return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
943
    } else {
944
        return -ENOTSUP;
945
    }
946
}
947

    
948
static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
949
                                   size_t size)
950
{
951
    int64_t len;
952

    
953
    if (!bdrv_is_inserted(bs))
954
        return -ENOMEDIUM;
955

    
956
    if (bs->growable)
957
        return 0;
958

    
959
    len = bdrv_getlength(bs);
960

    
961
    if (offset < 0)
962
        return -EIO;
963

    
964
    if ((offset > len) || (len - offset < size))
965
        return -EIO;
966

    
967
    return 0;
968
}
969

    
970
static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
971
                              int nb_sectors)
972
{
973
    return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
974
                                   nb_sectors * BDRV_SECTOR_SIZE);
975
}
976

    
977
static inline bool bdrv_has_async_rw(BlockDriver *drv)
978
{
979
    return drv->bdrv_co_readv != bdrv_co_readv_em
980
        || drv->bdrv_aio_readv != bdrv_aio_readv_em;
981
}
982

    
983
static inline bool bdrv_has_async_flush(BlockDriver *drv)
984
{
985
    return drv->bdrv_aio_flush != bdrv_aio_flush_em;
986
}
987

    
988
/* return < 0 if error. See bdrv_write() for the return codes */
989
int bdrv_read(BlockDriverState *bs, int64_t sector_num,
990
              uint8_t *buf, int nb_sectors)
991
{
992
    BlockDriver *drv = bs->drv;
993

    
994
    if (!drv)
995
        return -ENOMEDIUM;
996

    
997
    if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) {
998
        QEMUIOVector qiov;
999
        struct iovec iov = {
1000
            .iov_base = (void *)buf,
1001
            .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1002
        };
1003

    
1004
        qemu_iovec_init_external(&qiov, &iov, 1);
1005
        return bdrv_co_readv(bs, sector_num, nb_sectors, &qiov);
1006
    }
1007

    
1008
    if (bdrv_check_request(bs, sector_num, nb_sectors))
1009
        return -EIO;
1010

    
1011
    return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
1012
}
1013

    
1014
static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
1015
                             int nb_sectors, int dirty)
1016
{
1017
    int64_t start, end;
1018
    unsigned long val, idx, bit;
1019

    
1020
    start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
1021
    end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
1022

    
1023
    for (; start <= end; start++) {
1024
        idx = start / (sizeof(unsigned long) * 8);
1025
        bit = start % (sizeof(unsigned long) * 8);
1026
        val = bs->dirty_bitmap[idx];
1027
        if (dirty) {
1028
            if (!(val & (1UL << bit))) {
1029
                bs->dirty_count++;
1030
                val |= 1UL << bit;
1031
            }
1032
        } else {
1033
            if (val & (1UL << bit)) {
1034
                bs->dirty_count--;
1035
                val &= ~(1UL << bit);
1036
            }
1037
        }
1038
        bs->dirty_bitmap[idx] = val;
1039
    }
1040
}
1041

    
1042
/* Return < 0 if error. Important errors are:
1043
  -EIO         generic I/O error (may happen for all errors)
1044
  -ENOMEDIUM   No media inserted.
1045
  -EINVAL      Invalid sector number or nb_sectors
1046
  -EACCES      Trying to write a read-only device
1047
*/
1048
int bdrv_write(BlockDriverState *bs, int64_t sector_num,
1049
               const uint8_t *buf, int nb_sectors)
1050
{
1051
    BlockDriver *drv = bs->drv;
1052

    
1053
    if (!bs->drv)
1054
        return -ENOMEDIUM;
1055

    
1056
    if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) {
1057
        QEMUIOVector qiov;
1058
        struct iovec iov = {
1059
            .iov_base = (void *)buf,
1060
            .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1061
        };
1062

    
1063
        qemu_iovec_init_external(&qiov, &iov, 1);
1064
        return bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
1065
    }
1066

    
1067
    if (bs->read_only)
1068
        return -EACCES;
1069
    if (bdrv_check_request(bs, sector_num, nb_sectors))
1070
        return -EIO;
1071

    
1072
    if (bs->dirty_bitmap) {
1073
        set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1074
    }
1075

    
1076
    if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1077
        bs->wr_highest_sector = sector_num + nb_sectors - 1;
1078
    }
1079

    
1080
    return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
1081
}
1082

    
1083
int bdrv_pread(BlockDriverState *bs, int64_t offset,
1084
               void *buf, int count1)
1085
{
1086
    uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1087
    int len, nb_sectors, count;
1088
    int64_t sector_num;
1089
    int ret;
1090

    
1091
    count = count1;
1092
    /* first read to align to sector start */
1093
    len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1094
    if (len > count)
1095
        len = count;
1096
    sector_num = offset >> BDRV_SECTOR_BITS;
1097
    if (len > 0) {
1098
        if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1099
            return ret;
1100
        memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
1101
        count -= len;
1102
        if (count == 0)
1103
            return count1;
1104
        sector_num++;
1105
        buf += len;
1106
    }
1107

    
1108
    /* read the sectors "in place" */
1109
    nb_sectors = count >> BDRV_SECTOR_BITS;
1110
    if (nb_sectors > 0) {
1111
        if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1112
            return ret;
1113
        sector_num += nb_sectors;
1114
        len = nb_sectors << BDRV_SECTOR_BITS;
1115
        buf += len;
1116
        count -= len;
1117
    }
1118

    
1119
    /* add data from the last sector */
1120
    if (count > 0) {
1121
        if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1122
            return ret;
1123
        memcpy(buf, tmp_buf, count);
1124
    }
1125
    return count1;
1126
}
1127

    
1128
int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1129
                const void *buf, int count1)
1130
{
1131
    uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1132
    int len, nb_sectors, count;
1133
    int64_t sector_num;
1134
    int ret;
1135

    
1136
    count = count1;
1137
    /* first write to align to sector start */
1138
    len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1139
    if (len > count)
1140
        len = count;
1141
    sector_num = offset >> BDRV_SECTOR_BITS;
1142
    if (len > 0) {
1143
        if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1144
            return ret;
1145
        memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
1146
        if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1147
            return ret;
1148
        count -= len;
1149
        if (count == 0)
1150
            return count1;
1151
        sector_num++;
1152
        buf += len;
1153
    }
1154

    
1155
    /* write the sectors "in place" */
1156
    nb_sectors = count >> BDRV_SECTOR_BITS;
1157
    if (nb_sectors > 0) {
1158
        if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1159
            return ret;
1160
        sector_num += nb_sectors;
1161
        len = nb_sectors << BDRV_SECTOR_BITS;
1162
        buf += len;
1163
        count -= len;
1164
    }
1165

    
1166
    /* add data from the last sector */
1167
    if (count > 0) {
1168
        if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1169
            return ret;
1170
        memcpy(tmp_buf, buf, count);
1171
        if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1172
            return ret;
1173
    }
1174
    return count1;
1175
}
1176

    
1177
/*
1178
 * Writes to the file and ensures that no writes are reordered across this
1179
 * request (acts as a barrier)
1180
 *
1181
 * Returns 0 on success, -errno in error cases.
1182
 */
1183
int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1184
    const void *buf, int count)
1185
{
1186
    int ret;
1187

    
1188
    ret = bdrv_pwrite(bs, offset, buf, count);
1189
    if (ret < 0) {
1190
        return ret;
1191
    }
1192

    
1193
    /* No flush needed for cache modes that use O_DSYNC */
1194
    if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
1195
        bdrv_flush(bs);
1196
    }
1197

    
1198
    return 0;
1199
}
1200

    
1201
int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
1202
    int nb_sectors, QEMUIOVector *qiov)
1203
{
1204
    BlockDriver *drv = bs->drv;
1205

    
1206
    trace_bdrv_co_readv(bs, sector_num, nb_sectors);
1207

    
1208
    if (!drv) {
1209
        return -ENOMEDIUM;
1210
    }
1211
    if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1212
        return -EIO;
1213
    }
1214

    
1215
    return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1216
}
1217

    
1218
int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1219
    int nb_sectors, QEMUIOVector *qiov)
1220
{
1221
    BlockDriver *drv = bs->drv;
1222

    
1223
    trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1224

    
1225
    if (!bs->drv) {
1226
        return -ENOMEDIUM;
1227
    }
1228
    if (bs->read_only) {
1229
        return -EACCES;
1230
    }
1231
    if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1232
        return -EIO;
1233
    }
1234

    
1235
    if (bs->dirty_bitmap) {
1236
        set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1237
    }
1238

    
1239
    if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1240
        bs->wr_highest_sector = sector_num + nb_sectors - 1;
1241
    }
1242

    
1243
    return drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1244
}
1245

    
1246
/**
1247
 * Truncate file to 'offset' bytes (needed only for file protocols)
1248
 */
1249
int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1250
{
1251
    BlockDriver *drv = bs->drv;
1252
    int ret;
1253
    if (!drv)
1254
        return -ENOMEDIUM;
1255
    if (!drv->bdrv_truncate)
1256
        return -ENOTSUP;
1257
    if (bs->read_only)
1258
        return -EACCES;
1259
    if (bdrv_in_use(bs))
1260
        return -EBUSY;
1261
    ret = drv->bdrv_truncate(bs, offset);
1262
    if (ret == 0) {
1263
        ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
1264
        if (bs->change_cb) {
1265
            bs->change_cb(bs->change_opaque, CHANGE_SIZE);
1266
        }
1267
    }
1268
    return ret;
1269
}
1270

    
1271
/**
1272
 * Length of a allocated file in bytes. Sparse files are counted by actual
1273
 * allocated space. Return < 0 if error or unknown.
1274
 */
1275
int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1276
{
1277
    BlockDriver *drv = bs->drv;
1278
    if (!drv) {
1279
        return -ENOMEDIUM;
1280
    }
1281
    if (drv->bdrv_get_allocated_file_size) {
1282
        return drv->bdrv_get_allocated_file_size(bs);
1283
    }
1284
    if (bs->file) {
1285
        return bdrv_get_allocated_file_size(bs->file);
1286
    }
1287
    return -ENOTSUP;
1288
}
1289

    
1290
/**
1291
 * Length of a file in bytes. Return < 0 if error or unknown.
1292
 */
1293
int64_t bdrv_getlength(BlockDriverState *bs)
1294
{
1295
    BlockDriver *drv = bs->drv;
1296
    if (!drv)
1297
        return -ENOMEDIUM;
1298

    
1299
    if (bs->growable || bs->removable) {
1300
        if (drv->bdrv_getlength) {
1301
            return drv->bdrv_getlength(bs);
1302
        }
1303
    }
1304
    return bs->total_sectors * BDRV_SECTOR_SIZE;
1305
}
1306

    
1307
/* return 0 as number of sectors if no device present or error */
1308
void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
1309
{
1310
    int64_t length;
1311
    length = bdrv_getlength(bs);
1312
    if (length < 0)
1313
        length = 0;
1314
    else
1315
        length = length >> BDRV_SECTOR_BITS;
1316
    *nb_sectors_ptr = length;
1317
}
1318

    
1319
struct partition {
1320
        uint8_t boot_ind;           /* 0x80 - active */
1321
        uint8_t head;               /* starting head */
1322
        uint8_t sector;             /* starting sector */
1323
        uint8_t cyl;                /* starting cylinder */
1324
        uint8_t sys_ind;            /* What partition type */
1325
        uint8_t end_head;           /* end head */
1326
        uint8_t end_sector;         /* end sector */
1327
        uint8_t end_cyl;            /* end cylinder */
1328
        uint32_t start_sect;        /* starting sector counting from 0 */
1329
        uint32_t nr_sects;          /* nr of sectors in partition */
1330
} __attribute__((packed));
1331

    
1332
/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1333
static int guess_disk_lchs(BlockDriverState *bs,
1334
                           int *pcylinders, int *pheads, int *psectors)
1335
{
1336
    uint8_t buf[BDRV_SECTOR_SIZE];
1337
    int ret, i, heads, sectors, cylinders;
1338
    struct partition *p;
1339
    uint32_t nr_sects;
1340
    uint64_t nb_sectors;
1341

    
1342
    bdrv_get_geometry(bs, &nb_sectors);
1343

    
1344
    ret = bdrv_read(bs, 0, buf, 1);
1345
    if (ret < 0)
1346
        return -1;
1347
    /* test msdos magic */
1348
    if (buf[510] != 0x55 || buf[511] != 0xaa)
1349
        return -1;
1350
    for(i = 0; i < 4; i++) {
1351
        p = ((struct partition *)(buf + 0x1be)) + i;
1352
        nr_sects = le32_to_cpu(p->nr_sects);
1353
        if (nr_sects && p->end_head) {
1354
            /* We make the assumption that the partition terminates on
1355
               a cylinder boundary */
1356
            heads = p->end_head + 1;
1357
            sectors = p->end_sector & 63;
1358
            if (sectors == 0)
1359
                continue;
1360
            cylinders = nb_sectors / (heads * sectors);
1361
            if (cylinders < 1 || cylinders > 16383)
1362
                continue;
1363
            *pheads = heads;
1364
            *psectors = sectors;
1365
            *pcylinders = cylinders;
1366
#if 0
1367
            printf("guessed geometry: LCHS=%d %d %d\n",
1368
                   cylinders, heads, sectors);
1369
#endif
1370
            return 0;
1371
        }
1372
    }
1373
    return -1;
1374
}
1375

    
1376
void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1377
{
1378
    int translation, lba_detected = 0;
1379
    int cylinders, heads, secs;
1380
    uint64_t nb_sectors;
1381

    
1382
    /* if a geometry hint is available, use it */
1383
    bdrv_get_geometry(bs, &nb_sectors);
1384
    bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1385
    translation = bdrv_get_translation_hint(bs);
1386
    if (cylinders != 0) {
1387
        *pcyls = cylinders;
1388
        *pheads = heads;
1389
        *psecs = secs;
1390
    } else {
1391
        if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1392
            if (heads > 16) {
1393
                /* if heads > 16, it means that a BIOS LBA
1394
                   translation was active, so the default
1395
                   hardware geometry is OK */
1396
                lba_detected = 1;
1397
                goto default_geometry;
1398
            } else {
1399
                *pcyls = cylinders;
1400
                *pheads = heads;
1401
                *psecs = secs;
1402
                /* disable any translation to be in sync with
1403
                   the logical geometry */
1404
                if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1405
                    bdrv_set_translation_hint(bs,
1406
                                              BIOS_ATA_TRANSLATION_NONE);
1407
                }
1408
            }
1409
        } else {
1410
        default_geometry:
1411
            /* if no geometry, use a standard physical disk geometry */
1412
            cylinders = nb_sectors / (16 * 63);
1413

    
1414
            if (cylinders > 16383)
1415
                cylinders = 16383;
1416
            else if (cylinders < 2)
1417
                cylinders = 2;
1418
            *pcyls = cylinders;
1419
            *pheads = 16;
1420
            *psecs = 63;
1421
            if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1422
                if ((*pcyls * *pheads) <= 131072) {
1423
                    bdrv_set_translation_hint(bs,
1424
                                              BIOS_ATA_TRANSLATION_LARGE);
1425
                } else {
1426
                    bdrv_set_translation_hint(bs,
1427
                                              BIOS_ATA_TRANSLATION_LBA);
1428
                }
1429
            }
1430
        }
1431
        bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1432
    }
1433
}
1434

    
1435
void bdrv_set_geometry_hint(BlockDriverState *bs,
1436
                            int cyls, int heads, int secs)
1437
{
1438
    bs->cyls = cyls;
1439
    bs->heads = heads;
1440
    bs->secs = secs;
1441
}
1442

    
1443
void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1444
{
1445
    bs->translation = translation;
1446
}
1447

    
1448
void bdrv_get_geometry_hint(BlockDriverState *bs,
1449
                            int *pcyls, int *pheads, int *psecs)
1450
{
1451
    *pcyls = bs->cyls;
1452
    *pheads = bs->heads;
1453
    *psecs = bs->secs;
1454
}
1455

    
1456
/* Recognize floppy formats */
1457
typedef struct FDFormat {
1458
    FDriveType drive;
1459
    uint8_t last_sect;
1460
    uint8_t max_track;
1461
    uint8_t max_head;
1462
} FDFormat;
1463

    
1464
static const FDFormat fd_formats[] = {
1465
    /* First entry is default format */
1466
    /* 1.44 MB 3"1/2 floppy disks */
1467
    { FDRIVE_DRV_144, 18, 80, 1, },
1468
    { FDRIVE_DRV_144, 20, 80, 1, },
1469
    { FDRIVE_DRV_144, 21, 80, 1, },
1470
    { FDRIVE_DRV_144, 21, 82, 1, },
1471
    { FDRIVE_DRV_144, 21, 83, 1, },
1472
    { FDRIVE_DRV_144, 22, 80, 1, },
1473
    { FDRIVE_DRV_144, 23, 80, 1, },
1474
    { FDRIVE_DRV_144, 24, 80, 1, },
1475
    /* 2.88 MB 3"1/2 floppy disks */
1476
    { FDRIVE_DRV_288, 36, 80, 1, },
1477
    { FDRIVE_DRV_288, 39, 80, 1, },
1478
    { FDRIVE_DRV_288, 40, 80, 1, },
1479
    { FDRIVE_DRV_288, 44, 80, 1, },
1480
    { FDRIVE_DRV_288, 48, 80, 1, },
1481
    /* 720 kB 3"1/2 floppy disks */
1482
    { FDRIVE_DRV_144,  9, 80, 1, },
1483
    { FDRIVE_DRV_144, 10, 80, 1, },
1484
    { FDRIVE_DRV_144, 10, 82, 1, },
1485
    { FDRIVE_DRV_144, 10, 83, 1, },
1486
    { FDRIVE_DRV_144, 13, 80, 1, },
1487
    { FDRIVE_DRV_144, 14, 80, 1, },
1488
    /* 1.2 MB 5"1/4 floppy disks */
1489
    { FDRIVE_DRV_120, 15, 80, 1, },
1490
    { FDRIVE_DRV_120, 18, 80, 1, },
1491
    { FDRIVE_DRV_120, 18, 82, 1, },
1492
    { FDRIVE_DRV_120, 18, 83, 1, },
1493
    { FDRIVE_DRV_120, 20, 80, 1, },
1494
    /* 720 kB 5"1/4 floppy disks */
1495
    { FDRIVE_DRV_120,  9, 80, 1, },
1496
    { FDRIVE_DRV_120, 11, 80, 1, },
1497
    /* 360 kB 5"1/4 floppy disks */
1498
    { FDRIVE_DRV_120,  9, 40, 1, },
1499
    { FDRIVE_DRV_120,  9, 40, 0, },
1500
    { FDRIVE_DRV_120, 10, 41, 1, },
1501
    { FDRIVE_DRV_120, 10, 42, 1, },
1502
    /* 320 kB 5"1/4 floppy disks */
1503
    { FDRIVE_DRV_120,  8, 40, 1, },
1504
    { FDRIVE_DRV_120,  8, 40, 0, },
1505
    /* 360 kB must match 5"1/4 better than 3"1/2... */
1506
    { FDRIVE_DRV_144,  9, 80, 0, },
1507
    /* end */
1508
    { FDRIVE_DRV_NONE, -1, -1, 0, },
1509
};
1510

    
1511
void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1512
                                   int *max_track, int *last_sect,
1513
                                   FDriveType drive_in, FDriveType *drive)
1514
{
1515
    const FDFormat *parse;
1516
    uint64_t nb_sectors, size;
1517
    int i, first_match, match;
1518

    
1519
    bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1520
    if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1521
        /* User defined disk */
1522
    } else {
1523
        bdrv_get_geometry(bs, &nb_sectors);
1524
        match = -1;
1525
        first_match = -1;
1526
        for (i = 0; ; i++) {
1527
            parse = &fd_formats[i];
1528
            if (parse->drive == FDRIVE_DRV_NONE) {
1529
                break;
1530
            }
1531
            if (drive_in == parse->drive ||
1532
                drive_in == FDRIVE_DRV_NONE) {
1533
                size = (parse->max_head + 1) * parse->max_track *
1534
                    parse->last_sect;
1535
                if (nb_sectors == size) {
1536
                    match = i;
1537
                    break;
1538
                }
1539
                if (first_match == -1) {
1540
                    first_match = i;
1541
                }
1542
            }
1543
        }
1544
        if (match == -1) {
1545
            if (first_match == -1) {
1546
                match = 1;
1547
            } else {
1548
                match = first_match;
1549
            }
1550
            parse = &fd_formats[match];
1551
        }
1552
        *nb_heads = parse->max_head + 1;
1553
        *max_track = parse->max_track;
1554
        *last_sect = parse->last_sect;
1555
        *drive = parse->drive;
1556
    }
1557
}
1558

    
1559
int bdrv_get_translation_hint(BlockDriverState *bs)
1560
{
1561
    return bs->translation;
1562
}
1563

    
1564
void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1565
                       BlockErrorAction on_write_error)
1566
{
1567
    bs->on_read_error = on_read_error;
1568
    bs->on_write_error = on_write_error;
1569
}
1570

    
1571
BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1572
{
1573
    return is_read ? bs->on_read_error : bs->on_write_error;
1574
}
1575

    
1576
void bdrv_set_removable(BlockDriverState *bs, int removable)
1577
{
1578
    bs->removable = removable;
1579
    if (removable && bs == bs_snapshots) {
1580
        bs_snapshots = NULL;
1581
    }
1582
}
1583

    
1584
int bdrv_is_removable(BlockDriverState *bs)
1585
{
1586
    return bs->removable;
1587
}
1588

    
1589
int bdrv_is_read_only(BlockDriverState *bs)
1590
{
1591
    return bs->read_only;
1592
}
1593

    
1594
int bdrv_is_sg(BlockDriverState *bs)
1595
{
1596
    return bs->sg;
1597
}
1598

    
1599
int bdrv_enable_write_cache(BlockDriverState *bs)
1600
{
1601
    return bs->enable_write_cache;
1602
}
1603

    
1604
/* XXX: no longer used */
1605
void bdrv_set_change_cb(BlockDriverState *bs,
1606
                        void (*change_cb)(void *opaque, int reason),
1607
                        void *opaque)
1608
{
1609
    bs->change_cb = change_cb;
1610
    bs->change_opaque = opaque;
1611
}
1612

    
1613
int bdrv_is_encrypted(BlockDriverState *bs)
1614
{
1615
    if (bs->backing_hd && bs->backing_hd->encrypted)
1616
        return 1;
1617
    return bs->encrypted;
1618
}
1619

    
1620
int bdrv_key_required(BlockDriverState *bs)
1621
{
1622
    BlockDriverState *backing_hd = bs->backing_hd;
1623

    
1624
    if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1625
        return 1;
1626
    return (bs->encrypted && !bs->valid_key);
1627
}
1628

    
1629
int bdrv_set_key(BlockDriverState *bs, const char *key)
1630
{
1631
    int ret;
1632
    if (bs->backing_hd && bs->backing_hd->encrypted) {
1633
        ret = bdrv_set_key(bs->backing_hd, key);
1634
        if (ret < 0)
1635
            return ret;
1636
        if (!bs->encrypted)
1637
            return 0;
1638
    }
1639
    if (!bs->encrypted) {
1640
        return -EINVAL;
1641
    } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1642
        return -ENOMEDIUM;
1643
    }
1644
    ret = bs->drv->bdrv_set_key(bs, key);
1645
    if (ret < 0) {
1646
        bs->valid_key = 0;
1647
    } else if (!bs->valid_key) {
1648
        bs->valid_key = 1;
1649
        /* call the change callback now, we skipped it on open */
1650
        bs->media_changed = 1;
1651
        if (bs->change_cb)
1652
            bs->change_cb(bs->change_opaque, CHANGE_MEDIA);
1653
    }
1654
    return ret;
1655
}
1656

    
1657
void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1658
{
1659
    if (!bs->drv) {
1660
        buf[0] = '\0';
1661
    } else {
1662
        pstrcpy(buf, buf_size, bs->drv->format_name);
1663
    }
1664
}
1665

    
1666
void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
1667
                         void *opaque)
1668
{
1669
    BlockDriver *drv;
1670

    
1671
    QLIST_FOREACH(drv, &bdrv_drivers, list) {
1672
        it(opaque, drv->format_name);
1673
    }
1674
}
1675

    
1676
BlockDriverState *bdrv_find(const char *name)
1677
{
1678
    BlockDriverState *bs;
1679

    
1680
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
1681
        if (!strcmp(name, bs->device_name)) {
1682
            return bs;
1683
        }
1684
    }
1685
    return NULL;
1686
}
1687

    
1688
BlockDriverState *bdrv_next(BlockDriverState *bs)
1689
{
1690
    if (!bs) {
1691
        return QTAILQ_FIRST(&bdrv_states);
1692
    }
1693
    return QTAILQ_NEXT(bs, list);
1694
}
1695

    
1696
void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1697
{
1698
    BlockDriverState *bs;
1699

    
1700
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
1701
        it(opaque, bs);
1702
    }
1703
}
1704

    
1705
const char *bdrv_get_device_name(BlockDriverState *bs)
1706
{
1707
    return bs->device_name;
1708
}
1709

    
1710
int bdrv_flush(BlockDriverState *bs)
1711
{
1712
    if (bs->open_flags & BDRV_O_NO_FLUSH) {
1713
        return 0;
1714
    }
1715

    
1716
    if (bs->drv && bdrv_has_async_flush(bs->drv) && qemu_in_coroutine()) {
1717
        return bdrv_co_flush_em(bs);
1718
    }
1719

    
1720
    if (bs->drv && bs->drv->bdrv_flush) {
1721
        return bs->drv->bdrv_flush(bs);
1722
    }
1723

    
1724
    /*
1725
     * Some block drivers always operate in either writethrough or unsafe mode
1726
     * and don't support bdrv_flush therefore. Usually qemu doesn't know how
1727
     * the server works (because the behaviour is hardcoded or depends on
1728
     * server-side configuration), so we can't ensure that everything is safe
1729
     * on disk. Returning an error doesn't work because that would break guests
1730
     * even if the server operates in writethrough mode.
1731
     *
1732
     * Let's hope the user knows what he's doing.
1733
     */
1734
    return 0;
1735
}
1736

    
1737
void bdrv_flush_all(void)
1738
{
1739
    BlockDriverState *bs;
1740

    
1741
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
1742
        if (bs->drv && !bdrv_is_read_only(bs) &&
1743
            (!bdrv_is_removable(bs) || bdrv_is_inserted(bs))) {
1744
            bdrv_flush(bs);
1745
        }
1746
    }
1747
}
1748

    
1749
int bdrv_has_zero_init(BlockDriverState *bs)
1750
{
1751
    assert(bs->drv);
1752

    
1753
    if (bs->drv->bdrv_has_zero_init) {
1754
        return bs->drv->bdrv_has_zero_init(bs);
1755
    }
1756

    
1757
    return 1;
1758
}
1759

    
1760
int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
1761
{
1762
    if (!bs->drv) {
1763
        return -ENOMEDIUM;
1764
    }
1765
    if (!bs->drv->bdrv_discard) {
1766
        return 0;
1767
    }
1768
    return bs->drv->bdrv_discard(bs, sector_num, nb_sectors);
1769
}
1770

    
1771
/*
1772
 * Returns true iff the specified sector is present in the disk image. Drivers
1773
 * not implementing the functionality are assumed to not support backing files,
1774
 * hence all their sectors are reported as allocated.
1775
 *
1776
 * 'pnum' is set to the number of sectors (including and immediately following
1777
 * the specified sector) that are known to be in the same
1778
 * allocated/unallocated state.
1779
 *
1780
 * 'nb_sectors' is the max value 'pnum' should be set to.
1781
 */
1782
int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1783
        int *pnum)
1784
{
1785
    int64_t n;
1786
    if (!bs->drv->bdrv_is_allocated) {
1787
        if (sector_num >= bs->total_sectors) {
1788
            *pnum = 0;
1789
            return 0;
1790
        }
1791
        n = bs->total_sectors - sector_num;
1792
        *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1793
        return 1;
1794
    }
1795
    return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1796
}
1797

    
1798
void bdrv_mon_event(const BlockDriverState *bdrv,
1799
                    BlockMonEventAction action, int is_read)
1800
{
1801
    QObject *data;
1802
    const char *action_str;
1803

    
1804
    switch (action) {
1805
    case BDRV_ACTION_REPORT:
1806
        action_str = "report";
1807
        break;
1808
    case BDRV_ACTION_IGNORE:
1809
        action_str = "ignore";
1810
        break;
1811
    case BDRV_ACTION_STOP:
1812
        action_str = "stop";
1813
        break;
1814
    default:
1815
        abort();
1816
    }
1817

    
1818
    data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1819
                              bdrv->device_name,
1820
                              action_str,
1821
                              is_read ? "read" : "write");
1822
    monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1823

    
1824
    qobject_decref(data);
1825
}
1826

    
1827
static void bdrv_print_dict(QObject *obj, void *opaque)
1828
{
1829
    QDict *bs_dict;
1830
    Monitor *mon = opaque;
1831

    
1832
    bs_dict = qobject_to_qdict(obj);
1833

    
1834
    monitor_printf(mon, "%s: removable=%d",
1835
                        qdict_get_str(bs_dict, "device"),
1836
                        qdict_get_bool(bs_dict, "removable"));
1837

    
1838
    if (qdict_get_bool(bs_dict, "removable")) {
1839
        monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked"));
1840
    }
1841

    
1842
    if (qdict_haskey(bs_dict, "inserted")) {
1843
        QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted"));
1844

    
1845
        monitor_printf(mon, " file=");
1846
        monitor_print_filename(mon, qdict_get_str(qdict, "file"));
1847
        if (qdict_haskey(qdict, "backing_file")) {
1848
            monitor_printf(mon, " backing_file=");
1849
            monitor_print_filename(mon, qdict_get_str(qdict, "backing_file"));
1850
        }
1851
        monitor_printf(mon, " ro=%d drv=%s encrypted=%d",
1852
                            qdict_get_bool(qdict, "ro"),
1853
                            qdict_get_str(qdict, "drv"),
1854
                            qdict_get_bool(qdict, "encrypted"));
1855
    } else {
1856
        monitor_printf(mon, " [not inserted]");
1857
    }
1858

    
1859
    monitor_printf(mon, "\n");
1860
}
1861

    
1862
void bdrv_info_print(Monitor *mon, const QObject *data)
1863
{
1864
    qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon);
1865
}
1866

    
1867
void bdrv_info(Monitor *mon, QObject **ret_data)
1868
{
1869
    QList *bs_list;
1870
    BlockDriverState *bs;
1871

    
1872
    bs_list = qlist_new();
1873

    
1874
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
1875
        QObject *bs_obj;
1876

    
1877
        bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': 'unknown', "
1878
                                    "'removable': %i, 'locked': %i }",
1879
                                    bs->device_name, bs->removable,
1880
                                    bs->locked);
1881

    
1882
        if (bs->drv) {
1883
            QObject *obj;
1884
            QDict *bs_dict = qobject_to_qdict(bs_obj);
1885

    
1886
            obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
1887
                                     "'encrypted': %i }",
1888
                                     bs->filename, bs->read_only,
1889
                                     bs->drv->format_name,
1890
                                     bdrv_is_encrypted(bs));
1891
            if (bs->backing_file[0] != '\0') {
1892
                QDict *qdict = qobject_to_qdict(obj);
1893
                qdict_put(qdict, "backing_file",
1894
                          qstring_from_str(bs->backing_file));
1895
            }
1896

    
1897
            qdict_put_obj(bs_dict, "inserted", obj);
1898
        }
1899
        qlist_append_obj(bs_list, bs_obj);
1900
    }
1901

    
1902
    *ret_data = QOBJECT(bs_list);
1903
}
1904

    
1905
static void bdrv_stats_iter(QObject *data, void *opaque)
1906
{
1907
    QDict *qdict;
1908
    Monitor *mon = opaque;
1909

    
1910
    qdict = qobject_to_qdict(data);
1911
    monitor_printf(mon, "%s:", qdict_get_str(qdict, "device"));
1912

    
1913
    qdict = qobject_to_qdict(qdict_get(qdict, "stats"));
1914
    monitor_printf(mon, " rd_bytes=%" PRId64
1915
                        " wr_bytes=%" PRId64
1916
                        " rd_operations=%" PRId64
1917
                        " wr_operations=%" PRId64
1918
                        " flush_operations=%" PRId64
1919
                        " wr_total_time_ns=%" PRId64
1920
                        " rd_total_time_ns=%" PRId64
1921
                        " flush_total_time_ns=%" PRId64
1922
                        "\n",
1923
                        qdict_get_int(qdict, "rd_bytes"),
1924
                        qdict_get_int(qdict, "wr_bytes"),
1925
                        qdict_get_int(qdict, "rd_operations"),
1926
                        qdict_get_int(qdict, "wr_operations"),
1927
                        qdict_get_int(qdict, "flush_operations"),
1928
                        qdict_get_int(qdict, "wr_total_time_ns"),
1929
                        qdict_get_int(qdict, "rd_total_time_ns"),
1930
                        qdict_get_int(qdict, "flush_total_time_ns"));
1931
}
1932

    
1933
void bdrv_stats_print(Monitor *mon, const QObject *data)
1934
{
1935
    qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon);
1936
}
1937

    
1938
static QObject* bdrv_info_stats_bs(BlockDriverState *bs)
1939
{
1940
    QObject *res;
1941
    QDict *dict;
1942

    
1943
    res = qobject_from_jsonf("{ 'stats': {"
1944
                             "'rd_bytes': %" PRId64 ","
1945
                             "'wr_bytes': %" PRId64 ","
1946
                             "'rd_operations': %" PRId64 ","
1947
                             "'wr_operations': %" PRId64 ","
1948
                             "'wr_highest_offset': %" PRId64 ","
1949
                             "'flush_operations': %" PRId64 ","
1950
                             "'wr_total_time_ns': %" PRId64 ","
1951
                             "'rd_total_time_ns': %" PRId64 ","
1952
                             "'flush_total_time_ns': %" PRId64
1953
                             "} }",
1954
                             bs->nr_bytes[BDRV_ACCT_READ],
1955
                             bs->nr_bytes[BDRV_ACCT_WRITE],
1956
                             bs->nr_ops[BDRV_ACCT_READ],
1957
                             bs->nr_ops[BDRV_ACCT_WRITE],
1958
                             bs->wr_highest_sector *
1959
                             (uint64_t)BDRV_SECTOR_SIZE,
1960
                             bs->nr_ops[BDRV_ACCT_FLUSH],
1961
                             bs->total_time_ns[BDRV_ACCT_WRITE],
1962
                             bs->total_time_ns[BDRV_ACCT_READ],
1963
                             bs->total_time_ns[BDRV_ACCT_FLUSH]);
1964
    dict  = qobject_to_qdict(res);
1965

    
1966
    if (*bs->device_name) {
1967
        qdict_put(dict, "device", qstring_from_str(bs->device_name));
1968
    }
1969

    
1970
    if (bs->file) {
1971
        QObject *parent = bdrv_info_stats_bs(bs->file);
1972
        qdict_put_obj(dict, "parent", parent);
1973
    }
1974

    
1975
    return res;
1976
}
1977

    
1978
void bdrv_info_stats(Monitor *mon, QObject **ret_data)
1979
{
1980
    QObject *obj;
1981
    QList *devices;
1982
    BlockDriverState *bs;
1983

    
1984
    devices = qlist_new();
1985

    
1986
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
1987
        obj = bdrv_info_stats_bs(bs);
1988
        qlist_append_obj(devices, obj);
1989
    }
1990

    
1991
    *ret_data = QOBJECT(devices);
1992
}
1993

    
1994
const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
1995
{
1996
    if (bs->backing_hd && bs->backing_hd->encrypted)
1997
        return bs->backing_file;
1998
    else if (bs->encrypted)
1999
        return bs->filename;
2000
    else
2001
        return NULL;
2002
}
2003

    
2004
void bdrv_get_backing_filename(BlockDriverState *bs,
2005
                               char *filename, int filename_size)
2006
{
2007
    if (!bs->backing_file) {
2008
        pstrcpy(filename, filename_size, "");
2009
    } else {
2010
        pstrcpy(filename, filename_size, bs->backing_file);
2011
    }
2012
}
2013

    
2014
int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
2015
                          const uint8_t *buf, int nb_sectors)
2016
{
2017
    BlockDriver *drv = bs->drv;
2018
    if (!drv)
2019
        return -ENOMEDIUM;
2020
    if (!drv->bdrv_write_compressed)
2021
        return -ENOTSUP;
2022
    if (bdrv_check_request(bs, sector_num, nb_sectors))
2023
        return -EIO;
2024

    
2025
    if (bs->dirty_bitmap) {
2026
        set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2027
    }
2028

    
2029
    return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2030
}
2031

    
2032
int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2033
{
2034
    BlockDriver *drv = bs->drv;
2035
    if (!drv)
2036
        return -ENOMEDIUM;
2037
    if (!drv->bdrv_get_info)
2038
        return -ENOTSUP;
2039
    memset(bdi, 0, sizeof(*bdi));
2040
    return drv->bdrv_get_info(bs, bdi);
2041
}
2042

    
2043
int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2044
                      int64_t pos, int size)
2045
{
2046
    BlockDriver *drv = bs->drv;
2047
    if (!drv)
2048
        return -ENOMEDIUM;
2049
    if (drv->bdrv_save_vmstate)
2050
        return drv->bdrv_save_vmstate(bs, buf, pos, size);
2051
    if (bs->file)
2052
        return bdrv_save_vmstate(bs->file, buf, pos, size);
2053
    return -ENOTSUP;
2054
}
2055

    
2056
int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2057
                      int64_t pos, int size)
2058
{
2059
    BlockDriver *drv = bs->drv;
2060
    if (!drv)
2061
        return -ENOMEDIUM;
2062
    if (drv->bdrv_load_vmstate)
2063
        return drv->bdrv_load_vmstate(bs, buf, pos, size);
2064
    if (bs->file)
2065
        return bdrv_load_vmstate(bs->file, buf, pos, size);
2066
    return -ENOTSUP;
2067
}
2068

    
2069
void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2070
{
2071
    BlockDriver *drv = bs->drv;
2072

    
2073
    if (!drv || !drv->bdrv_debug_event) {
2074
        return;
2075
    }
2076

    
2077
    return drv->bdrv_debug_event(bs, event);
2078

    
2079
}
2080

    
2081
/**************************************************************/
2082
/* handling of snapshots */
2083

    
2084
int bdrv_can_snapshot(BlockDriverState *bs)
2085
{
2086
    BlockDriver *drv = bs->drv;
2087
    if (!drv || bdrv_is_removable(bs) || bdrv_is_read_only(bs)) {
2088
        return 0;
2089
    }
2090

    
2091
    if (!drv->bdrv_snapshot_create) {
2092
        if (bs->file != NULL) {
2093
            return bdrv_can_snapshot(bs->file);
2094
        }
2095
        return 0;
2096
    }
2097

    
2098
    return 1;
2099
}
2100

    
2101
int bdrv_is_snapshot(BlockDriverState *bs)
2102
{
2103
    return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2104
}
2105

    
2106
BlockDriverState *bdrv_snapshots(void)
2107
{
2108
    BlockDriverState *bs;
2109

    
2110
    if (bs_snapshots) {
2111
        return bs_snapshots;
2112
    }
2113

    
2114
    bs = NULL;
2115
    while ((bs = bdrv_next(bs))) {
2116
        if (bdrv_can_snapshot(bs)) {
2117
            bs_snapshots = bs;
2118
            return bs;
2119
        }
2120
    }
2121
    return NULL;
2122
}
2123

    
2124
int bdrv_snapshot_create(BlockDriverState *bs,
2125
                         QEMUSnapshotInfo *sn_info)
2126
{
2127
    BlockDriver *drv = bs->drv;
2128
    if (!drv)
2129
        return -ENOMEDIUM;
2130
    if (drv->bdrv_snapshot_create)
2131
        return drv->bdrv_snapshot_create(bs, sn_info);
2132
    if (bs->file)
2133
        return bdrv_snapshot_create(bs->file, sn_info);
2134
    return -ENOTSUP;
2135
}
2136

    
2137
int bdrv_snapshot_goto(BlockDriverState *bs,
2138
                       const char *snapshot_id)
2139
{
2140
    BlockDriver *drv = bs->drv;
2141
    int ret, open_ret;
2142

    
2143
    if (!drv)
2144
        return -ENOMEDIUM;
2145
    if (drv->bdrv_snapshot_goto)
2146
        return drv->bdrv_snapshot_goto(bs, snapshot_id);
2147

    
2148
    if (bs->file) {
2149
        drv->bdrv_close(bs);
2150
        ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2151
        open_ret = drv->bdrv_open(bs, bs->open_flags);
2152
        if (open_ret < 0) {
2153
            bdrv_delete(bs->file);
2154
            bs->drv = NULL;
2155
            return open_ret;
2156
        }
2157
        return ret;
2158
    }
2159

    
2160
    return -ENOTSUP;
2161
}
2162

    
2163
int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2164
{
2165
    BlockDriver *drv = bs->drv;
2166
    if (!drv)
2167
        return -ENOMEDIUM;
2168
    if (drv->bdrv_snapshot_delete)
2169
        return drv->bdrv_snapshot_delete(bs, snapshot_id);
2170
    if (bs->file)
2171
        return bdrv_snapshot_delete(bs->file, snapshot_id);
2172
    return -ENOTSUP;
2173
}
2174

    
2175
int bdrv_snapshot_list(BlockDriverState *bs,
2176
                       QEMUSnapshotInfo **psn_info)
2177
{
2178
    BlockDriver *drv = bs->drv;
2179
    if (!drv)
2180
        return -ENOMEDIUM;
2181
    if (drv->bdrv_snapshot_list)
2182
        return drv->bdrv_snapshot_list(bs, psn_info);
2183
    if (bs->file)
2184
        return bdrv_snapshot_list(bs->file, psn_info);
2185
    return -ENOTSUP;
2186
}
2187

    
2188
int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2189
        const char *snapshot_name)
2190
{
2191
    BlockDriver *drv = bs->drv;
2192
    if (!drv) {
2193
        return -ENOMEDIUM;
2194
    }
2195
    if (!bs->read_only) {
2196
        return -EINVAL;
2197
    }
2198
    if (drv->bdrv_snapshot_load_tmp) {
2199
        return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2200
    }
2201
    return -ENOTSUP;
2202
}
2203

    
2204
#define NB_SUFFIXES 4
2205

    
2206
char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2207
{
2208
    static const char suffixes[NB_SUFFIXES] = "KMGT";
2209
    int64_t base;
2210
    int i;
2211

    
2212
    if (size <= 999) {
2213
        snprintf(buf, buf_size, "%" PRId64, size);
2214
    } else {
2215
        base = 1024;
2216
        for(i = 0; i < NB_SUFFIXES; i++) {
2217
            if (size < (10 * base)) {
2218
                snprintf(buf, buf_size, "%0.1f%c",
2219
                         (double)size / base,
2220
                         suffixes[i]);
2221
                break;
2222
            } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
2223
                snprintf(buf, buf_size, "%" PRId64 "%c",
2224
                         ((size + (base >> 1)) / base),
2225
                         suffixes[i]);
2226
                break;
2227
            }
2228
            base = base * 1024;
2229
        }
2230
    }
2231
    return buf;
2232
}
2233

    
2234
char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2235
{
2236
    char buf1[128], date_buf[128], clock_buf[128];
2237
#ifdef _WIN32
2238
    struct tm *ptm;
2239
#else
2240
    struct tm tm;
2241
#endif
2242
    time_t ti;
2243
    int64_t secs;
2244

    
2245
    if (!sn) {
2246
        snprintf(buf, buf_size,
2247
                 "%-10s%-20s%7s%20s%15s",
2248
                 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2249
    } else {
2250
        ti = sn->date_sec;
2251
#ifdef _WIN32
2252
        ptm = localtime(&ti);
2253
        strftime(date_buf, sizeof(date_buf),
2254
                 "%Y-%m-%d %H:%M:%S", ptm);
2255
#else
2256
        localtime_r(&ti, &tm);
2257
        strftime(date_buf, sizeof(date_buf),
2258
                 "%Y-%m-%d %H:%M:%S", &tm);
2259
#endif
2260
        secs = sn->vm_clock_nsec / 1000000000;
2261
        snprintf(clock_buf, sizeof(clock_buf),
2262
                 "%02d:%02d:%02d.%03d",
2263
                 (int)(secs / 3600),
2264
                 (int)((secs / 60) % 60),
2265
                 (int)(secs % 60),
2266
                 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2267
        snprintf(buf, buf_size,
2268
                 "%-10s%-20s%7s%20s%15s",
2269
                 sn->id_str, sn->name,
2270
                 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2271
                 date_buf,
2272
                 clock_buf);
2273
    }
2274
    return buf;
2275
}
2276

    
2277
/**************************************************************/
2278
/* async I/Os */
2279

    
2280
BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
2281
                                 QEMUIOVector *qiov, int nb_sectors,
2282
                                 BlockDriverCompletionFunc *cb, void *opaque)
2283
{
2284
    BlockDriver *drv = bs->drv;
2285

    
2286
    trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2287

    
2288
    if (!drv)
2289
        return NULL;
2290
    if (bdrv_check_request(bs, sector_num, nb_sectors))
2291
        return NULL;
2292

    
2293
    return drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
2294
                               cb, opaque);
2295
}
2296

    
2297
typedef struct BlockCompleteData {
2298
    BlockDriverCompletionFunc *cb;
2299
    void *opaque;
2300
    BlockDriverState *bs;
2301
    int64_t sector_num;
2302
    int nb_sectors;
2303
} BlockCompleteData;
2304

    
2305
static void block_complete_cb(void *opaque, int ret)
2306
{
2307
    BlockCompleteData *b = opaque;
2308

    
2309
    if (b->bs->dirty_bitmap) {
2310
        set_dirty_bitmap(b->bs, b->sector_num, b->nb_sectors, 1);
2311
    }
2312
    b->cb(b->opaque, ret);
2313
    g_free(b);
2314
}
2315

    
2316
static BlockCompleteData *blk_dirty_cb_alloc(BlockDriverState *bs,
2317
                                             int64_t sector_num,
2318
                                             int nb_sectors,
2319
                                             BlockDriverCompletionFunc *cb,
2320
                                             void *opaque)
2321
{
2322
    BlockCompleteData *blkdata = g_malloc0(sizeof(BlockCompleteData));
2323

    
2324
    blkdata->bs = bs;
2325
    blkdata->cb = cb;
2326
    blkdata->opaque = opaque;
2327
    blkdata->sector_num = sector_num;
2328
    blkdata->nb_sectors = nb_sectors;
2329

    
2330
    return blkdata;
2331
}
2332

    
2333
BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2334
                                  QEMUIOVector *qiov, int nb_sectors,
2335
                                  BlockDriverCompletionFunc *cb, void *opaque)
2336
{
2337
    BlockDriver *drv = bs->drv;
2338
    BlockDriverAIOCB *ret;
2339
    BlockCompleteData *blk_cb_data;
2340

    
2341
    trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2342

    
2343
    if (!drv)
2344
        return NULL;
2345
    if (bs->read_only)
2346
        return NULL;
2347
    if (bdrv_check_request(bs, sector_num, nb_sectors))
2348
        return NULL;
2349

    
2350
    if (bs->dirty_bitmap) {
2351
        blk_cb_data = blk_dirty_cb_alloc(bs, sector_num, nb_sectors, cb,
2352
                                         opaque);
2353
        cb = &block_complete_cb;
2354
        opaque = blk_cb_data;
2355
    }
2356

    
2357
    ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
2358
                               cb, opaque);
2359

    
2360
    if (ret) {
2361
        if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
2362
            bs->wr_highest_sector = sector_num + nb_sectors - 1;
2363
        }
2364
    }
2365

    
2366
    return ret;
2367
}
2368

    
2369

    
2370
typedef struct MultiwriteCB {
2371
    int error;
2372
    int num_requests;
2373
    int num_callbacks;
2374
    struct {
2375
        BlockDriverCompletionFunc *cb;
2376
        void *opaque;
2377
        QEMUIOVector *free_qiov;
2378
        void *free_buf;
2379
    } callbacks[];
2380
} MultiwriteCB;
2381

    
2382
static void multiwrite_user_cb(MultiwriteCB *mcb)
2383
{
2384
    int i;
2385

    
2386
    for (i = 0; i < mcb->num_callbacks; i++) {
2387
        mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
2388
        if (mcb->callbacks[i].free_qiov) {
2389
            qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2390
        }
2391
        g_free(mcb->callbacks[i].free_qiov);
2392
        qemu_vfree(mcb->callbacks[i].free_buf);
2393
    }
2394
}
2395

    
2396
static void multiwrite_cb(void *opaque, int ret)
2397
{
2398
    MultiwriteCB *mcb = opaque;
2399

    
2400
    trace_multiwrite_cb(mcb, ret);
2401

    
2402
    if (ret < 0 && !mcb->error) {
2403
        mcb->error = ret;
2404
    }
2405

    
2406
    mcb->num_requests--;
2407
    if (mcb->num_requests == 0) {
2408
        multiwrite_user_cb(mcb);
2409
        g_free(mcb);
2410
    }
2411
}
2412

    
2413
static int multiwrite_req_compare(const void *a, const void *b)
2414
{
2415
    const BlockRequest *req1 = a, *req2 = b;
2416

    
2417
    /*
2418
     * Note that we can't simply subtract req2->sector from req1->sector
2419
     * here as that could overflow the return value.
2420
     */
2421
    if (req1->sector > req2->sector) {
2422
        return 1;
2423
    } else if (req1->sector < req2->sector) {
2424
        return -1;
2425
    } else {
2426
        return 0;
2427
    }
2428
}
2429

    
2430
/*
2431
 * Takes a bunch of requests and tries to merge them. Returns the number of
2432
 * requests that remain after merging.
2433
 */
2434
static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2435
    int num_reqs, MultiwriteCB *mcb)
2436
{
2437
    int i, outidx;
2438

    
2439
    // Sort requests by start sector
2440
    qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2441

    
2442
    // Check if adjacent requests touch the same clusters. If so, combine them,
2443
    // filling up gaps with zero sectors.
2444
    outidx = 0;
2445
    for (i = 1; i < num_reqs; i++) {
2446
        int merge = 0;
2447
        int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2448

    
2449
        // This handles the cases that are valid for all block drivers, namely
2450
        // exactly sequential writes and overlapping writes.
2451
        if (reqs[i].sector <= oldreq_last) {
2452
            merge = 1;
2453
        }
2454

    
2455
        // The block driver may decide that it makes sense to combine requests
2456
        // even if there is a gap of some sectors between them. In this case,
2457
        // the gap is filled with zeros (therefore only applicable for yet
2458
        // unused space in format like qcow2).
2459
        if (!merge && bs->drv->bdrv_merge_requests) {
2460
            merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2461
        }
2462

    
2463
        if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2464
            merge = 0;
2465
        }
2466

    
2467
        if (merge) {
2468
            size_t size;
2469
            QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
2470
            qemu_iovec_init(qiov,
2471
                reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2472

    
2473
            // Add the first request to the merged one. If the requests are
2474
            // overlapping, drop the last sectors of the first request.
2475
            size = (reqs[i].sector - reqs[outidx].sector) << 9;
2476
            qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2477

    
2478
            // We might need to add some zeros between the two requests
2479
            if (reqs[i].sector > oldreq_last) {
2480
                size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2481
                uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2482
                memset(buf, 0, zero_bytes);
2483
                qemu_iovec_add(qiov, buf, zero_bytes);
2484
                mcb->callbacks[i].free_buf = buf;
2485
            }
2486

    
2487
            // Add the second request
2488
            qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2489

    
2490
            reqs[outidx].nb_sectors = qiov->size >> 9;
2491
            reqs[outidx].qiov = qiov;
2492

    
2493
            mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2494
        } else {
2495
            outidx++;
2496
            reqs[outidx].sector     = reqs[i].sector;
2497
            reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2498
            reqs[outidx].qiov       = reqs[i].qiov;
2499
        }
2500
    }
2501

    
2502
    return outidx + 1;
2503
}
2504

    
2505
/*
2506
 * Submit multiple AIO write requests at once.
2507
 *
2508
 * On success, the function returns 0 and all requests in the reqs array have
2509
 * been submitted. In error case this function returns -1, and any of the
2510
 * requests may or may not be submitted yet. In particular, this means that the
2511
 * callback will be called for some of the requests, for others it won't. The
2512
 * caller must check the error field of the BlockRequest to wait for the right
2513
 * callbacks (if error != 0, no callback will be called).
2514
 *
2515
 * The implementation may modify the contents of the reqs array, e.g. to merge
2516
 * requests. However, the fields opaque and error are left unmodified as they
2517
 * are used to signal failure for a single request to the caller.
2518
 */
2519
int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2520
{
2521
    BlockDriverAIOCB *acb;
2522
    MultiwriteCB *mcb;
2523
    int i;
2524

    
2525
    /* don't submit writes if we don't have a medium */
2526
    if (bs->drv == NULL) {
2527
        for (i = 0; i < num_reqs; i++) {
2528
            reqs[i].error = -ENOMEDIUM;
2529
        }
2530
        return -1;
2531
    }
2532

    
2533
    if (num_reqs == 0) {
2534
        return 0;
2535
    }
2536

    
2537
    // Create MultiwriteCB structure
2538
    mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
2539
    mcb->num_requests = 0;
2540
    mcb->num_callbacks = num_reqs;
2541

    
2542
    for (i = 0; i < num_reqs; i++) {
2543
        mcb->callbacks[i].cb = reqs[i].cb;
2544
        mcb->callbacks[i].opaque = reqs[i].opaque;
2545
    }
2546

    
2547
    // Check for mergable requests
2548
    num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2549

    
2550
    trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2551

    
2552
    /*
2553
     * Run the aio requests. As soon as one request can't be submitted
2554
     * successfully, fail all requests that are not yet submitted (we must
2555
     * return failure for all requests anyway)
2556
     *
2557
     * num_requests cannot be set to the right value immediately: If
2558
     * bdrv_aio_writev fails for some request, num_requests would be too high
2559
     * and therefore multiwrite_cb() would never recognize the multiwrite
2560
     * request as completed. We also cannot use the loop variable i to set it
2561
     * when the first request fails because the callback may already have been
2562
     * called for previously submitted requests. Thus, num_requests must be
2563
     * incremented for each request that is submitted.
2564
     *
2565
     * The problem that callbacks may be called early also means that we need
2566
     * to take care that num_requests doesn't become 0 before all requests are
2567
     * submitted - multiwrite_cb() would consider the multiwrite request
2568
     * completed. A dummy request that is "completed" by a manual call to
2569
     * multiwrite_cb() takes care of this.
2570
     */
2571
    mcb->num_requests = 1;
2572

    
2573
    // Run the aio requests
2574
    for (i = 0; i < num_reqs; i++) {
2575
        mcb->num_requests++;
2576
        acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2577
            reqs[i].nb_sectors, multiwrite_cb, mcb);
2578

    
2579
        if (acb == NULL) {
2580
            // We can only fail the whole thing if no request has been
2581
            // submitted yet. Otherwise we'll wait for the submitted AIOs to
2582
            // complete and report the error in the callback.
2583
            if (i == 0) {
2584
                trace_bdrv_aio_multiwrite_earlyfail(mcb);
2585
                goto fail;
2586
            } else {
2587
                trace_bdrv_aio_multiwrite_latefail(mcb, i);
2588
                multiwrite_cb(mcb, -EIO);
2589
                break;
2590
            }
2591
        }
2592
    }
2593

    
2594
    /* Complete the dummy request */
2595
    multiwrite_cb(mcb, 0);
2596

    
2597
    return 0;
2598

    
2599
fail:
2600
    for (i = 0; i < mcb->num_callbacks; i++) {
2601
        reqs[i].error = -EIO;
2602
    }
2603
    g_free(mcb);
2604
    return -1;
2605
}
2606

    
2607
BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
2608
        BlockDriverCompletionFunc *cb, void *opaque)
2609
{
2610
    BlockDriver *drv = bs->drv;
2611

    
2612
    trace_bdrv_aio_flush(bs, opaque);
2613

    
2614
    if (bs->open_flags & BDRV_O_NO_FLUSH) {
2615
        return bdrv_aio_noop_em(bs, cb, opaque);
2616
    }
2617

    
2618
    if (!drv)
2619
        return NULL;
2620
    return drv->bdrv_aio_flush(bs, cb, opaque);
2621
}
2622

    
2623
void bdrv_aio_cancel(BlockDriverAIOCB *acb)
2624
{
2625
    acb->pool->cancel(acb);
2626
}
2627

    
2628

    
2629
/**************************************************************/
2630
/* async block device emulation */
2631

    
2632
typedef struct BlockDriverAIOCBSync {
2633
    BlockDriverAIOCB common;
2634
    QEMUBH *bh;
2635
    int ret;
2636
    /* vector translation state */
2637
    QEMUIOVector *qiov;
2638
    uint8_t *bounce;
2639
    int is_write;
2640
} BlockDriverAIOCBSync;
2641

    
2642
static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2643
{
2644
    BlockDriverAIOCBSync *acb =
2645
        container_of(blockacb, BlockDriverAIOCBSync, common);
2646
    qemu_bh_delete(acb->bh);
2647
    acb->bh = NULL;
2648
    qemu_aio_release(acb);
2649
}
2650

    
2651
static AIOPool bdrv_em_aio_pool = {
2652
    .aiocb_size         = sizeof(BlockDriverAIOCBSync),
2653
    .cancel             = bdrv_aio_cancel_em,
2654
};
2655

    
2656
static void bdrv_aio_bh_cb(void *opaque)
2657
{
2658
    BlockDriverAIOCBSync *acb = opaque;
2659

    
2660
    if (!acb->is_write)
2661
        qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
2662
    qemu_vfree(acb->bounce);
2663
    acb->common.cb(acb->common.opaque, acb->ret);
2664
    qemu_bh_delete(acb->bh);
2665
    acb->bh = NULL;
2666
    qemu_aio_release(acb);
2667
}
2668

    
2669
static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2670
                                            int64_t sector_num,
2671
                                            QEMUIOVector *qiov,
2672
                                            int nb_sectors,
2673
                                            BlockDriverCompletionFunc *cb,
2674
                                            void *opaque,
2675
                                            int is_write)
2676

    
2677
{
2678
    BlockDriverAIOCBSync *acb;
2679

    
2680
    acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2681
    acb->is_write = is_write;
2682
    acb->qiov = qiov;
2683
    acb->bounce = qemu_blockalign(bs, qiov->size);
2684

    
2685
    if (!acb->bh)
2686
        acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2687

    
2688
    if (is_write) {
2689
        qemu_iovec_to_buffer(acb->qiov, acb->bounce);
2690
        acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
2691
    } else {
2692
        acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
2693
    }
2694

    
2695
    qemu_bh_schedule(acb->bh);
2696

    
2697
    return &acb->common;
2698
}
2699

    
2700
static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2701
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2702
        BlockDriverCompletionFunc *cb, void *opaque)
2703
{
2704
    return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2705
}
2706

    
2707
static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2708
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2709
        BlockDriverCompletionFunc *cb, void *opaque)
2710
{
2711
    return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
2712
}
2713

    
2714

    
2715
typedef struct BlockDriverAIOCBCoroutine {
2716
    BlockDriverAIOCB common;
2717
    BlockRequest req;
2718
    bool is_write;
2719
    QEMUBH* bh;
2720
} BlockDriverAIOCBCoroutine;
2721

    
2722
static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
2723
{
2724
    qemu_aio_flush();
2725
}
2726

    
2727
static AIOPool bdrv_em_co_aio_pool = {
2728
    .aiocb_size         = sizeof(BlockDriverAIOCBCoroutine),
2729
    .cancel             = bdrv_aio_co_cancel_em,
2730
};
2731

    
2732
static void bdrv_co_rw_bh(void *opaque)
2733
{
2734
    BlockDriverAIOCBCoroutine *acb = opaque;
2735

    
2736
    acb->common.cb(acb->common.opaque, acb->req.error);
2737
    qemu_bh_delete(acb->bh);
2738
    qemu_aio_release(acb);
2739
}
2740

    
2741
static void coroutine_fn bdrv_co_rw(void *opaque)
2742
{
2743
    BlockDriverAIOCBCoroutine *acb = opaque;
2744
    BlockDriverState *bs = acb->common.bs;
2745

    
2746
    if (!acb->is_write) {
2747
        acb->req.error = bs->drv->bdrv_co_readv(bs, acb->req.sector,
2748
            acb->req.nb_sectors, acb->req.qiov);
2749
    } else {
2750
        acb->req.error = bs->drv->bdrv_co_writev(bs, acb->req.sector,
2751
            acb->req.nb_sectors, acb->req.qiov);
2752
    }
2753

    
2754
    acb->bh = qemu_bh_new(bdrv_co_rw_bh, acb);
2755
    qemu_bh_schedule(acb->bh);
2756
}
2757

    
2758
static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
2759
                                               int64_t sector_num,
2760
                                               QEMUIOVector *qiov,
2761
                                               int nb_sectors,
2762
                                               BlockDriverCompletionFunc *cb,
2763
                                               void *opaque,
2764
                                               bool is_write)
2765
{
2766
    Coroutine *co;
2767
    BlockDriverAIOCBCoroutine *acb;
2768

    
2769
    acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2770
    acb->req.sector = sector_num;
2771
    acb->req.nb_sectors = nb_sectors;
2772
    acb->req.qiov = qiov;
2773
    acb->is_write = is_write;
2774

    
2775
    co = qemu_coroutine_create(bdrv_co_rw);
2776
    qemu_coroutine_enter(co, acb);
2777

    
2778
    return &acb->common;
2779
}
2780

    
2781
static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
2782
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2783
        BlockDriverCompletionFunc *cb, void *opaque)
2784
{
2785
    return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque,
2786
                                 false);
2787
}
2788

    
2789
static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
2790
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2791
        BlockDriverCompletionFunc *cb, void *opaque)
2792
{
2793
    return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque,
2794
                                 true);
2795
}
2796

    
2797
static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
2798
        BlockDriverCompletionFunc *cb, void *opaque)
2799
{
2800
    BlockDriverAIOCBSync *acb;
2801

    
2802
    acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2803
    acb->is_write = 1; /* don't bounce in the completion hadler */
2804
    acb->qiov = NULL;
2805
    acb->bounce = NULL;
2806
    acb->ret = 0;
2807

    
2808
    if (!acb->bh)
2809
        acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2810

    
2811
    bdrv_flush(bs);
2812
    qemu_bh_schedule(acb->bh);
2813
    return &acb->common;
2814
}
2815

    
2816
static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
2817
        BlockDriverCompletionFunc *cb, void *opaque)
2818
{
2819
    BlockDriverAIOCBSync *acb;
2820

    
2821
    acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2822
    acb->is_write = 1; /* don't bounce in the completion handler */
2823
    acb->qiov = NULL;
2824
    acb->bounce = NULL;
2825
    acb->ret = 0;
2826

    
2827
    if (!acb->bh) {
2828
        acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2829
    }
2830

    
2831
    qemu_bh_schedule(acb->bh);
2832
    return &acb->common;
2833
}
2834

    
2835
/**************************************************************/
2836
/* sync block device emulation */
2837

    
2838
static void bdrv_rw_em_cb(void *opaque, int ret)
2839
{
2840
    *(int *)opaque = ret;
2841
}
2842

    
2843
#define NOT_DONE 0x7fffffff
2844

    
2845
static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
2846
                        uint8_t *buf, int nb_sectors)
2847
{
2848
    int async_ret;
2849
    BlockDriverAIOCB *acb;
2850
    struct iovec iov;
2851
    QEMUIOVector qiov;
2852

    
2853
    async_ret = NOT_DONE;
2854
    iov.iov_base = (void *)buf;
2855
    iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
2856
    qemu_iovec_init_external(&qiov, &iov, 1);
2857
    acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors,
2858
        bdrv_rw_em_cb, &async_ret);
2859
    if (acb == NULL) {
2860
        async_ret = -1;
2861
        goto fail;
2862
    }
2863

    
2864
    while (async_ret == NOT_DONE) {
2865
        qemu_aio_wait();
2866
    }
2867

    
2868

    
2869
fail:
2870
    return async_ret;
2871
}
2872

    
2873
static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
2874
                         const uint8_t *buf, int nb_sectors)
2875
{
2876
    int async_ret;
2877
    BlockDriverAIOCB *acb;
2878
    struct iovec iov;
2879
    QEMUIOVector qiov;
2880

    
2881
    async_ret = NOT_DONE;
2882
    iov.iov_base = (void *)buf;
2883
    iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
2884
    qemu_iovec_init_external(&qiov, &iov, 1);
2885
    acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors,
2886
        bdrv_rw_em_cb, &async_ret);
2887
    if (acb == NULL) {
2888
        async_ret = -1;
2889
        goto fail;
2890
    }
2891
    while (async_ret == NOT_DONE) {
2892
        qemu_aio_wait();
2893
    }
2894

    
2895
fail:
2896
    return async_ret;
2897
}
2898

    
2899
void bdrv_init(void)
2900
{
2901
    module_call_init(MODULE_INIT_BLOCK);
2902
}
2903

    
2904
void bdrv_init_with_whitelist(void)
2905
{
2906
    use_bdrv_whitelist = 1;
2907
    bdrv_init();
2908
}
2909

    
2910
void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2911
                   BlockDriverCompletionFunc *cb, void *opaque)
2912
{
2913
    BlockDriverAIOCB *acb;
2914

    
2915
    if (pool->free_aiocb) {
2916
        acb = pool->free_aiocb;
2917
        pool->free_aiocb = acb->next;
2918
    } else {
2919
        acb = g_malloc0(pool->aiocb_size);
2920
        acb->pool = pool;
2921
    }
2922
    acb->bs = bs;
2923
    acb->cb = cb;
2924
    acb->opaque = opaque;
2925
    return acb;
2926
}
2927

    
2928
void qemu_aio_release(void *p)
2929
{
2930
    BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2931
    AIOPool *pool = acb->pool;
2932
    acb->next = pool->free_aiocb;
2933
    pool->free_aiocb = acb;
2934
}
2935

    
2936
/**************************************************************/
2937
/* Coroutine block device emulation */
2938

    
2939
typedef struct CoroutineIOCompletion {
2940
    Coroutine *coroutine;
2941
    int ret;
2942
} CoroutineIOCompletion;
2943

    
2944
static void bdrv_co_io_em_complete(void *opaque, int ret)
2945
{
2946
    CoroutineIOCompletion *co = opaque;
2947

    
2948
    co->ret = ret;
2949
    qemu_coroutine_enter(co->coroutine, NULL);
2950
}
2951

    
2952
static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
2953
                                      int nb_sectors, QEMUIOVector *iov,
2954
                                      bool is_write)
2955
{
2956
    CoroutineIOCompletion co = {
2957
        .coroutine = qemu_coroutine_self(),
2958
    };
2959
    BlockDriverAIOCB *acb;
2960

    
2961
    if (is_write) {
2962
        acb = bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
2963
                              bdrv_co_io_em_complete, &co);
2964
    } else {
2965
        acb = bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
2966
                             bdrv_co_io_em_complete, &co);
2967
    }
2968

    
2969
    trace_bdrv_co_io(is_write, acb);
2970
    if (!acb) {
2971
        return -EIO;
2972
    }
2973
    qemu_coroutine_yield();
2974

    
2975
    return co.ret;
2976
}
2977

    
2978
static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
2979
                                         int64_t sector_num, int nb_sectors,
2980
                                         QEMUIOVector *iov)
2981
{
2982
    return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
2983
}
2984

    
2985
static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
2986
                                         int64_t sector_num, int nb_sectors,
2987
                                         QEMUIOVector *iov)
2988
{
2989
    return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
2990
}
2991

    
2992
static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs)
2993
{
2994
    CoroutineIOCompletion co = {
2995
        .coroutine = qemu_coroutine_self(),
2996
    };
2997
    BlockDriverAIOCB *acb;
2998

    
2999
    acb = bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3000
    if (!acb) {
3001
        return -EIO;
3002
    }
3003
    qemu_coroutine_yield();
3004
    return co.ret;
3005
}
3006

    
3007
/**************************************************************/
3008
/* removable device support */
3009

    
3010
/**
3011
 * Return TRUE if the media is present
3012
 */
3013
int bdrv_is_inserted(BlockDriverState *bs)
3014
{
3015
    BlockDriver *drv = bs->drv;
3016
    int ret;
3017
    if (!drv)
3018
        return 0;
3019
    if (!drv->bdrv_is_inserted)
3020
        return !bs->tray_open;
3021
    ret = drv->bdrv_is_inserted(bs);
3022
    return ret;
3023
}
3024

    
3025
/**
3026
 * Return TRUE if the media changed since the last call to this
3027
 * function. It is currently only used for floppy disks
3028
 */
3029
int bdrv_media_changed(BlockDriverState *bs)
3030
{
3031
    BlockDriver *drv = bs->drv;
3032
    int ret;
3033

    
3034
    if (!drv || !drv->bdrv_media_changed)
3035
        ret = -ENOTSUP;
3036
    else
3037
        ret = drv->bdrv_media_changed(bs);
3038
    if (ret == -ENOTSUP)
3039
        ret = bs->media_changed;
3040
    bs->media_changed = 0;
3041
    return ret;
3042
}
3043

    
3044
/**
3045
 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3046
 */
3047
int bdrv_eject(BlockDriverState *bs, int eject_flag)
3048
{
3049
    BlockDriver *drv = bs->drv;
3050

    
3051
    if (eject_flag && bs->locked) {
3052
        return -EBUSY;
3053
    }
3054

    
3055
    if (drv && drv->bdrv_eject) {
3056
        drv->bdrv_eject(bs, eject_flag);
3057
    }
3058
    bs->tray_open = eject_flag;
3059
    return 0;
3060
}
3061

    
3062
int bdrv_is_locked(BlockDriverState *bs)
3063
{
3064
    return bs->locked;
3065
}
3066

    
3067
/**
3068
 * Lock or unlock the media (if it is locked, the user won't be able
3069
 * to eject it manually).
3070
 */
3071
void bdrv_set_locked(BlockDriverState *bs, int locked)
3072
{
3073
    BlockDriver *drv = bs->drv;
3074

    
3075
    trace_bdrv_set_locked(bs, locked);
3076

    
3077
    bs->locked = locked;
3078
    if (drv && drv->bdrv_set_locked) {
3079
        drv->bdrv_set_locked(bs, locked);
3080
    }
3081
}
3082

    
3083
/* needed for generic scsi interface */
3084

    
3085
int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3086
{
3087
    BlockDriver *drv = bs->drv;
3088

    
3089
    if (drv && drv->bdrv_ioctl)
3090
        return drv->bdrv_ioctl(bs, req, buf);
3091
    return -ENOTSUP;
3092
}
3093

    
3094
BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3095
        unsigned long int req, void *buf,
3096
        BlockDriverCompletionFunc *cb, void *opaque)
3097
{
3098
    BlockDriver *drv = bs->drv;
3099

    
3100
    if (drv && drv->bdrv_aio_ioctl)
3101
        return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3102
    return NULL;
3103
}
3104

    
3105

    
3106

    
3107
void *qemu_blockalign(BlockDriverState *bs, size_t size)
3108
{
3109
    return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3110
}
3111

    
3112
void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3113
{
3114
    int64_t bitmap_size;
3115

    
3116
    bs->dirty_count = 0;
3117
    if (enable) {
3118
        if (!bs->dirty_bitmap) {
3119
            bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3120
                    BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3121
            bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
3122

    
3123
            bs->dirty_bitmap = g_malloc0(bitmap_size);
3124
        }
3125
    } else {
3126
        if (bs->dirty_bitmap) {
3127
            g_free(bs->dirty_bitmap);
3128
            bs->dirty_bitmap = NULL;
3129
        }
3130
    }
3131
}
3132

    
3133
int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3134
{
3135
    int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
3136

    
3137
    if (bs->dirty_bitmap &&
3138
        (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
3139
        return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3140
            (1UL << (chunk % (sizeof(unsigned long) * 8))));
3141
    } else {
3142
        return 0;
3143
    }
3144
}
3145

    
3146
void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3147
                      int nr_sectors)
3148
{
3149
    set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3150
}
3151

    
3152
int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3153
{
3154
    return bs->dirty_count;
3155
}
3156

    
3157
void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3158
{
3159
    assert(bs->in_use != in_use);
3160
    bs->in_use = in_use;
3161
}
3162

    
3163
int bdrv_in_use(BlockDriverState *bs)
3164
{
3165
    return bs->in_use;
3166
}
3167

    
3168
void
3169
bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3170
        enum BlockAcctType type)
3171
{
3172
    assert(type < BDRV_MAX_IOTYPE);
3173

    
3174
    cookie->bytes = bytes;
3175
    cookie->start_time_ns = get_clock();
3176
    cookie->type = type;
3177
}
3178

    
3179
void
3180
bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3181
{
3182
    assert(cookie->type < BDRV_MAX_IOTYPE);
3183

    
3184
    bs->nr_bytes[cookie->type] += cookie->bytes;
3185
    bs->nr_ops[cookie->type]++;
3186
    bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
3187
}
3188

    
3189
int bdrv_img_create(const char *filename, const char *fmt,
3190
                    const char *base_filename, const char *base_fmt,
3191
                    char *options, uint64_t img_size, int flags)
3192
{
3193
    QEMUOptionParameter *param = NULL, *create_options = NULL;
3194
    QEMUOptionParameter *backing_fmt, *backing_file, *size;
3195
    BlockDriverState *bs = NULL;
3196
    BlockDriver *drv, *proto_drv;
3197
    BlockDriver *backing_drv = NULL;
3198
    int ret = 0;
3199

    
3200
    /* Find driver and parse its options */
3201
    drv = bdrv_find_format(fmt);
3202
    if (!drv) {
3203
        error_report("Unknown file format '%s'", fmt);
3204
        ret = -EINVAL;
3205
        goto out;
3206
    }
3207

    
3208
    proto_drv = bdrv_find_protocol(filename);
3209
    if (!proto_drv) {
3210
        error_report("Unknown protocol '%s'", filename);
3211
        ret = -EINVAL;
3212
        goto out;
3213
    }
3214

    
3215
    create_options = append_option_parameters(create_options,
3216
                                              drv->create_options);
3217
    create_options = append_option_parameters(create_options,
3218
                                              proto_drv->create_options);
3219

    
3220
    /* Create parameter list with default values */
3221
    param = parse_option_parameters("", create_options, param);
3222

    
3223
    set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3224

    
3225
    /* Parse -o options */
3226
    if (options) {
3227
        param = parse_option_parameters(options, create_options, param);
3228
        if (param == NULL) {
3229
            error_report("Invalid options for file format '%s'.", fmt);
3230
            ret = -EINVAL;
3231
            goto out;
3232
        }
3233
    }
3234

    
3235
    if (base_filename) {
3236
        if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3237
                                 base_filename)) {
3238
            error_report("Backing file not supported for file format '%s'",
3239
                         fmt);
3240
            ret = -EINVAL;
3241
            goto out;
3242
        }
3243
    }
3244

    
3245
    if (base_fmt) {
3246
        if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3247
            error_report("Backing file format not supported for file "
3248
                         "format '%s'", fmt);
3249
            ret = -EINVAL;
3250
            goto out;
3251
        }
3252
    }
3253

    
3254
    backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3255
    if (backing_file && backing_file->value.s) {
3256
        if (!strcmp(filename, backing_file->value.s)) {
3257
            error_report("Error: Trying to create an image with the "
3258
                         "same filename as the backing file");
3259
            ret = -EINVAL;
3260
            goto out;
3261
        }
3262
    }
3263

    
3264
    backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3265
    if (backing_fmt && backing_fmt->value.s) {
3266
        backing_drv = bdrv_find_format(backing_fmt->value.s);
3267
        if (!backing_drv) {
3268
            error_report("Unknown backing file format '%s'",
3269
                         backing_fmt->value.s);
3270
            ret = -EINVAL;
3271
            goto out;
3272
        }
3273
    }
3274

    
3275
    // The size for the image must always be specified, with one exception:
3276
    // If we are using a backing file, we can obtain the size from there
3277
    size = get_option_parameter(param, BLOCK_OPT_SIZE);
3278
    if (size && size->value.n == -1) {
3279
        if (backing_file && backing_file->value.s) {
3280
            uint64_t size;
3281
            char buf[32];
3282

    
3283
            bs = bdrv_new("");
3284

    
3285
            ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
3286
            if (ret < 0) {
3287
                error_report("Could not open '%s'", backing_file->value.s);
3288
                goto out;
3289
            }
3290
            bdrv_get_geometry(bs, &size);
3291
            size *= 512;
3292

    
3293
            snprintf(buf, sizeof(buf), "%" PRId64, size);
3294
            set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3295
        } else {
3296
            error_report("Image creation needs a size parameter");
3297
            ret = -EINVAL;
3298
            goto out;
3299
        }
3300
    }
3301

    
3302
    printf("Formatting '%s', fmt=%s ", filename, fmt);
3303
    print_option_parameters(param);
3304
    puts("");
3305

    
3306
    ret = bdrv_create(drv, filename, param);
3307

    
3308
    if (ret < 0) {
3309
        if (ret == -ENOTSUP) {
3310
            error_report("Formatting or formatting option not supported for "
3311
                         "file format '%s'", fmt);
3312
        } else if (ret == -EFBIG) {
3313
            error_report("The image size is too large for file format '%s'",
3314
                         fmt);
3315
        } else {
3316
            error_report("%s: error while creating %s: %s", filename, fmt,
3317
                         strerror(-ret));
3318
        }
3319
    }
3320

    
3321
out:
3322
    free_option_parameters(create_options);
3323
    free_option_parameters(param);
3324

    
3325
    if (bs) {
3326
        bdrv_delete(bs);
3327
    }
3328

    
3329
    return ret;
3330
}