Statistics
| Branch: | Revision:

root / block.c @ 57c83dac

History | View | Annotate | Download (107.1 kB)

1
/*
2
 * QEMU System Emulator block driver
3
 *
4
 * Copyright (c) 2003 Fabrice Bellard
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
 * THE SOFTWARE.
23
 */
24
#include "config-host.h"
25
#include "qemu-common.h"
26
#include "trace.h"
27
#include "monitor.h"
28
#include "block_int.h"
29
#include "module.h"
30
#include "qjson.h"
31
#include "qemu-coroutine.h"
32
#include "qmp-commands.h"
33
#include "qemu-timer.h"
34

    
35
#ifdef CONFIG_BSD
36
#include <sys/types.h>
37
#include <sys/stat.h>
38
#include <sys/ioctl.h>
39
#include <sys/queue.h>
40
#ifndef __DragonFly__
41
#include <sys/disk.h>
42
#endif
43
#endif
44

    
45
#ifdef _WIN32
46
#include <windows.h>
47
#endif
48

    
49
#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
50

    
51
typedef enum {
52
    BDRV_REQ_COPY_ON_READ = 0x1,
53
} BdrvRequestFlags;
54

    
55
static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
56
static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
57
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
58
        BlockDriverCompletionFunc *cb, void *opaque);
59
static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
60
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
61
        BlockDriverCompletionFunc *cb, void *opaque);
62
static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
63
                                         int64_t sector_num, int nb_sectors,
64
                                         QEMUIOVector *iov);
65
static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
66
                                         int64_t sector_num, int nb_sectors,
67
                                         QEMUIOVector *iov);
68
static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
69
    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
70
    BdrvRequestFlags flags);
71
static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
72
    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
73
static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
74
                                               int64_t sector_num,
75
                                               QEMUIOVector *qiov,
76
                                               int nb_sectors,
77
                                               BlockDriverCompletionFunc *cb,
78
                                               void *opaque,
79
                                               bool is_write);
80
static void coroutine_fn bdrv_co_do_rw(void *opaque);
81

    
82
static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
83
        bool is_write, double elapsed_time, uint64_t *wait);
84
static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
85
        double elapsed_time, uint64_t *wait);
86
static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
87
        bool is_write, int64_t *wait);
88

    
89
static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
90
    QTAILQ_HEAD_INITIALIZER(bdrv_states);
91

    
92
static QLIST_HEAD(, BlockDriver) bdrv_drivers =
93
    QLIST_HEAD_INITIALIZER(bdrv_drivers);
94

    
95
/* The device to use for VM snapshots */
96
static BlockDriverState *bs_snapshots;
97

    
98
/* If non-zero, use only whitelisted block drivers */
99
static int use_bdrv_whitelist;
100

    
101
#ifdef _WIN32
102
static int is_windows_drive_prefix(const char *filename)
103
{
104
    return (((filename[0] >= 'a' && filename[0] <= 'z') ||
105
             (filename[0] >= 'A' && filename[0] <= 'Z')) &&
106
            filename[1] == ':');
107
}
108

    
109
int is_windows_drive(const char *filename)
110
{
111
    if (is_windows_drive_prefix(filename) &&
112
        filename[2] == '\0')
113
        return 1;
114
    if (strstart(filename, "\\\\.\\", NULL) ||
115
        strstart(filename, "//./", NULL))
116
        return 1;
117
    return 0;
118
}
119
#endif
120

    
121
/* throttling disk I/O limits */
122
void bdrv_io_limits_disable(BlockDriverState *bs)
123
{
124
    bs->io_limits_enabled = false;
125

    
126
    while (qemu_co_queue_next(&bs->throttled_reqs));
127

    
128
    if (bs->block_timer) {
129
        qemu_del_timer(bs->block_timer);
130
        qemu_free_timer(bs->block_timer);
131
        bs->block_timer = NULL;
132
    }
133

    
134
    bs->slice_start = 0;
135
    bs->slice_end   = 0;
136
    bs->slice_time  = 0;
137
    memset(&bs->io_base, 0, sizeof(bs->io_base));
138
}
139

    
140
static void bdrv_block_timer(void *opaque)
141
{
142
    BlockDriverState *bs = opaque;
143

    
144
    qemu_co_queue_next(&bs->throttled_reqs);
145
}
146

    
147
void bdrv_io_limits_enable(BlockDriverState *bs)
148
{
149
    qemu_co_queue_init(&bs->throttled_reqs);
150
    bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
151
    bs->slice_time  = 5 * BLOCK_IO_SLICE_TIME;
152
    bs->slice_start = qemu_get_clock_ns(vm_clock);
153
    bs->slice_end   = bs->slice_start + bs->slice_time;
154
    memset(&bs->io_base, 0, sizeof(bs->io_base));
155
    bs->io_limits_enabled = true;
156
}
157

    
158
bool bdrv_io_limits_enabled(BlockDriverState *bs)
159
{
160
    BlockIOLimit *io_limits = &bs->io_limits;
161
    return io_limits->bps[BLOCK_IO_LIMIT_READ]
162
         || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
163
         || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
164
         || io_limits->iops[BLOCK_IO_LIMIT_READ]
165
         || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
166
         || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
167
}
168

    
169
static void bdrv_io_limits_intercept(BlockDriverState *bs,
170
                                     bool is_write, int nb_sectors)
171
{
172
    int64_t wait_time = -1;
173

    
174
    if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
175
        qemu_co_queue_wait(&bs->throttled_reqs);
176
    }
177

    
178
    /* In fact, we hope to keep each request's timing, in FIFO mode. The next
179
     * throttled requests will not be dequeued until the current request is
180
     * allowed to be serviced. So if the current request still exceeds the
181
     * limits, it will be inserted to the head. All requests followed it will
182
     * be still in throttled_reqs queue.
183
     */
184

    
185
    while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
186
        qemu_mod_timer(bs->block_timer,
187
                       wait_time + qemu_get_clock_ns(vm_clock));
188
        qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
189
    }
190

    
191
    qemu_co_queue_next(&bs->throttled_reqs);
192
}
193

    
194
/* check if the path starts with "<protocol>:" */
195
static int path_has_protocol(const char *path)
196
{
197
#ifdef _WIN32
198
    if (is_windows_drive(path) ||
199
        is_windows_drive_prefix(path)) {
200
        return 0;
201
    }
202
#endif
203

    
204
    return strchr(path, ':') != NULL;
205
}
206

    
207
int path_is_absolute(const char *path)
208
{
209
    const char *p;
210
#ifdef _WIN32
211
    /* specific case for names like: "\\.\d:" */
212
    if (*path == '/' || *path == '\\')
213
        return 1;
214
#endif
215
    p = strchr(path, ':');
216
    if (p)
217
        p++;
218
    else
219
        p = path;
220
#ifdef _WIN32
221
    return (*p == '/' || *p == '\\');
222
#else
223
    return (*p == '/');
224
#endif
225
}
226

    
227
/* if filename is absolute, just copy it to dest. Otherwise, build a
228
   path to it by considering it is relative to base_path. URL are
229
   supported. */
230
void path_combine(char *dest, int dest_size,
231
                  const char *base_path,
232
                  const char *filename)
233
{
234
    const char *p, *p1;
235
    int len;
236

    
237
    if (dest_size <= 0)
238
        return;
239
    if (path_is_absolute(filename)) {
240
        pstrcpy(dest, dest_size, filename);
241
    } else {
242
        p = strchr(base_path, ':');
243
        if (p)
244
            p++;
245
        else
246
            p = base_path;
247
        p1 = strrchr(base_path, '/');
248
#ifdef _WIN32
249
        {
250
            const char *p2;
251
            p2 = strrchr(base_path, '\\');
252
            if (!p1 || p2 > p1)
253
                p1 = p2;
254
        }
255
#endif
256
        if (p1)
257
            p1++;
258
        else
259
            p1 = base_path;
260
        if (p1 > p)
261
            p = p1;
262
        len = p - base_path;
263
        if (len > dest_size - 1)
264
            len = dest_size - 1;
265
        memcpy(dest, base_path, len);
266
        dest[len] = '\0';
267
        pstrcat(dest, dest_size, filename);
268
    }
269
}
270

    
271
void bdrv_register(BlockDriver *bdrv)
272
{
273
    /* Block drivers without coroutine functions need emulation */
274
    if (!bdrv->bdrv_co_readv) {
275
        bdrv->bdrv_co_readv = bdrv_co_readv_em;
276
        bdrv->bdrv_co_writev = bdrv_co_writev_em;
277

    
278
        /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
279
         * the block driver lacks aio we need to emulate that too.
280
         */
281
        if (!bdrv->bdrv_aio_readv) {
282
            /* add AIO emulation layer */
283
            bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
284
            bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
285
        }
286
    }
287

    
288
    QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
289
}
290

    
291
/* create a new block device (by default it is empty) */
292
BlockDriverState *bdrv_new(const char *device_name)
293
{
294
    BlockDriverState *bs;
295

    
296
    bs = g_malloc0(sizeof(BlockDriverState));
297
    pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
298
    if (device_name[0] != '\0') {
299
        QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
300
    }
301
    bdrv_iostatus_disable(bs);
302
    return bs;
303
}
304

    
305
BlockDriver *bdrv_find_format(const char *format_name)
306
{
307
    BlockDriver *drv1;
308
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
309
        if (!strcmp(drv1->format_name, format_name)) {
310
            return drv1;
311
        }
312
    }
313
    return NULL;
314
}
315

    
316
static int bdrv_is_whitelisted(BlockDriver *drv)
317
{
318
    static const char *whitelist[] = {
319
        CONFIG_BDRV_WHITELIST
320
    };
321
    const char **p;
322

    
323
    if (!whitelist[0])
324
        return 1;               /* no whitelist, anything goes */
325

    
326
    for (p = whitelist; *p; p++) {
327
        if (!strcmp(drv->format_name, *p)) {
328
            return 1;
329
        }
330
    }
331
    return 0;
332
}
333

    
334
BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
335
{
336
    BlockDriver *drv = bdrv_find_format(format_name);
337
    return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
338
}
339

    
340
int bdrv_create(BlockDriver *drv, const char* filename,
341
    QEMUOptionParameter *options)
342
{
343
    if (!drv->bdrv_create)
344
        return -ENOTSUP;
345

    
346
    return drv->bdrv_create(filename, options);
347
}
348

    
349
int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
350
{
351
    BlockDriver *drv;
352

    
353
    drv = bdrv_find_protocol(filename);
354
    if (drv == NULL) {
355
        return -ENOENT;
356
    }
357

    
358
    return bdrv_create(drv, filename, options);
359
}
360

    
361
#ifdef _WIN32
362
void get_tmp_filename(char *filename, int size)
363
{
364
    char temp_dir[MAX_PATH];
365

    
366
    GetTempPath(MAX_PATH, temp_dir);
367
    GetTempFileName(temp_dir, "qem", 0, filename);
368
}
369
#else
370
void get_tmp_filename(char *filename, int size)
371
{
372
    int fd;
373
    const char *tmpdir;
374
    /* XXX: race condition possible */
375
    tmpdir = getenv("TMPDIR");
376
    if (!tmpdir)
377
        tmpdir = "/tmp";
378
    snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
379
    fd = mkstemp(filename);
380
    close(fd);
381
}
382
#endif
383

    
384
/*
385
 * Detect host devices. By convention, /dev/cdrom[N] is always
386
 * recognized as a host CDROM.
387
 */
388
static BlockDriver *find_hdev_driver(const char *filename)
389
{
390
    int score_max = 0, score;
391
    BlockDriver *drv = NULL, *d;
392

    
393
    QLIST_FOREACH(d, &bdrv_drivers, list) {
394
        if (d->bdrv_probe_device) {
395
            score = d->bdrv_probe_device(filename);
396
            if (score > score_max) {
397
                score_max = score;
398
                drv = d;
399
            }
400
        }
401
    }
402

    
403
    return drv;
404
}
405

    
406
BlockDriver *bdrv_find_protocol(const char *filename)
407
{
408
    BlockDriver *drv1;
409
    char protocol[128];
410
    int len;
411
    const char *p;
412

    
413
    /* TODO Drivers without bdrv_file_open must be specified explicitly */
414

    
415
    /*
416
     * XXX(hch): we really should not let host device detection
417
     * override an explicit protocol specification, but moving this
418
     * later breaks access to device names with colons in them.
419
     * Thanks to the brain-dead persistent naming schemes on udev-
420
     * based Linux systems those actually are quite common.
421
     */
422
    drv1 = find_hdev_driver(filename);
423
    if (drv1) {
424
        return drv1;
425
    }
426

    
427
    if (!path_has_protocol(filename)) {
428
        return bdrv_find_format("file");
429
    }
430
    p = strchr(filename, ':');
431
    assert(p != NULL);
432
    len = p - filename;
433
    if (len > sizeof(protocol) - 1)
434
        len = sizeof(protocol) - 1;
435
    memcpy(protocol, filename, len);
436
    protocol[len] = '\0';
437
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
438
        if (drv1->protocol_name &&
439
            !strcmp(drv1->protocol_name, protocol)) {
440
            return drv1;
441
        }
442
    }
443
    return NULL;
444
}
445

    
446
static int find_image_format(const char *filename, BlockDriver **pdrv)
447
{
448
    int ret, score, score_max;
449
    BlockDriver *drv1, *drv;
450
    uint8_t buf[2048];
451
    BlockDriverState *bs;
452

    
453
    ret = bdrv_file_open(&bs, filename, 0);
454
    if (ret < 0) {
455
        *pdrv = NULL;
456
        return ret;
457
    }
458

    
459
    /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
460
    if (bs->sg || !bdrv_is_inserted(bs)) {
461
        bdrv_delete(bs);
462
        drv = bdrv_find_format("raw");
463
        if (!drv) {
464
            ret = -ENOENT;
465
        }
466
        *pdrv = drv;
467
        return ret;
468
    }
469

    
470
    ret = bdrv_pread(bs, 0, buf, sizeof(buf));
471
    bdrv_delete(bs);
472
    if (ret < 0) {
473
        *pdrv = NULL;
474
        return ret;
475
    }
476

    
477
    score_max = 0;
478
    drv = NULL;
479
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
480
        if (drv1->bdrv_probe) {
481
            score = drv1->bdrv_probe(buf, ret, filename);
482
            if (score > score_max) {
483
                score_max = score;
484
                drv = drv1;
485
            }
486
        }
487
    }
488
    if (!drv) {
489
        ret = -ENOENT;
490
    }
491
    *pdrv = drv;
492
    return ret;
493
}
494

    
495
/**
496
 * Set the current 'total_sectors' value
497
 */
498
static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
499
{
500
    BlockDriver *drv = bs->drv;
501

    
502
    /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
503
    if (bs->sg)
504
        return 0;
505

    
506
    /* query actual device if possible, otherwise just trust the hint */
507
    if (drv->bdrv_getlength) {
508
        int64_t length = drv->bdrv_getlength(bs);
509
        if (length < 0) {
510
            return length;
511
        }
512
        hint = length >> BDRV_SECTOR_BITS;
513
    }
514

    
515
    bs->total_sectors = hint;
516
    return 0;
517
}
518

    
519
/**
520
 * Set open flags for a given cache mode
521
 *
522
 * Return 0 on success, -1 if the cache mode was invalid.
523
 */
524
int bdrv_parse_cache_flags(const char *mode, int *flags)
525
{
526
    *flags &= ~BDRV_O_CACHE_MASK;
527

    
528
    if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
529
        *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
530
    } else if (!strcmp(mode, "directsync")) {
531
        *flags |= BDRV_O_NOCACHE;
532
    } else if (!strcmp(mode, "writeback")) {
533
        *flags |= BDRV_O_CACHE_WB;
534
    } else if (!strcmp(mode, "unsafe")) {
535
        *flags |= BDRV_O_CACHE_WB;
536
        *flags |= BDRV_O_NO_FLUSH;
537
    } else if (!strcmp(mode, "writethrough")) {
538
        /* this is the default */
539
    } else {
540
        return -1;
541
    }
542

    
543
    return 0;
544
}
545

    
546
/**
547
 * The copy-on-read flag is actually a reference count so multiple users may
548
 * use the feature without worrying about clobbering its previous state.
549
 * Copy-on-read stays enabled until all users have called to disable it.
550
 */
551
void bdrv_enable_copy_on_read(BlockDriverState *bs)
552
{
553
    bs->copy_on_read++;
554
}
555

    
556
void bdrv_disable_copy_on_read(BlockDriverState *bs)
557
{
558
    assert(bs->copy_on_read > 0);
559
    bs->copy_on_read--;
560
}
561

    
562
/*
563
 * Common part for opening disk images and files
564
 */
565
static int bdrv_open_common(BlockDriverState *bs, const char *filename,
566
    int flags, BlockDriver *drv)
567
{
568
    int ret, open_flags;
569

    
570
    assert(drv != NULL);
571

    
572
    trace_bdrv_open_common(bs, filename, flags, drv->format_name);
573

    
574
    bs->file = NULL;
575
    bs->total_sectors = 0;
576
    bs->encrypted = 0;
577
    bs->valid_key = 0;
578
    bs->sg = 0;
579
    bs->open_flags = flags;
580
    bs->growable = 0;
581
    bs->buffer_alignment = 512;
582

    
583
    assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
584
    if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
585
        bdrv_enable_copy_on_read(bs);
586
    }
587

    
588
    pstrcpy(bs->filename, sizeof(bs->filename), filename);
589
    bs->backing_file[0] = '\0';
590

    
591
    if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
592
        return -ENOTSUP;
593
    }
594

    
595
    bs->drv = drv;
596
    bs->opaque = g_malloc0(drv->instance_size);
597

    
598
    bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
599

    
600
    /*
601
     * Clear flags that are internal to the block layer before opening the
602
     * image.
603
     */
604
    open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
605

    
606
    /*
607
     * Snapshots should be writable.
608
     */
609
    if (bs->is_temporary) {
610
        open_flags |= BDRV_O_RDWR;
611
    }
612

    
613
    bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
614

    
615
    /* Open the image, either directly or using a protocol */
616
    if (drv->bdrv_file_open) {
617
        ret = drv->bdrv_file_open(bs, filename, open_flags);
618
    } else {
619
        ret = bdrv_file_open(&bs->file, filename, open_flags);
620
        if (ret >= 0) {
621
            ret = drv->bdrv_open(bs, open_flags);
622
        }
623
    }
624

    
625
    if (ret < 0) {
626
        goto free_and_fail;
627
    }
628

    
629
    ret = refresh_total_sectors(bs, bs->total_sectors);
630
    if (ret < 0) {
631
        goto free_and_fail;
632
    }
633

    
634
#ifndef _WIN32
635
    if (bs->is_temporary) {
636
        unlink(filename);
637
    }
638
#endif
639
    return 0;
640

    
641
free_and_fail:
642
    if (bs->file) {
643
        bdrv_delete(bs->file);
644
        bs->file = NULL;
645
    }
646
    g_free(bs->opaque);
647
    bs->opaque = NULL;
648
    bs->drv = NULL;
649
    return ret;
650
}
651

    
652
/*
653
 * Opens a file using a protocol (file, host_device, nbd, ...)
654
 */
655
int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
656
{
657
    BlockDriverState *bs;
658
    BlockDriver *drv;
659
    int ret;
660

    
661
    drv = bdrv_find_protocol(filename);
662
    if (!drv) {
663
        return -ENOENT;
664
    }
665

    
666
    bs = bdrv_new("");
667
    ret = bdrv_open_common(bs, filename, flags, drv);
668
    if (ret < 0) {
669
        bdrv_delete(bs);
670
        return ret;
671
    }
672
    bs->growable = 1;
673
    *pbs = bs;
674
    return 0;
675
}
676

    
677
/*
678
 * Opens a disk image (raw, qcow2, vmdk, ...)
679
 */
680
int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
681
              BlockDriver *drv)
682
{
683
    int ret;
684
    char tmp_filename[PATH_MAX];
685

    
686
    if (flags & BDRV_O_SNAPSHOT) {
687
        BlockDriverState *bs1;
688
        int64_t total_size;
689
        int is_protocol = 0;
690
        BlockDriver *bdrv_qcow2;
691
        QEMUOptionParameter *options;
692
        char backing_filename[PATH_MAX];
693

    
694
        /* if snapshot, we create a temporary backing file and open it
695
           instead of opening 'filename' directly */
696

    
697
        /* if there is a backing file, use it */
698
        bs1 = bdrv_new("");
699
        ret = bdrv_open(bs1, filename, 0, drv);
700
        if (ret < 0) {
701
            bdrv_delete(bs1);
702
            return ret;
703
        }
704
        total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
705

    
706
        if (bs1->drv && bs1->drv->protocol_name)
707
            is_protocol = 1;
708

    
709
        bdrv_delete(bs1);
710

    
711
        get_tmp_filename(tmp_filename, sizeof(tmp_filename));
712

    
713
        /* Real path is meaningless for protocols */
714
        if (is_protocol)
715
            snprintf(backing_filename, sizeof(backing_filename),
716
                     "%s", filename);
717
        else if (!realpath(filename, backing_filename))
718
            return -errno;
719

    
720
        bdrv_qcow2 = bdrv_find_format("qcow2");
721
        options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
722

    
723
        set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
724
        set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
725
        if (drv) {
726
            set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
727
                drv->format_name);
728
        }
729

    
730
        ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
731
        free_option_parameters(options);
732
        if (ret < 0) {
733
            return ret;
734
        }
735

    
736
        filename = tmp_filename;
737
        drv = bdrv_qcow2;
738
        bs->is_temporary = 1;
739
    }
740

    
741
    /* Find the right image format driver */
742
    if (!drv) {
743
        ret = find_image_format(filename, &drv);
744
    }
745

    
746
    if (!drv) {
747
        goto unlink_and_fail;
748
    }
749

    
750
    /* Open the image */
751
    ret = bdrv_open_common(bs, filename, flags, drv);
752
    if (ret < 0) {
753
        goto unlink_and_fail;
754
    }
755

    
756
    /* If there is a backing file, use it */
757
    if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
758
        char backing_filename[PATH_MAX];
759
        int back_flags;
760
        BlockDriver *back_drv = NULL;
761

    
762
        bs->backing_hd = bdrv_new("");
763

    
764
        if (path_has_protocol(bs->backing_file)) {
765
            pstrcpy(backing_filename, sizeof(backing_filename),
766
                    bs->backing_file);
767
        } else {
768
            path_combine(backing_filename, sizeof(backing_filename),
769
                         filename, bs->backing_file);
770
        }
771

    
772
        if (bs->backing_format[0] != '\0') {
773
            back_drv = bdrv_find_format(bs->backing_format);
774
        }
775

    
776
        /* backing files always opened read-only */
777
        back_flags =
778
            flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
779

    
780
        ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
781
        if (ret < 0) {
782
            bdrv_close(bs);
783
            return ret;
784
        }
785
        if (bs->is_temporary) {
786
            bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
787
        } else {
788
            /* base image inherits from "parent" */
789
            bs->backing_hd->keep_read_only = bs->keep_read_only;
790
        }
791
    }
792

    
793
    if (!bdrv_key_required(bs)) {
794
        bdrv_dev_change_media_cb(bs, true);
795
    }
796

    
797
    /* throttling disk I/O limits */
798
    if (bs->io_limits_enabled) {
799
        bdrv_io_limits_enable(bs);
800
    }
801

    
802
    return 0;
803

    
804
unlink_and_fail:
805
    if (bs->is_temporary) {
806
        unlink(filename);
807
    }
808
    return ret;
809
}
810

    
811
void bdrv_close(BlockDriverState *bs)
812
{
813
    if (bs->drv) {
814
        if (bs == bs_snapshots) {
815
            bs_snapshots = NULL;
816
        }
817
        if (bs->backing_hd) {
818
            bdrv_delete(bs->backing_hd);
819
            bs->backing_hd = NULL;
820
        }
821
        bs->drv->bdrv_close(bs);
822
        g_free(bs->opaque);
823
#ifdef _WIN32
824
        if (bs->is_temporary) {
825
            unlink(bs->filename);
826
        }
827
#endif
828
        bs->opaque = NULL;
829
        bs->drv = NULL;
830
        bs->copy_on_read = 0;
831

    
832
        if (bs->file != NULL) {
833
            bdrv_close(bs->file);
834
        }
835

    
836
        bdrv_dev_change_media_cb(bs, false);
837
    }
838

    
839
    /*throttling disk I/O limits*/
840
    if (bs->io_limits_enabled) {
841
        bdrv_io_limits_disable(bs);
842
    }
843
}
844

    
845
void bdrv_close_all(void)
846
{
847
    BlockDriverState *bs;
848

    
849
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
850
        bdrv_close(bs);
851
    }
852
}
853

    
854
/*
855
 * Wait for pending requests to complete across all BlockDriverStates
856
 *
857
 * This function does not flush data to disk, use bdrv_flush_all() for that
858
 * after calling this function.
859
 */
860
void bdrv_drain_all(void)
861
{
862
    BlockDriverState *bs;
863

    
864
    qemu_aio_flush();
865

    
866
    /* If requests are still pending there is a bug somewhere */
867
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
868
        assert(QLIST_EMPTY(&bs->tracked_requests));
869
        assert(qemu_co_queue_empty(&bs->throttled_reqs));
870
    }
871
}
872

    
873
/* make a BlockDriverState anonymous by removing from bdrv_state list.
874
   Also, NULL terminate the device_name to prevent double remove */
875
void bdrv_make_anon(BlockDriverState *bs)
876
{
877
    if (bs->device_name[0] != '\0') {
878
        QTAILQ_REMOVE(&bdrv_states, bs, list);
879
    }
880
    bs->device_name[0] = '\0';
881
}
882

    
883
void bdrv_delete(BlockDriverState *bs)
884
{
885
    assert(!bs->dev);
886

    
887
    /* remove from list, if necessary */
888
    bdrv_make_anon(bs);
889

    
890
    bdrv_close(bs);
891
    if (bs->file != NULL) {
892
        bdrv_delete(bs->file);
893
    }
894

    
895
    assert(bs != bs_snapshots);
896
    g_free(bs);
897
}
898

    
899
int bdrv_attach_dev(BlockDriverState *bs, void *dev)
900
/* TODO change to DeviceState *dev when all users are qdevified */
901
{
902
    if (bs->dev) {
903
        return -EBUSY;
904
    }
905
    bs->dev = dev;
906
    bdrv_iostatus_reset(bs);
907
    return 0;
908
}
909

    
910
/* TODO qdevified devices don't use this, remove when devices are qdevified */
911
void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
912
{
913
    if (bdrv_attach_dev(bs, dev) < 0) {
914
        abort();
915
    }
916
}
917

    
918
void bdrv_detach_dev(BlockDriverState *bs, void *dev)
919
/* TODO change to DeviceState *dev when all users are qdevified */
920
{
921
    assert(bs->dev == dev);
922
    bs->dev = NULL;
923
    bs->dev_ops = NULL;
924
    bs->dev_opaque = NULL;
925
    bs->buffer_alignment = 512;
926
}
927

    
928
/* TODO change to return DeviceState * when all users are qdevified */
929
void *bdrv_get_attached_dev(BlockDriverState *bs)
930
{
931
    return bs->dev;
932
}
933

    
934
void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
935
                      void *opaque)
936
{
937
    bs->dev_ops = ops;
938
    bs->dev_opaque = opaque;
939
    if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
940
        bs_snapshots = NULL;
941
    }
942
}
943

    
944
static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
945
{
946
    if (bs->dev_ops && bs->dev_ops->change_media_cb) {
947
        bs->dev_ops->change_media_cb(bs->dev_opaque, load);
948
    }
949
}
950

    
951
bool bdrv_dev_has_removable_media(BlockDriverState *bs)
952
{
953
    return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
954
}
955

    
956
void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
957
{
958
    if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
959
        bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
960
    }
961
}
962

    
963
bool bdrv_dev_is_tray_open(BlockDriverState *bs)
964
{
965
    if (bs->dev_ops && bs->dev_ops->is_tray_open) {
966
        return bs->dev_ops->is_tray_open(bs->dev_opaque);
967
    }
968
    return false;
969
}
970

    
971
static void bdrv_dev_resize_cb(BlockDriverState *bs)
972
{
973
    if (bs->dev_ops && bs->dev_ops->resize_cb) {
974
        bs->dev_ops->resize_cb(bs->dev_opaque);
975
    }
976
}
977

    
978
bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
979
{
980
    if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
981
        return bs->dev_ops->is_medium_locked(bs->dev_opaque);
982
    }
983
    return false;
984
}
985

    
986
/*
987
 * Run consistency checks on an image
988
 *
989
 * Returns 0 if the check could be completed (it doesn't mean that the image is
990
 * free of errors) or -errno when an internal error occurred. The results of the
991
 * check are stored in res.
992
 */
993
int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
994
{
995
    if (bs->drv->bdrv_check == NULL) {
996
        return -ENOTSUP;
997
    }
998

    
999
    memset(res, 0, sizeof(*res));
1000
    return bs->drv->bdrv_check(bs, res);
1001
}
1002

    
1003
#define COMMIT_BUF_SECTORS 2048
1004

    
1005
/* commit COW file into the raw image */
1006
int bdrv_commit(BlockDriverState *bs)
1007
{
1008
    BlockDriver *drv = bs->drv;
1009
    BlockDriver *backing_drv;
1010
    int64_t sector, total_sectors;
1011
    int n, ro, open_flags;
1012
    int ret = 0, rw_ret = 0;
1013
    uint8_t *buf;
1014
    char filename[1024];
1015
    BlockDriverState *bs_rw, *bs_ro;
1016

    
1017
    if (!drv)
1018
        return -ENOMEDIUM;
1019
    
1020
    if (!bs->backing_hd) {
1021
        return -ENOTSUP;
1022
    }
1023

    
1024
    if (bs->backing_hd->keep_read_only) {
1025
        return -EACCES;
1026
    }
1027

    
1028
    if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
1029
        return -EBUSY;
1030
    }
1031

    
1032
    backing_drv = bs->backing_hd->drv;
1033
    ro = bs->backing_hd->read_only;
1034
    strncpy(filename, bs->backing_hd->filename, sizeof(filename));
1035
    open_flags =  bs->backing_hd->open_flags;
1036

    
1037
    if (ro) {
1038
        /* re-open as RW */
1039
        bdrv_delete(bs->backing_hd);
1040
        bs->backing_hd = NULL;
1041
        bs_rw = bdrv_new("");
1042
        rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
1043
            backing_drv);
1044
        if (rw_ret < 0) {
1045
            bdrv_delete(bs_rw);
1046
            /* try to re-open read-only */
1047
            bs_ro = bdrv_new("");
1048
            ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1049
                backing_drv);
1050
            if (ret < 0) {
1051
                bdrv_delete(bs_ro);
1052
                /* drive not functional anymore */
1053
                bs->drv = NULL;
1054
                return ret;
1055
            }
1056
            bs->backing_hd = bs_ro;
1057
            return rw_ret;
1058
        }
1059
        bs->backing_hd = bs_rw;
1060
    }
1061

    
1062
    total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
1063
    buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
1064

    
1065
    for (sector = 0; sector < total_sectors; sector += n) {
1066
        if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
1067

    
1068
            if (bdrv_read(bs, sector, buf, n) != 0) {
1069
                ret = -EIO;
1070
                goto ro_cleanup;
1071
            }
1072

    
1073
            if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
1074
                ret = -EIO;
1075
                goto ro_cleanup;
1076
            }
1077
        }
1078
    }
1079

    
1080
    if (drv->bdrv_make_empty) {
1081
        ret = drv->bdrv_make_empty(bs);
1082
        bdrv_flush(bs);
1083
    }
1084

    
1085
    /*
1086
     * Make sure all data we wrote to the backing device is actually
1087
     * stable on disk.
1088
     */
1089
    if (bs->backing_hd)
1090
        bdrv_flush(bs->backing_hd);
1091

    
1092
ro_cleanup:
1093
    g_free(buf);
1094

    
1095
    if (ro) {
1096
        /* re-open as RO */
1097
        bdrv_delete(bs->backing_hd);
1098
        bs->backing_hd = NULL;
1099
        bs_ro = bdrv_new("");
1100
        ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1101
            backing_drv);
1102
        if (ret < 0) {
1103
            bdrv_delete(bs_ro);
1104
            /* drive not functional anymore */
1105
            bs->drv = NULL;
1106
            return ret;
1107
        }
1108
        bs->backing_hd = bs_ro;
1109
        bs->backing_hd->keep_read_only = 0;
1110
    }
1111

    
1112
    return ret;
1113
}
1114

    
1115
void bdrv_commit_all(void)
1116
{
1117
    BlockDriverState *bs;
1118

    
1119
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
1120
        bdrv_commit(bs);
1121
    }
1122
}
1123

    
1124
struct BdrvTrackedRequest {
1125
    BlockDriverState *bs;
1126
    int64_t sector_num;
1127
    int nb_sectors;
1128
    bool is_write;
1129
    QLIST_ENTRY(BdrvTrackedRequest) list;
1130
    Coroutine *co; /* owner, used for deadlock detection */
1131
    CoQueue wait_queue; /* coroutines blocked on this request */
1132
};
1133

    
1134
/**
1135
 * Remove an active request from the tracked requests list
1136
 *
1137
 * This function should be called when a tracked request is completing.
1138
 */
1139
static void tracked_request_end(BdrvTrackedRequest *req)
1140
{
1141
    QLIST_REMOVE(req, list);
1142
    qemu_co_queue_restart_all(&req->wait_queue);
1143
}
1144

    
1145
/**
1146
 * Add an active request to the tracked requests list
1147
 */
1148
static void tracked_request_begin(BdrvTrackedRequest *req,
1149
                                  BlockDriverState *bs,
1150
                                  int64_t sector_num,
1151
                                  int nb_sectors, bool is_write)
1152
{
1153
    *req = (BdrvTrackedRequest){
1154
        .bs = bs,
1155
        .sector_num = sector_num,
1156
        .nb_sectors = nb_sectors,
1157
        .is_write = is_write,
1158
        .co = qemu_coroutine_self(),
1159
    };
1160

    
1161
    qemu_co_queue_init(&req->wait_queue);
1162

    
1163
    QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
1164
}
1165

    
1166
/**
1167
 * Round a region to cluster boundaries
1168
 */
1169
static void round_to_clusters(BlockDriverState *bs,
1170
                              int64_t sector_num, int nb_sectors,
1171
                              int64_t *cluster_sector_num,
1172
                              int *cluster_nb_sectors)
1173
{
1174
    BlockDriverInfo bdi;
1175

    
1176
    if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
1177
        *cluster_sector_num = sector_num;
1178
        *cluster_nb_sectors = nb_sectors;
1179
    } else {
1180
        int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
1181
        *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
1182
        *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
1183
                                            nb_sectors, c);
1184
    }
1185
}
1186

    
1187
static bool tracked_request_overlaps(BdrvTrackedRequest *req,
1188
                                     int64_t sector_num, int nb_sectors) {
1189
    /*        aaaa   bbbb */
1190
    if (sector_num >= req->sector_num + req->nb_sectors) {
1191
        return false;
1192
    }
1193
    /* bbbb   aaaa        */
1194
    if (req->sector_num >= sector_num + nb_sectors) {
1195
        return false;
1196
    }
1197
    return true;
1198
}
1199

    
1200
static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
1201
        int64_t sector_num, int nb_sectors)
1202
{
1203
    BdrvTrackedRequest *req;
1204
    int64_t cluster_sector_num;
1205
    int cluster_nb_sectors;
1206
    bool retry;
1207

    
1208
    /* If we touch the same cluster it counts as an overlap.  This guarantees
1209
     * that allocating writes will be serialized and not race with each other
1210
     * for the same cluster.  For example, in copy-on-read it ensures that the
1211
     * CoR read and write operations are atomic and guest writes cannot
1212
     * interleave between them.
1213
     */
1214
    round_to_clusters(bs, sector_num, nb_sectors,
1215
                      &cluster_sector_num, &cluster_nb_sectors);
1216

    
1217
    do {
1218
        retry = false;
1219
        QLIST_FOREACH(req, &bs->tracked_requests, list) {
1220
            if (tracked_request_overlaps(req, cluster_sector_num,
1221
                                         cluster_nb_sectors)) {
1222
                /* Hitting this means there was a reentrant request, for
1223
                 * example, a block driver issuing nested requests.  This must
1224
                 * never happen since it means deadlock.
1225
                 */
1226
                assert(qemu_coroutine_self() != req->co);
1227

    
1228
                qemu_co_queue_wait(&req->wait_queue);
1229
                retry = true;
1230
                break;
1231
            }
1232
        }
1233
    } while (retry);
1234
}
1235

    
1236
/*
1237
 * Return values:
1238
 * 0        - success
1239
 * -EINVAL  - backing format specified, but no file
1240
 * -ENOSPC  - can't update the backing file because no space is left in the
1241
 *            image file header
1242
 * -ENOTSUP - format driver doesn't support changing the backing file
1243
 */
1244
int bdrv_change_backing_file(BlockDriverState *bs,
1245
    const char *backing_file, const char *backing_fmt)
1246
{
1247
    BlockDriver *drv = bs->drv;
1248

    
1249
    if (drv->bdrv_change_backing_file != NULL) {
1250
        return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
1251
    } else {
1252
        return -ENOTSUP;
1253
    }
1254
}
1255

    
1256
static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1257
                                   size_t size)
1258
{
1259
    int64_t len;
1260

    
1261
    if (!bdrv_is_inserted(bs))
1262
        return -ENOMEDIUM;
1263

    
1264
    if (bs->growable)
1265
        return 0;
1266

    
1267
    len = bdrv_getlength(bs);
1268

    
1269
    if (offset < 0)
1270
        return -EIO;
1271

    
1272
    if ((offset > len) || (len - offset < size))
1273
        return -EIO;
1274

    
1275
    return 0;
1276
}
1277

    
1278
static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1279
                              int nb_sectors)
1280
{
1281
    return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1282
                                   nb_sectors * BDRV_SECTOR_SIZE);
1283
}
1284

    
1285
typedef struct RwCo {
1286
    BlockDriverState *bs;
1287
    int64_t sector_num;
1288
    int nb_sectors;
1289
    QEMUIOVector *qiov;
1290
    bool is_write;
1291
    int ret;
1292
} RwCo;
1293

    
1294
static void coroutine_fn bdrv_rw_co_entry(void *opaque)
1295
{
1296
    RwCo *rwco = opaque;
1297

    
1298
    if (!rwco->is_write) {
1299
        rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
1300
                                     rwco->nb_sectors, rwco->qiov, 0);
1301
    } else {
1302
        rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
1303
                                      rwco->nb_sectors, rwco->qiov);
1304
    }
1305
}
1306

    
1307
/*
1308
 * Process a synchronous request using coroutines
1309
 */
1310
static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1311
                      int nb_sectors, bool is_write)
1312
{
1313
    QEMUIOVector qiov;
1314
    struct iovec iov = {
1315
        .iov_base = (void *)buf,
1316
        .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1317
    };
1318
    Coroutine *co;
1319
    RwCo rwco = {
1320
        .bs = bs,
1321
        .sector_num = sector_num,
1322
        .nb_sectors = nb_sectors,
1323
        .qiov = &qiov,
1324
        .is_write = is_write,
1325
        .ret = NOT_DONE,
1326
    };
1327

    
1328
    qemu_iovec_init_external(&qiov, &iov, 1);
1329

    
1330
    if (qemu_in_coroutine()) {
1331
        /* Fast-path if already in coroutine context */
1332
        bdrv_rw_co_entry(&rwco);
1333
    } else {
1334
        co = qemu_coroutine_create(bdrv_rw_co_entry);
1335
        qemu_coroutine_enter(co, &rwco);
1336
        while (rwco.ret == NOT_DONE) {
1337
            qemu_aio_wait();
1338
        }
1339
    }
1340
    return rwco.ret;
1341
}
1342

    
1343
/* return < 0 if error. See bdrv_write() for the return codes */
1344
int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1345
              uint8_t *buf, int nb_sectors)
1346
{
1347
    return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
1348
}
1349

    
1350
static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
1351
                             int nb_sectors, int dirty)
1352
{
1353
    int64_t start, end;
1354
    unsigned long val, idx, bit;
1355

    
1356
    start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
1357
    end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
1358

    
1359
    for (; start <= end; start++) {
1360
        idx = start / (sizeof(unsigned long) * 8);
1361
        bit = start % (sizeof(unsigned long) * 8);
1362
        val = bs->dirty_bitmap[idx];
1363
        if (dirty) {
1364
            if (!(val & (1UL << bit))) {
1365
                bs->dirty_count++;
1366
                val |= 1UL << bit;
1367
            }
1368
        } else {
1369
            if (val & (1UL << bit)) {
1370
                bs->dirty_count--;
1371
                val &= ~(1UL << bit);
1372
            }
1373
        }
1374
        bs->dirty_bitmap[idx] = val;
1375
    }
1376
}
1377

    
1378
/* Return < 0 if error. Important errors are:
1379
  -EIO         generic I/O error (may happen for all errors)
1380
  -ENOMEDIUM   No media inserted.
1381
  -EINVAL      Invalid sector number or nb_sectors
1382
  -EACCES      Trying to write a read-only device
1383
*/
1384
int bdrv_write(BlockDriverState *bs, int64_t sector_num,
1385
               const uint8_t *buf, int nb_sectors)
1386
{
1387
    return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
1388
}
1389

    
1390
int bdrv_pread(BlockDriverState *bs, int64_t offset,
1391
               void *buf, int count1)
1392
{
1393
    uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1394
    int len, nb_sectors, count;
1395
    int64_t sector_num;
1396
    int ret;
1397

    
1398
    count = count1;
1399
    /* first read to align to sector start */
1400
    len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1401
    if (len > count)
1402
        len = count;
1403
    sector_num = offset >> BDRV_SECTOR_BITS;
1404
    if (len > 0) {
1405
        if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1406
            return ret;
1407
        memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
1408
        count -= len;
1409
        if (count == 0)
1410
            return count1;
1411
        sector_num++;
1412
        buf += len;
1413
    }
1414

    
1415
    /* read the sectors "in place" */
1416
    nb_sectors = count >> BDRV_SECTOR_BITS;
1417
    if (nb_sectors > 0) {
1418
        if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1419
            return ret;
1420
        sector_num += nb_sectors;
1421
        len = nb_sectors << BDRV_SECTOR_BITS;
1422
        buf += len;
1423
        count -= len;
1424
    }
1425

    
1426
    /* add data from the last sector */
1427
    if (count > 0) {
1428
        if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1429
            return ret;
1430
        memcpy(buf, tmp_buf, count);
1431
    }
1432
    return count1;
1433
}
1434

    
1435
int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1436
                const void *buf, int count1)
1437
{
1438
    uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1439
    int len, nb_sectors, count;
1440
    int64_t sector_num;
1441
    int ret;
1442

    
1443
    count = count1;
1444
    /* first write to align to sector start */
1445
    len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1446
    if (len > count)
1447
        len = count;
1448
    sector_num = offset >> BDRV_SECTOR_BITS;
1449
    if (len > 0) {
1450
        if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1451
            return ret;
1452
        memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
1453
        if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1454
            return ret;
1455
        count -= len;
1456
        if (count == 0)
1457
            return count1;
1458
        sector_num++;
1459
        buf += len;
1460
    }
1461

    
1462
    /* write the sectors "in place" */
1463
    nb_sectors = count >> BDRV_SECTOR_BITS;
1464
    if (nb_sectors > 0) {
1465
        if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1466
            return ret;
1467
        sector_num += nb_sectors;
1468
        len = nb_sectors << BDRV_SECTOR_BITS;
1469
        buf += len;
1470
        count -= len;
1471
    }
1472

    
1473
    /* add data from the last sector */
1474
    if (count > 0) {
1475
        if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1476
            return ret;
1477
        memcpy(tmp_buf, buf, count);
1478
        if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1479
            return ret;
1480
    }
1481
    return count1;
1482
}
1483

    
1484
/*
1485
 * Writes to the file and ensures that no writes are reordered across this
1486
 * request (acts as a barrier)
1487
 *
1488
 * Returns 0 on success, -errno in error cases.
1489
 */
1490
int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1491
    const void *buf, int count)
1492
{
1493
    int ret;
1494

    
1495
    ret = bdrv_pwrite(bs, offset, buf, count);
1496
    if (ret < 0) {
1497
        return ret;
1498
    }
1499

    
1500
    /* No flush needed for cache modes that use O_DSYNC */
1501
    if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
1502
        bdrv_flush(bs);
1503
    }
1504

    
1505
    return 0;
1506
}
1507

    
1508
static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
1509
        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1510
{
1511
    /* Perform I/O through a temporary buffer so that users who scribble over
1512
     * their read buffer while the operation is in progress do not end up
1513
     * modifying the image file.  This is critical for zero-copy guest I/O
1514
     * where anything might happen inside guest memory.
1515
     */
1516
    void *bounce_buffer;
1517

    
1518
    struct iovec iov;
1519
    QEMUIOVector bounce_qiov;
1520
    int64_t cluster_sector_num;
1521
    int cluster_nb_sectors;
1522
    size_t skip_bytes;
1523
    int ret;
1524

    
1525
    /* Cover entire cluster so no additional backing file I/O is required when
1526
     * allocating cluster in the image file.
1527
     */
1528
    round_to_clusters(bs, sector_num, nb_sectors,
1529
                      &cluster_sector_num, &cluster_nb_sectors);
1530

    
1531
    trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
1532
                                   cluster_sector_num, cluster_nb_sectors);
1533

    
1534
    iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
1535
    iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
1536
    qemu_iovec_init_external(&bounce_qiov, &iov, 1);
1537

    
1538
    ret = bs->drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
1539
                                 &bounce_qiov);
1540
    if (ret < 0) {
1541
        goto err;
1542
    }
1543

    
1544
    ret = bs->drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
1545
                                  &bounce_qiov);
1546
    if (ret < 0) {
1547
        /* It might be okay to ignore write errors for guest requests.  If this
1548
         * is a deliberate copy-on-read then we don't want to ignore the error.
1549
         * Simply report it in all cases.
1550
         */
1551
        goto err;
1552
    }
1553

    
1554
    skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
1555
    qemu_iovec_from_buffer(qiov, bounce_buffer + skip_bytes,
1556
                           nb_sectors * BDRV_SECTOR_SIZE);
1557

    
1558
err:
1559
    qemu_vfree(bounce_buffer);
1560
    return ret;
1561
}
1562

    
1563
/*
1564
 * Handle a read request in coroutine context
1565
 */
1566
static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
1567
    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
1568
    BdrvRequestFlags flags)
1569
{
1570
    BlockDriver *drv = bs->drv;
1571
    BdrvTrackedRequest req;
1572
    int ret;
1573

    
1574
    if (!drv) {
1575
        return -ENOMEDIUM;
1576
    }
1577
    if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1578
        return -EIO;
1579
    }
1580

    
1581
    /* throttling disk read I/O */
1582
    if (bs->io_limits_enabled) {
1583
        bdrv_io_limits_intercept(bs, false, nb_sectors);
1584
    }
1585

    
1586
    if (bs->copy_on_read) {
1587
        flags |= BDRV_REQ_COPY_ON_READ;
1588
    }
1589
    if (flags & BDRV_REQ_COPY_ON_READ) {
1590
        bs->copy_on_read_in_flight++;
1591
    }
1592

    
1593
    if (bs->copy_on_read_in_flight) {
1594
        wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1595
    }
1596

    
1597
    tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
1598

    
1599
    if (flags & BDRV_REQ_COPY_ON_READ) {
1600
        int pnum;
1601

    
1602
        ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
1603
        if (ret < 0) {
1604
            goto out;
1605
        }
1606

    
1607
        if (!ret || pnum != nb_sectors) {
1608
            ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
1609
            goto out;
1610
        }
1611
    }
1612

    
1613
    ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1614

    
1615
out:
1616
    tracked_request_end(&req);
1617

    
1618
    if (flags & BDRV_REQ_COPY_ON_READ) {
1619
        bs->copy_on_read_in_flight--;
1620
    }
1621

    
1622
    return ret;
1623
}
1624

    
1625
int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
1626
    int nb_sectors, QEMUIOVector *qiov)
1627
{
1628
    trace_bdrv_co_readv(bs, sector_num, nb_sectors);
1629

    
1630
    return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
1631
}
1632

    
1633
int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
1634
    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1635
{
1636
    trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
1637

    
1638
    return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
1639
                            BDRV_REQ_COPY_ON_READ);
1640
}
1641

    
1642
/*
1643
 * Handle a write request in coroutine context
1644
 */
1645
static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1646
    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1647
{
1648
    BlockDriver *drv = bs->drv;
1649
    BdrvTrackedRequest req;
1650
    int ret;
1651

    
1652
    if (!bs->drv) {
1653
        return -ENOMEDIUM;
1654
    }
1655
    if (bs->read_only) {
1656
        return -EACCES;
1657
    }
1658
    if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1659
        return -EIO;
1660
    }
1661

    
1662
    /* throttling disk write I/O */
1663
    if (bs->io_limits_enabled) {
1664
        bdrv_io_limits_intercept(bs, true, nb_sectors);
1665
    }
1666

    
1667
    if (bs->copy_on_read_in_flight) {
1668
        wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1669
    }
1670

    
1671
    tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
1672

    
1673
    ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1674

    
1675
    if (bs->dirty_bitmap) {
1676
        set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1677
    }
1678

    
1679
    if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1680
        bs->wr_highest_sector = sector_num + nb_sectors - 1;
1681
    }
1682

    
1683
    tracked_request_end(&req);
1684

    
1685
    return ret;
1686
}
1687

    
1688
int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1689
    int nb_sectors, QEMUIOVector *qiov)
1690
{
1691
    trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1692

    
1693
    return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
1694
}
1695

    
1696
/**
1697
 * Truncate file to 'offset' bytes (needed only for file protocols)
1698
 */
1699
int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1700
{
1701
    BlockDriver *drv = bs->drv;
1702
    int ret;
1703
    if (!drv)
1704
        return -ENOMEDIUM;
1705
    if (!drv->bdrv_truncate)
1706
        return -ENOTSUP;
1707
    if (bs->read_only)
1708
        return -EACCES;
1709
    if (bdrv_in_use(bs))
1710
        return -EBUSY;
1711
    ret = drv->bdrv_truncate(bs, offset);
1712
    if (ret == 0) {
1713
        ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
1714
        bdrv_dev_resize_cb(bs);
1715
    }
1716
    return ret;
1717
}
1718

    
1719
/**
1720
 * Length of a allocated file in bytes. Sparse files are counted by actual
1721
 * allocated space. Return < 0 if error or unknown.
1722
 */
1723
int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1724
{
1725
    BlockDriver *drv = bs->drv;
1726
    if (!drv) {
1727
        return -ENOMEDIUM;
1728
    }
1729
    if (drv->bdrv_get_allocated_file_size) {
1730
        return drv->bdrv_get_allocated_file_size(bs);
1731
    }
1732
    if (bs->file) {
1733
        return bdrv_get_allocated_file_size(bs->file);
1734
    }
1735
    return -ENOTSUP;
1736
}
1737

    
1738
/**
1739
 * Length of a file in bytes. Return < 0 if error or unknown.
1740
 */
1741
int64_t bdrv_getlength(BlockDriverState *bs)
1742
{
1743
    BlockDriver *drv = bs->drv;
1744
    if (!drv)
1745
        return -ENOMEDIUM;
1746

    
1747
    if (bs->growable || bdrv_dev_has_removable_media(bs)) {
1748
        if (drv->bdrv_getlength) {
1749
            return drv->bdrv_getlength(bs);
1750
        }
1751
    }
1752
    return bs->total_sectors * BDRV_SECTOR_SIZE;
1753
}
1754

    
1755
/* return 0 as number of sectors if no device present or error */
1756
void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
1757
{
1758
    int64_t length;
1759
    length = bdrv_getlength(bs);
1760
    if (length < 0)
1761
        length = 0;
1762
    else
1763
        length = length >> BDRV_SECTOR_BITS;
1764
    *nb_sectors_ptr = length;
1765
}
1766

    
1767
struct partition {
1768
        uint8_t boot_ind;           /* 0x80 - active */
1769
        uint8_t head;               /* starting head */
1770
        uint8_t sector;             /* starting sector */
1771
        uint8_t cyl;                /* starting cylinder */
1772
        uint8_t sys_ind;            /* What partition type */
1773
        uint8_t end_head;           /* end head */
1774
        uint8_t end_sector;         /* end sector */
1775
        uint8_t end_cyl;            /* end cylinder */
1776
        uint32_t start_sect;        /* starting sector counting from 0 */
1777
        uint32_t nr_sects;          /* nr of sectors in partition */
1778
} QEMU_PACKED;
1779

    
1780
/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1781
static int guess_disk_lchs(BlockDriverState *bs,
1782
                           int *pcylinders, int *pheads, int *psectors)
1783
{
1784
    uint8_t buf[BDRV_SECTOR_SIZE];
1785
    int ret, i, heads, sectors, cylinders;
1786
    struct partition *p;
1787
    uint32_t nr_sects;
1788
    uint64_t nb_sectors;
1789

    
1790
    bdrv_get_geometry(bs, &nb_sectors);
1791

    
1792
    ret = bdrv_read(bs, 0, buf, 1);
1793
    if (ret < 0)
1794
        return -1;
1795
    /* test msdos magic */
1796
    if (buf[510] != 0x55 || buf[511] != 0xaa)
1797
        return -1;
1798
    for(i = 0; i < 4; i++) {
1799
        p = ((struct partition *)(buf + 0x1be)) + i;
1800
        nr_sects = le32_to_cpu(p->nr_sects);
1801
        if (nr_sects && p->end_head) {
1802
            /* We make the assumption that the partition terminates on
1803
               a cylinder boundary */
1804
            heads = p->end_head + 1;
1805
            sectors = p->end_sector & 63;
1806
            if (sectors == 0)
1807
                continue;
1808
            cylinders = nb_sectors / (heads * sectors);
1809
            if (cylinders < 1 || cylinders > 16383)
1810
                continue;
1811
            *pheads = heads;
1812
            *psectors = sectors;
1813
            *pcylinders = cylinders;
1814
#if 0
1815
            printf("guessed geometry: LCHS=%d %d %d\n",
1816
                   cylinders, heads, sectors);
1817
#endif
1818
            return 0;
1819
        }
1820
    }
1821
    return -1;
1822
}
1823

    
1824
void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1825
{
1826
    int translation, lba_detected = 0;
1827
    int cylinders, heads, secs;
1828
    uint64_t nb_sectors;
1829

    
1830
    /* if a geometry hint is available, use it */
1831
    bdrv_get_geometry(bs, &nb_sectors);
1832
    bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1833
    translation = bdrv_get_translation_hint(bs);
1834
    if (cylinders != 0) {
1835
        *pcyls = cylinders;
1836
        *pheads = heads;
1837
        *psecs = secs;
1838
    } else {
1839
        if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1840
            if (heads > 16) {
1841
                /* if heads > 16, it means that a BIOS LBA
1842
                   translation was active, so the default
1843
                   hardware geometry is OK */
1844
                lba_detected = 1;
1845
                goto default_geometry;
1846
            } else {
1847
                *pcyls = cylinders;
1848
                *pheads = heads;
1849
                *psecs = secs;
1850
                /* disable any translation to be in sync with
1851
                   the logical geometry */
1852
                if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1853
                    bdrv_set_translation_hint(bs,
1854
                                              BIOS_ATA_TRANSLATION_NONE);
1855
                }
1856
            }
1857
        } else {
1858
        default_geometry:
1859
            /* if no geometry, use a standard physical disk geometry */
1860
            cylinders = nb_sectors / (16 * 63);
1861

    
1862
            if (cylinders > 16383)
1863
                cylinders = 16383;
1864
            else if (cylinders < 2)
1865
                cylinders = 2;
1866
            *pcyls = cylinders;
1867
            *pheads = 16;
1868
            *psecs = 63;
1869
            if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1870
                if ((*pcyls * *pheads) <= 131072) {
1871
                    bdrv_set_translation_hint(bs,
1872
                                              BIOS_ATA_TRANSLATION_LARGE);
1873
                } else {
1874
                    bdrv_set_translation_hint(bs,
1875
                                              BIOS_ATA_TRANSLATION_LBA);
1876
                }
1877
            }
1878
        }
1879
        bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1880
    }
1881
}
1882

    
1883
void bdrv_set_geometry_hint(BlockDriverState *bs,
1884
                            int cyls, int heads, int secs)
1885
{
1886
    bs->cyls = cyls;
1887
    bs->heads = heads;
1888
    bs->secs = secs;
1889
}
1890

    
1891
void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1892
{
1893
    bs->translation = translation;
1894
}
1895

    
1896
void bdrv_get_geometry_hint(BlockDriverState *bs,
1897
                            int *pcyls, int *pheads, int *psecs)
1898
{
1899
    *pcyls = bs->cyls;
1900
    *pheads = bs->heads;
1901
    *psecs = bs->secs;
1902
}
1903

    
1904
/* throttling disk io limits */
1905
void bdrv_set_io_limits(BlockDriverState *bs,
1906
                        BlockIOLimit *io_limits)
1907
{
1908
    bs->io_limits = *io_limits;
1909
    bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
1910
}
1911

    
1912
/* Recognize floppy formats */
1913
typedef struct FDFormat {
1914
    FDriveType drive;
1915
    uint8_t last_sect;
1916
    uint8_t max_track;
1917
    uint8_t max_head;
1918
} FDFormat;
1919

    
1920
static const FDFormat fd_formats[] = {
1921
    /* First entry is default format */
1922
    /* 1.44 MB 3"1/2 floppy disks */
1923
    { FDRIVE_DRV_144, 18, 80, 1, },
1924
    { FDRIVE_DRV_144, 20, 80, 1, },
1925
    { FDRIVE_DRV_144, 21, 80, 1, },
1926
    { FDRIVE_DRV_144, 21, 82, 1, },
1927
    { FDRIVE_DRV_144, 21, 83, 1, },
1928
    { FDRIVE_DRV_144, 22, 80, 1, },
1929
    { FDRIVE_DRV_144, 23, 80, 1, },
1930
    { FDRIVE_DRV_144, 24, 80, 1, },
1931
    /* 2.88 MB 3"1/2 floppy disks */
1932
    { FDRIVE_DRV_288, 36, 80, 1, },
1933
    { FDRIVE_DRV_288, 39, 80, 1, },
1934
    { FDRIVE_DRV_288, 40, 80, 1, },
1935
    { FDRIVE_DRV_288, 44, 80, 1, },
1936
    { FDRIVE_DRV_288, 48, 80, 1, },
1937
    /* 720 kB 3"1/2 floppy disks */
1938
    { FDRIVE_DRV_144,  9, 80, 1, },
1939
    { FDRIVE_DRV_144, 10, 80, 1, },
1940
    { FDRIVE_DRV_144, 10, 82, 1, },
1941
    { FDRIVE_DRV_144, 10, 83, 1, },
1942
    { FDRIVE_DRV_144, 13, 80, 1, },
1943
    { FDRIVE_DRV_144, 14, 80, 1, },
1944
    /* 1.2 MB 5"1/4 floppy disks */
1945
    { FDRIVE_DRV_120, 15, 80, 1, },
1946
    { FDRIVE_DRV_120, 18, 80, 1, },
1947
    { FDRIVE_DRV_120, 18, 82, 1, },
1948
    { FDRIVE_DRV_120, 18, 83, 1, },
1949
    { FDRIVE_DRV_120, 20, 80, 1, },
1950
    /* 720 kB 5"1/4 floppy disks */
1951
    { FDRIVE_DRV_120,  9, 80, 1, },
1952
    { FDRIVE_DRV_120, 11, 80, 1, },
1953
    /* 360 kB 5"1/4 floppy disks */
1954
    { FDRIVE_DRV_120,  9, 40, 1, },
1955
    { FDRIVE_DRV_120,  9, 40, 0, },
1956
    { FDRIVE_DRV_120, 10, 41, 1, },
1957
    { FDRIVE_DRV_120, 10, 42, 1, },
1958
    /* 320 kB 5"1/4 floppy disks */
1959
    { FDRIVE_DRV_120,  8, 40, 1, },
1960
    { FDRIVE_DRV_120,  8, 40, 0, },
1961
    /* 360 kB must match 5"1/4 better than 3"1/2... */
1962
    { FDRIVE_DRV_144,  9, 80, 0, },
1963
    /* end */
1964
    { FDRIVE_DRV_NONE, -1, -1, 0, },
1965
};
1966

    
1967
void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1968
                                   int *max_track, int *last_sect,
1969
                                   FDriveType drive_in, FDriveType *drive)
1970
{
1971
    const FDFormat *parse;
1972
    uint64_t nb_sectors, size;
1973
    int i, first_match, match;
1974

    
1975
    bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1976
    if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1977
        /* User defined disk */
1978
    } else {
1979
        bdrv_get_geometry(bs, &nb_sectors);
1980
        match = -1;
1981
        first_match = -1;
1982
        for (i = 0; ; i++) {
1983
            parse = &fd_formats[i];
1984
            if (parse->drive == FDRIVE_DRV_NONE) {
1985
                break;
1986
            }
1987
            if (drive_in == parse->drive ||
1988
                drive_in == FDRIVE_DRV_NONE) {
1989
                size = (parse->max_head + 1) * parse->max_track *
1990
                    parse->last_sect;
1991
                if (nb_sectors == size) {
1992
                    match = i;
1993
                    break;
1994
                }
1995
                if (first_match == -1) {
1996
                    first_match = i;
1997
                }
1998
            }
1999
        }
2000
        if (match == -1) {
2001
            if (first_match == -1) {
2002
                match = 1;
2003
            } else {
2004
                match = first_match;
2005
            }
2006
            parse = &fd_formats[match];
2007
        }
2008
        *nb_heads = parse->max_head + 1;
2009
        *max_track = parse->max_track;
2010
        *last_sect = parse->last_sect;
2011
        *drive = parse->drive;
2012
    }
2013
}
2014

    
2015
int bdrv_get_translation_hint(BlockDriverState *bs)
2016
{
2017
    return bs->translation;
2018
}
2019

    
2020
void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
2021
                       BlockErrorAction on_write_error)
2022
{
2023
    bs->on_read_error = on_read_error;
2024
    bs->on_write_error = on_write_error;
2025
}
2026

    
2027
BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
2028
{
2029
    return is_read ? bs->on_read_error : bs->on_write_error;
2030
}
2031

    
2032
int bdrv_is_read_only(BlockDriverState *bs)
2033
{
2034
    return bs->read_only;
2035
}
2036

    
2037
int bdrv_is_sg(BlockDriverState *bs)
2038
{
2039
    return bs->sg;
2040
}
2041

    
2042
int bdrv_enable_write_cache(BlockDriverState *bs)
2043
{
2044
    return bs->enable_write_cache;
2045
}
2046

    
2047
int bdrv_is_encrypted(BlockDriverState *bs)
2048
{
2049
    if (bs->backing_hd && bs->backing_hd->encrypted)
2050
        return 1;
2051
    return bs->encrypted;
2052
}
2053

    
2054
int bdrv_key_required(BlockDriverState *bs)
2055
{
2056
    BlockDriverState *backing_hd = bs->backing_hd;
2057

    
2058
    if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2059
        return 1;
2060
    return (bs->encrypted && !bs->valid_key);
2061
}
2062

    
2063
int bdrv_set_key(BlockDriverState *bs, const char *key)
2064
{
2065
    int ret;
2066
    if (bs->backing_hd && bs->backing_hd->encrypted) {
2067
        ret = bdrv_set_key(bs->backing_hd, key);
2068
        if (ret < 0)
2069
            return ret;
2070
        if (!bs->encrypted)
2071
            return 0;
2072
    }
2073
    if (!bs->encrypted) {
2074
        return -EINVAL;
2075
    } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2076
        return -ENOMEDIUM;
2077
    }
2078
    ret = bs->drv->bdrv_set_key(bs, key);
2079
    if (ret < 0) {
2080
        bs->valid_key = 0;
2081
    } else if (!bs->valid_key) {
2082
        bs->valid_key = 1;
2083
        /* call the change callback now, we skipped it on open */
2084
        bdrv_dev_change_media_cb(bs, true);
2085
    }
2086
    return ret;
2087
}
2088

    
2089
void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
2090
{
2091
    if (!bs->drv) {
2092
        buf[0] = '\0';
2093
    } else {
2094
        pstrcpy(buf, buf_size, bs->drv->format_name);
2095
    }
2096
}
2097

    
2098
void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2099
                         void *opaque)
2100
{
2101
    BlockDriver *drv;
2102

    
2103
    QLIST_FOREACH(drv, &bdrv_drivers, list) {
2104
        it(opaque, drv->format_name);
2105
    }
2106
}
2107

    
2108
BlockDriverState *bdrv_find(const char *name)
2109
{
2110
    BlockDriverState *bs;
2111

    
2112
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
2113
        if (!strcmp(name, bs->device_name)) {
2114
            return bs;
2115
        }
2116
    }
2117
    return NULL;
2118
}
2119

    
2120
BlockDriverState *bdrv_next(BlockDriverState *bs)
2121
{
2122
    if (!bs) {
2123
        return QTAILQ_FIRST(&bdrv_states);
2124
    }
2125
    return QTAILQ_NEXT(bs, list);
2126
}
2127

    
2128
void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
2129
{
2130
    BlockDriverState *bs;
2131

    
2132
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
2133
        it(opaque, bs);
2134
    }
2135
}
2136

    
2137
const char *bdrv_get_device_name(BlockDriverState *bs)
2138
{
2139
    return bs->device_name;
2140
}
2141

    
2142
void bdrv_flush_all(void)
2143
{
2144
    BlockDriverState *bs;
2145

    
2146
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
2147
        if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
2148
            bdrv_flush(bs);
2149
        }
2150
    }
2151
}
2152

    
2153
int bdrv_has_zero_init(BlockDriverState *bs)
2154
{
2155
    assert(bs->drv);
2156

    
2157
    if (bs->drv->bdrv_has_zero_init) {
2158
        return bs->drv->bdrv_has_zero_init(bs);
2159
    }
2160

    
2161
    return 1;
2162
}
2163

    
2164
typedef struct BdrvCoIsAllocatedData {
2165
    BlockDriverState *bs;
2166
    int64_t sector_num;
2167
    int nb_sectors;
2168
    int *pnum;
2169
    int ret;
2170
    bool done;
2171
} BdrvCoIsAllocatedData;
2172

    
2173
/*
2174
 * Returns true iff the specified sector is present in the disk image. Drivers
2175
 * not implementing the functionality are assumed to not support backing files,
2176
 * hence all their sectors are reported as allocated.
2177
 *
2178
 * If 'sector_num' is beyond the end of the disk image the return value is 0
2179
 * and 'pnum' is set to 0.
2180
 *
2181
 * 'pnum' is set to the number of sectors (including and immediately following
2182
 * the specified sector) that are known to be in the same
2183
 * allocated/unallocated state.
2184
 *
2185
 * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes
2186
 * beyond the end of the disk image it will be clamped.
2187
 */
2188
int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
2189
                                      int nb_sectors, int *pnum)
2190
{
2191
    int64_t n;
2192

    
2193
    if (sector_num >= bs->total_sectors) {
2194
        *pnum = 0;
2195
        return 0;
2196
    }
2197

    
2198
    n = bs->total_sectors - sector_num;
2199
    if (n < nb_sectors) {
2200
        nb_sectors = n;
2201
    }
2202

    
2203
    if (!bs->drv->bdrv_co_is_allocated) {
2204
        *pnum = nb_sectors;
2205
        return 1;
2206
    }
2207

    
2208
    return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
2209
}
2210

    
2211
/* Coroutine wrapper for bdrv_is_allocated() */
2212
static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
2213
{
2214
    BdrvCoIsAllocatedData *data = opaque;
2215
    BlockDriverState *bs = data->bs;
2216

    
2217
    data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
2218
                                     data->pnum);
2219
    data->done = true;
2220
}
2221

    
2222
/*
2223
 * Synchronous wrapper around bdrv_co_is_allocated().
2224
 *
2225
 * See bdrv_co_is_allocated() for details.
2226
 */
2227
int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
2228
                      int *pnum)
2229
{
2230
    Coroutine *co;
2231
    BdrvCoIsAllocatedData data = {
2232
        .bs = bs,
2233
        .sector_num = sector_num,
2234
        .nb_sectors = nb_sectors,
2235
        .pnum = pnum,
2236
        .done = false,
2237
    };
2238

    
2239
    co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
2240
    qemu_coroutine_enter(co, &data);
2241
    while (!data.done) {
2242
        qemu_aio_wait();
2243
    }
2244
    return data.ret;
2245
}
2246

    
2247
void bdrv_mon_event(const BlockDriverState *bdrv,
2248
                    BlockMonEventAction action, int is_read)
2249
{
2250
    QObject *data;
2251
    const char *action_str;
2252

    
2253
    switch (action) {
2254
    case BDRV_ACTION_REPORT:
2255
        action_str = "report";
2256
        break;
2257
    case BDRV_ACTION_IGNORE:
2258
        action_str = "ignore";
2259
        break;
2260
    case BDRV_ACTION_STOP:
2261
        action_str = "stop";
2262
        break;
2263
    default:
2264
        abort();
2265
    }
2266

    
2267
    data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
2268
                              bdrv->device_name,
2269
                              action_str,
2270
                              is_read ? "read" : "write");
2271
    monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
2272

    
2273
    qobject_decref(data);
2274
}
2275

    
2276
BlockInfoList *qmp_query_block(Error **errp)
2277
{
2278
    BlockInfoList *head = NULL, *cur_item = NULL;
2279
    BlockDriverState *bs;
2280

    
2281
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
2282
        BlockInfoList *info = g_malloc0(sizeof(*info));
2283

    
2284
        info->value = g_malloc0(sizeof(*info->value));
2285
        info->value->device = g_strdup(bs->device_name);
2286
        info->value->type = g_strdup("unknown");
2287
        info->value->locked = bdrv_dev_is_medium_locked(bs);
2288
        info->value->removable = bdrv_dev_has_removable_media(bs);
2289

    
2290
        if (bdrv_dev_has_removable_media(bs)) {
2291
            info->value->has_tray_open = true;
2292
            info->value->tray_open = bdrv_dev_is_tray_open(bs);
2293
        }
2294

    
2295
        if (bdrv_iostatus_is_enabled(bs)) {
2296
            info->value->has_io_status = true;
2297
            info->value->io_status = bs->iostatus;
2298
        }
2299

    
2300
        if (bs->drv) {
2301
            info->value->has_inserted = true;
2302
            info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
2303
            info->value->inserted->file = g_strdup(bs->filename);
2304
            info->value->inserted->ro = bs->read_only;
2305
            info->value->inserted->drv = g_strdup(bs->drv->format_name);
2306
            info->value->inserted->encrypted = bs->encrypted;
2307
            if (bs->backing_file[0]) {
2308
                info->value->inserted->has_backing_file = true;
2309
                info->value->inserted->backing_file = g_strdup(bs->backing_file);
2310
            }
2311

    
2312
            if (bs->io_limits_enabled) {
2313
                info->value->inserted->bps =
2314
                               bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2315
                info->value->inserted->bps_rd =
2316
                               bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
2317
                info->value->inserted->bps_wr =
2318
                               bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
2319
                info->value->inserted->iops =
2320
                               bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2321
                info->value->inserted->iops_rd =
2322
                               bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
2323
                info->value->inserted->iops_wr =
2324
                               bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
2325
            }
2326
        }
2327

    
2328
        /* XXX: waiting for the qapi to support GSList */
2329
        if (!cur_item) {
2330
            head = cur_item = info;
2331
        } else {
2332
            cur_item->next = info;
2333
            cur_item = info;
2334
        }
2335
    }
2336

    
2337
    return head;
2338
}
2339

    
2340
/* Consider exposing this as a full fledged QMP command */
2341
static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
2342
{
2343
    BlockStats *s;
2344

    
2345
    s = g_malloc0(sizeof(*s));
2346

    
2347
    if (bs->device_name[0]) {
2348
        s->has_device = true;
2349
        s->device = g_strdup(bs->device_name);
2350
    }
2351

    
2352
    s->stats = g_malloc0(sizeof(*s->stats));
2353
    s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
2354
    s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
2355
    s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
2356
    s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
2357
    s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
2358
    s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
2359
    s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
2360
    s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
2361
    s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
2362

    
2363
    if (bs->file) {
2364
        s->has_parent = true;
2365
        s->parent = qmp_query_blockstat(bs->file, NULL);
2366
    }
2367

    
2368
    return s;
2369
}
2370

    
2371
BlockStatsList *qmp_query_blockstats(Error **errp)
2372
{
2373
    BlockStatsList *head = NULL, *cur_item = NULL;
2374
    BlockDriverState *bs;
2375

    
2376
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
2377
        BlockStatsList *info = g_malloc0(sizeof(*info));
2378
        info->value = qmp_query_blockstat(bs, NULL);
2379

    
2380
        /* XXX: waiting for the qapi to support GSList */
2381
        if (!cur_item) {
2382
            head = cur_item = info;
2383
        } else {
2384
            cur_item->next = info;
2385
            cur_item = info;
2386
        }
2387
    }
2388

    
2389
    return head;
2390
}
2391

    
2392
const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2393
{
2394
    if (bs->backing_hd && bs->backing_hd->encrypted)
2395
        return bs->backing_file;
2396
    else if (bs->encrypted)
2397
        return bs->filename;
2398
    else
2399
        return NULL;
2400
}
2401

    
2402
void bdrv_get_backing_filename(BlockDriverState *bs,
2403
                               char *filename, int filename_size)
2404
{
2405
    pstrcpy(filename, filename_size, bs->backing_file);
2406
}
2407

    
2408
int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
2409
                          const uint8_t *buf, int nb_sectors)
2410
{
2411
    BlockDriver *drv = bs->drv;
2412
    if (!drv)
2413
        return -ENOMEDIUM;
2414
    if (!drv->bdrv_write_compressed)
2415
        return -ENOTSUP;
2416
    if (bdrv_check_request(bs, sector_num, nb_sectors))
2417
        return -EIO;
2418

    
2419
    if (bs->dirty_bitmap) {
2420
        set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2421
    }
2422

    
2423
    return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2424
}
2425

    
2426
int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2427
{
2428
    BlockDriver *drv = bs->drv;
2429
    if (!drv)
2430
        return -ENOMEDIUM;
2431
    if (!drv->bdrv_get_info)
2432
        return -ENOTSUP;
2433
    memset(bdi, 0, sizeof(*bdi));
2434
    return drv->bdrv_get_info(bs, bdi);
2435
}
2436

    
2437
int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2438
                      int64_t pos, int size)
2439
{
2440
    BlockDriver *drv = bs->drv;
2441
    if (!drv)
2442
        return -ENOMEDIUM;
2443
    if (drv->bdrv_save_vmstate)
2444
        return drv->bdrv_save_vmstate(bs, buf, pos, size);
2445
    if (bs->file)
2446
        return bdrv_save_vmstate(bs->file, buf, pos, size);
2447
    return -ENOTSUP;
2448
}
2449

    
2450
int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2451
                      int64_t pos, int size)
2452
{
2453
    BlockDriver *drv = bs->drv;
2454
    if (!drv)
2455
        return -ENOMEDIUM;
2456
    if (drv->bdrv_load_vmstate)
2457
        return drv->bdrv_load_vmstate(bs, buf, pos, size);
2458
    if (bs->file)
2459
        return bdrv_load_vmstate(bs->file, buf, pos, size);
2460
    return -ENOTSUP;
2461
}
2462

    
2463
void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2464
{
2465
    BlockDriver *drv = bs->drv;
2466

    
2467
    if (!drv || !drv->bdrv_debug_event) {
2468
        return;
2469
    }
2470

    
2471
    return drv->bdrv_debug_event(bs, event);
2472

    
2473
}
2474

    
2475
/**************************************************************/
2476
/* handling of snapshots */
2477

    
2478
int bdrv_can_snapshot(BlockDriverState *bs)
2479
{
2480
    BlockDriver *drv = bs->drv;
2481
    if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
2482
        return 0;
2483
    }
2484

    
2485
    if (!drv->bdrv_snapshot_create) {
2486
        if (bs->file != NULL) {
2487
            return bdrv_can_snapshot(bs->file);
2488
        }
2489
        return 0;
2490
    }
2491

    
2492
    return 1;
2493
}
2494

    
2495
int bdrv_is_snapshot(BlockDriverState *bs)
2496
{
2497
    return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2498
}
2499

    
2500
BlockDriverState *bdrv_snapshots(void)
2501
{
2502
    BlockDriverState *bs;
2503

    
2504
    if (bs_snapshots) {
2505
        return bs_snapshots;
2506
    }
2507

    
2508
    bs = NULL;
2509
    while ((bs = bdrv_next(bs))) {
2510
        if (bdrv_can_snapshot(bs)) {
2511
            bs_snapshots = bs;
2512
            return bs;
2513
        }
2514
    }
2515
    return NULL;
2516
}
2517

    
2518
int bdrv_snapshot_create(BlockDriverState *bs,
2519
                         QEMUSnapshotInfo *sn_info)
2520
{
2521
    BlockDriver *drv = bs->drv;
2522
    if (!drv)
2523
        return -ENOMEDIUM;
2524
    if (drv->bdrv_snapshot_create)
2525
        return drv->bdrv_snapshot_create(bs, sn_info);
2526
    if (bs->file)
2527
        return bdrv_snapshot_create(bs->file, sn_info);
2528
    return -ENOTSUP;
2529
}
2530

    
2531
int bdrv_snapshot_goto(BlockDriverState *bs,
2532
                       const char *snapshot_id)
2533
{
2534
    BlockDriver *drv = bs->drv;
2535
    int ret, open_ret;
2536

    
2537
    if (!drv)
2538
        return -ENOMEDIUM;
2539
    if (drv->bdrv_snapshot_goto)
2540
        return drv->bdrv_snapshot_goto(bs, snapshot_id);
2541

    
2542
    if (bs->file) {
2543
        drv->bdrv_close(bs);
2544
        ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2545
        open_ret = drv->bdrv_open(bs, bs->open_flags);
2546
        if (open_ret < 0) {
2547
            bdrv_delete(bs->file);
2548
            bs->drv = NULL;
2549
            return open_ret;
2550
        }
2551
        return ret;
2552
    }
2553

    
2554
    return -ENOTSUP;
2555
}
2556

    
2557
int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2558
{
2559
    BlockDriver *drv = bs->drv;
2560
    if (!drv)
2561
        return -ENOMEDIUM;
2562
    if (drv->bdrv_snapshot_delete)
2563
        return drv->bdrv_snapshot_delete(bs, snapshot_id);
2564
    if (bs->file)
2565
        return bdrv_snapshot_delete(bs->file, snapshot_id);
2566
    return -ENOTSUP;
2567
}
2568

    
2569
int bdrv_snapshot_list(BlockDriverState *bs,
2570
                       QEMUSnapshotInfo **psn_info)
2571
{
2572
    BlockDriver *drv = bs->drv;
2573
    if (!drv)
2574
        return -ENOMEDIUM;
2575
    if (drv->bdrv_snapshot_list)
2576
        return drv->bdrv_snapshot_list(bs, psn_info);
2577
    if (bs->file)
2578
        return bdrv_snapshot_list(bs->file, psn_info);
2579
    return -ENOTSUP;
2580
}
2581

    
2582
int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2583
        const char *snapshot_name)
2584
{
2585
    BlockDriver *drv = bs->drv;
2586
    if (!drv) {
2587
        return -ENOMEDIUM;
2588
    }
2589
    if (!bs->read_only) {
2590
        return -EINVAL;
2591
    }
2592
    if (drv->bdrv_snapshot_load_tmp) {
2593
        return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2594
    }
2595
    return -ENOTSUP;
2596
}
2597

    
2598
BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
2599
        const char *backing_file)
2600
{
2601
    if (!bs->drv) {
2602
        return NULL;
2603
    }
2604

    
2605
    if (bs->backing_hd) {
2606
        if (strcmp(bs->backing_file, backing_file) == 0) {
2607
            return bs->backing_hd;
2608
        } else {
2609
            return bdrv_find_backing_image(bs->backing_hd, backing_file);
2610
        }
2611
    }
2612

    
2613
    return NULL;
2614
}
2615

    
2616
#define NB_SUFFIXES 4
2617

    
2618
char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2619
{
2620
    static const char suffixes[NB_SUFFIXES] = "KMGT";
2621
    int64_t base;
2622
    int i;
2623

    
2624
    if (size <= 999) {
2625
        snprintf(buf, buf_size, "%" PRId64, size);
2626
    } else {
2627
        base = 1024;
2628
        for(i = 0; i < NB_SUFFIXES; i++) {
2629
            if (size < (10 * base)) {
2630
                snprintf(buf, buf_size, "%0.1f%c",
2631
                         (double)size / base,
2632
                         suffixes[i]);
2633
                break;
2634
            } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
2635
                snprintf(buf, buf_size, "%" PRId64 "%c",
2636
                         ((size + (base >> 1)) / base),
2637
                         suffixes[i]);
2638
                break;
2639
            }
2640
            base = base * 1024;
2641
        }
2642
    }
2643
    return buf;
2644
}
2645

    
2646
char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2647
{
2648
    char buf1[128], date_buf[128], clock_buf[128];
2649
#ifdef _WIN32
2650
    struct tm *ptm;
2651
#else
2652
    struct tm tm;
2653
#endif
2654
    time_t ti;
2655
    int64_t secs;
2656

    
2657
    if (!sn) {
2658
        snprintf(buf, buf_size,
2659
                 "%-10s%-20s%7s%20s%15s",
2660
                 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2661
    } else {
2662
        ti = sn->date_sec;
2663
#ifdef _WIN32
2664
        ptm = localtime(&ti);
2665
        strftime(date_buf, sizeof(date_buf),
2666
                 "%Y-%m-%d %H:%M:%S", ptm);
2667
#else
2668
        localtime_r(&ti, &tm);
2669
        strftime(date_buf, sizeof(date_buf),
2670
                 "%Y-%m-%d %H:%M:%S", &tm);
2671
#endif
2672
        secs = sn->vm_clock_nsec / 1000000000;
2673
        snprintf(clock_buf, sizeof(clock_buf),
2674
                 "%02d:%02d:%02d.%03d",
2675
                 (int)(secs / 3600),
2676
                 (int)((secs / 60) % 60),
2677
                 (int)(secs % 60),
2678
                 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2679
        snprintf(buf, buf_size,
2680
                 "%-10s%-20s%7s%20s%15s",
2681
                 sn->id_str, sn->name,
2682
                 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2683
                 date_buf,
2684
                 clock_buf);
2685
    }
2686
    return buf;
2687
}
2688

    
2689
/**************************************************************/
2690
/* async I/Os */
2691

    
2692
BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
2693
                                 QEMUIOVector *qiov, int nb_sectors,
2694
                                 BlockDriverCompletionFunc *cb, void *opaque)
2695
{
2696
    trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2697

    
2698
    return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
2699
                                 cb, opaque, false);
2700
}
2701

    
2702
BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2703
                                  QEMUIOVector *qiov, int nb_sectors,
2704
                                  BlockDriverCompletionFunc *cb, void *opaque)
2705
{
2706
    trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2707

    
2708
    return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
2709
                                 cb, opaque, true);
2710
}
2711

    
2712

    
2713
typedef struct MultiwriteCB {
2714
    int error;
2715
    int num_requests;
2716
    int num_callbacks;
2717
    struct {
2718
        BlockDriverCompletionFunc *cb;
2719
        void *opaque;
2720
        QEMUIOVector *free_qiov;
2721
        void *free_buf;
2722
    } callbacks[];
2723
} MultiwriteCB;
2724

    
2725
static void multiwrite_user_cb(MultiwriteCB *mcb)
2726
{
2727
    int i;
2728

    
2729
    for (i = 0; i < mcb->num_callbacks; i++) {
2730
        mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
2731
        if (mcb->callbacks[i].free_qiov) {
2732
            qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2733
        }
2734
        g_free(mcb->callbacks[i].free_qiov);
2735
        qemu_vfree(mcb->callbacks[i].free_buf);
2736
    }
2737
}
2738

    
2739
static void multiwrite_cb(void *opaque, int ret)
2740
{
2741
    MultiwriteCB *mcb = opaque;
2742

    
2743
    trace_multiwrite_cb(mcb, ret);
2744

    
2745
    if (ret < 0 && !mcb->error) {
2746
        mcb->error = ret;
2747
    }
2748

    
2749
    mcb->num_requests--;
2750
    if (mcb->num_requests == 0) {
2751
        multiwrite_user_cb(mcb);
2752
        g_free(mcb);
2753
    }
2754
}
2755

    
2756
static int multiwrite_req_compare(const void *a, const void *b)
2757
{
2758
    const BlockRequest *req1 = a, *req2 = b;
2759

    
2760
    /*
2761
     * Note that we can't simply subtract req2->sector from req1->sector
2762
     * here as that could overflow the return value.
2763
     */
2764
    if (req1->sector > req2->sector) {
2765
        return 1;
2766
    } else if (req1->sector < req2->sector) {
2767
        return -1;
2768
    } else {
2769
        return 0;
2770
    }
2771
}
2772

    
2773
/*
2774
 * Takes a bunch of requests and tries to merge them. Returns the number of
2775
 * requests that remain after merging.
2776
 */
2777
static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2778
    int num_reqs, MultiwriteCB *mcb)
2779
{
2780
    int i, outidx;
2781

    
2782
    // Sort requests by start sector
2783
    qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2784

    
2785
    // Check if adjacent requests touch the same clusters. If so, combine them,
2786
    // filling up gaps with zero sectors.
2787
    outidx = 0;
2788
    for (i = 1; i < num_reqs; i++) {
2789
        int merge = 0;
2790
        int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2791

    
2792
        // This handles the cases that are valid for all block drivers, namely
2793
        // exactly sequential writes and overlapping writes.
2794
        if (reqs[i].sector <= oldreq_last) {
2795
            merge = 1;
2796
        }
2797

    
2798
        // The block driver may decide that it makes sense to combine requests
2799
        // even if there is a gap of some sectors between them. In this case,
2800
        // the gap is filled with zeros (therefore only applicable for yet
2801
        // unused space in format like qcow2).
2802
        if (!merge && bs->drv->bdrv_merge_requests) {
2803
            merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2804
        }
2805

    
2806
        if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2807
            merge = 0;
2808
        }
2809

    
2810
        if (merge) {
2811
            size_t size;
2812
            QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
2813
            qemu_iovec_init(qiov,
2814
                reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2815

    
2816
            // Add the first request to the merged one. If the requests are
2817
            // overlapping, drop the last sectors of the first request.
2818
            size = (reqs[i].sector - reqs[outidx].sector) << 9;
2819
            qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2820

    
2821
            // We might need to add some zeros between the two requests
2822
            if (reqs[i].sector > oldreq_last) {
2823
                size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2824
                uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2825
                memset(buf, 0, zero_bytes);
2826
                qemu_iovec_add(qiov, buf, zero_bytes);
2827
                mcb->callbacks[i].free_buf = buf;
2828
            }
2829

    
2830
            // Add the second request
2831
            qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2832

    
2833
            reqs[outidx].nb_sectors = qiov->size >> 9;
2834
            reqs[outidx].qiov = qiov;
2835

    
2836
            mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2837
        } else {
2838
            outidx++;
2839
            reqs[outidx].sector     = reqs[i].sector;
2840
            reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2841
            reqs[outidx].qiov       = reqs[i].qiov;
2842
        }
2843
    }
2844

    
2845
    return outidx + 1;
2846
}
2847

    
2848
/*
2849
 * Submit multiple AIO write requests at once.
2850
 *
2851
 * On success, the function returns 0 and all requests in the reqs array have
2852
 * been submitted. In error case this function returns -1, and any of the
2853
 * requests may or may not be submitted yet. In particular, this means that the
2854
 * callback will be called for some of the requests, for others it won't. The
2855
 * caller must check the error field of the BlockRequest to wait for the right
2856
 * callbacks (if error != 0, no callback will be called).
2857
 *
2858
 * The implementation may modify the contents of the reqs array, e.g. to merge
2859
 * requests. However, the fields opaque and error are left unmodified as they
2860
 * are used to signal failure for a single request to the caller.
2861
 */
2862
int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2863
{
2864
    MultiwriteCB *mcb;
2865
    int i;
2866

    
2867
    /* don't submit writes if we don't have a medium */
2868
    if (bs->drv == NULL) {
2869
        for (i = 0; i < num_reqs; i++) {
2870
            reqs[i].error = -ENOMEDIUM;
2871
        }
2872
        return -1;
2873
    }
2874

    
2875
    if (num_reqs == 0) {
2876
        return 0;
2877
    }
2878

    
2879
    // Create MultiwriteCB structure
2880
    mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
2881
    mcb->num_requests = 0;
2882
    mcb->num_callbacks = num_reqs;
2883

    
2884
    for (i = 0; i < num_reqs; i++) {
2885
        mcb->callbacks[i].cb = reqs[i].cb;
2886
        mcb->callbacks[i].opaque = reqs[i].opaque;
2887
    }
2888

    
2889
    // Check for mergable requests
2890
    num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2891

    
2892
    trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2893

    
2894
    /* Run the aio requests. */
2895
    mcb->num_requests = num_reqs;
2896
    for (i = 0; i < num_reqs; i++) {
2897
        bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2898
            reqs[i].nb_sectors, multiwrite_cb, mcb);
2899
    }
2900

    
2901
    return 0;
2902
}
2903

    
2904
void bdrv_aio_cancel(BlockDriverAIOCB *acb)
2905
{
2906
    acb->pool->cancel(acb);
2907
}
2908

    
2909
/* block I/O throttling */
2910
static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
2911
                 bool is_write, double elapsed_time, uint64_t *wait)
2912
{
2913
    uint64_t bps_limit = 0;
2914
    double   bytes_limit, bytes_base, bytes_res;
2915
    double   slice_time, wait_time;
2916

    
2917
    if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
2918
        bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2919
    } else if (bs->io_limits.bps[is_write]) {
2920
        bps_limit = bs->io_limits.bps[is_write];
2921
    } else {
2922
        if (wait) {
2923
            *wait = 0;
2924
        }
2925

    
2926
        return false;
2927
    }
2928

    
2929
    slice_time = bs->slice_end - bs->slice_start;
2930
    slice_time /= (NANOSECONDS_PER_SECOND);
2931
    bytes_limit = bps_limit * slice_time;
2932
    bytes_base  = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
2933
    if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
2934
        bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
2935
    }
2936

    
2937
    /* bytes_base: the bytes of data which have been read/written; and
2938
     *             it is obtained from the history statistic info.
2939
     * bytes_res: the remaining bytes of data which need to be read/written.
2940
     * (bytes_base + bytes_res) / bps_limit: used to calcuate
2941
     *             the total time for completing reading/writting all data.
2942
     */
2943
    bytes_res   = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
2944

    
2945
    if (bytes_base + bytes_res <= bytes_limit) {
2946
        if (wait) {
2947
            *wait = 0;
2948
        }
2949

    
2950
        return false;
2951
    }
2952

    
2953
    /* Calc approx time to dispatch */
2954
    wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
2955

    
2956
    /* When the I/O rate at runtime exceeds the limits,
2957
     * bs->slice_end need to be extended in order that the current statistic
2958
     * info can be kept until the timer fire, so it is increased and tuned
2959
     * based on the result of experiment.
2960
     */
2961
    bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
2962
    bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
2963
    if (wait) {
2964
        *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
2965
    }
2966

    
2967
    return true;
2968
}
2969

    
2970
static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
2971
                             double elapsed_time, uint64_t *wait)
2972
{
2973
    uint64_t iops_limit = 0;
2974
    double   ios_limit, ios_base;
2975
    double   slice_time, wait_time;
2976

    
2977
    if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
2978
        iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2979
    } else if (bs->io_limits.iops[is_write]) {
2980
        iops_limit = bs->io_limits.iops[is_write];
2981
    } else {
2982
        if (wait) {
2983
            *wait = 0;
2984
        }
2985

    
2986
        return false;
2987
    }
2988

    
2989
    slice_time = bs->slice_end - bs->slice_start;
2990
    slice_time /= (NANOSECONDS_PER_SECOND);
2991
    ios_limit  = iops_limit * slice_time;
2992
    ios_base   = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
2993
    if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
2994
        ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
2995
    }
2996

    
2997
    if (ios_base + 1 <= ios_limit) {
2998
        if (wait) {
2999
            *wait = 0;
3000
        }
3001

    
3002
        return false;
3003
    }
3004

    
3005
    /* Calc approx time to dispatch */
3006
    wait_time = (ios_base + 1) / iops_limit;
3007
    if (wait_time > elapsed_time) {
3008
        wait_time = wait_time - elapsed_time;
3009
    } else {
3010
        wait_time = 0;
3011
    }
3012

    
3013
    bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3014
    bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3015
    if (wait) {
3016
        *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3017
    }
3018

    
3019
    return true;
3020
}
3021

    
3022
static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
3023
                           bool is_write, int64_t *wait)
3024
{
3025
    int64_t  now, max_wait;
3026
    uint64_t bps_wait = 0, iops_wait = 0;
3027
    double   elapsed_time;
3028
    int      bps_ret, iops_ret;
3029

    
3030
    now = qemu_get_clock_ns(vm_clock);
3031
    if ((bs->slice_start < now)
3032
        && (bs->slice_end > now)) {
3033
        bs->slice_end = now + bs->slice_time;
3034
    } else {
3035
        bs->slice_time  =  5 * BLOCK_IO_SLICE_TIME;
3036
        bs->slice_start = now;
3037
        bs->slice_end   = now + bs->slice_time;
3038

    
3039
        bs->io_base.bytes[is_write]  = bs->nr_bytes[is_write];
3040
        bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
3041

    
3042
        bs->io_base.ios[is_write]    = bs->nr_ops[is_write];
3043
        bs->io_base.ios[!is_write]   = bs->nr_ops[!is_write];
3044
    }
3045

    
3046
    elapsed_time  = now - bs->slice_start;
3047
    elapsed_time  /= (NANOSECONDS_PER_SECOND);
3048

    
3049
    bps_ret  = bdrv_exceed_bps_limits(bs, nb_sectors,
3050
                                      is_write, elapsed_time, &bps_wait);
3051
    iops_ret = bdrv_exceed_iops_limits(bs, is_write,
3052
                                      elapsed_time, &iops_wait);
3053
    if (bps_ret || iops_ret) {
3054
        max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
3055
        if (wait) {
3056
            *wait = max_wait;
3057
        }
3058

    
3059
        now = qemu_get_clock_ns(vm_clock);
3060
        if (bs->slice_end < now + max_wait) {
3061
            bs->slice_end = now + max_wait;
3062
        }
3063

    
3064
        return true;
3065
    }
3066

    
3067
    if (wait) {
3068
        *wait = 0;
3069
    }
3070

    
3071
    return false;
3072
}
3073

    
3074
/**************************************************************/
3075
/* async block device emulation */
3076

    
3077
typedef struct BlockDriverAIOCBSync {
3078
    BlockDriverAIOCB common;
3079
    QEMUBH *bh;
3080
    int ret;
3081
    /* vector translation state */
3082
    QEMUIOVector *qiov;
3083
    uint8_t *bounce;
3084
    int is_write;
3085
} BlockDriverAIOCBSync;
3086

    
3087
static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
3088
{
3089
    BlockDriverAIOCBSync *acb =
3090
        container_of(blockacb, BlockDriverAIOCBSync, common);
3091
    qemu_bh_delete(acb->bh);
3092
    acb->bh = NULL;
3093
    qemu_aio_release(acb);
3094
}
3095

    
3096
static AIOPool bdrv_em_aio_pool = {
3097
    .aiocb_size         = sizeof(BlockDriverAIOCBSync),
3098
    .cancel             = bdrv_aio_cancel_em,
3099
};
3100

    
3101
static void bdrv_aio_bh_cb(void *opaque)
3102
{
3103
    BlockDriverAIOCBSync *acb = opaque;
3104

    
3105
    if (!acb->is_write)
3106
        qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
3107
    qemu_vfree(acb->bounce);
3108
    acb->common.cb(acb->common.opaque, acb->ret);
3109
    qemu_bh_delete(acb->bh);
3110
    acb->bh = NULL;
3111
    qemu_aio_release(acb);
3112
}
3113

    
3114
static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
3115
                                            int64_t sector_num,
3116
                                            QEMUIOVector *qiov,
3117
                                            int nb_sectors,
3118
                                            BlockDriverCompletionFunc *cb,
3119
                                            void *opaque,
3120
                                            int is_write)
3121

    
3122
{
3123
    BlockDriverAIOCBSync *acb;
3124

    
3125
    acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
3126
    acb->is_write = is_write;
3127
    acb->qiov = qiov;
3128
    acb->bounce = qemu_blockalign(bs, qiov->size);
3129
    acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
3130

    
3131
    if (is_write) {
3132
        qemu_iovec_to_buffer(acb->qiov, acb->bounce);
3133
        acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
3134
    } else {
3135
        acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
3136
    }
3137

    
3138
    qemu_bh_schedule(acb->bh);
3139

    
3140
    return &acb->common;
3141
}
3142

    
3143
static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
3144
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
3145
        BlockDriverCompletionFunc *cb, void *opaque)
3146
{
3147
    return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
3148
}
3149

    
3150
static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
3151
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
3152
        BlockDriverCompletionFunc *cb, void *opaque)
3153
{
3154
    return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
3155
}
3156

    
3157

    
3158
typedef struct BlockDriverAIOCBCoroutine {
3159
    BlockDriverAIOCB common;
3160
    BlockRequest req;
3161
    bool is_write;
3162
    QEMUBH* bh;
3163
} BlockDriverAIOCBCoroutine;
3164

    
3165
static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
3166
{
3167
    qemu_aio_flush();
3168
}
3169

    
3170
static AIOPool bdrv_em_co_aio_pool = {
3171
    .aiocb_size         = sizeof(BlockDriverAIOCBCoroutine),
3172
    .cancel             = bdrv_aio_co_cancel_em,
3173
};
3174

    
3175
static void bdrv_co_em_bh(void *opaque)
3176
{
3177
    BlockDriverAIOCBCoroutine *acb = opaque;
3178

    
3179
    acb->common.cb(acb->common.opaque, acb->req.error);
3180
    qemu_bh_delete(acb->bh);
3181
    qemu_aio_release(acb);
3182
}
3183

    
3184
/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3185
static void coroutine_fn bdrv_co_do_rw(void *opaque)
3186
{
3187
    BlockDriverAIOCBCoroutine *acb = opaque;
3188
    BlockDriverState *bs = acb->common.bs;
3189

    
3190
    if (!acb->is_write) {
3191
        acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
3192
            acb->req.nb_sectors, acb->req.qiov, 0);
3193
    } else {
3194
        acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
3195
            acb->req.nb_sectors, acb->req.qiov);
3196
    }
3197

    
3198
    acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3199
    qemu_bh_schedule(acb->bh);
3200
}
3201

    
3202
static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
3203
                                               int64_t sector_num,
3204
                                               QEMUIOVector *qiov,
3205
                                               int nb_sectors,
3206
                                               BlockDriverCompletionFunc *cb,
3207
                                               void *opaque,
3208
                                               bool is_write)
3209
{
3210
    Coroutine *co;
3211
    BlockDriverAIOCBCoroutine *acb;
3212

    
3213
    acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3214
    acb->req.sector = sector_num;
3215
    acb->req.nb_sectors = nb_sectors;
3216
    acb->req.qiov = qiov;
3217
    acb->is_write = is_write;
3218

    
3219
    co = qemu_coroutine_create(bdrv_co_do_rw);
3220
    qemu_coroutine_enter(co, acb);
3221

    
3222
    return &acb->common;
3223
}
3224

    
3225
static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
3226
{
3227
    BlockDriverAIOCBCoroutine *acb = opaque;
3228
    BlockDriverState *bs = acb->common.bs;
3229

    
3230
    acb->req.error = bdrv_co_flush(bs);
3231
    acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3232
    qemu_bh_schedule(acb->bh);
3233
}
3234

    
3235
BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
3236
        BlockDriverCompletionFunc *cb, void *opaque)
3237
{
3238
    trace_bdrv_aio_flush(bs, opaque);
3239

    
3240
    Coroutine *co;
3241
    BlockDriverAIOCBCoroutine *acb;
3242

    
3243
    acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3244
    co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
3245
    qemu_coroutine_enter(co, acb);
3246

    
3247
    return &acb->common;
3248
}
3249

    
3250
static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
3251
{
3252
    BlockDriverAIOCBCoroutine *acb = opaque;
3253
    BlockDriverState *bs = acb->common.bs;
3254

    
3255
    acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
3256
    acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3257
    qemu_bh_schedule(acb->bh);
3258
}
3259

    
3260
BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
3261
        int64_t sector_num, int nb_sectors,
3262
        BlockDriverCompletionFunc *cb, void *opaque)
3263
{
3264
    Coroutine *co;
3265
    BlockDriverAIOCBCoroutine *acb;
3266

    
3267
    trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
3268

    
3269
    acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3270
    acb->req.sector = sector_num;
3271
    acb->req.nb_sectors = nb_sectors;
3272
    co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
3273
    qemu_coroutine_enter(co, acb);
3274

    
3275
    return &acb->common;
3276
}
3277

    
3278
void bdrv_init(void)
3279
{
3280
    module_call_init(MODULE_INIT_BLOCK);
3281
}
3282

    
3283
void bdrv_init_with_whitelist(void)
3284
{
3285
    use_bdrv_whitelist = 1;
3286
    bdrv_init();
3287
}
3288

    
3289
void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
3290
                   BlockDriverCompletionFunc *cb, void *opaque)
3291
{
3292
    BlockDriverAIOCB *acb;
3293

    
3294
    if (pool->free_aiocb) {
3295
        acb = pool->free_aiocb;
3296
        pool->free_aiocb = acb->next;
3297
    } else {
3298
        acb = g_malloc0(pool->aiocb_size);
3299
        acb->pool = pool;
3300
    }
3301
    acb->bs = bs;
3302
    acb->cb = cb;
3303
    acb->opaque = opaque;
3304
    return acb;
3305
}
3306

    
3307
void qemu_aio_release(void *p)
3308
{
3309
    BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
3310
    AIOPool *pool = acb->pool;
3311
    acb->next = pool->free_aiocb;
3312
    pool->free_aiocb = acb;
3313
}
3314

    
3315
/**************************************************************/
3316
/* Coroutine block device emulation */
3317

    
3318
typedef struct CoroutineIOCompletion {
3319
    Coroutine *coroutine;
3320
    int ret;
3321
} CoroutineIOCompletion;
3322

    
3323
static void bdrv_co_io_em_complete(void *opaque, int ret)
3324
{
3325
    CoroutineIOCompletion *co = opaque;
3326

    
3327
    co->ret = ret;
3328
    qemu_coroutine_enter(co->coroutine, NULL);
3329
}
3330

    
3331
static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3332
                                      int nb_sectors, QEMUIOVector *iov,
3333
                                      bool is_write)
3334
{
3335
    CoroutineIOCompletion co = {
3336
        .coroutine = qemu_coroutine_self(),
3337
    };
3338
    BlockDriverAIOCB *acb;
3339

    
3340
    if (is_write) {
3341
        acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3342
                                       bdrv_co_io_em_complete, &co);
3343
    } else {
3344
        acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3345
                                      bdrv_co_io_em_complete, &co);
3346
    }
3347

    
3348
    trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
3349
    if (!acb) {
3350
        return -EIO;
3351
    }
3352
    qemu_coroutine_yield();
3353

    
3354
    return co.ret;
3355
}
3356

    
3357
static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3358
                                         int64_t sector_num, int nb_sectors,
3359
                                         QEMUIOVector *iov)
3360
{
3361
    return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3362
}
3363

    
3364
static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3365
                                         int64_t sector_num, int nb_sectors,
3366
                                         QEMUIOVector *iov)
3367
{
3368
    return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3369
}
3370

    
3371
static void coroutine_fn bdrv_flush_co_entry(void *opaque)
3372
{
3373
    RwCo *rwco = opaque;
3374

    
3375
    rwco->ret = bdrv_co_flush(rwco->bs);
3376
}
3377

    
3378
int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
3379
{
3380
    int ret;
3381

    
3382
    if (!bs->drv) {
3383
        return 0;
3384
    }
3385

    
3386
    /* Write back cached data to the OS even with cache=unsafe */
3387
    if (bs->drv->bdrv_co_flush_to_os) {
3388
        ret = bs->drv->bdrv_co_flush_to_os(bs);
3389
        if (ret < 0) {
3390
            return ret;
3391
        }
3392
    }
3393

    
3394
    /* But don't actually force it to the disk with cache=unsafe */
3395
    if (bs->open_flags & BDRV_O_NO_FLUSH) {
3396
        return 0;
3397
    }
3398

    
3399
    if (bs->drv->bdrv_co_flush_to_disk) {
3400
        return bs->drv->bdrv_co_flush_to_disk(bs);
3401
    } else if (bs->drv->bdrv_aio_flush) {
3402
        BlockDriverAIOCB *acb;
3403
        CoroutineIOCompletion co = {
3404
            .coroutine = qemu_coroutine_self(),
3405
        };
3406

    
3407
        acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3408
        if (acb == NULL) {
3409
            return -EIO;
3410
        } else {
3411
            qemu_coroutine_yield();
3412
            return co.ret;
3413
        }
3414
    } else {
3415
        /*
3416
         * Some block drivers always operate in either writethrough or unsafe
3417
         * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3418
         * know how the server works (because the behaviour is hardcoded or
3419
         * depends on server-side configuration), so we can't ensure that
3420
         * everything is safe on disk. Returning an error doesn't work because
3421
         * that would break guests even if the server operates in writethrough
3422
         * mode.
3423
         *
3424
         * Let's hope the user knows what he's doing.
3425
         */
3426
        return 0;
3427
    }
3428
}
3429

    
3430
void bdrv_invalidate_cache(BlockDriverState *bs)
3431
{
3432
    if (bs->drv && bs->drv->bdrv_invalidate_cache) {
3433
        bs->drv->bdrv_invalidate_cache(bs);
3434
    }
3435
}
3436

    
3437
void bdrv_invalidate_cache_all(void)
3438
{
3439
    BlockDriverState *bs;
3440

    
3441
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
3442
        bdrv_invalidate_cache(bs);
3443
    }
3444
}
3445

    
3446
int bdrv_flush(BlockDriverState *bs)
3447
{
3448
    Coroutine *co;
3449
    RwCo rwco = {
3450
        .bs = bs,
3451
        .ret = NOT_DONE,
3452
    };
3453

    
3454
    if (qemu_in_coroutine()) {
3455
        /* Fast-path if already in coroutine context */
3456
        bdrv_flush_co_entry(&rwco);
3457
    } else {
3458
        co = qemu_coroutine_create(bdrv_flush_co_entry);
3459
        qemu_coroutine_enter(co, &rwco);
3460
        while (rwco.ret == NOT_DONE) {
3461
            qemu_aio_wait();
3462
        }
3463
    }
3464

    
3465
    return rwco.ret;
3466
}
3467

    
3468
static void coroutine_fn bdrv_discard_co_entry(void *opaque)
3469
{
3470
    RwCo *rwco = opaque;
3471

    
3472
    rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
3473
}
3474

    
3475
int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
3476
                                 int nb_sectors)
3477
{
3478
    if (!bs->drv) {
3479
        return -ENOMEDIUM;
3480
    } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
3481
        return -EIO;
3482
    } else if (bs->read_only) {
3483
        return -EROFS;
3484
    } else if (bs->drv->bdrv_co_discard) {
3485
        return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
3486
    } else if (bs->drv->bdrv_aio_discard) {
3487
        BlockDriverAIOCB *acb;
3488
        CoroutineIOCompletion co = {
3489
            .coroutine = qemu_coroutine_self(),
3490
        };
3491

    
3492
        acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
3493
                                        bdrv_co_io_em_complete, &co);
3494
        if (acb == NULL) {
3495
            return -EIO;
3496
        } else {
3497
            qemu_coroutine_yield();
3498
            return co.ret;
3499
        }
3500
    } else {
3501
        return 0;
3502
    }
3503
}
3504

    
3505
int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
3506
{
3507
    Coroutine *co;
3508
    RwCo rwco = {
3509
        .bs = bs,
3510
        .sector_num = sector_num,
3511
        .nb_sectors = nb_sectors,
3512
        .ret = NOT_DONE,
3513
    };
3514

    
3515
    if (qemu_in_coroutine()) {
3516
        /* Fast-path if already in coroutine context */
3517
        bdrv_discard_co_entry(&rwco);
3518
    } else {
3519
        co = qemu_coroutine_create(bdrv_discard_co_entry);
3520
        qemu_coroutine_enter(co, &rwco);
3521
        while (rwco.ret == NOT_DONE) {
3522
            qemu_aio_wait();
3523
        }
3524
    }
3525

    
3526
    return rwco.ret;
3527
}
3528

    
3529
/**************************************************************/
3530
/* removable device support */
3531

    
3532
/**
3533
 * Return TRUE if the media is present
3534
 */
3535
int bdrv_is_inserted(BlockDriverState *bs)
3536
{
3537
    BlockDriver *drv = bs->drv;
3538

    
3539
    if (!drv)
3540
        return 0;
3541
    if (!drv->bdrv_is_inserted)
3542
        return 1;
3543
    return drv->bdrv_is_inserted(bs);
3544
}
3545

    
3546
/**
3547
 * Return whether the media changed since the last call to this
3548
 * function, or -ENOTSUP if we don't know.  Most drivers don't know.
3549
 */
3550
int bdrv_media_changed(BlockDriverState *bs)
3551
{
3552
    BlockDriver *drv = bs->drv;
3553

    
3554
    if (drv && drv->bdrv_media_changed) {
3555
        return drv->bdrv_media_changed(bs);
3556
    }
3557
    return -ENOTSUP;
3558
}
3559

    
3560
/**
3561
 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3562
 */
3563
void bdrv_eject(BlockDriverState *bs, int eject_flag)
3564
{
3565
    BlockDriver *drv = bs->drv;
3566

    
3567
    if (drv && drv->bdrv_eject) {
3568
        drv->bdrv_eject(bs, eject_flag);
3569
    }
3570
}
3571

    
3572
/**
3573
 * Lock or unlock the media (if it is locked, the user won't be able
3574
 * to eject it manually).
3575
 */
3576
void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3577
{
3578
    BlockDriver *drv = bs->drv;
3579

    
3580
    trace_bdrv_lock_medium(bs, locked);
3581

    
3582
    if (drv && drv->bdrv_lock_medium) {
3583
        drv->bdrv_lock_medium(bs, locked);
3584
    }
3585
}
3586

    
3587
/* needed for generic scsi interface */
3588

    
3589
int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3590
{
3591
    BlockDriver *drv = bs->drv;
3592

    
3593
    if (drv && drv->bdrv_ioctl)
3594
        return drv->bdrv_ioctl(bs, req, buf);
3595
    return -ENOTSUP;
3596
}
3597

    
3598
BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3599
        unsigned long int req, void *buf,
3600
        BlockDriverCompletionFunc *cb, void *opaque)
3601
{
3602
    BlockDriver *drv = bs->drv;
3603

    
3604
    if (drv && drv->bdrv_aio_ioctl)
3605
        return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3606
    return NULL;
3607
}
3608

    
3609
void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3610
{
3611
    bs->buffer_alignment = align;
3612
}
3613

    
3614
void *qemu_blockalign(BlockDriverState *bs, size_t size)
3615
{
3616
    return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3617
}
3618

    
3619
void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3620
{
3621
    int64_t bitmap_size;
3622

    
3623
    bs->dirty_count = 0;
3624
    if (enable) {
3625
        if (!bs->dirty_bitmap) {
3626
            bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3627
                    BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3628
            bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
3629

    
3630
            bs->dirty_bitmap = g_malloc0(bitmap_size);
3631
        }
3632
    } else {
3633
        if (bs->dirty_bitmap) {
3634
            g_free(bs->dirty_bitmap);
3635
            bs->dirty_bitmap = NULL;
3636
        }
3637
    }
3638
}
3639

    
3640
int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3641
{
3642
    int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
3643

    
3644
    if (bs->dirty_bitmap &&
3645
        (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
3646
        return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3647
            (1UL << (chunk % (sizeof(unsigned long) * 8))));
3648
    } else {
3649
        return 0;
3650
    }
3651
}
3652

    
3653
void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3654
                      int nr_sectors)
3655
{
3656
    set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3657
}
3658

    
3659
int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3660
{
3661
    return bs->dirty_count;
3662
}
3663

    
3664
void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3665
{
3666
    assert(bs->in_use != in_use);
3667
    bs->in_use = in_use;
3668
}
3669

    
3670
int bdrv_in_use(BlockDriverState *bs)
3671
{
3672
    return bs->in_use;
3673
}
3674

    
3675
void bdrv_iostatus_enable(BlockDriverState *bs)
3676
{
3677
    bs->iostatus_enabled = true;
3678
    bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3679
}
3680

    
3681
/* The I/O status is only enabled if the drive explicitly
3682
 * enables it _and_ the VM is configured to stop on errors */
3683
bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3684
{
3685
    return (bs->iostatus_enabled &&
3686
           (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3687
            bs->on_write_error == BLOCK_ERR_STOP_ANY    ||
3688
            bs->on_read_error == BLOCK_ERR_STOP_ANY));
3689
}
3690

    
3691
void bdrv_iostatus_disable(BlockDriverState *bs)
3692
{
3693
    bs->iostatus_enabled = false;
3694
}
3695

    
3696
void bdrv_iostatus_reset(BlockDriverState *bs)
3697
{
3698
    if (bdrv_iostatus_is_enabled(bs)) {
3699
        bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3700
    }
3701
}
3702

    
3703
/* XXX: Today this is set by device models because it makes the implementation
3704
   quite simple. However, the block layer knows about the error, so it's
3705
   possible to implement this without device models being involved */
3706
void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3707
{
3708
    if (bdrv_iostatus_is_enabled(bs) &&
3709
        bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
3710
        assert(error >= 0);
3711
        bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3712
                                         BLOCK_DEVICE_IO_STATUS_FAILED;
3713
    }
3714
}
3715

    
3716
void
3717
bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3718
        enum BlockAcctType type)
3719
{
3720
    assert(type < BDRV_MAX_IOTYPE);
3721

    
3722
    cookie->bytes = bytes;
3723
    cookie->start_time_ns = get_clock();
3724
    cookie->type = type;
3725
}
3726

    
3727
void
3728
bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3729
{
3730
    assert(cookie->type < BDRV_MAX_IOTYPE);
3731

    
3732
    bs->nr_bytes[cookie->type] += cookie->bytes;
3733
    bs->nr_ops[cookie->type]++;
3734
    bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
3735
}
3736

    
3737
int bdrv_img_create(const char *filename, const char *fmt,
3738
                    const char *base_filename, const char *base_fmt,
3739
                    char *options, uint64_t img_size, int flags)
3740
{
3741
    QEMUOptionParameter *param = NULL, *create_options = NULL;
3742
    QEMUOptionParameter *backing_fmt, *backing_file, *size;
3743
    BlockDriverState *bs = NULL;
3744
    BlockDriver *drv, *proto_drv;
3745
    BlockDriver *backing_drv = NULL;
3746
    int ret = 0;
3747

    
3748
    /* Find driver and parse its options */
3749
    drv = bdrv_find_format(fmt);
3750
    if (!drv) {
3751
        error_report("Unknown file format '%s'", fmt);
3752
        ret = -EINVAL;
3753
        goto out;
3754
    }
3755

    
3756
    proto_drv = bdrv_find_protocol(filename);
3757
    if (!proto_drv) {
3758
        error_report("Unknown protocol '%s'", filename);
3759
        ret = -EINVAL;
3760
        goto out;
3761
    }
3762

    
3763
    create_options = append_option_parameters(create_options,
3764
                                              drv->create_options);
3765
    create_options = append_option_parameters(create_options,
3766
                                              proto_drv->create_options);
3767

    
3768
    /* Create parameter list with default values */
3769
    param = parse_option_parameters("", create_options, param);
3770

    
3771
    set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3772

    
3773
    /* Parse -o options */
3774
    if (options) {
3775
        param = parse_option_parameters(options, create_options, param);
3776
        if (param == NULL) {
3777
            error_report("Invalid options for file format '%s'.", fmt);
3778
            ret = -EINVAL;
3779
            goto out;
3780
        }
3781
    }
3782

    
3783
    if (base_filename) {
3784
        if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3785
                                 base_filename)) {
3786
            error_report("Backing file not supported for file format '%s'",
3787
                         fmt);
3788
            ret = -EINVAL;
3789
            goto out;
3790
        }
3791
    }
3792

    
3793
    if (base_fmt) {
3794
        if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3795
            error_report("Backing file format not supported for file "
3796
                         "format '%s'", fmt);
3797
            ret = -EINVAL;
3798
            goto out;
3799
        }
3800
    }
3801

    
3802
    backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3803
    if (backing_file && backing_file->value.s) {
3804
        if (!strcmp(filename, backing_file->value.s)) {
3805
            error_report("Error: Trying to create an image with the "
3806
                         "same filename as the backing file");
3807
            ret = -EINVAL;
3808
            goto out;
3809
        }
3810
    }
3811

    
3812
    backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3813
    if (backing_fmt && backing_fmt->value.s) {
3814
        backing_drv = bdrv_find_format(backing_fmt->value.s);
3815
        if (!backing_drv) {
3816
            error_report("Unknown backing file format '%s'",
3817
                         backing_fmt->value.s);
3818
            ret = -EINVAL;
3819
            goto out;
3820
        }
3821
    }
3822

    
3823
    // The size for the image must always be specified, with one exception:
3824
    // If we are using a backing file, we can obtain the size from there
3825
    size = get_option_parameter(param, BLOCK_OPT_SIZE);
3826
    if (size && size->value.n == -1) {
3827
        if (backing_file && backing_file->value.s) {
3828
            uint64_t size;
3829
            char buf[32];
3830

    
3831
            bs = bdrv_new("");
3832

    
3833
            ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
3834
            if (ret < 0) {
3835
                error_report("Could not open '%s'", backing_file->value.s);
3836
                goto out;
3837
            }
3838
            bdrv_get_geometry(bs, &size);
3839
            size *= 512;
3840

    
3841
            snprintf(buf, sizeof(buf), "%" PRId64, size);
3842
            set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3843
        } else {
3844
            error_report("Image creation needs a size parameter");
3845
            ret = -EINVAL;
3846
            goto out;
3847
        }
3848
    }
3849

    
3850
    printf("Formatting '%s', fmt=%s ", filename, fmt);
3851
    print_option_parameters(param);
3852
    puts("");
3853

    
3854
    ret = bdrv_create(drv, filename, param);
3855

    
3856
    if (ret < 0) {
3857
        if (ret == -ENOTSUP) {
3858
            error_report("Formatting or formatting option not supported for "
3859
                         "file format '%s'", fmt);
3860
        } else if (ret == -EFBIG) {
3861
            error_report("The image size is too large for file format '%s'",
3862
                         fmt);
3863
        } else {
3864
            error_report("%s: error while creating %s: %s", filename, fmt,
3865
                         strerror(-ret));
3866
        }
3867
    }
3868

    
3869
out:
3870
    free_option_parameters(create_options);
3871
    free_option_parameters(param);
3872

    
3873
    if (bs) {
3874
        bdrv_delete(bs);
3875
    }
3876

    
3877
    return ret;
3878
}
3879

    
3880
void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
3881
                       BlockDriverCompletionFunc *cb, void *opaque)
3882
{
3883
    BlockJob *job;
3884

    
3885
    if (bs->job || bdrv_in_use(bs)) {
3886
        return NULL;
3887
    }
3888
    bdrv_set_in_use(bs, 1);
3889

    
3890
    job = g_malloc0(job_type->instance_size);
3891
    job->job_type      = job_type;
3892
    job->bs            = bs;
3893
    job->cb            = cb;
3894
    job->opaque        = opaque;
3895
    bs->job = job;
3896
    return job;
3897
}
3898

    
3899
void block_job_complete(BlockJob *job, int ret)
3900
{
3901
    BlockDriverState *bs = job->bs;
3902

    
3903
    assert(bs->job == job);
3904
    job->cb(job->opaque, ret);
3905
    bs->job = NULL;
3906
    g_free(job);
3907
    bdrv_set_in_use(bs, 0);
3908
}
3909

    
3910
int block_job_set_speed(BlockJob *job, int64_t value)
3911
{
3912
    if (!job->job_type->set_speed) {
3913
        return -ENOTSUP;
3914
    }
3915
    return job->job_type->set_speed(job, value);
3916
}
3917

    
3918
void block_job_cancel(BlockJob *job)
3919
{
3920
    job->cancelled = true;
3921
}
3922

    
3923
bool block_job_is_cancelled(BlockJob *job)
3924
{
3925
    return job->cancelled;
3926
}