Statistics
| Branch: | Revision:

root / block.c @ 384acbf4

History | View | Annotate | Download (87 kB)

1
/*
2
 * QEMU System Emulator block driver
3
 *
4
 * Copyright (c) 2003 Fabrice Bellard
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
 * THE SOFTWARE.
23
 */
24
#include "config-host.h"
25
#include "qemu-common.h"
26
#include "trace.h"
27
#include "monitor.h"
28
#include "block_int.h"
29
#include "module.h"
30
#include "qemu-objects.h"
31
#include "qemu-coroutine.h"
32

    
33
#ifdef CONFIG_BSD
34
#include <sys/types.h>
35
#include <sys/stat.h>
36
#include <sys/ioctl.h>
37
#include <sys/queue.h>
38
#ifndef __DragonFly__
39
#include <sys/disk.h>
40
#endif
41
#endif
42

    
43
#ifdef _WIN32
44
#include <windows.h>
45
#endif
46

    
47
static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
48
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
49
        BlockDriverCompletionFunc *cb, void *opaque);
50
static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
51
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
52
        BlockDriverCompletionFunc *cb, void *opaque);
53
static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
54
        BlockDriverCompletionFunc *cb, void *opaque);
55
static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
56
        BlockDriverCompletionFunc *cb, void *opaque);
57
static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
58
                        uint8_t *buf, int nb_sectors);
59
static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
60
                         const uint8_t *buf, int nb_sectors);
61
static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
62
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
63
        BlockDriverCompletionFunc *cb, void *opaque);
64
static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
65
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
66
        BlockDriverCompletionFunc *cb, void *opaque);
67
static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
68
                                         int64_t sector_num, int nb_sectors,
69
                                         QEMUIOVector *iov);
70
static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
71
                                         int64_t sector_num, int nb_sectors,
72
                                         QEMUIOVector *iov);
73

    
74
static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
75
    QTAILQ_HEAD_INITIALIZER(bdrv_states);
76

    
77
static QLIST_HEAD(, BlockDriver) bdrv_drivers =
78
    QLIST_HEAD_INITIALIZER(bdrv_drivers);
79

    
80
/* The device to use for VM snapshots */
81
static BlockDriverState *bs_snapshots;
82

    
83
/* If non-zero, use only whitelisted block drivers */
84
static int use_bdrv_whitelist;
85

    
86
#ifdef _WIN32
87
static int is_windows_drive_prefix(const char *filename)
88
{
89
    return (((filename[0] >= 'a' && filename[0] <= 'z') ||
90
             (filename[0] >= 'A' && filename[0] <= 'Z')) &&
91
            filename[1] == ':');
92
}
93

    
94
int is_windows_drive(const char *filename)
95
{
96
    if (is_windows_drive_prefix(filename) &&
97
        filename[2] == '\0')
98
        return 1;
99
    if (strstart(filename, "\\\\.\\", NULL) ||
100
        strstart(filename, "//./", NULL))
101
        return 1;
102
    return 0;
103
}
104
#endif
105

    
106
/* check if the path starts with "<protocol>:" */
107
static int path_has_protocol(const char *path)
108
{
109
#ifdef _WIN32
110
    if (is_windows_drive(path) ||
111
        is_windows_drive_prefix(path)) {
112
        return 0;
113
    }
114
#endif
115

    
116
    return strchr(path, ':') != NULL;
117
}
118

    
119
int path_is_absolute(const char *path)
120
{
121
    const char *p;
122
#ifdef _WIN32
123
    /* specific case for names like: "\\.\d:" */
124
    if (*path == '/' || *path == '\\')
125
        return 1;
126
#endif
127
    p = strchr(path, ':');
128
    if (p)
129
        p++;
130
    else
131
        p = path;
132
#ifdef _WIN32
133
    return (*p == '/' || *p == '\\');
134
#else
135
    return (*p == '/');
136
#endif
137
}
138

    
139
/* if filename is absolute, just copy it to dest. Otherwise, build a
140
   path to it by considering it is relative to base_path. URL are
141
   supported. */
142
void path_combine(char *dest, int dest_size,
143
                  const char *base_path,
144
                  const char *filename)
145
{
146
    const char *p, *p1;
147
    int len;
148

    
149
    if (dest_size <= 0)
150
        return;
151
    if (path_is_absolute(filename)) {
152
        pstrcpy(dest, dest_size, filename);
153
    } else {
154
        p = strchr(base_path, ':');
155
        if (p)
156
            p++;
157
        else
158
            p = base_path;
159
        p1 = strrchr(base_path, '/');
160
#ifdef _WIN32
161
        {
162
            const char *p2;
163
            p2 = strrchr(base_path, '\\');
164
            if (!p1 || p2 > p1)
165
                p1 = p2;
166
        }
167
#endif
168
        if (p1)
169
            p1++;
170
        else
171
            p1 = base_path;
172
        if (p1 > p)
173
            p = p1;
174
        len = p - base_path;
175
        if (len > dest_size - 1)
176
            len = dest_size - 1;
177
        memcpy(dest, base_path, len);
178
        dest[len] = '\0';
179
        pstrcat(dest, dest_size, filename);
180
    }
181
}
182

    
183
void bdrv_register(BlockDriver *bdrv)
184
{
185
    if (bdrv->bdrv_co_readv) {
186
        /* Emulate AIO by coroutines, and sync by AIO */
187
        bdrv->bdrv_aio_readv = bdrv_co_aio_readv_em;
188
        bdrv->bdrv_aio_writev = bdrv_co_aio_writev_em;
189
        bdrv->bdrv_read = bdrv_read_em;
190
        bdrv->bdrv_write = bdrv_write_em;
191
     } else {
192
        bdrv->bdrv_co_readv = bdrv_co_readv_em;
193
        bdrv->bdrv_co_writev = bdrv_co_writev_em;
194

    
195
        if (!bdrv->bdrv_aio_readv) {
196
            /* add AIO emulation layer */
197
            bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
198
            bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
199
        } else if (!bdrv->bdrv_read) {
200
            /* add synchronous IO emulation layer */
201
            bdrv->bdrv_read = bdrv_read_em;
202
            bdrv->bdrv_write = bdrv_write_em;
203
        }
204
    }
205

    
206
    if (!bdrv->bdrv_aio_flush)
207
        bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
208

    
209
    QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
210
}
211

    
212
/* create a new block device (by default it is empty) */
213
BlockDriverState *bdrv_new(const char *device_name)
214
{
215
    BlockDriverState *bs;
216

    
217
    bs = qemu_mallocz(sizeof(BlockDriverState));
218
    pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
219
    if (device_name[0] != '\0') {
220
        QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
221
    }
222
    return bs;
223
}
224

    
225
BlockDriver *bdrv_find_format(const char *format_name)
226
{
227
    BlockDriver *drv1;
228
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
229
        if (!strcmp(drv1->format_name, format_name)) {
230
            return drv1;
231
        }
232
    }
233
    return NULL;
234
}
235

    
236
static int bdrv_is_whitelisted(BlockDriver *drv)
237
{
238
    static const char *whitelist[] = {
239
        CONFIG_BDRV_WHITELIST
240
    };
241
    const char **p;
242

    
243
    if (!whitelist[0])
244
        return 1;               /* no whitelist, anything goes */
245

    
246
    for (p = whitelist; *p; p++) {
247
        if (!strcmp(drv->format_name, *p)) {
248
            return 1;
249
        }
250
    }
251
    return 0;
252
}
253

    
254
BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
255
{
256
    BlockDriver *drv = bdrv_find_format(format_name);
257
    return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
258
}
259

    
260
int bdrv_create(BlockDriver *drv, const char* filename,
261
    QEMUOptionParameter *options)
262
{
263
    if (!drv->bdrv_create)
264
        return -ENOTSUP;
265

    
266
    return drv->bdrv_create(filename, options);
267
}
268

    
269
int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
270
{
271
    BlockDriver *drv;
272

    
273
    drv = bdrv_find_protocol(filename);
274
    if (drv == NULL) {
275
        return -ENOENT;
276
    }
277

    
278
    return bdrv_create(drv, filename, options);
279
}
280

    
281
#ifdef _WIN32
282
void get_tmp_filename(char *filename, int size)
283
{
284
    char temp_dir[MAX_PATH];
285

    
286
    GetTempPath(MAX_PATH, temp_dir);
287
    GetTempFileName(temp_dir, "qem", 0, filename);
288
}
289
#else
290
void get_tmp_filename(char *filename, int size)
291
{
292
    int fd;
293
    const char *tmpdir;
294
    /* XXX: race condition possible */
295
    tmpdir = getenv("TMPDIR");
296
    if (!tmpdir)
297
        tmpdir = "/tmp";
298
    snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
299
    fd = mkstemp(filename);
300
    close(fd);
301
}
302
#endif
303

    
304
/*
305
 * Detect host devices. By convention, /dev/cdrom[N] is always
306
 * recognized as a host CDROM.
307
 */
308
static BlockDriver *find_hdev_driver(const char *filename)
309
{
310
    int score_max = 0, score;
311
    BlockDriver *drv = NULL, *d;
312

    
313
    QLIST_FOREACH(d, &bdrv_drivers, list) {
314
        if (d->bdrv_probe_device) {
315
            score = d->bdrv_probe_device(filename);
316
            if (score > score_max) {
317
                score_max = score;
318
                drv = d;
319
            }
320
        }
321
    }
322

    
323
    return drv;
324
}
325

    
326
BlockDriver *bdrv_find_protocol(const char *filename)
327
{
328
    BlockDriver *drv1;
329
    char protocol[128];
330
    int len;
331
    const char *p;
332

    
333
    /* TODO Drivers without bdrv_file_open must be specified explicitly */
334

    
335
    /*
336
     * XXX(hch): we really should not let host device detection
337
     * override an explicit protocol specification, but moving this
338
     * later breaks access to device names with colons in them.
339
     * Thanks to the brain-dead persistent naming schemes on udev-
340
     * based Linux systems those actually are quite common.
341
     */
342
    drv1 = find_hdev_driver(filename);
343
    if (drv1) {
344
        return drv1;
345
    }
346

    
347
    if (!path_has_protocol(filename)) {
348
        return bdrv_find_format("file");
349
    }
350
    p = strchr(filename, ':');
351
    assert(p != NULL);
352
    len = p - filename;
353
    if (len > sizeof(protocol) - 1)
354
        len = sizeof(protocol) - 1;
355
    memcpy(protocol, filename, len);
356
    protocol[len] = '\0';
357
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
358
        if (drv1->protocol_name &&
359
            !strcmp(drv1->protocol_name, protocol)) {
360
            return drv1;
361
        }
362
    }
363
    return NULL;
364
}
365

    
366
static int find_image_format(const char *filename, BlockDriver **pdrv)
367
{
368
    int ret, score, score_max;
369
    BlockDriver *drv1, *drv;
370
    uint8_t buf[2048];
371
    BlockDriverState *bs;
372

    
373
    ret = bdrv_file_open(&bs, filename, 0);
374
    if (ret < 0) {
375
        *pdrv = NULL;
376
        return ret;
377
    }
378

    
379
    /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
380
    if (bs->sg || !bdrv_is_inserted(bs)) {
381
        bdrv_delete(bs);
382
        drv = bdrv_find_format("raw");
383
        if (!drv) {
384
            ret = -ENOENT;
385
        }
386
        *pdrv = drv;
387
        return ret;
388
    }
389

    
390
    ret = bdrv_pread(bs, 0, buf, sizeof(buf));
391
    bdrv_delete(bs);
392
    if (ret < 0) {
393
        *pdrv = NULL;
394
        return ret;
395
    }
396

    
397
    score_max = 0;
398
    drv = NULL;
399
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
400
        if (drv1->bdrv_probe) {
401
            score = drv1->bdrv_probe(buf, ret, filename);
402
            if (score > score_max) {
403
                score_max = score;
404
                drv = drv1;
405
            }
406
        }
407
    }
408
    if (!drv) {
409
        ret = -ENOENT;
410
    }
411
    *pdrv = drv;
412
    return ret;
413
}
414

    
415
/**
416
 * Set the current 'total_sectors' value
417
 */
418
static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
419
{
420
    BlockDriver *drv = bs->drv;
421

    
422
    /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
423
    if (bs->sg)
424
        return 0;
425

    
426
    /* query actual device if possible, otherwise just trust the hint */
427
    if (drv->bdrv_getlength) {
428
        int64_t length = drv->bdrv_getlength(bs);
429
        if (length < 0) {
430
            return length;
431
        }
432
        hint = length >> BDRV_SECTOR_BITS;
433
    }
434

    
435
    bs->total_sectors = hint;
436
    return 0;
437
}
438

    
439
/*
440
 * Common part for opening disk images and files
441
 */
442
static int bdrv_open_common(BlockDriverState *bs, const char *filename,
443
    int flags, BlockDriver *drv)
444
{
445
    int ret, open_flags;
446

    
447
    assert(drv != NULL);
448

    
449
    bs->file = NULL;
450
    bs->total_sectors = 0;
451
    bs->encrypted = 0;
452
    bs->valid_key = 0;
453
    bs->open_flags = flags;
454
    /* buffer_alignment defaulted to 512, drivers can change this value */
455
    bs->buffer_alignment = 512;
456

    
457
    pstrcpy(bs->filename, sizeof(bs->filename), filename);
458

    
459
    if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
460
        return -ENOTSUP;
461
    }
462

    
463
    bs->drv = drv;
464
    bs->opaque = qemu_mallocz(drv->instance_size);
465

    
466
    if (flags & BDRV_O_CACHE_WB)
467
        bs->enable_write_cache = 1;
468

    
469
    /*
470
     * Clear flags that are internal to the block layer before opening the
471
     * image.
472
     */
473
    open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
474

    
475
    /*
476
     * Snapshots should be writable.
477
     */
478
    if (bs->is_temporary) {
479
        open_flags |= BDRV_O_RDWR;
480
    }
481

    
482
    /* Open the image, either directly or using a protocol */
483
    if (drv->bdrv_file_open) {
484
        ret = drv->bdrv_file_open(bs, filename, open_flags);
485
    } else {
486
        ret = bdrv_file_open(&bs->file, filename, open_flags);
487
        if (ret >= 0) {
488
            ret = drv->bdrv_open(bs, open_flags);
489
        }
490
    }
491

    
492
    if (ret < 0) {
493
        goto free_and_fail;
494
    }
495

    
496
    bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
497

    
498
    ret = refresh_total_sectors(bs, bs->total_sectors);
499
    if (ret < 0) {
500
        goto free_and_fail;
501
    }
502

    
503
#ifndef _WIN32
504
    if (bs->is_temporary) {
505
        unlink(filename);
506
    }
507
#endif
508
    return 0;
509

    
510
free_and_fail:
511
    if (bs->file) {
512
        bdrv_delete(bs->file);
513
        bs->file = NULL;
514
    }
515
    qemu_free(bs->opaque);
516
    bs->opaque = NULL;
517
    bs->drv = NULL;
518
    return ret;
519
}
520

    
521
/*
522
 * Opens a file using a protocol (file, host_device, nbd, ...)
523
 */
524
int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
525
{
526
    BlockDriverState *bs;
527
    BlockDriver *drv;
528
    int ret;
529

    
530
    drv = bdrv_find_protocol(filename);
531
    if (!drv) {
532
        return -ENOENT;
533
    }
534

    
535
    bs = bdrv_new("");
536
    ret = bdrv_open_common(bs, filename, flags, drv);
537
    if (ret < 0) {
538
        bdrv_delete(bs);
539
        return ret;
540
    }
541
    bs->growable = 1;
542
    *pbs = bs;
543
    return 0;
544
}
545

    
546
/*
547
 * Opens a disk image (raw, qcow2, vmdk, ...)
548
 */
549
int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
550
              BlockDriver *drv)
551
{
552
    int ret;
553

    
554
    if (flags & BDRV_O_SNAPSHOT) {
555
        BlockDriverState *bs1;
556
        int64_t total_size;
557
        int is_protocol = 0;
558
        BlockDriver *bdrv_qcow2;
559
        QEMUOptionParameter *options;
560
        char tmp_filename[PATH_MAX];
561
        char backing_filename[PATH_MAX];
562

    
563
        /* if snapshot, we create a temporary backing file and open it
564
           instead of opening 'filename' directly */
565

    
566
        /* if there is a backing file, use it */
567
        bs1 = bdrv_new("");
568
        ret = bdrv_open(bs1, filename, 0, drv);
569
        if (ret < 0) {
570
            bdrv_delete(bs1);
571
            return ret;
572
        }
573
        total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
574

    
575
        if (bs1->drv && bs1->drv->protocol_name)
576
            is_protocol = 1;
577

    
578
        bdrv_delete(bs1);
579

    
580
        get_tmp_filename(tmp_filename, sizeof(tmp_filename));
581

    
582
        /* Real path is meaningless for protocols */
583
        if (is_protocol)
584
            snprintf(backing_filename, sizeof(backing_filename),
585
                     "%s", filename);
586
        else if (!realpath(filename, backing_filename))
587
            return -errno;
588

    
589
        bdrv_qcow2 = bdrv_find_format("qcow2");
590
        options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
591

    
592
        set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
593
        set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
594
        if (drv) {
595
            set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
596
                drv->format_name);
597
        }
598

    
599
        ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
600
        free_option_parameters(options);
601
        if (ret < 0) {
602
            return ret;
603
        }
604

    
605
        filename = tmp_filename;
606
        drv = bdrv_qcow2;
607
        bs->is_temporary = 1;
608
    }
609

    
610
    /* Find the right image format driver */
611
    if (!drv) {
612
        ret = find_image_format(filename, &drv);
613
    }
614

    
615
    if (!drv) {
616
        goto unlink_and_fail;
617
    }
618

    
619
    /* Open the image */
620
    ret = bdrv_open_common(bs, filename, flags, drv);
621
    if (ret < 0) {
622
        goto unlink_and_fail;
623
    }
624

    
625
    /* If there is a backing file, use it */
626
    if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
627
        char backing_filename[PATH_MAX];
628
        int back_flags;
629
        BlockDriver *back_drv = NULL;
630

    
631
        bs->backing_hd = bdrv_new("");
632

    
633
        if (path_has_protocol(bs->backing_file)) {
634
            pstrcpy(backing_filename, sizeof(backing_filename),
635
                    bs->backing_file);
636
        } else {
637
            path_combine(backing_filename, sizeof(backing_filename),
638
                         filename, bs->backing_file);
639
        }
640

    
641
        if (bs->backing_format[0] != '\0') {
642
            back_drv = bdrv_find_format(bs->backing_format);
643
        }
644

    
645
        /* backing files always opened read-only */
646
        back_flags =
647
            flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
648

    
649
        ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
650
        if (ret < 0) {
651
            bdrv_close(bs);
652
            return ret;
653
        }
654
        if (bs->is_temporary) {
655
            bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
656
        } else {
657
            /* base image inherits from "parent" */
658
            bs->backing_hd->keep_read_only = bs->keep_read_only;
659
        }
660
    }
661

    
662
    if (!bdrv_key_required(bs)) {
663
        /* call the change callback */
664
        bs->media_changed = 1;
665
        if (bs->change_cb)
666
            bs->change_cb(bs->change_opaque, CHANGE_MEDIA);
667
    }
668

    
669
    return 0;
670

    
671
unlink_and_fail:
672
    if (bs->is_temporary) {
673
        unlink(filename);
674
    }
675
    return ret;
676
}
677

    
678
void bdrv_close(BlockDriverState *bs)
679
{
680
    if (bs->drv) {
681
        if (bs == bs_snapshots) {
682
            bs_snapshots = NULL;
683
        }
684
        if (bs->backing_hd) {
685
            bdrv_delete(bs->backing_hd);
686
            bs->backing_hd = NULL;
687
        }
688
        bs->drv->bdrv_close(bs);
689
        qemu_free(bs->opaque);
690
#ifdef _WIN32
691
        if (bs->is_temporary) {
692
            unlink(bs->filename);
693
        }
694
#endif
695
        bs->opaque = NULL;
696
        bs->drv = NULL;
697

    
698
        if (bs->file != NULL) {
699
            bdrv_close(bs->file);
700
        }
701

    
702
        /* call the change callback */
703
        bs->media_changed = 1;
704
        if (bs->change_cb)
705
            bs->change_cb(bs->change_opaque, CHANGE_MEDIA);
706
    }
707
}
708

    
709
void bdrv_close_all(void)
710
{
711
    BlockDriverState *bs;
712

    
713
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
714
        bdrv_close(bs);
715
    }
716
}
717

    
718
/* make a BlockDriverState anonymous by removing from bdrv_state list.
719
   Also, NULL terminate the device_name to prevent double remove */
720
void bdrv_make_anon(BlockDriverState *bs)
721
{
722
    if (bs->device_name[0] != '\0') {
723
        QTAILQ_REMOVE(&bdrv_states, bs, list);
724
    }
725
    bs->device_name[0] = '\0';
726
}
727

    
728
void bdrv_delete(BlockDriverState *bs)
729
{
730
    assert(!bs->peer);
731

    
732
    /* remove from list, if necessary */
733
    bdrv_make_anon(bs);
734

    
735
    bdrv_close(bs);
736
    if (bs->file != NULL) {
737
        bdrv_delete(bs->file);
738
    }
739

    
740
    assert(bs != bs_snapshots);
741
    qemu_free(bs);
742
}
743

    
744
int bdrv_attach(BlockDriverState *bs, DeviceState *qdev)
745
{
746
    if (bs->peer) {
747
        return -EBUSY;
748
    }
749
    bs->peer = qdev;
750
    return 0;
751
}
752

    
753
void bdrv_detach(BlockDriverState *bs, DeviceState *qdev)
754
{
755
    assert(bs->peer == qdev);
756
    bs->peer = NULL;
757
    bs->change_cb = NULL;
758
    bs->change_opaque = NULL;
759
}
760

    
761
DeviceState *bdrv_get_attached(BlockDriverState *bs)
762
{
763
    return bs->peer;
764
}
765

    
766
/*
767
 * Run consistency checks on an image
768
 *
769
 * Returns 0 if the check could be completed (it doesn't mean that the image is
770
 * free of errors) or -errno when an internal error occurred. The results of the
771
 * check are stored in res.
772
 */
773
int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
774
{
775
    if (bs->drv->bdrv_check == NULL) {
776
        return -ENOTSUP;
777
    }
778

    
779
    memset(res, 0, sizeof(*res));
780
    return bs->drv->bdrv_check(bs, res);
781
}
782

    
783
#define COMMIT_BUF_SECTORS 2048
784

    
785
/* commit COW file into the raw image */
786
int bdrv_commit(BlockDriverState *bs)
787
{
788
    BlockDriver *drv = bs->drv;
789
    BlockDriver *backing_drv;
790
    int64_t sector, total_sectors;
791
    int n, ro, open_flags;
792
    int ret = 0, rw_ret = 0;
793
    uint8_t *buf;
794
    char filename[1024];
795
    BlockDriverState *bs_rw, *bs_ro;
796

    
797
    if (!drv)
798
        return -ENOMEDIUM;
799
    
800
    if (!bs->backing_hd) {
801
        return -ENOTSUP;
802
    }
803

    
804
    if (bs->backing_hd->keep_read_only) {
805
        return -EACCES;
806
    }
807

    
808
    backing_drv = bs->backing_hd->drv;
809
    ro = bs->backing_hd->read_only;
810
    strncpy(filename, bs->backing_hd->filename, sizeof(filename));
811
    open_flags =  bs->backing_hd->open_flags;
812

    
813
    if (ro) {
814
        /* re-open as RW */
815
        bdrv_delete(bs->backing_hd);
816
        bs->backing_hd = NULL;
817
        bs_rw = bdrv_new("");
818
        rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
819
            backing_drv);
820
        if (rw_ret < 0) {
821
            bdrv_delete(bs_rw);
822
            /* try to re-open read-only */
823
            bs_ro = bdrv_new("");
824
            ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
825
                backing_drv);
826
            if (ret < 0) {
827
                bdrv_delete(bs_ro);
828
                /* drive not functional anymore */
829
                bs->drv = NULL;
830
                return ret;
831
            }
832
            bs->backing_hd = bs_ro;
833
            return rw_ret;
834
        }
835
        bs->backing_hd = bs_rw;
836
    }
837

    
838
    total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
839
    buf = qemu_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
840

    
841
    for (sector = 0; sector < total_sectors; sector += n) {
842
        if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
843

    
844
            if (bdrv_read(bs, sector, buf, n) != 0) {
845
                ret = -EIO;
846
                goto ro_cleanup;
847
            }
848

    
849
            if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
850
                ret = -EIO;
851
                goto ro_cleanup;
852
            }
853
        }
854
    }
855

    
856
    if (drv->bdrv_make_empty) {
857
        ret = drv->bdrv_make_empty(bs);
858
        bdrv_flush(bs);
859
    }
860

    
861
    /*
862
     * Make sure all data we wrote to the backing device is actually
863
     * stable on disk.
864
     */
865
    if (bs->backing_hd)
866
        bdrv_flush(bs->backing_hd);
867

    
868
ro_cleanup:
869
    qemu_free(buf);
870

    
871
    if (ro) {
872
        /* re-open as RO */
873
        bdrv_delete(bs->backing_hd);
874
        bs->backing_hd = NULL;
875
        bs_ro = bdrv_new("");
876
        ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
877
            backing_drv);
878
        if (ret < 0) {
879
            bdrv_delete(bs_ro);
880
            /* drive not functional anymore */
881
            bs->drv = NULL;
882
            return ret;
883
        }
884
        bs->backing_hd = bs_ro;
885
        bs->backing_hd->keep_read_only = 0;
886
    }
887

    
888
    return ret;
889
}
890

    
891
void bdrv_commit_all(void)
892
{
893
    BlockDriverState *bs;
894

    
895
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
896
        bdrv_commit(bs);
897
    }
898
}
899

    
900
/*
901
 * Return values:
902
 * 0        - success
903
 * -EINVAL  - backing format specified, but no file
904
 * -ENOSPC  - can't update the backing file because no space is left in the
905
 *            image file header
906
 * -ENOTSUP - format driver doesn't support changing the backing file
907
 */
908
int bdrv_change_backing_file(BlockDriverState *bs,
909
    const char *backing_file, const char *backing_fmt)
910
{
911
    BlockDriver *drv = bs->drv;
912

    
913
    if (drv->bdrv_change_backing_file != NULL) {
914
        return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
915
    } else {
916
        return -ENOTSUP;
917
    }
918
}
919

    
920
static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
921
                                   size_t size)
922
{
923
    int64_t len;
924

    
925
    if (!bdrv_is_inserted(bs))
926
        return -ENOMEDIUM;
927

    
928
    if (bs->growable)
929
        return 0;
930

    
931
    len = bdrv_getlength(bs);
932

    
933
    if (offset < 0)
934
        return -EIO;
935

    
936
    if ((offset > len) || (len - offset < size))
937
        return -EIO;
938

    
939
    return 0;
940
}
941

    
942
static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
943
                              int nb_sectors)
944
{
945
    return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
946
                                   nb_sectors * BDRV_SECTOR_SIZE);
947
}
948

    
949
/* return < 0 if error. See bdrv_write() for the return codes */
950
int bdrv_read(BlockDriverState *bs, int64_t sector_num,
951
              uint8_t *buf, int nb_sectors)
952
{
953
    BlockDriver *drv = bs->drv;
954

    
955
    if (!drv)
956
        return -ENOMEDIUM;
957
    if (bdrv_check_request(bs, sector_num, nb_sectors))
958
        return -EIO;
959

    
960
    return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
961
}
962

    
963
static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
964
                             int nb_sectors, int dirty)
965
{
966
    int64_t start, end;
967
    unsigned long val, idx, bit;
968

    
969
    start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
970
    end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
971

    
972
    for (; start <= end; start++) {
973
        idx = start / (sizeof(unsigned long) * 8);
974
        bit = start % (sizeof(unsigned long) * 8);
975
        val = bs->dirty_bitmap[idx];
976
        if (dirty) {
977
            if (!(val & (1UL << bit))) {
978
                bs->dirty_count++;
979
                val |= 1UL << bit;
980
            }
981
        } else {
982
            if (val & (1UL << bit)) {
983
                bs->dirty_count--;
984
                val &= ~(1UL << bit);
985
            }
986
        }
987
        bs->dirty_bitmap[idx] = val;
988
    }
989
}
990

    
991
/* Return < 0 if error. Important errors are:
992
  -EIO         generic I/O error (may happen for all errors)
993
  -ENOMEDIUM   No media inserted.
994
  -EINVAL      Invalid sector number or nb_sectors
995
  -EACCES      Trying to write a read-only device
996
*/
997
int bdrv_write(BlockDriverState *bs, int64_t sector_num,
998
               const uint8_t *buf, int nb_sectors)
999
{
1000
    BlockDriver *drv = bs->drv;
1001
    if (!bs->drv)
1002
        return -ENOMEDIUM;
1003
    if (bs->read_only)
1004
        return -EACCES;
1005
    if (bdrv_check_request(bs, sector_num, nb_sectors))
1006
        return -EIO;
1007

    
1008
    if (bs->dirty_bitmap) {
1009
        set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1010
    }
1011

    
1012
    if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1013
        bs->wr_highest_sector = sector_num + nb_sectors - 1;
1014
    }
1015

    
1016
    return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
1017
}
1018

    
1019
int bdrv_pread(BlockDriverState *bs, int64_t offset,
1020
               void *buf, int count1)
1021
{
1022
    uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1023
    int len, nb_sectors, count;
1024
    int64_t sector_num;
1025
    int ret;
1026

    
1027
    count = count1;
1028
    /* first read to align to sector start */
1029
    len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1030
    if (len > count)
1031
        len = count;
1032
    sector_num = offset >> BDRV_SECTOR_BITS;
1033
    if (len > 0) {
1034
        if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1035
            return ret;
1036
        memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
1037
        count -= len;
1038
        if (count == 0)
1039
            return count1;
1040
        sector_num++;
1041
        buf += len;
1042
    }
1043

    
1044
    /* read the sectors "in place" */
1045
    nb_sectors = count >> BDRV_SECTOR_BITS;
1046
    if (nb_sectors > 0) {
1047
        if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1048
            return ret;
1049
        sector_num += nb_sectors;
1050
        len = nb_sectors << BDRV_SECTOR_BITS;
1051
        buf += len;
1052
        count -= len;
1053
    }
1054

    
1055
    /* add data from the last sector */
1056
    if (count > 0) {
1057
        if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1058
            return ret;
1059
        memcpy(buf, tmp_buf, count);
1060
    }
1061
    return count1;
1062
}
1063

    
1064
int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1065
                const void *buf, int count1)
1066
{
1067
    uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1068
    int len, nb_sectors, count;
1069
    int64_t sector_num;
1070
    int ret;
1071

    
1072
    count = count1;
1073
    /* first write to align to sector start */
1074
    len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1075
    if (len > count)
1076
        len = count;
1077
    sector_num = offset >> BDRV_SECTOR_BITS;
1078
    if (len > 0) {
1079
        if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1080
            return ret;
1081
        memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
1082
        if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1083
            return ret;
1084
        count -= len;
1085
        if (count == 0)
1086
            return count1;
1087
        sector_num++;
1088
        buf += len;
1089
    }
1090

    
1091
    /* write the sectors "in place" */
1092
    nb_sectors = count >> BDRV_SECTOR_BITS;
1093
    if (nb_sectors > 0) {
1094
        if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1095
            return ret;
1096
        sector_num += nb_sectors;
1097
        len = nb_sectors << BDRV_SECTOR_BITS;
1098
        buf += len;
1099
        count -= len;
1100
    }
1101

    
1102
    /* add data from the last sector */
1103
    if (count > 0) {
1104
        if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1105
            return ret;
1106
        memcpy(tmp_buf, buf, count);
1107
        if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1108
            return ret;
1109
    }
1110
    return count1;
1111
}
1112

    
1113
/*
1114
 * Writes to the file and ensures that no writes are reordered across this
1115
 * request (acts as a barrier)
1116
 *
1117
 * Returns 0 on success, -errno in error cases.
1118
 */
1119
int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1120
    const void *buf, int count)
1121
{
1122
    int ret;
1123

    
1124
    ret = bdrv_pwrite(bs, offset, buf, count);
1125
    if (ret < 0) {
1126
        return ret;
1127
    }
1128

    
1129
    /* No flush needed for cache=writethrough, it uses O_DSYNC */
1130
    if ((bs->open_flags & BDRV_O_CACHE_MASK) != 0) {
1131
        bdrv_flush(bs);
1132
    }
1133

    
1134
    return 0;
1135
}
1136

    
1137
int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
1138
    int nb_sectors, QEMUIOVector *qiov)
1139
{
1140
    BlockDriver *drv = bs->drv;
1141

    
1142
    trace_bdrv_co_readv(bs, sector_num, nb_sectors);
1143

    
1144
    if (!drv) {
1145
        return -ENOMEDIUM;
1146
    }
1147
    if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1148
        return -EIO;
1149
    }
1150

    
1151
    return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1152
}
1153

    
1154
int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1155
    int nb_sectors, QEMUIOVector *qiov)
1156
{
1157
    BlockDriver *drv = bs->drv;
1158

    
1159
    trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1160

    
1161
    if (!bs->drv) {
1162
        return -ENOMEDIUM;
1163
    }
1164
    if (bs->read_only) {
1165
        return -EACCES;
1166
    }
1167
    if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1168
        return -EIO;
1169
    }
1170

    
1171
    if (bs->dirty_bitmap) {
1172
        set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1173
    }
1174

    
1175
    if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1176
        bs->wr_highest_sector = sector_num + nb_sectors - 1;
1177
    }
1178

    
1179
    return drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1180
}
1181

    
1182
/**
1183
 * Truncate file to 'offset' bytes (needed only for file protocols)
1184
 */
1185
int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1186
{
1187
    BlockDriver *drv = bs->drv;
1188
    int ret;
1189
    if (!drv)
1190
        return -ENOMEDIUM;
1191
    if (!drv->bdrv_truncate)
1192
        return -ENOTSUP;
1193
    if (bs->read_only)
1194
        return -EACCES;
1195
    if (bdrv_in_use(bs))
1196
        return -EBUSY;
1197
    ret = drv->bdrv_truncate(bs, offset);
1198
    if (ret == 0) {
1199
        ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
1200
        if (bs->change_cb) {
1201
            bs->change_cb(bs->change_opaque, CHANGE_SIZE);
1202
        }
1203
    }
1204
    return ret;
1205
}
1206

    
1207
/**
1208
 * Length of a allocated file in bytes. Sparse files are counted by actual
1209
 * allocated space. Return < 0 if error or unknown.
1210
 */
1211
int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1212
{
1213
    BlockDriver *drv = bs->drv;
1214
    if (!drv) {
1215
        return -ENOMEDIUM;
1216
    }
1217
    if (drv->bdrv_get_allocated_file_size) {
1218
        return drv->bdrv_get_allocated_file_size(bs);
1219
    }
1220
    if (bs->file) {
1221
        return bdrv_get_allocated_file_size(bs->file);
1222
    }
1223
    return -ENOTSUP;
1224
}
1225

    
1226
/**
1227
 * Length of a file in bytes. Return < 0 if error or unknown.
1228
 */
1229
int64_t bdrv_getlength(BlockDriverState *bs)
1230
{
1231
    BlockDriver *drv = bs->drv;
1232
    if (!drv)
1233
        return -ENOMEDIUM;
1234

    
1235
    if (bs->growable || bs->removable) {
1236
        if (drv->bdrv_getlength) {
1237
            return drv->bdrv_getlength(bs);
1238
        }
1239
    }
1240
    return bs->total_sectors * BDRV_SECTOR_SIZE;
1241
}
1242

    
1243
/* return 0 as number of sectors if no device present or error */
1244
void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
1245
{
1246
    int64_t length;
1247
    length = bdrv_getlength(bs);
1248
    if (length < 0)
1249
        length = 0;
1250
    else
1251
        length = length >> BDRV_SECTOR_BITS;
1252
    *nb_sectors_ptr = length;
1253
}
1254

    
1255
struct partition {
1256
        uint8_t boot_ind;           /* 0x80 - active */
1257
        uint8_t head;               /* starting head */
1258
        uint8_t sector;             /* starting sector */
1259
        uint8_t cyl;                /* starting cylinder */
1260
        uint8_t sys_ind;            /* What partition type */
1261
        uint8_t end_head;           /* end head */
1262
        uint8_t end_sector;         /* end sector */
1263
        uint8_t end_cyl;            /* end cylinder */
1264
        uint32_t start_sect;        /* starting sector counting from 0 */
1265
        uint32_t nr_sects;          /* nr of sectors in partition */
1266
} __attribute__((packed));
1267

    
1268
/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1269
static int guess_disk_lchs(BlockDriverState *bs,
1270
                           int *pcylinders, int *pheads, int *psectors)
1271
{
1272
    uint8_t buf[BDRV_SECTOR_SIZE];
1273
    int ret, i, heads, sectors, cylinders;
1274
    struct partition *p;
1275
    uint32_t nr_sects;
1276
    uint64_t nb_sectors;
1277

    
1278
    bdrv_get_geometry(bs, &nb_sectors);
1279

    
1280
    ret = bdrv_read(bs, 0, buf, 1);
1281
    if (ret < 0)
1282
        return -1;
1283
    /* test msdos magic */
1284
    if (buf[510] != 0x55 || buf[511] != 0xaa)
1285
        return -1;
1286
    for(i = 0; i < 4; i++) {
1287
        p = ((struct partition *)(buf + 0x1be)) + i;
1288
        nr_sects = le32_to_cpu(p->nr_sects);
1289
        if (nr_sects && p->end_head) {
1290
            /* We make the assumption that the partition terminates on
1291
               a cylinder boundary */
1292
            heads = p->end_head + 1;
1293
            sectors = p->end_sector & 63;
1294
            if (sectors == 0)
1295
                continue;
1296
            cylinders = nb_sectors / (heads * sectors);
1297
            if (cylinders < 1 || cylinders > 16383)
1298
                continue;
1299
            *pheads = heads;
1300
            *psectors = sectors;
1301
            *pcylinders = cylinders;
1302
#if 0
1303
            printf("guessed geometry: LCHS=%d %d %d\n",
1304
                   cylinders, heads, sectors);
1305
#endif
1306
            return 0;
1307
        }
1308
    }
1309
    return -1;
1310
}
1311

    
1312
void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1313
{
1314
    int translation, lba_detected = 0;
1315
    int cylinders, heads, secs;
1316
    uint64_t nb_sectors;
1317

    
1318
    /* if a geometry hint is available, use it */
1319
    bdrv_get_geometry(bs, &nb_sectors);
1320
    bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1321
    translation = bdrv_get_translation_hint(bs);
1322
    if (cylinders != 0) {
1323
        *pcyls = cylinders;
1324
        *pheads = heads;
1325
        *psecs = secs;
1326
    } else {
1327
        if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1328
            if (heads > 16) {
1329
                /* if heads > 16, it means that a BIOS LBA
1330
                   translation was active, so the default
1331
                   hardware geometry is OK */
1332
                lba_detected = 1;
1333
                goto default_geometry;
1334
            } else {
1335
                *pcyls = cylinders;
1336
                *pheads = heads;
1337
                *psecs = secs;
1338
                /* disable any translation to be in sync with
1339
                   the logical geometry */
1340
                if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1341
                    bdrv_set_translation_hint(bs,
1342
                                              BIOS_ATA_TRANSLATION_NONE);
1343
                }
1344
            }
1345
        } else {
1346
        default_geometry:
1347
            /* if no geometry, use a standard physical disk geometry */
1348
            cylinders = nb_sectors / (16 * 63);
1349

    
1350
            if (cylinders > 16383)
1351
                cylinders = 16383;
1352
            else if (cylinders < 2)
1353
                cylinders = 2;
1354
            *pcyls = cylinders;
1355
            *pheads = 16;
1356
            *psecs = 63;
1357
            if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1358
                if ((*pcyls * *pheads) <= 131072) {
1359
                    bdrv_set_translation_hint(bs,
1360
                                              BIOS_ATA_TRANSLATION_LARGE);
1361
                } else {
1362
                    bdrv_set_translation_hint(bs,
1363
                                              BIOS_ATA_TRANSLATION_LBA);
1364
                }
1365
            }
1366
        }
1367
        bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1368
    }
1369
}
1370

    
1371
void bdrv_set_geometry_hint(BlockDriverState *bs,
1372
                            int cyls, int heads, int secs)
1373
{
1374
    bs->cyls = cyls;
1375
    bs->heads = heads;
1376
    bs->secs = secs;
1377
}
1378

    
1379
void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1380
{
1381
    bs->translation = translation;
1382
}
1383

    
1384
void bdrv_get_geometry_hint(BlockDriverState *bs,
1385
                            int *pcyls, int *pheads, int *psecs)
1386
{
1387
    *pcyls = bs->cyls;
1388
    *pheads = bs->heads;
1389
    *psecs = bs->secs;
1390
}
1391

    
1392
/* Recognize floppy formats */
1393
typedef struct FDFormat {
1394
    FDriveType drive;
1395
    uint8_t last_sect;
1396
    uint8_t max_track;
1397
    uint8_t max_head;
1398
} FDFormat;
1399

    
1400
static const FDFormat fd_formats[] = {
1401
    /* First entry is default format */
1402
    /* 1.44 MB 3"1/2 floppy disks */
1403
    { FDRIVE_DRV_144, 18, 80, 1, },
1404
    { FDRIVE_DRV_144, 20, 80, 1, },
1405
    { FDRIVE_DRV_144, 21, 80, 1, },
1406
    { FDRIVE_DRV_144, 21, 82, 1, },
1407
    { FDRIVE_DRV_144, 21, 83, 1, },
1408
    { FDRIVE_DRV_144, 22, 80, 1, },
1409
    { FDRIVE_DRV_144, 23, 80, 1, },
1410
    { FDRIVE_DRV_144, 24, 80, 1, },
1411
    /* 2.88 MB 3"1/2 floppy disks */
1412
    { FDRIVE_DRV_288, 36, 80, 1, },
1413
    { FDRIVE_DRV_288, 39, 80, 1, },
1414
    { FDRIVE_DRV_288, 40, 80, 1, },
1415
    { FDRIVE_DRV_288, 44, 80, 1, },
1416
    { FDRIVE_DRV_288, 48, 80, 1, },
1417
    /* 720 kB 3"1/2 floppy disks */
1418
    { FDRIVE_DRV_144,  9, 80, 1, },
1419
    { FDRIVE_DRV_144, 10, 80, 1, },
1420
    { FDRIVE_DRV_144, 10, 82, 1, },
1421
    { FDRIVE_DRV_144, 10, 83, 1, },
1422
    { FDRIVE_DRV_144, 13, 80, 1, },
1423
    { FDRIVE_DRV_144, 14, 80, 1, },
1424
    /* 1.2 MB 5"1/4 floppy disks */
1425
    { FDRIVE_DRV_120, 15, 80, 1, },
1426
    { FDRIVE_DRV_120, 18, 80, 1, },
1427
    { FDRIVE_DRV_120, 18, 82, 1, },
1428
    { FDRIVE_DRV_120, 18, 83, 1, },
1429
    { FDRIVE_DRV_120, 20, 80, 1, },
1430
    /* 720 kB 5"1/4 floppy disks */
1431
    { FDRIVE_DRV_120,  9, 80, 1, },
1432
    { FDRIVE_DRV_120, 11, 80, 1, },
1433
    /* 360 kB 5"1/4 floppy disks */
1434
    { FDRIVE_DRV_120,  9, 40, 1, },
1435
    { FDRIVE_DRV_120,  9, 40, 0, },
1436
    { FDRIVE_DRV_120, 10, 41, 1, },
1437
    { FDRIVE_DRV_120, 10, 42, 1, },
1438
    /* 320 kB 5"1/4 floppy disks */
1439
    { FDRIVE_DRV_120,  8, 40, 1, },
1440
    { FDRIVE_DRV_120,  8, 40, 0, },
1441
    /* 360 kB must match 5"1/4 better than 3"1/2... */
1442
    { FDRIVE_DRV_144,  9, 80, 0, },
1443
    /* end */
1444
    { FDRIVE_DRV_NONE, -1, -1, 0, },
1445
};
1446

    
1447
void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1448
                                   int *max_track, int *last_sect,
1449
                                   FDriveType drive_in, FDriveType *drive)
1450
{
1451
    const FDFormat *parse;
1452
    uint64_t nb_sectors, size;
1453
    int i, first_match, match;
1454

    
1455
    bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1456
    if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1457
        /* User defined disk */
1458
    } else {
1459
        bdrv_get_geometry(bs, &nb_sectors);
1460
        match = -1;
1461
        first_match = -1;
1462
        for (i = 0; ; i++) {
1463
            parse = &fd_formats[i];
1464
            if (parse->drive == FDRIVE_DRV_NONE) {
1465
                break;
1466
            }
1467
            if (drive_in == parse->drive ||
1468
                drive_in == FDRIVE_DRV_NONE) {
1469
                size = (parse->max_head + 1) * parse->max_track *
1470
                    parse->last_sect;
1471
                if (nb_sectors == size) {
1472
                    match = i;
1473
                    break;
1474
                }
1475
                if (first_match == -1) {
1476
                    first_match = i;
1477
                }
1478
            }
1479
        }
1480
        if (match == -1) {
1481
            if (first_match == -1) {
1482
                match = 1;
1483
            } else {
1484
                match = first_match;
1485
            }
1486
            parse = &fd_formats[match];
1487
        }
1488
        *nb_heads = parse->max_head + 1;
1489
        *max_track = parse->max_track;
1490
        *last_sect = parse->last_sect;
1491
        *drive = parse->drive;
1492
    }
1493
}
1494

    
1495
int bdrv_get_translation_hint(BlockDriverState *bs)
1496
{
1497
    return bs->translation;
1498
}
1499

    
1500
void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1501
                       BlockErrorAction on_write_error)
1502
{
1503
    bs->on_read_error = on_read_error;
1504
    bs->on_write_error = on_write_error;
1505
}
1506

    
1507
BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1508
{
1509
    return is_read ? bs->on_read_error : bs->on_write_error;
1510
}
1511

    
1512
void bdrv_set_removable(BlockDriverState *bs, int removable)
1513
{
1514
    bs->removable = removable;
1515
    if (removable && bs == bs_snapshots) {
1516
        bs_snapshots = NULL;
1517
    }
1518
}
1519

    
1520
int bdrv_is_removable(BlockDriverState *bs)
1521
{
1522
    return bs->removable;
1523
}
1524

    
1525
int bdrv_is_read_only(BlockDriverState *bs)
1526
{
1527
    return bs->read_only;
1528
}
1529

    
1530
int bdrv_is_sg(BlockDriverState *bs)
1531
{
1532
    return bs->sg;
1533
}
1534

    
1535
int bdrv_enable_write_cache(BlockDriverState *bs)
1536
{
1537
    return bs->enable_write_cache;
1538
}
1539

    
1540
/* XXX: no longer used */
1541
void bdrv_set_change_cb(BlockDriverState *bs,
1542
                        void (*change_cb)(void *opaque, int reason),
1543
                        void *opaque)
1544
{
1545
    bs->change_cb = change_cb;
1546
    bs->change_opaque = opaque;
1547
}
1548

    
1549
int bdrv_is_encrypted(BlockDriverState *bs)
1550
{
1551
    if (bs->backing_hd && bs->backing_hd->encrypted)
1552
        return 1;
1553
    return bs->encrypted;
1554
}
1555

    
1556
int bdrv_key_required(BlockDriverState *bs)
1557
{
1558
    BlockDriverState *backing_hd = bs->backing_hd;
1559

    
1560
    if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1561
        return 1;
1562
    return (bs->encrypted && !bs->valid_key);
1563
}
1564

    
1565
int bdrv_set_key(BlockDriverState *bs, const char *key)
1566
{
1567
    int ret;
1568
    if (bs->backing_hd && bs->backing_hd->encrypted) {
1569
        ret = bdrv_set_key(bs->backing_hd, key);
1570
        if (ret < 0)
1571
            return ret;
1572
        if (!bs->encrypted)
1573
            return 0;
1574
    }
1575
    if (!bs->encrypted) {
1576
        return -EINVAL;
1577
    } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1578
        return -ENOMEDIUM;
1579
    }
1580
    ret = bs->drv->bdrv_set_key(bs, key);
1581
    if (ret < 0) {
1582
        bs->valid_key = 0;
1583
    } else if (!bs->valid_key) {
1584
        bs->valid_key = 1;
1585
        /* call the change callback now, we skipped it on open */
1586
        bs->media_changed = 1;
1587
        if (bs->change_cb)
1588
            bs->change_cb(bs->change_opaque, CHANGE_MEDIA);
1589
    }
1590
    return ret;
1591
}
1592

    
1593
void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1594
{
1595
    if (!bs->drv) {
1596
        buf[0] = '\0';
1597
    } else {
1598
        pstrcpy(buf, buf_size, bs->drv->format_name);
1599
    }
1600
}
1601

    
1602
void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
1603
                         void *opaque)
1604
{
1605
    BlockDriver *drv;
1606

    
1607
    QLIST_FOREACH(drv, &bdrv_drivers, list) {
1608
        it(opaque, drv->format_name);
1609
    }
1610
}
1611

    
1612
BlockDriverState *bdrv_find(const char *name)
1613
{
1614
    BlockDriverState *bs;
1615

    
1616
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
1617
        if (!strcmp(name, bs->device_name)) {
1618
            return bs;
1619
        }
1620
    }
1621
    return NULL;
1622
}
1623

    
1624
BlockDriverState *bdrv_next(BlockDriverState *bs)
1625
{
1626
    if (!bs) {
1627
        return QTAILQ_FIRST(&bdrv_states);
1628
    }
1629
    return QTAILQ_NEXT(bs, list);
1630
}
1631

    
1632
void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1633
{
1634
    BlockDriverState *bs;
1635

    
1636
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
1637
        it(opaque, bs);
1638
    }
1639
}
1640

    
1641
const char *bdrv_get_device_name(BlockDriverState *bs)
1642
{
1643
    return bs->device_name;
1644
}
1645

    
1646
int bdrv_flush(BlockDriverState *bs)
1647
{
1648
    if (bs->open_flags & BDRV_O_NO_FLUSH) {
1649
        return 0;
1650
    }
1651

    
1652
    if (bs->drv && bs->drv->bdrv_flush) {
1653
        return bs->drv->bdrv_flush(bs);
1654
    }
1655

    
1656
    /*
1657
     * Some block drivers always operate in either writethrough or unsafe mode
1658
     * and don't support bdrv_flush therefore. Usually qemu doesn't know how
1659
     * the server works (because the behaviour is hardcoded or depends on
1660
     * server-side configuration), so we can't ensure that everything is safe
1661
     * on disk. Returning an error doesn't work because that would break guests
1662
     * even if the server operates in writethrough mode.
1663
     *
1664
     * Let's hope the user knows what he's doing.
1665
     */
1666
    return 0;
1667
}
1668

    
1669
void bdrv_flush_all(void)
1670
{
1671
    BlockDriverState *bs;
1672

    
1673
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
1674
        if (bs->drv && !bdrv_is_read_only(bs) &&
1675
            (!bdrv_is_removable(bs) || bdrv_is_inserted(bs))) {
1676
            bdrv_flush(bs);
1677
        }
1678
    }
1679
}
1680

    
1681
int bdrv_has_zero_init(BlockDriverState *bs)
1682
{
1683
    assert(bs->drv);
1684

    
1685
    if (bs->drv->bdrv_has_zero_init) {
1686
        return bs->drv->bdrv_has_zero_init(bs);
1687
    }
1688

    
1689
    return 1;
1690
}
1691

    
1692
int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
1693
{
1694
    if (!bs->drv) {
1695
        return -ENOMEDIUM;
1696
    }
1697
    if (!bs->drv->bdrv_discard) {
1698
        return 0;
1699
    }
1700
    return bs->drv->bdrv_discard(bs, sector_num, nb_sectors);
1701
}
1702

    
1703
/*
1704
 * Returns true iff the specified sector is present in the disk image. Drivers
1705
 * not implementing the functionality are assumed to not support backing files,
1706
 * hence all their sectors are reported as allocated.
1707
 *
1708
 * 'pnum' is set to the number of sectors (including and immediately following
1709
 * the specified sector) that are known to be in the same
1710
 * allocated/unallocated state.
1711
 *
1712
 * 'nb_sectors' is the max value 'pnum' should be set to.
1713
 */
1714
int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1715
        int *pnum)
1716
{
1717
    int64_t n;
1718
    if (!bs->drv->bdrv_is_allocated) {
1719
        if (sector_num >= bs->total_sectors) {
1720
            *pnum = 0;
1721
            return 0;
1722
        }
1723
        n = bs->total_sectors - sector_num;
1724
        *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1725
        return 1;
1726
    }
1727
    return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1728
}
1729

    
1730
void bdrv_mon_event(const BlockDriverState *bdrv,
1731
                    BlockMonEventAction action, int is_read)
1732
{
1733
    QObject *data;
1734
    const char *action_str;
1735

    
1736
    switch (action) {
1737
    case BDRV_ACTION_REPORT:
1738
        action_str = "report";
1739
        break;
1740
    case BDRV_ACTION_IGNORE:
1741
        action_str = "ignore";
1742
        break;
1743
    case BDRV_ACTION_STOP:
1744
        action_str = "stop";
1745
        break;
1746
    default:
1747
        abort();
1748
    }
1749

    
1750
    data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1751
                              bdrv->device_name,
1752
                              action_str,
1753
                              is_read ? "read" : "write");
1754
    monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1755

    
1756
    qobject_decref(data);
1757
}
1758

    
1759
static void bdrv_print_dict(QObject *obj, void *opaque)
1760
{
1761
    QDict *bs_dict;
1762
    Monitor *mon = opaque;
1763

    
1764
    bs_dict = qobject_to_qdict(obj);
1765

    
1766
    monitor_printf(mon, "%s: removable=%d",
1767
                        qdict_get_str(bs_dict, "device"),
1768
                        qdict_get_bool(bs_dict, "removable"));
1769

    
1770
    if (qdict_get_bool(bs_dict, "removable")) {
1771
        monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked"));
1772
    }
1773

    
1774
    if (qdict_haskey(bs_dict, "inserted")) {
1775
        QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted"));
1776

    
1777
        monitor_printf(mon, " file=");
1778
        monitor_print_filename(mon, qdict_get_str(qdict, "file"));
1779
        if (qdict_haskey(qdict, "backing_file")) {
1780
            monitor_printf(mon, " backing_file=");
1781
            monitor_print_filename(mon, qdict_get_str(qdict, "backing_file"));
1782
        }
1783
        monitor_printf(mon, " ro=%d drv=%s encrypted=%d",
1784
                            qdict_get_bool(qdict, "ro"),
1785
                            qdict_get_str(qdict, "drv"),
1786
                            qdict_get_bool(qdict, "encrypted"));
1787
    } else {
1788
        monitor_printf(mon, " [not inserted]");
1789
    }
1790

    
1791
    monitor_printf(mon, "\n");
1792
}
1793

    
1794
void bdrv_info_print(Monitor *mon, const QObject *data)
1795
{
1796
    qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon);
1797
}
1798

    
1799
void bdrv_info(Monitor *mon, QObject **ret_data)
1800
{
1801
    QList *bs_list;
1802
    BlockDriverState *bs;
1803

    
1804
    bs_list = qlist_new();
1805

    
1806
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
1807
        QObject *bs_obj;
1808

    
1809
        bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': 'unknown', "
1810
                                    "'removable': %i, 'locked': %i }",
1811
                                    bs->device_name, bs->removable,
1812
                                    bs->locked);
1813

    
1814
        if (bs->drv) {
1815
            QObject *obj;
1816
            QDict *bs_dict = qobject_to_qdict(bs_obj);
1817

    
1818
            obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
1819
                                     "'encrypted': %i }",
1820
                                     bs->filename, bs->read_only,
1821
                                     bs->drv->format_name,
1822
                                     bdrv_is_encrypted(bs));
1823
            if (bs->backing_file[0] != '\0') {
1824
                QDict *qdict = qobject_to_qdict(obj);
1825
                qdict_put(qdict, "backing_file",
1826
                          qstring_from_str(bs->backing_file));
1827
            }
1828

    
1829
            qdict_put_obj(bs_dict, "inserted", obj);
1830
        }
1831
        qlist_append_obj(bs_list, bs_obj);
1832
    }
1833

    
1834
    *ret_data = QOBJECT(bs_list);
1835
}
1836

    
1837
static void bdrv_stats_iter(QObject *data, void *opaque)
1838
{
1839
    QDict *qdict;
1840
    Monitor *mon = opaque;
1841

    
1842
    qdict = qobject_to_qdict(data);
1843
    monitor_printf(mon, "%s:", qdict_get_str(qdict, "device"));
1844

    
1845
    qdict = qobject_to_qdict(qdict_get(qdict, "stats"));
1846
    monitor_printf(mon, " rd_bytes=%" PRId64
1847
                        " wr_bytes=%" PRId64
1848
                        " rd_operations=%" PRId64
1849
                        " wr_operations=%" PRId64
1850
                        "\n",
1851
                        qdict_get_int(qdict, "rd_bytes"),
1852
                        qdict_get_int(qdict, "wr_bytes"),
1853
                        qdict_get_int(qdict, "rd_operations"),
1854
                        qdict_get_int(qdict, "wr_operations"));
1855
}
1856

    
1857
void bdrv_stats_print(Monitor *mon, const QObject *data)
1858
{
1859
    qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon);
1860
}
1861

    
1862
static QObject* bdrv_info_stats_bs(BlockDriverState *bs)
1863
{
1864
    QObject *res;
1865
    QDict *dict;
1866

    
1867
    res = qobject_from_jsonf("{ 'stats': {"
1868
                             "'rd_bytes': %" PRId64 ","
1869
                             "'wr_bytes': %" PRId64 ","
1870
                             "'rd_operations': %" PRId64 ","
1871
                             "'wr_operations': %" PRId64 ","
1872
                             "'wr_highest_offset': %" PRId64
1873
                             "} }",
1874
                             bs->rd_bytes, bs->wr_bytes,
1875
                             bs->rd_ops, bs->wr_ops,
1876
                             bs->wr_highest_sector *
1877
                             (uint64_t)BDRV_SECTOR_SIZE);
1878
    dict  = qobject_to_qdict(res);
1879

    
1880
    if (*bs->device_name) {
1881
        qdict_put(dict, "device", qstring_from_str(bs->device_name));
1882
    }
1883

    
1884
    if (bs->file) {
1885
        QObject *parent = bdrv_info_stats_bs(bs->file);
1886
        qdict_put_obj(dict, "parent", parent);
1887
    }
1888

    
1889
    return res;
1890
}
1891

    
1892
void bdrv_info_stats(Monitor *mon, QObject **ret_data)
1893
{
1894
    QObject *obj;
1895
    QList *devices;
1896
    BlockDriverState *bs;
1897

    
1898
    devices = qlist_new();
1899

    
1900
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
1901
        obj = bdrv_info_stats_bs(bs);
1902
        qlist_append_obj(devices, obj);
1903
    }
1904

    
1905
    *ret_data = QOBJECT(devices);
1906
}
1907

    
1908
const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
1909
{
1910
    if (bs->backing_hd && bs->backing_hd->encrypted)
1911
        return bs->backing_file;
1912
    else if (bs->encrypted)
1913
        return bs->filename;
1914
    else
1915
        return NULL;
1916
}
1917

    
1918
void bdrv_get_backing_filename(BlockDriverState *bs,
1919
                               char *filename, int filename_size)
1920
{
1921
    if (!bs->backing_file) {
1922
        pstrcpy(filename, filename_size, "");
1923
    } else {
1924
        pstrcpy(filename, filename_size, bs->backing_file);
1925
    }
1926
}
1927

    
1928
int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
1929
                          const uint8_t *buf, int nb_sectors)
1930
{
1931
    BlockDriver *drv = bs->drv;
1932
    if (!drv)
1933
        return -ENOMEDIUM;
1934
    if (!drv->bdrv_write_compressed)
1935
        return -ENOTSUP;
1936
    if (bdrv_check_request(bs, sector_num, nb_sectors))
1937
        return -EIO;
1938

    
1939
    if (bs->dirty_bitmap) {
1940
        set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1941
    }
1942

    
1943
    return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
1944
}
1945

    
1946
int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1947
{
1948
    BlockDriver *drv = bs->drv;
1949
    if (!drv)
1950
        return -ENOMEDIUM;
1951
    if (!drv->bdrv_get_info)
1952
        return -ENOTSUP;
1953
    memset(bdi, 0, sizeof(*bdi));
1954
    return drv->bdrv_get_info(bs, bdi);
1955
}
1956

    
1957
int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
1958
                      int64_t pos, int size)
1959
{
1960
    BlockDriver *drv = bs->drv;
1961
    if (!drv)
1962
        return -ENOMEDIUM;
1963
    if (drv->bdrv_save_vmstate)
1964
        return drv->bdrv_save_vmstate(bs, buf, pos, size);
1965
    if (bs->file)
1966
        return bdrv_save_vmstate(bs->file, buf, pos, size);
1967
    return -ENOTSUP;
1968
}
1969

    
1970
int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
1971
                      int64_t pos, int size)
1972
{
1973
    BlockDriver *drv = bs->drv;
1974
    if (!drv)
1975
        return -ENOMEDIUM;
1976
    if (drv->bdrv_load_vmstate)
1977
        return drv->bdrv_load_vmstate(bs, buf, pos, size);
1978
    if (bs->file)
1979
        return bdrv_load_vmstate(bs->file, buf, pos, size);
1980
    return -ENOTSUP;
1981
}
1982

    
1983
void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
1984
{
1985
    BlockDriver *drv = bs->drv;
1986

    
1987
    if (!drv || !drv->bdrv_debug_event) {
1988
        return;
1989
    }
1990

    
1991
    return drv->bdrv_debug_event(bs, event);
1992

    
1993
}
1994

    
1995
/**************************************************************/
1996
/* handling of snapshots */
1997

    
1998
int bdrv_can_snapshot(BlockDriverState *bs)
1999
{
2000
    BlockDriver *drv = bs->drv;
2001
    if (!drv || bdrv_is_removable(bs) || bdrv_is_read_only(bs)) {
2002
        return 0;
2003
    }
2004

    
2005
    if (!drv->bdrv_snapshot_create) {
2006
        if (bs->file != NULL) {
2007
            return bdrv_can_snapshot(bs->file);
2008
        }
2009
        return 0;
2010
    }
2011

    
2012
    return 1;
2013
}
2014

    
2015
int bdrv_is_snapshot(BlockDriverState *bs)
2016
{
2017
    return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2018
}
2019

    
2020
BlockDriverState *bdrv_snapshots(void)
2021
{
2022
    BlockDriverState *bs;
2023

    
2024
    if (bs_snapshots) {
2025
        return bs_snapshots;
2026
    }
2027

    
2028
    bs = NULL;
2029
    while ((bs = bdrv_next(bs))) {
2030
        if (bdrv_can_snapshot(bs)) {
2031
            bs_snapshots = bs;
2032
            return bs;
2033
        }
2034
    }
2035
    return NULL;
2036
}
2037

    
2038
int bdrv_snapshot_create(BlockDriverState *bs,
2039
                         QEMUSnapshotInfo *sn_info)
2040
{
2041
    BlockDriver *drv = bs->drv;
2042
    if (!drv)
2043
        return -ENOMEDIUM;
2044
    if (drv->bdrv_snapshot_create)
2045
        return drv->bdrv_snapshot_create(bs, sn_info);
2046
    if (bs->file)
2047
        return bdrv_snapshot_create(bs->file, sn_info);
2048
    return -ENOTSUP;
2049
}
2050

    
2051
int bdrv_snapshot_goto(BlockDriverState *bs,
2052
                       const char *snapshot_id)
2053
{
2054
    BlockDriver *drv = bs->drv;
2055
    int ret, open_ret;
2056

    
2057
    if (!drv)
2058
        return -ENOMEDIUM;
2059
    if (drv->bdrv_snapshot_goto)
2060
        return drv->bdrv_snapshot_goto(bs, snapshot_id);
2061

    
2062
    if (bs->file) {
2063
        drv->bdrv_close(bs);
2064
        ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2065
        open_ret = drv->bdrv_open(bs, bs->open_flags);
2066
        if (open_ret < 0) {
2067
            bdrv_delete(bs->file);
2068
            bs->drv = NULL;
2069
            return open_ret;
2070
        }
2071
        return ret;
2072
    }
2073

    
2074
    return -ENOTSUP;
2075
}
2076

    
2077
int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2078
{
2079
    BlockDriver *drv = bs->drv;
2080
    if (!drv)
2081
        return -ENOMEDIUM;
2082
    if (drv->bdrv_snapshot_delete)
2083
        return drv->bdrv_snapshot_delete(bs, snapshot_id);
2084
    if (bs->file)
2085
        return bdrv_snapshot_delete(bs->file, snapshot_id);
2086
    return -ENOTSUP;
2087
}
2088

    
2089
int bdrv_snapshot_list(BlockDriverState *bs,
2090
                       QEMUSnapshotInfo **psn_info)
2091
{
2092
    BlockDriver *drv = bs->drv;
2093
    if (!drv)
2094
        return -ENOMEDIUM;
2095
    if (drv->bdrv_snapshot_list)
2096
        return drv->bdrv_snapshot_list(bs, psn_info);
2097
    if (bs->file)
2098
        return bdrv_snapshot_list(bs->file, psn_info);
2099
    return -ENOTSUP;
2100
}
2101

    
2102
int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2103
        const char *snapshot_name)
2104
{
2105
    BlockDriver *drv = bs->drv;
2106
    if (!drv) {
2107
        return -ENOMEDIUM;
2108
    }
2109
    if (!bs->read_only) {
2110
        return -EINVAL;
2111
    }
2112
    if (drv->bdrv_snapshot_load_tmp) {
2113
        return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2114
    }
2115
    return -ENOTSUP;
2116
}
2117

    
2118
#define NB_SUFFIXES 4
2119

    
2120
char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2121
{
2122
    static const char suffixes[NB_SUFFIXES] = "KMGT";
2123
    int64_t base;
2124
    int i;
2125

    
2126
    if (size <= 999) {
2127
        snprintf(buf, buf_size, "%" PRId64, size);
2128
    } else {
2129
        base = 1024;
2130
        for(i = 0; i < NB_SUFFIXES; i++) {
2131
            if (size < (10 * base)) {
2132
                snprintf(buf, buf_size, "%0.1f%c",
2133
                         (double)size / base,
2134
                         suffixes[i]);
2135
                break;
2136
            } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
2137
                snprintf(buf, buf_size, "%" PRId64 "%c",
2138
                         ((size + (base >> 1)) / base),
2139
                         suffixes[i]);
2140
                break;
2141
            }
2142
            base = base * 1024;
2143
        }
2144
    }
2145
    return buf;
2146
}
2147

    
2148
char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2149
{
2150
    char buf1[128], date_buf[128], clock_buf[128];
2151
#ifdef _WIN32
2152
    struct tm *ptm;
2153
#else
2154
    struct tm tm;
2155
#endif
2156
    time_t ti;
2157
    int64_t secs;
2158

    
2159
    if (!sn) {
2160
        snprintf(buf, buf_size,
2161
                 "%-10s%-20s%7s%20s%15s",
2162
                 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2163
    } else {
2164
        ti = sn->date_sec;
2165
#ifdef _WIN32
2166
        ptm = localtime(&ti);
2167
        strftime(date_buf, sizeof(date_buf),
2168
                 "%Y-%m-%d %H:%M:%S", ptm);
2169
#else
2170
        localtime_r(&ti, &tm);
2171
        strftime(date_buf, sizeof(date_buf),
2172
                 "%Y-%m-%d %H:%M:%S", &tm);
2173
#endif
2174
        secs = sn->vm_clock_nsec / 1000000000;
2175
        snprintf(clock_buf, sizeof(clock_buf),
2176
                 "%02d:%02d:%02d.%03d",
2177
                 (int)(secs / 3600),
2178
                 (int)((secs / 60) % 60),
2179
                 (int)(secs % 60),
2180
                 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2181
        snprintf(buf, buf_size,
2182
                 "%-10s%-20s%7s%20s%15s",
2183
                 sn->id_str, sn->name,
2184
                 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2185
                 date_buf,
2186
                 clock_buf);
2187
    }
2188
    return buf;
2189
}
2190

    
2191

    
2192
/**************************************************************/
2193
/* async I/Os */
2194

    
2195
BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
2196
                                 QEMUIOVector *qiov, int nb_sectors,
2197
                                 BlockDriverCompletionFunc *cb, void *opaque)
2198
{
2199
    BlockDriver *drv = bs->drv;
2200
    BlockDriverAIOCB *ret;
2201

    
2202
    trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2203

    
2204
    if (!drv)
2205
        return NULL;
2206
    if (bdrv_check_request(bs, sector_num, nb_sectors))
2207
        return NULL;
2208

    
2209
    ret = drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
2210
                              cb, opaque);
2211

    
2212
    if (ret) {
2213
        /* Update stats even though technically transfer has not happened. */
2214
        bs->rd_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
2215
        bs->rd_ops ++;
2216
    }
2217

    
2218
    return ret;
2219
}
2220

    
2221
typedef struct BlockCompleteData {
2222
    BlockDriverCompletionFunc *cb;
2223
    void *opaque;
2224
    BlockDriverState *bs;
2225
    int64_t sector_num;
2226
    int nb_sectors;
2227
} BlockCompleteData;
2228

    
2229
static void block_complete_cb(void *opaque, int ret)
2230
{
2231
    BlockCompleteData *b = opaque;
2232

    
2233
    if (b->bs->dirty_bitmap) {
2234
        set_dirty_bitmap(b->bs, b->sector_num, b->nb_sectors, 1);
2235
    }
2236
    b->cb(b->opaque, ret);
2237
    qemu_free(b);
2238
}
2239

    
2240
static BlockCompleteData *blk_dirty_cb_alloc(BlockDriverState *bs,
2241
                                             int64_t sector_num,
2242
                                             int nb_sectors,
2243
                                             BlockDriverCompletionFunc *cb,
2244
                                             void *opaque)
2245
{
2246
    BlockCompleteData *blkdata = qemu_mallocz(sizeof(BlockCompleteData));
2247

    
2248
    blkdata->bs = bs;
2249
    blkdata->cb = cb;
2250
    blkdata->opaque = opaque;
2251
    blkdata->sector_num = sector_num;
2252
    blkdata->nb_sectors = nb_sectors;
2253

    
2254
    return blkdata;
2255
}
2256

    
2257
BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2258
                                  QEMUIOVector *qiov, int nb_sectors,
2259
                                  BlockDriverCompletionFunc *cb, void *opaque)
2260
{
2261
    BlockDriver *drv = bs->drv;
2262
    BlockDriverAIOCB *ret;
2263
    BlockCompleteData *blk_cb_data;
2264

    
2265
    trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2266

    
2267
    if (!drv)
2268
        return NULL;
2269
    if (bs->read_only)
2270
        return NULL;
2271
    if (bdrv_check_request(bs, sector_num, nb_sectors))
2272
        return NULL;
2273

    
2274
    if (bs->dirty_bitmap) {
2275
        blk_cb_data = blk_dirty_cb_alloc(bs, sector_num, nb_sectors, cb,
2276
                                         opaque);
2277
        cb = &block_complete_cb;
2278
        opaque = blk_cb_data;
2279
    }
2280

    
2281
    ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
2282
                               cb, opaque);
2283

    
2284
    if (ret) {
2285
        /* Update stats even though technically transfer has not happened. */
2286
        bs->wr_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
2287
        bs->wr_ops ++;
2288
        if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
2289
            bs->wr_highest_sector = sector_num + nb_sectors - 1;
2290
        }
2291
    }
2292

    
2293
    return ret;
2294
}
2295

    
2296

    
2297
typedef struct MultiwriteCB {
2298
    int error;
2299
    int num_requests;
2300
    int num_callbacks;
2301
    struct {
2302
        BlockDriverCompletionFunc *cb;
2303
        void *opaque;
2304
        QEMUIOVector *free_qiov;
2305
        void *free_buf;
2306
    } callbacks[];
2307
} MultiwriteCB;
2308

    
2309
static void multiwrite_user_cb(MultiwriteCB *mcb)
2310
{
2311
    int i;
2312

    
2313
    for (i = 0; i < mcb->num_callbacks; i++) {
2314
        mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
2315
        if (mcb->callbacks[i].free_qiov) {
2316
            qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2317
        }
2318
        qemu_free(mcb->callbacks[i].free_qiov);
2319
        qemu_vfree(mcb->callbacks[i].free_buf);
2320
    }
2321
}
2322

    
2323
static void multiwrite_cb(void *opaque, int ret)
2324
{
2325
    MultiwriteCB *mcb = opaque;
2326

    
2327
    trace_multiwrite_cb(mcb, ret);
2328

    
2329
    if (ret < 0 && !mcb->error) {
2330
        mcb->error = ret;
2331
    }
2332

    
2333
    mcb->num_requests--;
2334
    if (mcb->num_requests == 0) {
2335
        multiwrite_user_cb(mcb);
2336
        qemu_free(mcb);
2337
    }
2338
}
2339

    
2340
static int multiwrite_req_compare(const void *a, const void *b)
2341
{
2342
    const BlockRequest *req1 = a, *req2 = b;
2343

    
2344
    /*
2345
     * Note that we can't simply subtract req2->sector from req1->sector
2346
     * here as that could overflow the return value.
2347
     */
2348
    if (req1->sector > req2->sector) {
2349
        return 1;
2350
    } else if (req1->sector < req2->sector) {
2351
        return -1;
2352
    } else {
2353
        return 0;
2354
    }
2355
}
2356

    
2357
/*
2358
 * Takes a bunch of requests and tries to merge them. Returns the number of
2359
 * requests that remain after merging.
2360
 */
2361
static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2362
    int num_reqs, MultiwriteCB *mcb)
2363
{
2364
    int i, outidx;
2365

    
2366
    // Sort requests by start sector
2367
    qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2368

    
2369
    // Check if adjacent requests touch the same clusters. If so, combine them,
2370
    // filling up gaps with zero sectors.
2371
    outidx = 0;
2372
    for (i = 1; i < num_reqs; i++) {
2373
        int merge = 0;
2374
        int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2375

    
2376
        // This handles the cases that are valid for all block drivers, namely
2377
        // exactly sequential writes and overlapping writes.
2378
        if (reqs[i].sector <= oldreq_last) {
2379
            merge = 1;
2380
        }
2381

    
2382
        // The block driver may decide that it makes sense to combine requests
2383
        // even if there is a gap of some sectors between them. In this case,
2384
        // the gap is filled with zeros (therefore only applicable for yet
2385
        // unused space in format like qcow2).
2386
        if (!merge && bs->drv->bdrv_merge_requests) {
2387
            merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2388
        }
2389

    
2390
        if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2391
            merge = 0;
2392
        }
2393

    
2394
        if (merge) {
2395
            size_t size;
2396
            QEMUIOVector *qiov = qemu_mallocz(sizeof(*qiov));
2397
            qemu_iovec_init(qiov,
2398
                reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2399

    
2400
            // Add the first request to the merged one. If the requests are
2401
            // overlapping, drop the last sectors of the first request.
2402
            size = (reqs[i].sector - reqs[outidx].sector) << 9;
2403
            qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2404

    
2405
            // We might need to add some zeros between the two requests
2406
            if (reqs[i].sector > oldreq_last) {
2407
                size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2408
                uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2409
                memset(buf, 0, zero_bytes);
2410
                qemu_iovec_add(qiov, buf, zero_bytes);
2411
                mcb->callbacks[i].free_buf = buf;
2412
            }
2413

    
2414
            // Add the second request
2415
            qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2416

    
2417
            reqs[outidx].nb_sectors = qiov->size >> 9;
2418
            reqs[outidx].qiov = qiov;
2419

    
2420
            mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2421
        } else {
2422
            outidx++;
2423
            reqs[outidx].sector     = reqs[i].sector;
2424
            reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2425
            reqs[outidx].qiov       = reqs[i].qiov;
2426
        }
2427
    }
2428

    
2429
    return outidx + 1;
2430
}
2431

    
2432
/*
2433
 * Submit multiple AIO write requests at once.
2434
 *
2435
 * On success, the function returns 0 and all requests in the reqs array have
2436
 * been submitted. In error case this function returns -1, and any of the
2437
 * requests may or may not be submitted yet. In particular, this means that the
2438
 * callback will be called for some of the requests, for others it won't. The
2439
 * caller must check the error field of the BlockRequest to wait for the right
2440
 * callbacks (if error != 0, no callback will be called).
2441
 *
2442
 * The implementation may modify the contents of the reqs array, e.g. to merge
2443
 * requests. However, the fields opaque and error are left unmodified as they
2444
 * are used to signal failure for a single request to the caller.
2445
 */
2446
int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2447
{
2448
    BlockDriverAIOCB *acb;
2449
    MultiwriteCB *mcb;
2450
    int i;
2451

    
2452
    /* don't submit writes if we don't have a medium */
2453
    if (bs->drv == NULL) {
2454
        for (i = 0; i < num_reqs; i++) {
2455
            reqs[i].error = -ENOMEDIUM;
2456
        }
2457
        return -1;
2458
    }
2459

    
2460
    if (num_reqs == 0) {
2461
        return 0;
2462
    }
2463

    
2464
    // Create MultiwriteCB structure
2465
    mcb = qemu_mallocz(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
2466
    mcb->num_requests = 0;
2467
    mcb->num_callbacks = num_reqs;
2468

    
2469
    for (i = 0; i < num_reqs; i++) {
2470
        mcb->callbacks[i].cb = reqs[i].cb;
2471
        mcb->callbacks[i].opaque = reqs[i].opaque;
2472
    }
2473

    
2474
    // Check for mergable requests
2475
    num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2476

    
2477
    trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2478

    
2479
    /*
2480
     * Run the aio requests. As soon as one request can't be submitted
2481
     * successfully, fail all requests that are not yet submitted (we must
2482
     * return failure for all requests anyway)
2483
     *
2484
     * num_requests cannot be set to the right value immediately: If
2485
     * bdrv_aio_writev fails for some request, num_requests would be too high
2486
     * and therefore multiwrite_cb() would never recognize the multiwrite
2487
     * request as completed. We also cannot use the loop variable i to set it
2488
     * when the first request fails because the callback may already have been
2489
     * called for previously submitted requests. Thus, num_requests must be
2490
     * incremented for each request that is submitted.
2491
     *
2492
     * The problem that callbacks may be called early also means that we need
2493
     * to take care that num_requests doesn't become 0 before all requests are
2494
     * submitted - multiwrite_cb() would consider the multiwrite request
2495
     * completed. A dummy request that is "completed" by a manual call to
2496
     * multiwrite_cb() takes care of this.
2497
     */
2498
    mcb->num_requests = 1;
2499

    
2500
    // Run the aio requests
2501
    for (i = 0; i < num_reqs; i++) {
2502
        mcb->num_requests++;
2503
        acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2504
            reqs[i].nb_sectors, multiwrite_cb, mcb);
2505

    
2506
        if (acb == NULL) {
2507
            // We can only fail the whole thing if no request has been
2508
            // submitted yet. Otherwise we'll wait for the submitted AIOs to
2509
            // complete and report the error in the callback.
2510
            if (i == 0) {
2511
                trace_bdrv_aio_multiwrite_earlyfail(mcb);
2512
                goto fail;
2513
            } else {
2514
                trace_bdrv_aio_multiwrite_latefail(mcb, i);
2515
                multiwrite_cb(mcb, -EIO);
2516
                break;
2517
            }
2518
        }
2519
    }
2520

    
2521
    /* Complete the dummy request */
2522
    multiwrite_cb(mcb, 0);
2523

    
2524
    return 0;
2525

    
2526
fail:
2527
    for (i = 0; i < mcb->num_callbacks; i++) {
2528
        reqs[i].error = -EIO;
2529
    }
2530
    qemu_free(mcb);
2531
    return -1;
2532
}
2533

    
2534
BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
2535
        BlockDriverCompletionFunc *cb, void *opaque)
2536
{
2537
    BlockDriver *drv = bs->drv;
2538

    
2539
    trace_bdrv_aio_flush(bs, opaque);
2540

    
2541
    if (bs->open_flags & BDRV_O_NO_FLUSH) {
2542
        return bdrv_aio_noop_em(bs, cb, opaque);
2543
    }
2544

    
2545
    if (!drv)
2546
        return NULL;
2547
    return drv->bdrv_aio_flush(bs, cb, opaque);
2548
}
2549

    
2550
void bdrv_aio_cancel(BlockDriverAIOCB *acb)
2551
{
2552
    acb->pool->cancel(acb);
2553
}
2554

    
2555

    
2556
/**************************************************************/
2557
/* async block device emulation */
2558

    
2559
typedef struct BlockDriverAIOCBSync {
2560
    BlockDriverAIOCB common;
2561
    QEMUBH *bh;
2562
    int ret;
2563
    /* vector translation state */
2564
    QEMUIOVector *qiov;
2565
    uint8_t *bounce;
2566
    int is_write;
2567
} BlockDriverAIOCBSync;
2568

    
2569
static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2570
{
2571
    BlockDriverAIOCBSync *acb =
2572
        container_of(blockacb, BlockDriverAIOCBSync, common);
2573
    qemu_bh_delete(acb->bh);
2574
    acb->bh = NULL;
2575
    qemu_aio_release(acb);
2576
}
2577

    
2578
static AIOPool bdrv_em_aio_pool = {
2579
    .aiocb_size         = sizeof(BlockDriverAIOCBSync),
2580
    .cancel             = bdrv_aio_cancel_em,
2581
};
2582

    
2583
static void bdrv_aio_bh_cb(void *opaque)
2584
{
2585
    BlockDriverAIOCBSync *acb = opaque;
2586

    
2587
    if (!acb->is_write)
2588
        qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
2589
    qemu_vfree(acb->bounce);
2590
    acb->common.cb(acb->common.opaque, acb->ret);
2591
    qemu_bh_delete(acb->bh);
2592
    acb->bh = NULL;
2593
    qemu_aio_release(acb);
2594
}
2595

    
2596
static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2597
                                            int64_t sector_num,
2598
                                            QEMUIOVector *qiov,
2599
                                            int nb_sectors,
2600
                                            BlockDriverCompletionFunc *cb,
2601
                                            void *opaque,
2602
                                            int is_write)
2603

    
2604
{
2605
    BlockDriverAIOCBSync *acb;
2606

    
2607
    acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2608
    acb->is_write = is_write;
2609
    acb->qiov = qiov;
2610
    acb->bounce = qemu_blockalign(bs, qiov->size);
2611

    
2612
    if (!acb->bh)
2613
        acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2614

    
2615
    if (is_write) {
2616
        qemu_iovec_to_buffer(acb->qiov, acb->bounce);
2617
        acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
2618
    } else {
2619
        acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
2620
    }
2621

    
2622
    qemu_bh_schedule(acb->bh);
2623

    
2624
    return &acb->common;
2625
}
2626

    
2627
static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2628
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2629
        BlockDriverCompletionFunc *cb, void *opaque)
2630
{
2631
    return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2632
}
2633

    
2634
static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2635
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2636
        BlockDriverCompletionFunc *cb, void *opaque)
2637
{
2638
    return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
2639
}
2640

    
2641

    
2642
typedef struct BlockDriverAIOCBCoroutine {
2643
    BlockDriverAIOCB common;
2644
    BlockRequest req;
2645
    bool is_write;
2646
    QEMUBH* bh;
2647
} BlockDriverAIOCBCoroutine;
2648

    
2649
static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
2650
{
2651
    qemu_aio_flush();
2652
}
2653

    
2654
static AIOPool bdrv_em_co_aio_pool = {
2655
    .aiocb_size         = sizeof(BlockDriverAIOCBCoroutine),
2656
    .cancel             = bdrv_aio_co_cancel_em,
2657
};
2658

    
2659
static void bdrv_co_rw_bh(void *opaque)
2660
{
2661
    BlockDriverAIOCBCoroutine *acb = opaque;
2662

    
2663
    acb->common.cb(acb->common.opaque, acb->req.error);
2664
    qemu_bh_delete(acb->bh);
2665
    qemu_aio_release(acb);
2666
}
2667

    
2668
static void coroutine_fn bdrv_co_rw(void *opaque)
2669
{
2670
    BlockDriverAIOCBCoroutine *acb = opaque;
2671
    BlockDriverState *bs = acb->common.bs;
2672

    
2673
    if (!acb->is_write) {
2674
        acb->req.error = bs->drv->bdrv_co_readv(bs, acb->req.sector,
2675
            acb->req.nb_sectors, acb->req.qiov);
2676
    } else {
2677
        acb->req.error = bs->drv->bdrv_co_writev(bs, acb->req.sector,
2678
            acb->req.nb_sectors, acb->req.qiov);
2679
    }
2680

    
2681
    acb->bh = qemu_bh_new(bdrv_co_rw_bh, acb);
2682
    qemu_bh_schedule(acb->bh);
2683
}
2684

    
2685
static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
2686
                                               int64_t sector_num,
2687
                                               QEMUIOVector *qiov,
2688
                                               int nb_sectors,
2689
                                               BlockDriverCompletionFunc *cb,
2690
                                               void *opaque,
2691
                                               bool is_write)
2692
{
2693
    Coroutine *co;
2694
    BlockDriverAIOCBCoroutine *acb;
2695

    
2696
    acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2697
    acb->req.sector = sector_num;
2698
    acb->req.nb_sectors = nb_sectors;
2699
    acb->req.qiov = qiov;
2700
    acb->is_write = is_write;
2701

    
2702
    co = qemu_coroutine_create(bdrv_co_rw);
2703
    qemu_coroutine_enter(co, acb);
2704

    
2705
    return &acb->common;
2706
}
2707

    
2708
static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
2709
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2710
        BlockDriverCompletionFunc *cb, void *opaque)
2711
{
2712
    return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque,
2713
                                 false);
2714
}
2715

    
2716
static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
2717
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2718
        BlockDriverCompletionFunc *cb, void *opaque)
2719
{
2720
    return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque,
2721
                                 true);
2722
}
2723

    
2724
static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
2725
        BlockDriverCompletionFunc *cb, void *opaque)
2726
{
2727
    BlockDriverAIOCBSync *acb;
2728

    
2729
    acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2730
    acb->is_write = 1; /* don't bounce in the completion hadler */
2731
    acb->qiov = NULL;
2732
    acb->bounce = NULL;
2733
    acb->ret = 0;
2734

    
2735
    if (!acb->bh)
2736
        acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2737

    
2738
    bdrv_flush(bs);
2739
    qemu_bh_schedule(acb->bh);
2740
    return &acb->common;
2741
}
2742

    
2743
static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
2744
        BlockDriverCompletionFunc *cb, void *opaque)
2745
{
2746
    BlockDriverAIOCBSync *acb;
2747

    
2748
    acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2749
    acb->is_write = 1; /* don't bounce in the completion handler */
2750
    acb->qiov = NULL;
2751
    acb->bounce = NULL;
2752
    acb->ret = 0;
2753

    
2754
    if (!acb->bh) {
2755
        acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2756
    }
2757

    
2758
    qemu_bh_schedule(acb->bh);
2759
    return &acb->common;
2760
}
2761

    
2762
/**************************************************************/
2763
/* sync block device emulation */
2764

    
2765
static void bdrv_rw_em_cb(void *opaque, int ret)
2766
{
2767
    *(int *)opaque = ret;
2768
}
2769

    
2770
#define NOT_DONE 0x7fffffff
2771

    
2772
static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
2773
                        uint8_t *buf, int nb_sectors)
2774
{
2775
    int async_ret;
2776
    BlockDriverAIOCB *acb;
2777
    struct iovec iov;
2778
    QEMUIOVector qiov;
2779

    
2780
    async_ret = NOT_DONE;
2781
    iov.iov_base = (void *)buf;
2782
    iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
2783
    qemu_iovec_init_external(&qiov, &iov, 1);
2784
    acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors,
2785
        bdrv_rw_em_cb, &async_ret);
2786
    if (acb == NULL) {
2787
        async_ret = -1;
2788
        goto fail;
2789
    }
2790

    
2791
    while (async_ret == NOT_DONE) {
2792
        qemu_aio_wait();
2793
    }
2794

    
2795

    
2796
fail:
2797
    return async_ret;
2798
}
2799

    
2800
static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
2801
                         const uint8_t *buf, int nb_sectors)
2802
{
2803
    int async_ret;
2804
    BlockDriverAIOCB *acb;
2805
    struct iovec iov;
2806
    QEMUIOVector qiov;
2807

    
2808
    async_ret = NOT_DONE;
2809
    iov.iov_base = (void *)buf;
2810
    iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
2811
    qemu_iovec_init_external(&qiov, &iov, 1);
2812
    acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors,
2813
        bdrv_rw_em_cb, &async_ret);
2814
    if (acb == NULL) {
2815
        async_ret = -1;
2816
        goto fail;
2817
    }
2818
    while (async_ret == NOT_DONE) {
2819
        qemu_aio_wait();
2820
    }
2821

    
2822
fail:
2823
    return async_ret;
2824
}
2825

    
2826
void bdrv_init(void)
2827
{
2828
    module_call_init(MODULE_INIT_BLOCK);
2829
}
2830

    
2831
void bdrv_init_with_whitelist(void)
2832
{
2833
    use_bdrv_whitelist = 1;
2834
    bdrv_init();
2835
}
2836

    
2837
void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2838
                   BlockDriverCompletionFunc *cb, void *opaque)
2839
{
2840
    BlockDriverAIOCB *acb;
2841

    
2842
    if (pool->free_aiocb) {
2843
        acb = pool->free_aiocb;
2844
        pool->free_aiocb = acb->next;
2845
    } else {
2846
        acb = qemu_mallocz(pool->aiocb_size);
2847
        acb->pool = pool;
2848
    }
2849
    acb->bs = bs;
2850
    acb->cb = cb;
2851
    acb->opaque = opaque;
2852
    return acb;
2853
}
2854

    
2855
void qemu_aio_release(void *p)
2856
{
2857
    BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2858
    AIOPool *pool = acb->pool;
2859
    acb->next = pool->free_aiocb;
2860
    pool->free_aiocb = acb;
2861
}
2862

    
2863
/**************************************************************/
2864
/* Coroutine block device emulation */
2865

    
2866
typedef struct CoroutineIOCompletion {
2867
    Coroutine *coroutine;
2868
    int ret;
2869
} CoroutineIOCompletion;
2870

    
2871
static void bdrv_co_io_em_complete(void *opaque, int ret)
2872
{
2873
    CoroutineIOCompletion *co = opaque;
2874

    
2875
    co->ret = ret;
2876
    qemu_coroutine_enter(co->coroutine, NULL);
2877
}
2878

    
2879
static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
2880
                                      int nb_sectors, QEMUIOVector *iov,
2881
                                      bool is_write)
2882
{
2883
    CoroutineIOCompletion co = {
2884
        .coroutine = qemu_coroutine_self(),
2885
    };
2886
    BlockDriverAIOCB *acb;
2887

    
2888
    if (is_write) {
2889
        acb = bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
2890
                              bdrv_co_io_em_complete, &co);
2891
    } else {
2892
        acb = bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
2893
                             bdrv_co_io_em_complete, &co);
2894
    }
2895

    
2896
    trace_bdrv_co_io(is_write, acb);
2897
    if (!acb) {
2898
        return -EIO;
2899
    }
2900
    qemu_coroutine_yield();
2901

    
2902
    return co.ret;
2903
}
2904

    
2905
static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
2906
                                         int64_t sector_num, int nb_sectors,
2907
                                         QEMUIOVector *iov)
2908
{
2909
    return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
2910
}
2911

    
2912
static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
2913
                                         int64_t sector_num, int nb_sectors,
2914
                                         QEMUIOVector *iov)
2915
{
2916
    return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
2917
}
2918

    
2919
/**************************************************************/
2920
/* removable device support */
2921

    
2922
/**
2923
 * Return TRUE if the media is present
2924
 */
2925
int bdrv_is_inserted(BlockDriverState *bs)
2926
{
2927
    BlockDriver *drv = bs->drv;
2928
    int ret;
2929
    if (!drv)
2930
        return 0;
2931
    if (!drv->bdrv_is_inserted)
2932
        return !bs->tray_open;
2933
    ret = drv->bdrv_is_inserted(bs);
2934
    return ret;
2935
}
2936

    
2937
/**
2938
 * Return TRUE if the media changed since the last call to this
2939
 * function. It is currently only used for floppy disks
2940
 */
2941
int bdrv_media_changed(BlockDriverState *bs)
2942
{
2943
    BlockDriver *drv = bs->drv;
2944
    int ret;
2945

    
2946
    if (!drv || !drv->bdrv_media_changed)
2947
        ret = -ENOTSUP;
2948
    else
2949
        ret = drv->bdrv_media_changed(bs);
2950
    if (ret == -ENOTSUP)
2951
        ret = bs->media_changed;
2952
    bs->media_changed = 0;
2953
    return ret;
2954
}
2955

    
2956
/**
2957
 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
2958
 */
2959
int bdrv_eject(BlockDriverState *bs, int eject_flag)
2960
{
2961
    BlockDriver *drv = bs->drv;
2962

    
2963
    if (eject_flag && bs->locked) {
2964
        return -EBUSY;
2965
    }
2966

    
2967
    if (drv && drv->bdrv_eject) {
2968
        drv->bdrv_eject(bs, eject_flag);
2969
    }
2970
    bs->tray_open = eject_flag;
2971
    return 0;
2972
}
2973

    
2974
int bdrv_is_locked(BlockDriverState *bs)
2975
{
2976
    return bs->locked;
2977
}
2978

    
2979
/**
2980
 * Lock or unlock the media (if it is locked, the user won't be able
2981
 * to eject it manually).
2982
 */
2983
void bdrv_set_locked(BlockDriverState *bs, int locked)
2984
{
2985
    BlockDriver *drv = bs->drv;
2986

    
2987
    trace_bdrv_set_locked(bs, locked);
2988

    
2989
    bs->locked = locked;
2990
    if (drv && drv->bdrv_set_locked) {
2991
        drv->bdrv_set_locked(bs, locked);
2992
    }
2993
}
2994

    
2995
/* needed for generic scsi interface */
2996

    
2997
int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
2998
{
2999
    BlockDriver *drv = bs->drv;
3000

    
3001
    if (drv && drv->bdrv_ioctl)
3002
        return drv->bdrv_ioctl(bs, req, buf);
3003
    return -ENOTSUP;
3004
}
3005

    
3006
BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3007
        unsigned long int req, void *buf,
3008
        BlockDriverCompletionFunc *cb, void *opaque)
3009
{
3010
    BlockDriver *drv = bs->drv;
3011

    
3012
    if (drv && drv->bdrv_aio_ioctl)
3013
        return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3014
    return NULL;
3015
}
3016

    
3017

    
3018

    
3019
void *qemu_blockalign(BlockDriverState *bs, size_t size)
3020
{
3021
    return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3022
}
3023

    
3024
void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3025
{
3026
    int64_t bitmap_size;
3027

    
3028
    bs->dirty_count = 0;
3029
    if (enable) {
3030
        if (!bs->dirty_bitmap) {
3031
            bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3032
                    BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3033
            bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
3034

    
3035
            bs->dirty_bitmap = qemu_mallocz(bitmap_size);
3036
        }
3037
    } else {
3038
        if (bs->dirty_bitmap) {
3039
            qemu_free(bs->dirty_bitmap);
3040
            bs->dirty_bitmap = NULL;
3041
        }
3042
    }
3043
}
3044

    
3045
int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3046
{
3047
    int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
3048

    
3049
    if (bs->dirty_bitmap &&
3050
        (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
3051
        return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3052
            (1UL << (chunk % (sizeof(unsigned long) * 8))));
3053
    } else {
3054
        return 0;
3055
    }
3056
}
3057

    
3058
void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3059
                      int nr_sectors)
3060
{
3061
    set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3062
}
3063

    
3064
int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3065
{
3066
    return bs->dirty_count;
3067
}
3068

    
3069
void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3070
{
3071
    assert(bs->in_use != in_use);
3072
    bs->in_use = in_use;
3073
}
3074

    
3075
int bdrv_in_use(BlockDriverState *bs)
3076
{
3077
    return bs->in_use;
3078
}
3079

    
3080
int bdrv_img_create(const char *filename, const char *fmt,
3081
                    const char *base_filename, const char *base_fmt,
3082
                    char *options, uint64_t img_size, int flags)
3083
{
3084
    QEMUOptionParameter *param = NULL, *create_options = NULL;
3085
    QEMUOptionParameter *backing_fmt, *backing_file, *size;
3086
    BlockDriverState *bs = NULL;
3087
    BlockDriver *drv, *proto_drv;
3088
    BlockDriver *backing_drv = NULL;
3089
    int ret = 0;
3090

    
3091
    /* Find driver and parse its options */
3092
    drv = bdrv_find_format(fmt);
3093
    if (!drv) {
3094
        error_report("Unknown file format '%s'", fmt);
3095
        ret = -EINVAL;
3096
        goto out;
3097
    }
3098

    
3099
    proto_drv = bdrv_find_protocol(filename);
3100
    if (!proto_drv) {
3101
        error_report("Unknown protocol '%s'", filename);
3102
        ret = -EINVAL;
3103
        goto out;
3104
    }
3105

    
3106
    create_options = append_option_parameters(create_options,
3107
                                              drv->create_options);
3108
    create_options = append_option_parameters(create_options,
3109
                                              proto_drv->create_options);
3110

    
3111
    /* Create parameter list with default values */
3112
    param = parse_option_parameters("", create_options, param);
3113

    
3114
    set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3115

    
3116
    /* Parse -o options */
3117
    if (options) {
3118
        param = parse_option_parameters(options, create_options, param);
3119
        if (param == NULL) {
3120
            error_report("Invalid options for file format '%s'.", fmt);
3121
            ret = -EINVAL;
3122
            goto out;
3123
        }
3124
    }
3125

    
3126
    if (base_filename) {
3127
        if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3128
                                 base_filename)) {
3129
            error_report("Backing file not supported for file format '%s'",
3130
                         fmt);
3131
            ret = -EINVAL;
3132
            goto out;
3133
        }
3134
    }
3135

    
3136
    if (base_fmt) {
3137
        if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3138
            error_report("Backing file format not supported for file "
3139
                         "format '%s'", fmt);
3140
            ret = -EINVAL;
3141
            goto out;
3142
        }
3143
    }
3144

    
3145
    backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3146
    if (backing_file && backing_file->value.s) {
3147
        if (!strcmp(filename, backing_file->value.s)) {
3148
            error_report("Error: Trying to create an image with the "
3149
                         "same filename as the backing file");
3150
            ret = -EINVAL;
3151
            goto out;
3152
        }
3153
    }
3154

    
3155
    backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3156
    if (backing_fmt && backing_fmt->value.s) {
3157
        backing_drv = bdrv_find_format(backing_fmt->value.s);
3158
        if (!backing_drv) {
3159
            error_report("Unknown backing file format '%s'",
3160
                         backing_fmt->value.s);
3161
            ret = -EINVAL;
3162
            goto out;
3163
        }
3164
    }
3165

    
3166
    // The size for the image must always be specified, with one exception:
3167
    // If we are using a backing file, we can obtain the size from there
3168
    size = get_option_parameter(param, BLOCK_OPT_SIZE);
3169
    if (size && size->value.n == -1) {
3170
        if (backing_file && backing_file->value.s) {
3171
            uint64_t size;
3172
            char buf[32];
3173

    
3174
            bs = bdrv_new("");
3175

    
3176
            ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
3177
            if (ret < 0) {
3178
                error_report("Could not open '%s'", backing_file->value.s);
3179
                goto out;
3180
            }
3181
            bdrv_get_geometry(bs, &size);
3182
            size *= 512;
3183

    
3184
            snprintf(buf, sizeof(buf), "%" PRId64, size);
3185
            set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3186
        } else {
3187
            error_report("Image creation needs a size parameter");
3188
            ret = -EINVAL;
3189
            goto out;
3190
        }
3191
    }
3192

    
3193
    printf("Formatting '%s', fmt=%s ", filename, fmt);
3194
    print_option_parameters(param);
3195
    puts("");
3196

    
3197
    ret = bdrv_create(drv, filename, param);
3198

    
3199
    if (ret < 0) {
3200
        if (ret == -ENOTSUP) {
3201
            error_report("Formatting or formatting option not supported for "
3202
                         "file format '%s'", fmt);
3203
        } else if (ret == -EFBIG) {
3204
            error_report("The image size is too large for file format '%s'",
3205
                         fmt);
3206
        } else {
3207
            error_report("%s: error while creating %s: %s", filename, fmt,
3208
                         strerror(-ret));
3209
        }
3210
    }
3211

    
3212
out:
3213
    free_option_parameters(create_options);
3214
    free_option_parameters(param);
3215

    
3216
    if (bs) {
3217
        bdrv_delete(bs);
3218
    }
3219

    
3220
    return ret;
3221
}