Statistics
| Branch: | Revision:

root / block.c @ 4a1d5e1f

History | View | Annotate | Download (81.2 kB)

1
/*
2
 * QEMU System Emulator block driver
3
 *
4
 * Copyright (c) 2003 Fabrice Bellard
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
 * THE SOFTWARE.
23
 */
24
#include "config-host.h"
25
#include "qemu-common.h"
26
#include "trace.h"
27
#include "monitor.h"
28
#include "block_int.h"
29
#include "module.h"
30
#include "qemu-objects.h"
31

    
32
#ifdef CONFIG_BSD
33
#include <sys/types.h>
34
#include <sys/stat.h>
35
#include <sys/ioctl.h>
36
#include <sys/queue.h>
37
#ifndef __DragonFly__
38
#include <sys/disk.h>
39
#endif
40
#endif
41

    
42
#ifdef _WIN32
43
#include <windows.h>
44
#endif
45

    
46
static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
47
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
48
        BlockDriverCompletionFunc *cb, void *opaque);
49
static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
50
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
51
        BlockDriverCompletionFunc *cb, void *opaque);
52
static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
53
        BlockDriverCompletionFunc *cb, void *opaque);
54
static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
55
        BlockDriverCompletionFunc *cb, void *opaque);
56
static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
57
                        uint8_t *buf, int nb_sectors);
58
static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
59
                         const uint8_t *buf, int nb_sectors);
60

    
61
static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
62
    QTAILQ_HEAD_INITIALIZER(bdrv_states);
63

    
64
static QLIST_HEAD(, BlockDriver) bdrv_drivers =
65
    QLIST_HEAD_INITIALIZER(bdrv_drivers);
66

    
67
/* The device to use for VM snapshots */
68
static BlockDriverState *bs_snapshots;
69

    
70
/* If non-zero, use only whitelisted block drivers */
71
static int use_bdrv_whitelist;
72

    
73
#ifdef _WIN32
74
static int is_windows_drive_prefix(const char *filename)
75
{
76
    return (((filename[0] >= 'a' && filename[0] <= 'z') ||
77
             (filename[0] >= 'A' && filename[0] <= 'Z')) &&
78
            filename[1] == ':');
79
}
80

    
81
int is_windows_drive(const char *filename)
82
{
83
    if (is_windows_drive_prefix(filename) &&
84
        filename[2] == '\0')
85
        return 1;
86
    if (strstart(filename, "\\\\.\\", NULL) ||
87
        strstart(filename, "//./", NULL))
88
        return 1;
89
    return 0;
90
}
91
#endif
92

    
93
/* check if the path starts with "<protocol>:" */
94
static int path_has_protocol(const char *path)
95
{
96
#ifdef _WIN32
97
    if (is_windows_drive(path) ||
98
        is_windows_drive_prefix(path)) {
99
        return 0;
100
    }
101
#endif
102

    
103
    return strchr(path, ':') != NULL;
104
}
105

    
106
int path_is_absolute(const char *path)
107
{
108
    const char *p;
109
#ifdef _WIN32
110
    /* specific case for names like: "\\.\d:" */
111
    if (*path == '/' || *path == '\\')
112
        return 1;
113
#endif
114
    p = strchr(path, ':');
115
    if (p)
116
        p++;
117
    else
118
        p = path;
119
#ifdef _WIN32
120
    return (*p == '/' || *p == '\\');
121
#else
122
    return (*p == '/');
123
#endif
124
}
125

    
126
/* if filename is absolute, just copy it to dest. Otherwise, build a
127
   path to it by considering it is relative to base_path. URL are
128
   supported. */
129
void path_combine(char *dest, int dest_size,
130
                  const char *base_path,
131
                  const char *filename)
132
{
133
    const char *p, *p1;
134
    int len;
135

    
136
    if (dest_size <= 0)
137
        return;
138
    if (path_is_absolute(filename)) {
139
        pstrcpy(dest, dest_size, filename);
140
    } else {
141
        p = strchr(base_path, ':');
142
        if (p)
143
            p++;
144
        else
145
            p = base_path;
146
        p1 = strrchr(base_path, '/');
147
#ifdef _WIN32
148
        {
149
            const char *p2;
150
            p2 = strrchr(base_path, '\\');
151
            if (!p1 || p2 > p1)
152
                p1 = p2;
153
        }
154
#endif
155
        if (p1)
156
            p1++;
157
        else
158
            p1 = base_path;
159
        if (p1 > p)
160
            p = p1;
161
        len = p - base_path;
162
        if (len > dest_size - 1)
163
            len = dest_size - 1;
164
        memcpy(dest, base_path, len);
165
        dest[len] = '\0';
166
        pstrcat(dest, dest_size, filename);
167
    }
168
}
169

    
170
void bdrv_register(BlockDriver *bdrv)
171
{
172
    if (!bdrv->bdrv_aio_readv) {
173
        /* add AIO emulation layer */
174
        bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
175
        bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
176
    } else if (!bdrv->bdrv_read) {
177
        /* add synchronous IO emulation layer */
178
        bdrv->bdrv_read = bdrv_read_em;
179
        bdrv->bdrv_write = bdrv_write_em;
180
    }
181

    
182
    if (!bdrv->bdrv_aio_flush)
183
        bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
184

    
185
    QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
186
}
187

    
188
/* create a new block device (by default it is empty) */
189
BlockDriverState *bdrv_new(const char *device_name)
190
{
191
    BlockDriverState *bs;
192

    
193
    bs = qemu_mallocz(sizeof(BlockDriverState));
194
    pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
195
    if (device_name[0] != '\0') {
196
        QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
197
    }
198
    return bs;
199
}
200

    
201
BlockDriver *bdrv_find_format(const char *format_name)
202
{
203
    BlockDriver *drv1;
204
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
205
        if (!strcmp(drv1->format_name, format_name)) {
206
            return drv1;
207
        }
208
    }
209
    return NULL;
210
}
211

    
212
static int bdrv_is_whitelisted(BlockDriver *drv)
213
{
214
    static const char *whitelist[] = {
215
        CONFIG_BDRV_WHITELIST
216
    };
217
    const char **p;
218

    
219
    if (!whitelist[0])
220
        return 1;               /* no whitelist, anything goes */
221

    
222
    for (p = whitelist; *p; p++) {
223
        if (!strcmp(drv->format_name, *p)) {
224
            return 1;
225
        }
226
    }
227
    return 0;
228
}
229

    
230
BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
231
{
232
    BlockDriver *drv = bdrv_find_format(format_name);
233
    return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
234
}
235

    
236
int bdrv_create(BlockDriver *drv, const char* filename,
237
    QEMUOptionParameter *options)
238
{
239
    if (!drv->bdrv_create)
240
        return -ENOTSUP;
241

    
242
    return drv->bdrv_create(filename, options);
243
}
244

    
245
int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
246
{
247
    BlockDriver *drv;
248

    
249
    drv = bdrv_find_protocol(filename);
250
    if (drv == NULL) {
251
        return -ENOENT;
252
    }
253

    
254
    return bdrv_create(drv, filename, options);
255
}
256

    
257
#ifdef _WIN32
258
void get_tmp_filename(char *filename, int size)
259
{
260
    char temp_dir[MAX_PATH];
261

    
262
    GetTempPath(MAX_PATH, temp_dir);
263
    GetTempFileName(temp_dir, "qem", 0, filename);
264
}
265
#else
266
void get_tmp_filename(char *filename, int size)
267
{
268
    int fd;
269
    const char *tmpdir;
270
    /* XXX: race condition possible */
271
    tmpdir = getenv("TMPDIR");
272
    if (!tmpdir)
273
        tmpdir = "/tmp";
274
    snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
275
    fd = mkstemp(filename);
276
    close(fd);
277
}
278
#endif
279

    
280
/*
281
 * Detect host devices. By convention, /dev/cdrom[N] is always
282
 * recognized as a host CDROM.
283
 */
284
static BlockDriver *find_hdev_driver(const char *filename)
285
{
286
    int score_max = 0, score;
287
    BlockDriver *drv = NULL, *d;
288

    
289
    QLIST_FOREACH(d, &bdrv_drivers, list) {
290
        if (d->bdrv_probe_device) {
291
            score = d->bdrv_probe_device(filename);
292
            if (score > score_max) {
293
                score_max = score;
294
                drv = d;
295
            }
296
        }
297
    }
298

    
299
    return drv;
300
}
301

    
302
BlockDriver *bdrv_find_protocol(const char *filename)
303
{
304
    BlockDriver *drv1;
305
    char protocol[128];
306
    int len;
307
    const char *p;
308

    
309
    /* TODO Drivers without bdrv_file_open must be specified explicitly */
310

    
311
    /*
312
     * XXX(hch): we really should not let host device detection
313
     * override an explicit protocol specification, but moving this
314
     * later breaks access to device names with colons in them.
315
     * Thanks to the brain-dead persistent naming schemes on udev-
316
     * based Linux systems those actually are quite common.
317
     */
318
    drv1 = find_hdev_driver(filename);
319
    if (drv1) {
320
        return drv1;
321
    }
322

    
323
    if (!path_has_protocol(filename)) {
324
        return bdrv_find_format("file");
325
    }
326
    p = strchr(filename, ':');
327
    assert(p != NULL);
328
    len = p - filename;
329
    if (len > sizeof(protocol) - 1)
330
        len = sizeof(protocol) - 1;
331
    memcpy(protocol, filename, len);
332
    protocol[len] = '\0';
333
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
334
        if (drv1->protocol_name &&
335
            !strcmp(drv1->protocol_name, protocol)) {
336
            return drv1;
337
        }
338
    }
339
    return NULL;
340
}
341

    
342
static int find_image_format(const char *filename, BlockDriver **pdrv)
343
{
344
    int ret, score, score_max;
345
    BlockDriver *drv1, *drv;
346
    uint8_t buf[2048];
347
    BlockDriverState *bs;
348

    
349
    ret = bdrv_file_open(&bs, filename, 0);
350
    if (ret < 0) {
351
        *pdrv = NULL;
352
        return ret;
353
    }
354

    
355
    /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
356
    if (bs->sg || !bdrv_is_inserted(bs)) {
357
        bdrv_delete(bs);
358
        drv = bdrv_find_format("raw");
359
        if (!drv) {
360
            ret = -ENOENT;
361
        }
362
        *pdrv = drv;
363
        return ret;
364
    }
365

    
366
    ret = bdrv_pread(bs, 0, buf, sizeof(buf));
367
    bdrv_delete(bs);
368
    if (ret < 0) {
369
        *pdrv = NULL;
370
        return ret;
371
    }
372

    
373
    score_max = 0;
374
    drv = NULL;
375
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
376
        if (drv1->bdrv_probe) {
377
            score = drv1->bdrv_probe(buf, ret, filename);
378
            if (score > score_max) {
379
                score_max = score;
380
                drv = drv1;
381
            }
382
        }
383
    }
384
    if (!drv) {
385
        ret = -ENOENT;
386
    }
387
    *pdrv = drv;
388
    return ret;
389
}
390

    
391
/**
392
 * Set the current 'total_sectors' value
393
 */
394
static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
395
{
396
    BlockDriver *drv = bs->drv;
397

    
398
    /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
399
    if (bs->sg)
400
        return 0;
401

    
402
    /* query actual device if possible, otherwise just trust the hint */
403
    if (drv->bdrv_getlength) {
404
        int64_t length = drv->bdrv_getlength(bs);
405
        if (length < 0) {
406
            return length;
407
        }
408
        hint = length >> BDRV_SECTOR_BITS;
409
    }
410

    
411
    bs->total_sectors = hint;
412
    return 0;
413
}
414

    
415
/*
416
 * Common part for opening disk images and files
417
 */
418
static int bdrv_open_common(BlockDriverState *bs, const char *filename,
419
    int flags, BlockDriver *drv)
420
{
421
    int ret, open_flags;
422

    
423
    assert(drv != NULL);
424

    
425
    bs->file = NULL;
426
    bs->total_sectors = 0;
427
    bs->encrypted = 0;
428
    bs->valid_key = 0;
429
    bs->open_flags = flags;
430
    /* buffer_alignment defaulted to 512, drivers can change this value */
431
    bs->buffer_alignment = 512;
432

    
433
    pstrcpy(bs->filename, sizeof(bs->filename), filename);
434

    
435
    if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
436
        return -ENOTSUP;
437
    }
438

    
439
    bs->drv = drv;
440
    bs->opaque = qemu_mallocz(drv->instance_size);
441

    
442
    if (flags & BDRV_O_CACHE_WB)
443
        bs->enable_write_cache = 1;
444

    
445
    /*
446
     * Clear flags that are internal to the block layer before opening the
447
     * image.
448
     */
449
    open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
450

    
451
    /*
452
     * Snapshots should be writable.
453
     */
454
    if (bs->is_temporary) {
455
        open_flags |= BDRV_O_RDWR;
456
    }
457

    
458
    /* Open the image, either directly or using a protocol */
459
    if (drv->bdrv_file_open) {
460
        ret = drv->bdrv_file_open(bs, filename, open_flags);
461
    } else {
462
        ret = bdrv_file_open(&bs->file, filename, open_flags);
463
        if (ret >= 0) {
464
            ret = drv->bdrv_open(bs, open_flags);
465
        }
466
    }
467

    
468
    if (ret < 0) {
469
        goto free_and_fail;
470
    }
471

    
472
    bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
473

    
474
    ret = refresh_total_sectors(bs, bs->total_sectors);
475
    if (ret < 0) {
476
        goto free_and_fail;
477
    }
478

    
479
#ifndef _WIN32
480
    if (bs->is_temporary) {
481
        unlink(filename);
482
    }
483
#endif
484
    return 0;
485

    
486
free_and_fail:
487
    if (bs->file) {
488
        bdrv_delete(bs->file);
489
        bs->file = NULL;
490
    }
491
    qemu_free(bs->opaque);
492
    bs->opaque = NULL;
493
    bs->drv = NULL;
494
    return ret;
495
}
496

    
497
/*
498
 * Opens a file using a protocol (file, host_device, nbd, ...)
499
 */
500
int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
501
{
502
    BlockDriverState *bs;
503
    BlockDriver *drv;
504
    int ret;
505

    
506
    drv = bdrv_find_protocol(filename);
507
    if (!drv) {
508
        return -ENOENT;
509
    }
510

    
511
    bs = bdrv_new("");
512
    ret = bdrv_open_common(bs, filename, flags, drv);
513
    if (ret < 0) {
514
        bdrv_delete(bs);
515
        return ret;
516
    }
517
    bs->growable = 1;
518
    *pbs = bs;
519
    return 0;
520
}
521

    
522
/*
523
 * Opens a disk image (raw, qcow2, vmdk, ...)
524
 */
525
int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
526
              BlockDriver *drv)
527
{
528
    int ret;
529

    
530
    if (flags & BDRV_O_SNAPSHOT) {
531
        BlockDriverState *bs1;
532
        int64_t total_size;
533
        int is_protocol = 0;
534
        BlockDriver *bdrv_qcow2;
535
        QEMUOptionParameter *options;
536
        char tmp_filename[PATH_MAX];
537
        char backing_filename[PATH_MAX];
538

    
539
        /* if snapshot, we create a temporary backing file and open it
540
           instead of opening 'filename' directly */
541

    
542
        /* if there is a backing file, use it */
543
        bs1 = bdrv_new("");
544
        ret = bdrv_open(bs1, filename, 0, drv);
545
        if (ret < 0) {
546
            bdrv_delete(bs1);
547
            return ret;
548
        }
549
        total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
550

    
551
        if (bs1->drv && bs1->drv->protocol_name)
552
            is_protocol = 1;
553

    
554
        bdrv_delete(bs1);
555

    
556
        get_tmp_filename(tmp_filename, sizeof(tmp_filename));
557

    
558
        /* Real path is meaningless for protocols */
559
        if (is_protocol)
560
            snprintf(backing_filename, sizeof(backing_filename),
561
                     "%s", filename);
562
        else if (!realpath(filename, backing_filename))
563
            return -errno;
564

    
565
        bdrv_qcow2 = bdrv_find_format("qcow2");
566
        options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
567

    
568
        set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
569
        set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
570
        if (drv) {
571
            set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
572
                drv->format_name);
573
        }
574

    
575
        ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
576
        free_option_parameters(options);
577
        if (ret < 0) {
578
            return ret;
579
        }
580

    
581
        filename = tmp_filename;
582
        drv = bdrv_qcow2;
583
        bs->is_temporary = 1;
584
    }
585

    
586
    /* Find the right image format driver */
587
    if (!drv) {
588
        ret = find_image_format(filename, &drv);
589
    }
590

    
591
    if (!drv) {
592
        goto unlink_and_fail;
593
    }
594

    
595
    /* Open the image */
596
    ret = bdrv_open_common(bs, filename, flags, drv);
597
    if (ret < 0) {
598
        goto unlink_and_fail;
599
    }
600

    
601
    /* If there is a backing file, use it */
602
    if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
603
        char backing_filename[PATH_MAX];
604
        int back_flags;
605
        BlockDriver *back_drv = NULL;
606

    
607
        bs->backing_hd = bdrv_new("");
608

    
609
        if (path_has_protocol(bs->backing_file)) {
610
            pstrcpy(backing_filename, sizeof(backing_filename),
611
                    bs->backing_file);
612
        } else {
613
            path_combine(backing_filename, sizeof(backing_filename),
614
                         filename, bs->backing_file);
615
        }
616

    
617
        if (bs->backing_format[0] != '\0') {
618
            back_drv = bdrv_find_format(bs->backing_format);
619
        }
620

    
621
        /* backing files always opened read-only */
622
        back_flags =
623
            flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
624

    
625
        ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
626
        if (ret < 0) {
627
            bdrv_close(bs);
628
            return ret;
629
        }
630
        if (bs->is_temporary) {
631
            bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
632
        } else {
633
            /* base image inherits from "parent" */
634
            bs->backing_hd->keep_read_only = bs->keep_read_only;
635
        }
636
    }
637

    
638
    if (!bdrv_key_required(bs)) {
639
        /* call the change callback */
640
        bs->media_changed = 1;
641
        if (bs->change_cb)
642
            bs->change_cb(bs->change_opaque, CHANGE_MEDIA);
643
    }
644

    
645
    return 0;
646

    
647
unlink_and_fail:
648
    if (bs->is_temporary) {
649
        unlink(filename);
650
    }
651
    return ret;
652
}
653

    
654
void bdrv_close(BlockDriverState *bs)
655
{
656
    if (bs->drv) {
657
        if (bs == bs_snapshots) {
658
            bs_snapshots = NULL;
659
        }
660
        if (bs->backing_hd) {
661
            bdrv_delete(bs->backing_hd);
662
            bs->backing_hd = NULL;
663
        }
664
        bs->drv->bdrv_close(bs);
665
        qemu_free(bs->opaque);
666
#ifdef _WIN32
667
        if (bs->is_temporary) {
668
            unlink(bs->filename);
669
        }
670
#endif
671
        bs->opaque = NULL;
672
        bs->drv = NULL;
673

    
674
        if (bs->file != NULL) {
675
            bdrv_close(bs->file);
676
        }
677

    
678
        /* call the change callback */
679
        bs->media_changed = 1;
680
        if (bs->change_cb)
681
            bs->change_cb(bs->change_opaque, CHANGE_MEDIA);
682
    }
683
}
684

    
685
void bdrv_close_all(void)
686
{
687
    BlockDriverState *bs;
688

    
689
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
690
        bdrv_close(bs);
691
    }
692
}
693

    
694
/* make a BlockDriverState anonymous by removing from bdrv_state list.
695
   Also, NULL terminate the device_name to prevent double remove */
696
void bdrv_make_anon(BlockDriverState *bs)
697
{
698
    if (bs->device_name[0] != '\0') {
699
        QTAILQ_REMOVE(&bdrv_states, bs, list);
700
    }
701
    bs->device_name[0] = '\0';
702
}
703

    
704
void bdrv_delete(BlockDriverState *bs)
705
{
706
    assert(!bs->peer);
707

    
708
    /* remove from list, if necessary */
709
    bdrv_make_anon(bs);
710

    
711
    bdrv_close(bs);
712
    if (bs->file != NULL) {
713
        bdrv_delete(bs->file);
714
    }
715

    
716
    assert(bs != bs_snapshots);
717
    qemu_free(bs);
718
}
719

    
720
int bdrv_attach(BlockDriverState *bs, DeviceState *qdev)
721
{
722
    if (bs->peer) {
723
        return -EBUSY;
724
    }
725
    bs->peer = qdev;
726
    return 0;
727
}
728

    
729
void bdrv_detach(BlockDriverState *bs, DeviceState *qdev)
730
{
731
    assert(bs->peer == qdev);
732
    bs->peer = NULL;
733
}
734

    
735
DeviceState *bdrv_get_attached(BlockDriverState *bs)
736
{
737
    return bs->peer;
738
}
739

    
740
/*
741
 * Run consistency checks on an image
742
 *
743
 * Returns 0 if the check could be completed (it doesn't mean that the image is
744
 * free of errors) or -errno when an internal error occurred. The results of the
745
 * check are stored in res.
746
 */
747
int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
748
{
749
    if (bs->drv->bdrv_check == NULL) {
750
        return -ENOTSUP;
751
    }
752

    
753
    memset(res, 0, sizeof(*res));
754
    return bs->drv->bdrv_check(bs, res);
755
}
756

    
757
#define COMMIT_BUF_SECTORS 2048
758

    
759
/* commit COW file into the raw image */
760
int bdrv_commit(BlockDriverState *bs)
761
{
762
    BlockDriver *drv = bs->drv;
763
    BlockDriver *backing_drv;
764
    int64_t sector, total_sectors;
765
    int n, ro, open_flags;
766
    int ret = 0, rw_ret = 0;
767
    uint8_t *buf;
768
    char filename[1024];
769
    BlockDriverState *bs_rw, *bs_ro;
770

    
771
    if (!drv)
772
        return -ENOMEDIUM;
773
    
774
    if (!bs->backing_hd) {
775
        return -ENOTSUP;
776
    }
777

    
778
    if (bs->backing_hd->keep_read_only) {
779
        return -EACCES;
780
    }
781

    
782
    backing_drv = bs->backing_hd->drv;
783
    ro = bs->backing_hd->read_only;
784
    strncpy(filename, bs->backing_hd->filename, sizeof(filename));
785
    open_flags =  bs->backing_hd->open_flags;
786

    
787
    if (ro) {
788
        /* re-open as RW */
789
        bdrv_delete(bs->backing_hd);
790
        bs->backing_hd = NULL;
791
        bs_rw = bdrv_new("");
792
        rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
793
            backing_drv);
794
        if (rw_ret < 0) {
795
            bdrv_delete(bs_rw);
796
            /* try to re-open read-only */
797
            bs_ro = bdrv_new("");
798
            ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
799
                backing_drv);
800
            if (ret < 0) {
801
                bdrv_delete(bs_ro);
802
                /* drive not functional anymore */
803
                bs->drv = NULL;
804
                return ret;
805
            }
806
            bs->backing_hd = bs_ro;
807
            return rw_ret;
808
        }
809
        bs->backing_hd = bs_rw;
810
    }
811

    
812
    total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
813
    buf = qemu_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
814

    
815
    for (sector = 0; sector < total_sectors; sector += n) {
816
        if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
817

    
818
            if (bdrv_read(bs, sector, buf, n) != 0) {
819
                ret = -EIO;
820
                goto ro_cleanup;
821
            }
822

    
823
            if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
824
                ret = -EIO;
825
                goto ro_cleanup;
826
            }
827
        }
828
    }
829

    
830
    if (drv->bdrv_make_empty) {
831
        ret = drv->bdrv_make_empty(bs);
832
        bdrv_flush(bs);
833
    }
834

    
835
    /*
836
     * Make sure all data we wrote to the backing device is actually
837
     * stable on disk.
838
     */
839
    if (bs->backing_hd)
840
        bdrv_flush(bs->backing_hd);
841

    
842
ro_cleanup:
843
    qemu_free(buf);
844

    
845
    if (ro) {
846
        /* re-open as RO */
847
        bdrv_delete(bs->backing_hd);
848
        bs->backing_hd = NULL;
849
        bs_ro = bdrv_new("");
850
        ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
851
            backing_drv);
852
        if (ret < 0) {
853
            bdrv_delete(bs_ro);
854
            /* drive not functional anymore */
855
            bs->drv = NULL;
856
            return ret;
857
        }
858
        bs->backing_hd = bs_ro;
859
        bs->backing_hd->keep_read_only = 0;
860
    }
861

    
862
    return ret;
863
}
864

    
865
void bdrv_commit_all(void)
866
{
867
    BlockDriverState *bs;
868

    
869
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
870
        bdrv_commit(bs);
871
    }
872
}
873

    
874
/*
875
 * Return values:
876
 * 0        - success
877
 * -EINVAL  - backing format specified, but no file
878
 * -ENOSPC  - can't update the backing file because no space is left in the
879
 *            image file header
880
 * -ENOTSUP - format driver doesn't support changing the backing file
881
 */
882
int bdrv_change_backing_file(BlockDriverState *bs,
883
    const char *backing_file, const char *backing_fmt)
884
{
885
    BlockDriver *drv = bs->drv;
886

    
887
    if (drv->bdrv_change_backing_file != NULL) {
888
        return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
889
    } else {
890
        return -ENOTSUP;
891
    }
892
}
893

    
894
static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
895
                                   size_t size)
896
{
897
    int64_t len;
898

    
899
    if (!bdrv_is_inserted(bs))
900
        return -ENOMEDIUM;
901

    
902
    if (bs->growable)
903
        return 0;
904

    
905
    len = bdrv_getlength(bs);
906

    
907
    if (offset < 0)
908
        return -EIO;
909

    
910
    if ((offset > len) || (len - offset < size))
911
        return -EIO;
912

    
913
    return 0;
914
}
915

    
916
static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
917
                              int nb_sectors)
918
{
919
    return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
920
                                   nb_sectors * BDRV_SECTOR_SIZE);
921
}
922

    
923
/* return < 0 if error. See bdrv_write() for the return codes */
924
int bdrv_read(BlockDriverState *bs, int64_t sector_num,
925
              uint8_t *buf, int nb_sectors)
926
{
927
    BlockDriver *drv = bs->drv;
928

    
929
    if (!drv)
930
        return -ENOMEDIUM;
931
    if (bdrv_check_request(bs, sector_num, nb_sectors))
932
        return -EIO;
933

    
934
    return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
935
}
936

    
937
static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
938
                             int nb_sectors, int dirty)
939
{
940
    int64_t start, end;
941
    unsigned long val, idx, bit;
942

    
943
    start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
944
    end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
945

    
946
    for (; start <= end; start++) {
947
        idx = start / (sizeof(unsigned long) * 8);
948
        bit = start % (sizeof(unsigned long) * 8);
949
        val = bs->dirty_bitmap[idx];
950
        if (dirty) {
951
            if (!(val & (1UL << bit))) {
952
                bs->dirty_count++;
953
                val |= 1UL << bit;
954
            }
955
        } else {
956
            if (val & (1UL << bit)) {
957
                bs->dirty_count--;
958
                val &= ~(1UL << bit);
959
            }
960
        }
961
        bs->dirty_bitmap[idx] = val;
962
    }
963
}
964

    
965
/* Return < 0 if error. Important errors are:
966
  -EIO         generic I/O error (may happen for all errors)
967
  -ENOMEDIUM   No media inserted.
968
  -EINVAL      Invalid sector number or nb_sectors
969
  -EACCES      Trying to write a read-only device
970
*/
971
int bdrv_write(BlockDriverState *bs, int64_t sector_num,
972
               const uint8_t *buf, int nb_sectors)
973
{
974
    BlockDriver *drv = bs->drv;
975
    if (!bs->drv)
976
        return -ENOMEDIUM;
977
    if (bs->read_only)
978
        return -EACCES;
979
    if (bdrv_check_request(bs, sector_num, nb_sectors))
980
        return -EIO;
981

    
982
    if (bs->dirty_bitmap) {
983
        set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
984
    }
985

    
986
    if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
987
        bs->wr_highest_sector = sector_num + nb_sectors - 1;
988
    }
989

    
990
    return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
991
}
992

    
993
int bdrv_pread(BlockDriverState *bs, int64_t offset,
994
               void *buf, int count1)
995
{
996
    uint8_t tmp_buf[BDRV_SECTOR_SIZE];
997
    int len, nb_sectors, count;
998
    int64_t sector_num;
999
    int ret;
1000

    
1001
    count = count1;
1002
    /* first read to align to sector start */
1003
    len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1004
    if (len > count)
1005
        len = count;
1006
    sector_num = offset >> BDRV_SECTOR_BITS;
1007
    if (len > 0) {
1008
        if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1009
            return ret;
1010
        memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
1011
        count -= len;
1012
        if (count == 0)
1013
            return count1;
1014
        sector_num++;
1015
        buf += len;
1016
    }
1017

    
1018
    /* read the sectors "in place" */
1019
    nb_sectors = count >> BDRV_SECTOR_BITS;
1020
    if (nb_sectors > 0) {
1021
        if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1022
            return ret;
1023
        sector_num += nb_sectors;
1024
        len = nb_sectors << BDRV_SECTOR_BITS;
1025
        buf += len;
1026
        count -= len;
1027
    }
1028

    
1029
    /* add data from the last sector */
1030
    if (count > 0) {
1031
        if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1032
            return ret;
1033
        memcpy(buf, tmp_buf, count);
1034
    }
1035
    return count1;
1036
}
1037

    
1038
int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1039
                const void *buf, int count1)
1040
{
1041
    uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1042
    int len, nb_sectors, count;
1043
    int64_t sector_num;
1044
    int ret;
1045

    
1046
    count = count1;
1047
    /* first write to align to sector start */
1048
    len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1049
    if (len > count)
1050
        len = count;
1051
    sector_num = offset >> BDRV_SECTOR_BITS;
1052
    if (len > 0) {
1053
        if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1054
            return ret;
1055
        memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
1056
        if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1057
            return ret;
1058
        count -= len;
1059
        if (count == 0)
1060
            return count1;
1061
        sector_num++;
1062
        buf += len;
1063
    }
1064

    
1065
    /* write the sectors "in place" */
1066
    nb_sectors = count >> BDRV_SECTOR_BITS;
1067
    if (nb_sectors > 0) {
1068
        if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1069
            return ret;
1070
        sector_num += nb_sectors;
1071
        len = nb_sectors << BDRV_SECTOR_BITS;
1072
        buf += len;
1073
        count -= len;
1074
    }
1075

    
1076
    /* add data from the last sector */
1077
    if (count > 0) {
1078
        if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1079
            return ret;
1080
        memcpy(tmp_buf, buf, count);
1081
        if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1082
            return ret;
1083
    }
1084
    return count1;
1085
}
1086

    
1087
/*
1088
 * Writes to the file and ensures that no writes are reordered across this
1089
 * request (acts as a barrier)
1090
 *
1091
 * Returns 0 on success, -errno in error cases.
1092
 */
1093
int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1094
    const void *buf, int count)
1095
{
1096
    int ret;
1097

    
1098
    ret = bdrv_pwrite(bs, offset, buf, count);
1099
    if (ret < 0) {
1100
        return ret;
1101
    }
1102

    
1103
    /* No flush needed for cache=writethrough, it uses O_DSYNC */
1104
    if ((bs->open_flags & BDRV_O_CACHE_MASK) != 0) {
1105
        bdrv_flush(bs);
1106
    }
1107

    
1108
    return 0;
1109
}
1110

    
1111
/*
1112
 * Writes to the file and ensures that no writes are reordered across this
1113
 * request (acts as a barrier)
1114
 *
1115
 * Returns 0 on success, -errno in error cases.
1116
 */
1117
int bdrv_write_sync(BlockDriverState *bs, int64_t sector_num,
1118
    const uint8_t *buf, int nb_sectors)
1119
{
1120
    return bdrv_pwrite_sync(bs, BDRV_SECTOR_SIZE * sector_num,
1121
        buf, BDRV_SECTOR_SIZE * nb_sectors);
1122
}
1123

    
1124
/**
1125
 * Truncate file to 'offset' bytes (needed only for file protocols)
1126
 */
1127
int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1128
{
1129
    BlockDriver *drv = bs->drv;
1130
    int ret;
1131
    if (!drv)
1132
        return -ENOMEDIUM;
1133
    if (!drv->bdrv_truncate)
1134
        return -ENOTSUP;
1135
    if (bs->read_only)
1136
        return -EACCES;
1137
    if (bdrv_in_use(bs))
1138
        return -EBUSY;
1139
    ret = drv->bdrv_truncate(bs, offset);
1140
    if (ret == 0) {
1141
        ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
1142
        if (bs->change_cb) {
1143
            bs->change_cb(bs->change_opaque, CHANGE_SIZE);
1144
        }
1145
    }
1146
    return ret;
1147
}
1148

    
1149
/**
1150
 * Length of a allocated file in bytes. Sparse files are counted by actual
1151
 * allocated space. Return < 0 if error or unknown.
1152
 */
1153
int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1154
{
1155
    BlockDriver *drv = bs->drv;
1156
    if (!drv) {
1157
        return -ENOMEDIUM;
1158
    }
1159
    if (drv->bdrv_get_allocated_file_size) {
1160
        return drv->bdrv_get_allocated_file_size(bs);
1161
    }
1162
    if (bs->file) {
1163
        return bdrv_get_allocated_file_size(bs->file);
1164
    }
1165
    return -ENOTSUP;
1166
}
1167

    
1168
/**
1169
 * Length of a file in bytes. Return < 0 if error or unknown.
1170
 */
1171
int64_t bdrv_getlength(BlockDriverState *bs)
1172
{
1173
    BlockDriver *drv = bs->drv;
1174
    if (!drv)
1175
        return -ENOMEDIUM;
1176

    
1177
    if (bs->growable || bs->removable) {
1178
        if (drv->bdrv_getlength) {
1179
            return drv->bdrv_getlength(bs);
1180
        }
1181
    }
1182
    return bs->total_sectors * BDRV_SECTOR_SIZE;
1183
}
1184

    
1185
/* return 0 as number of sectors if no device present or error */
1186
void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
1187
{
1188
    int64_t length;
1189
    length = bdrv_getlength(bs);
1190
    if (length < 0)
1191
        length = 0;
1192
    else
1193
        length = length >> BDRV_SECTOR_BITS;
1194
    *nb_sectors_ptr = length;
1195
}
1196

    
1197
struct partition {
1198
        uint8_t boot_ind;           /* 0x80 - active */
1199
        uint8_t head;               /* starting head */
1200
        uint8_t sector;             /* starting sector */
1201
        uint8_t cyl;                /* starting cylinder */
1202
        uint8_t sys_ind;            /* What partition type */
1203
        uint8_t end_head;           /* end head */
1204
        uint8_t end_sector;         /* end sector */
1205
        uint8_t end_cyl;            /* end cylinder */
1206
        uint32_t start_sect;        /* starting sector counting from 0 */
1207
        uint32_t nr_sects;          /* nr of sectors in partition */
1208
} __attribute__((packed));
1209

    
1210
/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1211
static int guess_disk_lchs(BlockDriverState *bs,
1212
                           int *pcylinders, int *pheads, int *psectors)
1213
{
1214
    uint8_t buf[BDRV_SECTOR_SIZE];
1215
    int ret, i, heads, sectors, cylinders;
1216
    struct partition *p;
1217
    uint32_t nr_sects;
1218
    uint64_t nb_sectors;
1219

    
1220
    bdrv_get_geometry(bs, &nb_sectors);
1221

    
1222
    ret = bdrv_read(bs, 0, buf, 1);
1223
    if (ret < 0)
1224
        return -1;
1225
    /* test msdos magic */
1226
    if (buf[510] != 0x55 || buf[511] != 0xaa)
1227
        return -1;
1228
    for(i = 0; i < 4; i++) {
1229
        p = ((struct partition *)(buf + 0x1be)) + i;
1230
        nr_sects = le32_to_cpu(p->nr_sects);
1231
        if (nr_sects && p->end_head) {
1232
            /* We make the assumption that the partition terminates on
1233
               a cylinder boundary */
1234
            heads = p->end_head + 1;
1235
            sectors = p->end_sector & 63;
1236
            if (sectors == 0)
1237
                continue;
1238
            cylinders = nb_sectors / (heads * sectors);
1239
            if (cylinders < 1 || cylinders > 16383)
1240
                continue;
1241
            *pheads = heads;
1242
            *psectors = sectors;
1243
            *pcylinders = cylinders;
1244
#if 0
1245
            printf("guessed geometry: LCHS=%d %d %d\n",
1246
                   cylinders, heads, sectors);
1247
#endif
1248
            return 0;
1249
        }
1250
    }
1251
    return -1;
1252
}
1253

    
1254
void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1255
{
1256
    int translation, lba_detected = 0;
1257
    int cylinders, heads, secs;
1258
    uint64_t nb_sectors;
1259

    
1260
    /* if a geometry hint is available, use it */
1261
    bdrv_get_geometry(bs, &nb_sectors);
1262
    bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1263
    translation = bdrv_get_translation_hint(bs);
1264
    if (cylinders != 0) {
1265
        *pcyls = cylinders;
1266
        *pheads = heads;
1267
        *psecs = secs;
1268
    } else {
1269
        if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1270
            if (heads > 16) {
1271
                /* if heads > 16, it means that a BIOS LBA
1272
                   translation was active, so the default
1273
                   hardware geometry is OK */
1274
                lba_detected = 1;
1275
                goto default_geometry;
1276
            } else {
1277
                *pcyls = cylinders;
1278
                *pheads = heads;
1279
                *psecs = secs;
1280
                /* disable any translation to be in sync with
1281
                   the logical geometry */
1282
                if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1283
                    bdrv_set_translation_hint(bs,
1284
                                              BIOS_ATA_TRANSLATION_NONE);
1285
                }
1286
            }
1287
        } else {
1288
        default_geometry:
1289
            /* if no geometry, use a standard physical disk geometry */
1290
            cylinders = nb_sectors / (16 * 63);
1291

    
1292
            if (cylinders > 16383)
1293
                cylinders = 16383;
1294
            else if (cylinders < 2)
1295
                cylinders = 2;
1296
            *pcyls = cylinders;
1297
            *pheads = 16;
1298
            *psecs = 63;
1299
            if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1300
                if ((*pcyls * *pheads) <= 131072) {
1301
                    bdrv_set_translation_hint(bs,
1302
                                              BIOS_ATA_TRANSLATION_LARGE);
1303
                } else {
1304
                    bdrv_set_translation_hint(bs,
1305
                                              BIOS_ATA_TRANSLATION_LBA);
1306
                }
1307
            }
1308
        }
1309
        bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1310
    }
1311
}
1312

    
1313
void bdrv_set_geometry_hint(BlockDriverState *bs,
1314
                            int cyls, int heads, int secs)
1315
{
1316
    bs->cyls = cyls;
1317
    bs->heads = heads;
1318
    bs->secs = secs;
1319
}
1320

    
1321
void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1322
{
1323
    bs->translation = translation;
1324
}
1325

    
1326
void bdrv_get_geometry_hint(BlockDriverState *bs,
1327
                            int *pcyls, int *pheads, int *psecs)
1328
{
1329
    *pcyls = bs->cyls;
1330
    *pheads = bs->heads;
1331
    *psecs = bs->secs;
1332
}
1333

    
1334
/* Recognize floppy formats */
1335
typedef struct FDFormat {
1336
    FDriveType drive;
1337
    uint8_t last_sect;
1338
    uint8_t max_track;
1339
    uint8_t max_head;
1340
} FDFormat;
1341

    
1342
static const FDFormat fd_formats[] = {
1343
    /* First entry is default format */
1344
    /* 1.44 MB 3"1/2 floppy disks */
1345
    { FDRIVE_DRV_144, 18, 80, 1, },
1346
    { FDRIVE_DRV_144, 20, 80, 1, },
1347
    { FDRIVE_DRV_144, 21, 80, 1, },
1348
    { FDRIVE_DRV_144, 21, 82, 1, },
1349
    { FDRIVE_DRV_144, 21, 83, 1, },
1350
    { FDRIVE_DRV_144, 22, 80, 1, },
1351
    { FDRIVE_DRV_144, 23, 80, 1, },
1352
    { FDRIVE_DRV_144, 24, 80, 1, },
1353
    /* 2.88 MB 3"1/2 floppy disks */
1354
    { FDRIVE_DRV_288, 36, 80, 1, },
1355
    { FDRIVE_DRV_288, 39, 80, 1, },
1356
    { FDRIVE_DRV_288, 40, 80, 1, },
1357
    { FDRIVE_DRV_288, 44, 80, 1, },
1358
    { FDRIVE_DRV_288, 48, 80, 1, },
1359
    /* 720 kB 3"1/2 floppy disks */
1360
    { FDRIVE_DRV_144,  9, 80, 1, },
1361
    { FDRIVE_DRV_144, 10, 80, 1, },
1362
    { FDRIVE_DRV_144, 10, 82, 1, },
1363
    { FDRIVE_DRV_144, 10, 83, 1, },
1364
    { FDRIVE_DRV_144, 13, 80, 1, },
1365
    { FDRIVE_DRV_144, 14, 80, 1, },
1366
    /* 1.2 MB 5"1/4 floppy disks */
1367
    { FDRIVE_DRV_120, 15, 80, 1, },
1368
    { FDRIVE_DRV_120, 18, 80, 1, },
1369
    { FDRIVE_DRV_120, 18, 82, 1, },
1370
    { FDRIVE_DRV_120, 18, 83, 1, },
1371
    { FDRIVE_DRV_120, 20, 80, 1, },
1372
    /* 720 kB 5"1/4 floppy disks */
1373
    { FDRIVE_DRV_120,  9, 80, 1, },
1374
    { FDRIVE_DRV_120, 11, 80, 1, },
1375
    /* 360 kB 5"1/4 floppy disks */
1376
    { FDRIVE_DRV_120,  9, 40, 1, },
1377
    { FDRIVE_DRV_120,  9, 40, 0, },
1378
    { FDRIVE_DRV_120, 10, 41, 1, },
1379
    { FDRIVE_DRV_120, 10, 42, 1, },
1380
    /* 320 kB 5"1/4 floppy disks */
1381
    { FDRIVE_DRV_120,  8, 40, 1, },
1382
    { FDRIVE_DRV_120,  8, 40, 0, },
1383
    /* 360 kB must match 5"1/4 better than 3"1/2... */
1384
    { FDRIVE_DRV_144,  9, 80, 0, },
1385
    /* end */
1386
    { FDRIVE_DRV_NONE, -1, -1, 0, },
1387
};
1388

    
1389
void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1390
                                   int *max_track, int *last_sect,
1391
                                   FDriveType drive_in, FDriveType *drive)
1392
{
1393
    const FDFormat *parse;
1394
    uint64_t nb_sectors, size;
1395
    int i, first_match, match;
1396

    
1397
    bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1398
    if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1399
        /* User defined disk */
1400
    } else {
1401
        bdrv_get_geometry(bs, &nb_sectors);
1402
        match = -1;
1403
        first_match = -1;
1404
        for (i = 0; ; i++) {
1405
            parse = &fd_formats[i];
1406
            if (parse->drive == FDRIVE_DRV_NONE) {
1407
                break;
1408
            }
1409
            if (drive_in == parse->drive ||
1410
                drive_in == FDRIVE_DRV_NONE) {
1411
                size = (parse->max_head + 1) * parse->max_track *
1412
                    parse->last_sect;
1413
                if (nb_sectors == size) {
1414
                    match = i;
1415
                    break;
1416
                }
1417
                if (first_match == -1) {
1418
                    first_match = i;
1419
                }
1420
            }
1421
        }
1422
        if (match == -1) {
1423
            if (first_match == -1) {
1424
                match = 1;
1425
            } else {
1426
                match = first_match;
1427
            }
1428
            parse = &fd_formats[match];
1429
        }
1430
        *nb_heads = parse->max_head + 1;
1431
        *max_track = parse->max_track;
1432
        *last_sect = parse->last_sect;
1433
        *drive = parse->drive;
1434
    }
1435
}
1436

    
1437
int bdrv_get_translation_hint(BlockDriverState *bs)
1438
{
1439
    return bs->translation;
1440
}
1441

    
1442
void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1443
                       BlockErrorAction on_write_error)
1444
{
1445
    bs->on_read_error = on_read_error;
1446
    bs->on_write_error = on_write_error;
1447
}
1448

    
1449
BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1450
{
1451
    return is_read ? bs->on_read_error : bs->on_write_error;
1452
}
1453

    
1454
void bdrv_set_removable(BlockDriverState *bs, int removable)
1455
{
1456
    bs->removable = removable;
1457
    if (removable && bs == bs_snapshots) {
1458
        bs_snapshots = NULL;
1459
    }
1460
}
1461

    
1462
int bdrv_is_removable(BlockDriverState *bs)
1463
{
1464
    return bs->removable;
1465
}
1466

    
1467
int bdrv_is_read_only(BlockDriverState *bs)
1468
{
1469
    return bs->read_only;
1470
}
1471

    
1472
int bdrv_is_sg(BlockDriverState *bs)
1473
{
1474
    return bs->sg;
1475
}
1476

    
1477
int bdrv_enable_write_cache(BlockDriverState *bs)
1478
{
1479
    return bs->enable_write_cache;
1480
}
1481

    
1482
/* XXX: no longer used */
1483
void bdrv_set_change_cb(BlockDriverState *bs,
1484
                        void (*change_cb)(void *opaque, int reason),
1485
                        void *opaque)
1486
{
1487
    bs->change_cb = change_cb;
1488
    bs->change_opaque = opaque;
1489
}
1490

    
1491
int bdrv_is_encrypted(BlockDriverState *bs)
1492
{
1493
    if (bs->backing_hd && bs->backing_hd->encrypted)
1494
        return 1;
1495
    return bs->encrypted;
1496
}
1497

    
1498
int bdrv_key_required(BlockDriverState *bs)
1499
{
1500
    BlockDriverState *backing_hd = bs->backing_hd;
1501

    
1502
    if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1503
        return 1;
1504
    return (bs->encrypted && !bs->valid_key);
1505
}
1506

    
1507
int bdrv_set_key(BlockDriverState *bs, const char *key)
1508
{
1509
    int ret;
1510
    if (bs->backing_hd && bs->backing_hd->encrypted) {
1511
        ret = bdrv_set_key(bs->backing_hd, key);
1512
        if (ret < 0)
1513
            return ret;
1514
        if (!bs->encrypted)
1515
            return 0;
1516
    }
1517
    if (!bs->encrypted) {
1518
        return -EINVAL;
1519
    } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1520
        return -ENOMEDIUM;
1521
    }
1522
    ret = bs->drv->bdrv_set_key(bs, key);
1523
    if (ret < 0) {
1524
        bs->valid_key = 0;
1525
    } else if (!bs->valid_key) {
1526
        bs->valid_key = 1;
1527
        /* call the change callback now, we skipped it on open */
1528
        bs->media_changed = 1;
1529
        if (bs->change_cb)
1530
            bs->change_cb(bs->change_opaque, CHANGE_MEDIA);
1531
    }
1532
    return ret;
1533
}
1534

    
1535
void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1536
{
1537
    if (!bs->drv) {
1538
        buf[0] = '\0';
1539
    } else {
1540
        pstrcpy(buf, buf_size, bs->drv->format_name);
1541
    }
1542
}
1543

    
1544
void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
1545
                         void *opaque)
1546
{
1547
    BlockDriver *drv;
1548

    
1549
    QLIST_FOREACH(drv, &bdrv_drivers, list) {
1550
        it(opaque, drv->format_name);
1551
    }
1552
}
1553

    
1554
BlockDriverState *bdrv_find(const char *name)
1555
{
1556
    BlockDriverState *bs;
1557

    
1558
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
1559
        if (!strcmp(name, bs->device_name)) {
1560
            return bs;
1561
        }
1562
    }
1563
    return NULL;
1564
}
1565

    
1566
BlockDriverState *bdrv_next(BlockDriverState *bs)
1567
{
1568
    if (!bs) {
1569
        return QTAILQ_FIRST(&bdrv_states);
1570
    }
1571
    return QTAILQ_NEXT(bs, list);
1572
}
1573

    
1574
void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1575
{
1576
    BlockDriverState *bs;
1577

    
1578
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
1579
        it(opaque, bs);
1580
    }
1581
}
1582

    
1583
const char *bdrv_get_device_name(BlockDriverState *bs)
1584
{
1585
    return bs->device_name;
1586
}
1587

    
1588
int bdrv_flush(BlockDriverState *bs)
1589
{
1590
    if (bs->open_flags & BDRV_O_NO_FLUSH) {
1591
        return 0;
1592
    }
1593

    
1594
    if (bs->drv && bs->drv->bdrv_flush) {
1595
        return bs->drv->bdrv_flush(bs);
1596
    }
1597

    
1598
    /*
1599
     * Some block drivers always operate in either writethrough or unsafe mode
1600
     * and don't support bdrv_flush therefore. Usually qemu doesn't know how
1601
     * the server works (because the behaviour is hardcoded or depends on
1602
     * server-side configuration), so we can't ensure that everything is safe
1603
     * on disk. Returning an error doesn't work because that would break guests
1604
     * even if the server operates in writethrough mode.
1605
     *
1606
     * Let's hope the user knows what he's doing.
1607
     */
1608
    return 0;
1609
}
1610

    
1611
void bdrv_flush_all(void)
1612
{
1613
    BlockDriverState *bs;
1614

    
1615
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
1616
        if (bs->drv && !bdrv_is_read_only(bs) &&
1617
            (!bdrv_is_removable(bs) || bdrv_is_inserted(bs))) {
1618
            bdrv_flush(bs);
1619
        }
1620
    }
1621
}
1622

    
1623
int bdrv_has_zero_init(BlockDriverState *bs)
1624
{
1625
    assert(bs->drv);
1626

    
1627
    if (bs->drv->bdrv_has_zero_init) {
1628
        return bs->drv->bdrv_has_zero_init(bs);
1629
    }
1630

    
1631
    return 1;
1632
}
1633

    
1634
int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
1635
{
1636
    if (!bs->drv) {
1637
        return -ENOMEDIUM;
1638
    }
1639
    if (!bs->drv->bdrv_discard) {
1640
        return 0;
1641
    }
1642
    return bs->drv->bdrv_discard(bs, sector_num, nb_sectors);
1643
}
1644

    
1645
/*
1646
 * Returns true iff the specified sector is present in the disk image. Drivers
1647
 * not implementing the functionality are assumed to not support backing files,
1648
 * hence all their sectors are reported as allocated.
1649
 *
1650
 * 'pnum' is set to the number of sectors (including and immediately following
1651
 * the specified sector) that are known to be in the same
1652
 * allocated/unallocated state.
1653
 *
1654
 * 'nb_sectors' is the max value 'pnum' should be set to.
1655
 */
1656
int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1657
        int *pnum)
1658
{
1659
    int64_t n;
1660
    if (!bs->drv->bdrv_is_allocated) {
1661
        if (sector_num >= bs->total_sectors) {
1662
            *pnum = 0;
1663
            return 0;
1664
        }
1665
        n = bs->total_sectors - sector_num;
1666
        *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1667
        return 1;
1668
    }
1669
    return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1670
}
1671

    
1672
void bdrv_mon_event(const BlockDriverState *bdrv,
1673
                    BlockMonEventAction action, int is_read)
1674
{
1675
    QObject *data;
1676
    const char *action_str;
1677

    
1678
    switch (action) {
1679
    case BDRV_ACTION_REPORT:
1680
        action_str = "report";
1681
        break;
1682
    case BDRV_ACTION_IGNORE:
1683
        action_str = "ignore";
1684
        break;
1685
    case BDRV_ACTION_STOP:
1686
        action_str = "stop";
1687
        break;
1688
    default:
1689
        abort();
1690
    }
1691

    
1692
    data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1693
                              bdrv->device_name,
1694
                              action_str,
1695
                              is_read ? "read" : "write");
1696
    monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1697

    
1698
    qobject_decref(data);
1699
}
1700

    
1701
static void bdrv_print_dict(QObject *obj, void *opaque)
1702
{
1703
    QDict *bs_dict;
1704
    Monitor *mon = opaque;
1705

    
1706
    bs_dict = qobject_to_qdict(obj);
1707

    
1708
    monitor_printf(mon, "%s: removable=%d",
1709
                        qdict_get_str(bs_dict, "device"),
1710
                        qdict_get_bool(bs_dict, "removable"));
1711

    
1712
    if (qdict_get_bool(bs_dict, "removable")) {
1713
        monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked"));
1714
    }
1715

    
1716
    if (qdict_haskey(bs_dict, "inserted")) {
1717
        QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted"));
1718

    
1719
        monitor_printf(mon, " file=");
1720
        monitor_print_filename(mon, qdict_get_str(qdict, "file"));
1721
        if (qdict_haskey(qdict, "backing_file")) {
1722
            monitor_printf(mon, " backing_file=");
1723
            monitor_print_filename(mon, qdict_get_str(qdict, "backing_file"));
1724
        }
1725
        monitor_printf(mon, " ro=%d drv=%s encrypted=%d",
1726
                            qdict_get_bool(qdict, "ro"),
1727
                            qdict_get_str(qdict, "drv"),
1728
                            qdict_get_bool(qdict, "encrypted"));
1729
    } else {
1730
        monitor_printf(mon, " [not inserted]");
1731
    }
1732

    
1733
    monitor_printf(mon, "\n");
1734
}
1735

    
1736
void bdrv_info_print(Monitor *mon, const QObject *data)
1737
{
1738
    qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon);
1739
}
1740

    
1741
void bdrv_info(Monitor *mon, QObject **ret_data)
1742
{
1743
    QList *bs_list;
1744
    BlockDriverState *bs;
1745

    
1746
    bs_list = qlist_new();
1747

    
1748
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
1749
        QObject *bs_obj;
1750

    
1751
        bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': 'unknown', "
1752
                                    "'removable': %i, 'locked': %i }",
1753
                                    bs->device_name, bs->removable,
1754
                                    bs->locked);
1755

    
1756
        if (bs->drv) {
1757
            QObject *obj;
1758
            QDict *bs_dict = qobject_to_qdict(bs_obj);
1759

    
1760
            obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
1761
                                     "'encrypted': %i }",
1762
                                     bs->filename, bs->read_only,
1763
                                     bs->drv->format_name,
1764
                                     bdrv_is_encrypted(bs));
1765
            if (bs->backing_file[0] != '\0') {
1766
                QDict *qdict = qobject_to_qdict(obj);
1767
                qdict_put(qdict, "backing_file",
1768
                          qstring_from_str(bs->backing_file));
1769
            }
1770

    
1771
            qdict_put_obj(bs_dict, "inserted", obj);
1772
        }
1773
        qlist_append_obj(bs_list, bs_obj);
1774
    }
1775

    
1776
    *ret_data = QOBJECT(bs_list);
1777
}
1778

    
1779
static void bdrv_stats_iter(QObject *data, void *opaque)
1780
{
1781
    QDict *qdict;
1782
    Monitor *mon = opaque;
1783

    
1784
    qdict = qobject_to_qdict(data);
1785
    monitor_printf(mon, "%s:", qdict_get_str(qdict, "device"));
1786

    
1787
    qdict = qobject_to_qdict(qdict_get(qdict, "stats"));
1788
    monitor_printf(mon, " rd_bytes=%" PRId64
1789
                        " wr_bytes=%" PRId64
1790
                        " rd_operations=%" PRId64
1791
                        " wr_operations=%" PRId64
1792
                        "\n",
1793
                        qdict_get_int(qdict, "rd_bytes"),
1794
                        qdict_get_int(qdict, "wr_bytes"),
1795
                        qdict_get_int(qdict, "rd_operations"),
1796
                        qdict_get_int(qdict, "wr_operations"));
1797
}
1798

    
1799
void bdrv_stats_print(Monitor *mon, const QObject *data)
1800
{
1801
    qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon);
1802
}
1803

    
1804
static QObject* bdrv_info_stats_bs(BlockDriverState *bs)
1805
{
1806
    QObject *res;
1807
    QDict *dict;
1808

    
1809
    res = qobject_from_jsonf("{ 'stats': {"
1810
                             "'rd_bytes': %" PRId64 ","
1811
                             "'wr_bytes': %" PRId64 ","
1812
                             "'rd_operations': %" PRId64 ","
1813
                             "'wr_operations': %" PRId64 ","
1814
                             "'wr_highest_offset': %" PRId64
1815
                             "} }",
1816
                             bs->rd_bytes, bs->wr_bytes,
1817
                             bs->rd_ops, bs->wr_ops,
1818
                             bs->wr_highest_sector *
1819
                             (uint64_t)BDRV_SECTOR_SIZE);
1820
    dict  = qobject_to_qdict(res);
1821

    
1822
    if (*bs->device_name) {
1823
        qdict_put(dict, "device", qstring_from_str(bs->device_name));
1824
    }
1825

    
1826
    if (bs->file) {
1827
        QObject *parent = bdrv_info_stats_bs(bs->file);
1828
        qdict_put_obj(dict, "parent", parent);
1829
    }
1830

    
1831
    return res;
1832
}
1833

    
1834
void bdrv_info_stats(Monitor *mon, QObject **ret_data)
1835
{
1836
    QObject *obj;
1837
    QList *devices;
1838
    BlockDriverState *bs;
1839

    
1840
    devices = qlist_new();
1841

    
1842
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
1843
        obj = bdrv_info_stats_bs(bs);
1844
        qlist_append_obj(devices, obj);
1845
    }
1846

    
1847
    *ret_data = QOBJECT(devices);
1848
}
1849

    
1850
const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
1851
{
1852
    if (bs->backing_hd && bs->backing_hd->encrypted)
1853
        return bs->backing_file;
1854
    else if (bs->encrypted)
1855
        return bs->filename;
1856
    else
1857
        return NULL;
1858
}
1859

    
1860
void bdrv_get_backing_filename(BlockDriverState *bs,
1861
                               char *filename, int filename_size)
1862
{
1863
    if (!bs->backing_file) {
1864
        pstrcpy(filename, filename_size, "");
1865
    } else {
1866
        pstrcpy(filename, filename_size, bs->backing_file);
1867
    }
1868
}
1869

    
1870
int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
1871
                          const uint8_t *buf, int nb_sectors)
1872
{
1873
    BlockDriver *drv = bs->drv;
1874
    if (!drv)
1875
        return -ENOMEDIUM;
1876
    if (!drv->bdrv_write_compressed)
1877
        return -ENOTSUP;
1878
    if (bdrv_check_request(bs, sector_num, nb_sectors))
1879
        return -EIO;
1880

    
1881
    if (bs->dirty_bitmap) {
1882
        set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1883
    }
1884

    
1885
    return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
1886
}
1887

    
1888
int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1889
{
1890
    BlockDriver *drv = bs->drv;
1891
    if (!drv)
1892
        return -ENOMEDIUM;
1893
    if (!drv->bdrv_get_info)
1894
        return -ENOTSUP;
1895
    memset(bdi, 0, sizeof(*bdi));
1896
    return drv->bdrv_get_info(bs, bdi);
1897
}
1898

    
1899
int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
1900
                      int64_t pos, int size)
1901
{
1902
    BlockDriver *drv = bs->drv;
1903
    if (!drv)
1904
        return -ENOMEDIUM;
1905
    if (drv->bdrv_save_vmstate)
1906
        return drv->bdrv_save_vmstate(bs, buf, pos, size);
1907
    if (bs->file)
1908
        return bdrv_save_vmstate(bs->file, buf, pos, size);
1909
    return -ENOTSUP;
1910
}
1911

    
1912
int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
1913
                      int64_t pos, int size)
1914
{
1915
    BlockDriver *drv = bs->drv;
1916
    if (!drv)
1917
        return -ENOMEDIUM;
1918
    if (drv->bdrv_load_vmstate)
1919
        return drv->bdrv_load_vmstate(bs, buf, pos, size);
1920
    if (bs->file)
1921
        return bdrv_load_vmstate(bs->file, buf, pos, size);
1922
    return -ENOTSUP;
1923
}
1924

    
1925
void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
1926
{
1927
    BlockDriver *drv = bs->drv;
1928

    
1929
    if (!drv || !drv->bdrv_debug_event) {
1930
        return;
1931
    }
1932

    
1933
    return drv->bdrv_debug_event(bs, event);
1934

    
1935
}
1936

    
1937
/**************************************************************/
1938
/* handling of snapshots */
1939

    
1940
int bdrv_can_snapshot(BlockDriverState *bs)
1941
{
1942
    BlockDriver *drv = bs->drv;
1943
    if (!drv || bdrv_is_removable(bs) || bdrv_is_read_only(bs)) {
1944
        return 0;
1945
    }
1946

    
1947
    if (!drv->bdrv_snapshot_create) {
1948
        if (bs->file != NULL) {
1949
            return bdrv_can_snapshot(bs->file);
1950
        }
1951
        return 0;
1952
    }
1953

    
1954
    return 1;
1955
}
1956

    
1957
int bdrv_is_snapshot(BlockDriverState *bs)
1958
{
1959
    return !!(bs->open_flags & BDRV_O_SNAPSHOT);
1960
}
1961

    
1962
BlockDriverState *bdrv_snapshots(void)
1963
{
1964
    BlockDriverState *bs;
1965

    
1966
    if (bs_snapshots) {
1967
        return bs_snapshots;
1968
    }
1969

    
1970
    bs = NULL;
1971
    while ((bs = bdrv_next(bs))) {
1972
        if (bdrv_can_snapshot(bs)) {
1973
            bs_snapshots = bs;
1974
            return bs;
1975
        }
1976
    }
1977
    return NULL;
1978
}
1979

    
1980
int bdrv_snapshot_create(BlockDriverState *bs,
1981
                         QEMUSnapshotInfo *sn_info)
1982
{
1983
    BlockDriver *drv = bs->drv;
1984
    if (!drv)
1985
        return -ENOMEDIUM;
1986
    if (drv->bdrv_snapshot_create)
1987
        return drv->bdrv_snapshot_create(bs, sn_info);
1988
    if (bs->file)
1989
        return bdrv_snapshot_create(bs->file, sn_info);
1990
    return -ENOTSUP;
1991
}
1992

    
1993
int bdrv_snapshot_goto(BlockDriverState *bs,
1994
                       const char *snapshot_id)
1995
{
1996
    BlockDriver *drv = bs->drv;
1997
    int ret, open_ret;
1998

    
1999
    if (!drv)
2000
        return -ENOMEDIUM;
2001
    if (drv->bdrv_snapshot_goto)
2002
        return drv->bdrv_snapshot_goto(bs, snapshot_id);
2003

    
2004
    if (bs->file) {
2005
        drv->bdrv_close(bs);
2006
        ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2007
        open_ret = drv->bdrv_open(bs, bs->open_flags);
2008
        if (open_ret < 0) {
2009
            bdrv_delete(bs->file);
2010
            bs->drv = NULL;
2011
            return open_ret;
2012
        }
2013
        return ret;
2014
    }
2015

    
2016
    return -ENOTSUP;
2017
}
2018

    
2019
int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2020
{
2021
    BlockDriver *drv = bs->drv;
2022
    if (!drv)
2023
        return -ENOMEDIUM;
2024
    if (drv->bdrv_snapshot_delete)
2025
        return drv->bdrv_snapshot_delete(bs, snapshot_id);
2026
    if (bs->file)
2027
        return bdrv_snapshot_delete(bs->file, snapshot_id);
2028
    return -ENOTSUP;
2029
}
2030

    
2031
int bdrv_snapshot_list(BlockDriverState *bs,
2032
                       QEMUSnapshotInfo **psn_info)
2033
{
2034
    BlockDriver *drv = bs->drv;
2035
    if (!drv)
2036
        return -ENOMEDIUM;
2037
    if (drv->bdrv_snapshot_list)
2038
        return drv->bdrv_snapshot_list(bs, psn_info);
2039
    if (bs->file)
2040
        return bdrv_snapshot_list(bs->file, psn_info);
2041
    return -ENOTSUP;
2042
}
2043

    
2044
int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2045
        const char *snapshot_name)
2046
{
2047
    BlockDriver *drv = bs->drv;
2048
    if (!drv) {
2049
        return -ENOMEDIUM;
2050
    }
2051
    if (!bs->read_only) {
2052
        return -EINVAL;
2053
    }
2054
    if (drv->bdrv_snapshot_load_tmp) {
2055
        return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2056
    }
2057
    return -ENOTSUP;
2058
}
2059

    
2060
#define NB_SUFFIXES 4
2061

    
2062
char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2063
{
2064
    static const char suffixes[NB_SUFFIXES] = "KMGT";
2065
    int64_t base;
2066
    int i;
2067

    
2068
    if (size <= 999) {
2069
        snprintf(buf, buf_size, "%" PRId64, size);
2070
    } else {
2071
        base = 1024;
2072
        for(i = 0; i < NB_SUFFIXES; i++) {
2073
            if (size < (10 * base)) {
2074
                snprintf(buf, buf_size, "%0.1f%c",
2075
                         (double)size / base,
2076
                         suffixes[i]);
2077
                break;
2078
            } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
2079
                snprintf(buf, buf_size, "%" PRId64 "%c",
2080
                         ((size + (base >> 1)) / base),
2081
                         suffixes[i]);
2082
                break;
2083
            }
2084
            base = base * 1024;
2085
        }
2086
    }
2087
    return buf;
2088
}
2089

    
2090
char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2091
{
2092
    char buf1[128], date_buf[128], clock_buf[128];
2093
#ifdef _WIN32
2094
    struct tm *ptm;
2095
#else
2096
    struct tm tm;
2097
#endif
2098
    time_t ti;
2099
    int64_t secs;
2100

    
2101
    if (!sn) {
2102
        snprintf(buf, buf_size,
2103
                 "%-10s%-20s%7s%20s%15s",
2104
                 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2105
    } else {
2106
        ti = sn->date_sec;
2107
#ifdef _WIN32
2108
        ptm = localtime(&ti);
2109
        strftime(date_buf, sizeof(date_buf),
2110
                 "%Y-%m-%d %H:%M:%S", ptm);
2111
#else
2112
        localtime_r(&ti, &tm);
2113
        strftime(date_buf, sizeof(date_buf),
2114
                 "%Y-%m-%d %H:%M:%S", &tm);
2115
#endif
2116
        secs = sn->vm_clock_nsec / 1000000000;
2117
        snprintf(clock_buf, sizeof(clock_buf),
2118
                 "%02d:%02d:%02d.%03d",
2119
                 (int)(secs / 3600),
2120
                 (int)((secs / 60) % 60),
2121
                 (int)(secs % 60),
2122
                 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2123
        snprintf(buf, buf_size,
2124
                 "%-10s%-20s%7s%20s%15s",
2125
                 sn->id_str, sn->name,
2126
                 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2127
                 date_buf,
2128
                 clock_buf);
2129
    }
2130
    return buf;
2131
}
2132

    
2133

    
2134
/**************************************************************/
2135
/* async I/Os */
2136

    
2137
BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
2138
                                 QEMUIOVector *qiov, int nb_sectors,
2139
                                 BlockDriverCompletionFunc *cb, void *opaque)
2140
{
2141
    BlockDriver *drv = bs->drv;
2142
    BlockDriverAIOCB *ret;
2143

    
2144
    trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2145

    
2146
    if (!drv)
2147
        return NULL;
2148
    if (bdrv_check_request(bs, sector_num, nb_sectors))
2149
        return NULL;
2150

    
2151
    ret = drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
2152
                              cb, opaque);
2153

    
2154
    if (ret) {
2155
        /* Update stats even though technically transfer has not happened. */
2156
        bs->rd_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
2157
        bs->rd_ops ++;
2158
    }
2159

    
2160
    return ret;
2161
}
2162

    
2163
typedef struct BlockCompleteData {
2164
    BlockDriverCompletionFunc *cb;
2165
    void *opaque;
2166
    BlockDriverState *bs;
2167
    int64_t sector_num;
2168
    int nb_sectors;
2169
} BlockCompleteData;
2170

    
2171
static void block_complete_cb(void *opaque, int ret)
2172
{
2173
    BlockCompleteData *b = opaque;
2174

    
2175
    if (b->bs->dirty_bitmap) {
2176
        set_dirty_bitmap(b->bs, b->sector_num, b->nb_sectors, 1);
2177
    }
2178
    b->cb(b->opaque, ret);
2179
    qemu_free(b);
2180
}
2181

    
2182
static BlockCompleteData *blk_dirty_cb_alloc(BlockDriverState *bs,
2183
                                             int64_t sector_num,
2184
                                             int nb_sectors,
2185
                                             BlockDriverCompletionFunc *cb,
2186
                                             void *opaque)
2187
{
2188
    BlockCompleteData *blkdata = qemu_mallocz(sizeof(BlockCompleteData));
2189

    
2190
    blkdata->bs = bs;
2191
    blkdata->cb = cb;
2192
    blkdata->opaque = opaque;
2193
    blkdata->sector_num = sector_num;
2194
    blkdata->nb_sectors = nb_sectors;
2195

    
2196
    return blkdata;
2197
}
2198

    
2199
BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2200
                                  QEMUIOVector *qiov, int nb_sectors,
2201
                                  BlockDriverCompletionFunc *cb, void *opaque)
2202
{
2203
    BlockDriver *drv = bs->drv;
2204
    BlockDriverAIOCB *ret;
2205
    BlockCompleteData *blk_cb_data;
2206

    
2207
    trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2208

    
2209
    if (!drv)
2210
        return NULL;
2211
    if (bs->read_only)
2212
        return NULL;
2213
    if (bdrv_check_request(bs, sector_num, nb_sectors))
2214
        return NULL;
2215

    
2216
    if (bs->dirty_bitmap) {
2217
        blk_cb_data = blk_dirty_cb_alloc(bs, sector_num, nb_sectors, cb,
2218
                                         opaque);
2219
        cb = &block_complete_cb;
2220
        opaque = blk_cb_data;
2221
    }
2222

    
2223
    ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
2224
                               cb, opaque);
2225

    
2226
    if (ret) {
2227
        /* Update stats even though technically transfer has not happened. */
2228
        bs->wr_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
2229
        bs->wr_ops ++;
2230
        if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
2231
            bs->wr_highest_sector = sector_num + nb_sectors - 1;
2232
        }
2233
    }
2234

    
2235
    return ret;
2236
}
2237

    
2238

    
2239
typedef struct MultiwriteCB {
2240
    int error;
2241
    int num_requests;
2242
    int num_callbacks;
2243
    struct {
2244
        BlockDriverCompletionFunc *cb;
2245
        void *opaque;
2246
        QEMUIOVector *free_qiov;
2247
        void *free_buf;
2248
    } callbacks[];
2249
} MultiwriteCB;
2250

    
2251
static void multiwrite_user_cb(MultiwriteCB *mcb)
2252
{
2253
    int i;
2254

    
2255
    for (i = 0; i < mcb->num_callbacks; i++) {
2256
        mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
2257
        if (mcb->callbacks[i].free_qiov) {
2258
            qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2259
        }
2260
        qemu_free(mcb->callbacks[i].free_qiov);
2261
        qemu_vfree(mcb->callbacks[i].free_buf);
2262
    }
2263
}
2264

    
2265
static void multiwrite_cb(void *opaque, int ret)
2266
{
2267
    MultiwriteCB *mcb = opaque;
2268

    
2269
    trace_multiwrite_cb(mcb, ret);
2270

    
2271
    if (ret < 0 && !mcb->error) {
2272
        mcb->error = ret;
2273
    }
2274

    
2275
    mcb->num_requests--;
2276
    if (mcb->num_requests == 0) {
2277
        multiwrite_user_cb(mcb);
2278
        qemu_free(mcb);
2279
    }
2280
}
2281

    
2282
static int multiwrite_req_compare(const void *a, const void *b)
2283
{
2284
    const BlockRequest *req1 = a, *req2 = b;
2285

    
2286
    /*
2287
     * Note that we can't simply subtract req2->sector from req1->sector
2288
     * here as that could overflow the return value.
2289
     */
2290
    if (req1->sector > req2->sector) {
2291
        return 1;
2292
    } else if (req1->sector < req2->sector) {
2293
        return -1;
2294
    } else {
2295
        return 0;
2296
    }
2297
}
2298

    
2299
/*
2300
 * Takes a bunch of requests and tries to merge them. Returns the number of
2301
 * requests that remain after merging.
2302
 */
2303
static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2304
    int num_reqs, MultiwriteCB *mcb)
2305
{
2306
    int i, outidx;
2307

    
2308
    // Sort requests by start sector
2309
    qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2310

    
2311
    // Check if adjacent requests touch the same clusters. If so, combine them,
2312
    // filling up gaps with zero sectors.
2313
    outidx = 0;
2314
    for (i = 1; i < num_reqs; i++) {
2315
        int merge = 0;
2316
        int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2317

    
2318
        // This handles the cases that are valid for all block drivers, namely
2319
        // exactly sequential writes and overlapping writes.
2320
        if (reqs[i].sector <= oldreq_last) {
2321
            merge = 1;
2322
        }
2323

    
2324
        // The block driver may decide that it makes sense to combine requests
2325
        // even if there is a gap of some sectors between them. In this case,
2326
        // the gap is filled with zeros (therefore only applicable for yet
2327
        // unused space in format like qcow2).
2328
        if (!merge && bs->drv->bdrv_merge_requests) {
2329
            merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2330
        }
2331

    
2332
        if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2333
            merge = 0;
2334
        }
2335

    
2336
        if (merge) {
2337
            size_t size;
2338
            QEMUIOVector *qiov = qemu_mallocz(sizeof(*qiov));
2339
            qemu_iovec_init(qiov,
2340
                reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2341

    
2342
            // Add the first request to the merged one. If the requests are
2343
            // overlapping, drop the last sectors of the first request.
2344
            size = (reqs[i].sector - reqs[outidx].sector) << 9;
2345
            qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2346

    
2347
            // We might need to add some zeros between the two requests
2348
            if (reqs[i].sector > oldreq_last) {
2349
                size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2350
                uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2351
                memset(buf, 0, zero_bytes);
2352
                qemu_iovec_add(qiov, buf, zero_bytes);
2353
                mcb->callbacks[i].free_buf = buf;
2354
            }
2355

    
2356
            // Add the second request
2357
            qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2358

    
2359
            reqs[outidx].nb_sectors = qiov->size >> 9;
2360
            reqs[outidx].qiov = qiov;
2361

    
2362
            mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2363
        } else {
2364
            outidx++;
2365
            reqs[outidx].sector     = reqs[i].sector;
2366
            reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2367
            reqs[outidx].qiov       = reqs[i].qiov;
2368
        }
2369
    }
2370

    
2371
    return outidx + 1;
2372
}
2373

    
2374
/*
2375
 * Submit multiple AIO write requests at once.
2376
 *
2377
 * On success, the function returns 0 and all requests in the reqs array have
2378
 * been submitted. In error case this function returns -1, and any of the
2379
 * requests may or may not be submitted yet. In particular, this means that the
2380
 * callback will be called for some of the requests, for others it won't. The
2381
 * caller must check the error field of the BlockRequest to wait for the right
2382
 * callbacks (if error != 0, no callback will be called).
2383
 *
2384
 * The implementation may modify the contents of the reqs array, e.g. to merge
2385
 * requests. However, the fields opaque and error are left unmodified as they
2386
 * are used to signal failure for a single request to the caller.
2387
 */
2388
int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2389
{
2390
    BlockDriverAIOCB *acb;
2391
    MultiwriteCB *mcb;
2392
    int i;
2393

    
2394
    /* don't submit writes if we don't have a medium */
2395
    if (bs->drv == NULL) {
2396
        for (i = 0; i < num_reqs; i++) {
2397
            reqs[i].error = -ENOMEDIUM;
2398
        }
2399
        return -1;
2400
    }
2401

    
2402
    if (num_reqs == 0) {
2403
        return 0;
2404
    }
2405

    
2406
    // Create MultiwriteCB structure
2407
    mcb = qemu_mallocz(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
2408
    mcb->num_requests = 0;
2409
    mcb->num_callbacks = num_reqs;
2410

    
2411
    for (i = 0; i < num_reqs; i++) {
2412
        mcb->callbacks[i].cb = reqs[i].cb;
2413
        mcb->callbacks[i].opaque = reqs[i].opaque;
2414
    }
2415

    
2416
    // Check for mergable requests
2417
    num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2418

    
2419
    trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2420

    
2421
    /*
2422
     * Run the aio requests. As soon as one request can't be submitted
2423
     * successfully, fail all requests that are not yet submitted (we must
2424
     * return failure for all requests anyway)
2425
     *
2426
     * num_requests cannot be set to the right value immediately: If
2427
     * bdrv_aio_writev fails for some request, num_requests would be too high
2428
     * and therefore multiwrite_cb() would never recognize the multiwrite
2429
     * request as completed. We also cannot use the loop variable i to set it
2430
     * when the first request fails because the callback may already have been
2431
     * called for previously submitted requests. Thus, num_requests must be
2432
     * incremented for each request that is submitted.
2433
     *
2434
     * The problem that callbacks may be called early also means that we need
2435
     * to take care that num_requests doesn't become 0 before all requests are
2436
     * submitted - multiwrite_cb() would consider the multiwrite request
2437
     * completed. A dummy request that is "completed" by a manual call to
2438
     * multiwrite_cb() takes care of this.
2439
     */
2440
    mcb->num_requests = 1;
2441

    
2442
    // Run the aio requests
2443
    for (i = 0; i < num_reqs; i++) {
2444
        mcb->num_requests++;
2445
        acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2446
            reqs[i].nb_sectors, multiwrite_cb, mcb);
2447

    
2448
        if (acb == NULL) {
2449
            // We can only fail the whole thing if no request has been
2450
            // submitted yet. Otherwise we'll wait for the submitted AIOs to
2451
            // complete and report the error in the callback.
2452
            if (i == 0) {
2453
                trace_bdrv_aio_multiwrite_earlyfail(mcb);
2454
                goto fail;
2455
            } else {
2456
                trace_bdrv_aio_multiwrite_latefail(mcb, i);
2457
                multiwrite_cb(mcb, -EIO);
2458
                break;
2459
            }
2460
        }
2461
    }
2462

    
2463
    /* Complete the dummy request */
2464
    multiwrite_cb(mcb, 0);
2465

    
2466
    return 0;
2467

    
2468
fail:
2469
    for (i = 0; i < mcb->num_callbacks; i++) {
2470
        reqs[i].error = -EIO;
2471
    }
2472
    qemu_free(mcb);
2473
    return -1;
2474
}
2475

    
2476
BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
2477
        BlockDriverCompletionFunc *cb, void *opaque)
2478
{
2479
    BlockDriver *drv = bs->drv;
2480

    
2481
    trace_bdrv_aio_flush(bs, opaque);
2482

    
2483
    if (bs->open_flags & BDRV_O_NO_FLUSH) {
2484
        return bdrv_aio_noop_em(bs, cb, opaque);
2485
    }
2486

    
2487
    if (!drv)
2488
        return NULL;
2489
    return drv->bdrv_aio_flush(bs, cb, opaque);
2490
}
2491

    
2492
void bdrv_aio_cancel(BlockDriverAIOCB *acb)
2493
{
2494
    acb->pool->cancel(acb);
2495
}
2496

    
2497

    
2498
/**************************************************************/
2499
/* async block device emulation */
2500

    
2501
typedef struct BlockDriverAIOCBSync {
2502
    BlockDriverAIOCB common;
2503
    QEMUBH *bh;
2504
    int ret;
2505
    /* vector translation state */
2506
    QEMUIOVector *qiov;
2507
    uint8_t *bounce;
2508
    int is_write;
2509
} BlockDriverAIOCBSync;
2510

    
2511
static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2512
{
2513
    BlockDriverAIOCBSync *acb =
2514
        container_of(blockacb, BlockDriverAIOCBSync, common);
2515
    qemu_bh_delete(acb->bh);
2516
    acb->bh = NULL;
2517
    qemu_aio_release(acb);
2518
}
2519

    
2520
static AIOPool bdrv_em_aio_pool = {
2521
    .aiocb_size         = sizeof(BlockDriverAIOCBSync),
2522
    .cancel             = bdrv_aio_cancel_em,
2523
};
2524

    
2525
static void bdrv_aio_bh_cb(void *opaque)
2526
{
2527
    BlockDriverAIOCBSync *acb = opaque;
2528

    
2529
    if (!acb->is_write)
2530
        qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
2531
    qemu_vfree(acb->bounce);
2532
    acb->common.cb(acb->common.opaque, acb->ret);
2533
    qemu_bh_delete(acb->bh);
2534
    acb->bh = NULL;
2535
    qemu_aio_release(acb);
2536
}
2537

    
2538
static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2539
                                            int64_t sector_num,
2540
                                            QEMUIOVector *qiov,
2541
                                            int nb_sectors,
2542
                                            BlockDriverCompletionFunc *cb,
2543
                                            void *opaque,
2544
                                            int is_write)
2545

    
2546
{
2547
    BlockDriverAIOCBSync *acb;
2548

    
2549
    acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2550
    acb->is_write = is_write;
2551
    acb->qiov = qiov;
2552
    acb->bounce = qemu_blockalign(bs, qiov->size);
2553

    
2554
    if (!acb->bh)
2555
        acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2556

    
2557
    if (is_write) {
2558
        qemu_iovec_to_buffer(acb->qiov, acb->bounce);
2559
        acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
2560
    } else {
2561
        acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
2562
    }
2563

    
2564
    qemu_bh_schedule(acb->bh);
2565

    
2566
    return &acb->common;
2567
}
2568

    
2569
static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2570
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2571
        BlockDriverCompletionFunc *cb, void *opaque)
2572
{
2573
    return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2574
}
2575

    
2576
static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2577
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2578
        BlockDriverCompletionFunc *cb, void *opaque)
2579
{
2580
    return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
2581
}
2582

    
2583
static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
2584
        BlockDriverCompletionFunc *cb, void *opaque)
2585
{
2586
    BlockDriverAIOCBSync *acb;
2587

    
2588
    acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2589
    acb->is_write = 1; /* don't bounce in the completion hadler */
2590
    acb->qiov = NULL;
2591
    acb->bounce = NULL;
2592
    acb->ret = 0;
2593

    
2594
    if (!acb->bh)
2595
        acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2596

    
2597
    bdrv_flush(bs);
2598
    qemu_bh_schedule(acb->bh);
2599
    return &acb->common;
2600
}
2601

    
2602
static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
2603
        BlockDriverCompletionFunc *cb, void *opaque)
2604
{
2605
    BlockDriverAIOCBSync *acb;
2606

    
2607
    acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2608
    acb->is_write = 1; /* don't bounce in the completion handler */
2609
    acb->qiov = NULL;
2610
    acb->bounce = NULL;
2611
    acb->ret = 0;
2612

    
2613
    if (!acb->bh) {
2614
        acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2615
    }
2616

    
2617
    qemu_bh_schedule(acb->bh);
2618
    return &acb->common;
2619
}
2620

    
2621
/**************************************************************/
2622
/* sync block device emulation */
2623

    
2624
static void bdrv_rw_em_cb(void *opaque, int ret)
2625
{
2626
    *(int *)opaque = ret;
2627
}
2628

    
2629
#define NOT_DONE 0x7fffffff
2630

    
2631
static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
2632
                        uint8_t *buf, int nb_sectors)
2633
{
2634
    int async_ret;
2635
    BlockDriverAIOCB *acb;
2636
    struct iovec iov;
2637
    QEMUIOVector qiov;
2638

    
2639
    async_context_push();
2640

    
2641
    async_ret = NOT_DONE;
2642
    iov.iov_base = (void *)buf;
2643
    iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
2644
    qemu_iovec_init_external(&qiov, &iov, 1);
2645
    acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors,
2646
        bdrv_rw_em_cb, &async_ret);
2647
    if (acb == NULL) {
2648
        async_ret = -1;
2649
        goto fail;
2650
    }
2651

    
2652
    while (async_ret == NOT_DONE) {
2653
        qemu_aio_wait();
2654
    }
2655

    
2656

    
2657
fail:
2658
    async_context_pop();
2659
    return async_ret;
2660
}
2661

    
2662
static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
2663
                         const uint8_t *buf, int nb_sectors)
2664
{
2665
    int async_ret;
2666
    BlockDriverAIOCB *acb;
2667
    struct iovec iov;
2668
    QEMUIOVector qiov;
2669

    
2670
    async_context_push();
2671

    
2672
    async_ret = NOT_DONE;
2673
    iov.iov_base = (void *)buf;
2674
    iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
2675
    qemu_iovec_init_external(&qiov, &iov, 1);
2676
    acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors,
2677
        bdrv_rw_em_cb, &async_ret);
2678
    if (acb == NULL) {
2679
        async_ret = -1;
2680
        goto fail;
2681
    }
2682
    while (async_ret == NOT_DONE) {
2683
        qemu_aio_wait();
2684
    }
2685

    
2686
fail:
2687
    async_context_pop();
2688
    return async_ret;
2689
}
2690

    
2691
void bdrv_init(void)
2692
{
2693
    module_call_init(MODULE_INIT_BLOCK);
2694
}
2695

    
2696
void bdrv_init_with_whitelist(void)
2697
{
2698
    use_bdrv_whitelist = 1;
2699
    bdrv_init();
2700
}
2701

    
2702
void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2703
                   BlockDriverCompletionFunc *cb, void *opaque)
2704
{
2705
    BlockDriverAIOCB *acb;
2706

    
2707
    if (pool->free_aiocb) {
2708
        acb = pool->free_aiocb;
2709
        pool->free_aiocb = acb->next;
2710
    } else {
2711
        acb = qemu_mallocz(pool->aiocb_size);
2712
        acb->pool = pool;
2713
    }
2714
    acb->bs = bs;
2715
    acb->cb = cb;
2716
    acb->opaque = opaque;
2717
    return acb;
2718
}
2719

    
2720
void qemu_aio_release(void *p)
2721
{
2722
    BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2723
    AIOPool *pool = acb->pool;
2724
    acb->next = pool->free_aiocb;
2725
    pool->free_aiocb = acb;
2726
}
2727

    
2728
/**************************************************************/
2729
/* removable device support */
2730

    
2731
/**
2732
 * Return TRUE if the media is present
2733
 */
2734
int bdrv_is_inserted(BlockDriverState *bs)
2735
{
2736
    BlockDriver *drv = bs->drv;
2737
    int ret;
2738
    if (!drv)
2739
        return 0;
2740
    if (!drv->bdrv_is_inserted)
2741
        return !bs->tray_open;
2742
    ret = drv->bdrv_is_inserted(bs);
2743
    return ret;
2744
}
2745

    
2746
/**
2747
 * Return TRUE if the media changed since the last call to this
2748
 * function. It is currently only used for floppy disks
2749
 */
2750
int bdrv_media_changed(BlockDriverState *bs)
2751
{
2752
    BlockDriver *drv = bs->drv;
2753
    int ret;
2754

    
2755
    if (!drv || !drv->bdrv_media_changed)
2756
        ret = -ENOTSUP;
2757
    else
2758
        ret = drv->bdrv_media_changed(bs);
2759
    if (ret == -ENOTSUP)
2760
        ret = bs->media_changed;
2761
    bs->media_changed = 0;
2762
    return ret;
2763
}
2764

    
2765
/**
2766
 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
2767
 */
2768
int bdrv_eject(BlockDriverState *bs, int eject_flag)
2769
{
2770
    BlockDriver *drv = bs->drv;
2771
    int ret;
2772

    
2773
    if (bs->locked) {
2774
        return -EBUSY;
2775
    }
2776

    
2777
    if (!drv || !drv->bdrv_eject) {
2778
        ret = -ENOTSUP;
2779
    } else {
2780
        ret = drv->bdrv_eject(bs, eject_flag);
2781
    }
2782
    if (ret == -ENOTSUP) {
2783
        ret = 0;
2784
    }
2785
    if (ret >= 0) {
2786
        bs->tray_open = eject_flag;
2787
    }
2788

    
2789
    return ret;
2790
}
2791

    
2792
int bdrv_is_locked(BlockDriverState *bs)
2793
{
2794
    return bs->locked;
2795
}
2796

    
2797
/**
2798
 * Lock or unlock the media (if it is locked, the user won't be able
2799
 * to eject it manually).
2800
 */
2801
void bdrv_set_locked(BlockDriverState *bs, int locked)
2802
{
2803
    BlockDriver *drv = bs->drv;
2804

    
2805
    trace_bdrv_set_locked(bs, locked);
2806

    
2807
    bs->locked = locked;
2808
    if (drv && drv->bdrv_set_locked) {
2809
        drv->bdrv_set_locked(bs, locked);
2810
    }
2811
}
2812

    
2813
/* needed for generic scsi interface */
2814

    
2815
int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
2816
{
2817
    BlockDriver *drv = bs->drv;
2818

    
2819
    if (drv && drv->bdrv_ioctl)
2820
        return drv->bdrv_ioctl(bs, req, buf);
2821
    return -ENOTSUP;
2822
}
2823

    
2824
BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
2825
        unsigned long int req, void *buf,
2826
        BlockDriverCompletionFunc *cb, void *opaque)
2827
{
2828
    BlockDriver *drv = bs->drv;
2829

    
2830
    if (drv && drv->bdrv_aio_ioctl)
2831
        return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
2832
    return NULL;
2833
}
2834

    
2835

    
2836

    
2837
void *qemu_blockalign(BlockDriverState *bs, size_t size)
2838
{
2839
    return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
2840
}
2841

    
2842
void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
2843
{
2844
    int64_t bitmap_size;
2845

    
2846
    bs->dirty_count = 0;
2847
    if (enable) {
2848
        if (!bs->dirty_bitmap) {
2849
            bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
2850
                    BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
2851
            bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
2852

    
2853
            bs->dirty_bitmap = qemu_mallocz(bitmap_size);
2854
        }
2855
    } else {
2856
        if (bs->dirty_bitmap) {
2857
            qemu_free(bs->dirty_bitmap);
2858
            bs->dirty_bitmap = NULL;
2859
        }
2860
    }
2861
}
2862

    
2863
int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
2864
{
2865
    int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
2866

    
2867
    if (bs->dirty_bitmap &&
2868
        (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
2869
        return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
2870
            (1UL << (chunk % (sizeof(unsigned long) * 8))));
2871
    } else {
2872
        return 0;
2873
    }
2874
}
2875

    
2876
void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
2877
                      int nr_sectors)
2878
{
2879
    set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
2880
}
2881

    
2882
int64_t bdrv_get_dirty_count(BlockDriverState *bs)
2883
{
2884
    return bs->dirty_count;
2885
}
2886

    
2887
void bdrv_set_in_use(BlockDriverState *bs, int in_use)
2888
{
2889
    assert(bs->in_use != in_use);
2890
    bs->in_use = in_use;
2891
}
2892

    
2893
int bdrv_in_use(BlockDriverState *bs)
2894
{
2895
    return bs->in_use;
2896
}
2897

    
2898
int bdrv_img_create(const char *filename, const char *fmt,
2899
                    const char *base_filename, const char *base_fmt,
2900
                    char *options, uint64_t img_size, int flags)
2901
{
2902
    QEMUOptionParameter *param = NULL, *create_options = NULL;
2903
    QEMUOptionParameter *backing_fmt, *backing_file, *size;
2904
    BlockDriverState *bs = NULL;
2905
    BlockDriver *drv, *proto_drv;
2906
    BlockDriver *backing_drv = NULL;
2907
    int ret = 0;
2908

    
2909
    /* Find driver and parse its options */
2910
    drv = bdrv_find_format(fmt);
2911
    if (!drv) {
2912
        error_report("Unknown file format '%s'", fmt);
2913
        ret = -EINVAL;
2914
        goto out;
2915
    }
2916

    
2917
    proto_drv = bdrv_find_protocol(filename);
2918
    if (!proto_drv) {
2919
        error_report("Unknown protocol '%s'", filename);
2920
        ret = -EINVAL;
2921
        goto out;
2922
    }
2923

    
2924
    create_options = append_option_parameters(create_options,
2925
                                              drv->create_options);
2926
    create_options = append_option_parameters(create_options,
2927
                                              proto_drv->create_options);
2928

    
2929
    /* Create parameter list with default values */
2930
    param = parse_option_parameters("", create_options, param);
2931

    
2932
    set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
2933

    
2934
    /* Parse -o options */
2935
    if (options) {
2936
        param = parse_option_parameters(options, create_options, param);
2937
        if (param == NULL) {
2938
            error_report("Invalid options for file format '%s'.", fmt);
2939
            ret = -EINVAL;
2940
            goto out;
2941
        }
2942
    }
2943

    
2944
    if (base_filename) {
2945
        if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
2946
                                 base_filename)) {
2947
            error_report("Backing file not supported for file format '%s'",
2948
                         fmt);
2949
            ret = -EINVAL;
2950
            goto out;
2951
        }
2952
    }
2953

    
2954
    if (base_fmt) {
2955
        if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
2956
            error_report("Backing file format not supported for file "
2957
                         "format '%s'", fmt);
2958
            ret = -EINVAL;
2959
            goto out;
2960
        }
2961
    }
2962

    
2963
    backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
2964
    if (backing_file && backing_file->value.s) {
2965
        if (!strcmp(filename, backing_file->value.s)) {
2966
            error_report("Error: Trying to create an image with the "
2967
                         "same filename as the backing file");
2968
            ret = -EINVAL;
2969
            goto out;
2970
        }
2971
    }
2972

    
2973
    backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
2974
    if (backing_fmt && backing_fmt->value.s) {
2975
        backing_drv = bdrv_find_format(backing_fmt->value.s);
2976
        if (!backing_drv) {
2977
            error_report("Unknown backing file format '%s'",
2978
                         backing_fmt->value.s);
2979
            ret = -EINVAL;
2980
            goto out;
2981
        }
2982
    }
2983

    
2984
    // The size for the image must always be specified, with one exception:
2985
    // If we are using a backing file, we can obtain the size from there
2986
    size = get_option_parameter(param, BLOCK_OPT_SIZE);
2987
    if (size && size->value.n == -1) {
2988
        if (backing_file && backing_file->value.s) {
2989
            uint64_t size;
2990
            char buf[32];
2991

    
2992
            bs = bdrv_new("");
2993

    
2994
            ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
2995
            if (ret < 0) {
2996
                error_report("Could not open '%s'", backing_file->value.s);
2997
                goto out;
2998
            }
2999
            bdrv_get_geometry(bs, &size);
3000
            size *= 512;
3001

    
3002
            snprintf(buf, sizeof(buf), "%" PRId64, size);
3003
            set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3004
        } else {
3005
            error_report("Image creation needs a size parameter");
3006
            ret = -EINVAL;
3007
            goto out;
3008
        }
3009
    }
3010

    
3011
    printf("Formatting '%s', fmt=%s ", filename, fmt);
3012
    print_option_parameters(param);
3013
    puts("");
3014

    
3015
    ret = bdrv_create(drv, filename, param);
3016

    
3017
    if (ret < 0) {
3018
        if (ret == -ENOTSUP) {
3019
            error_report("Formatting or formatting option not supported for "
3020
                         "file format '%s'", fmt);
3021
        } else if (ret == -EFBIG) {
3022
            error_report("The image size is too large for file format '%s'",
3023
                         fmt);
3024
        } else {
3025
            error_report("%s: error while creating %s: %s", filename, fmt,
3026
                         strerror(-ret));
3027
        }
3028
    }
3029

    
3030
out:
3031
    free_option_parameters(create_options);
3032
    free_option_parameters(param);
3033

    
3034
    if (bs) {
3035
        bdrv_delete(bs);
3036
    }
3037

    
3038
    return ret;
3039
}