Statistics
| Branch: | Revision:

root / block.c @ d220894e

History | View | Annotate | Download (80.7 kB)

1
/*
2
 * QEMU System Emulator block driver
3
 *
4
 * Copyright (c) 2003 Fabrice Bellard
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
 * THE SOFTWARE.
23
 */
24
#include "config-host.h"
25
#include "qemu-common.h"
26
#include "trace.h"
27
#include "monitor.h"
28
#include "block_int.h"
29
#include "module.h"
30
#include "qemu-objects.h"
31

    
32
#ifdef CONFIG_BSD
33
#include <sys/types.h>
34
#include <sys/stat.h>
35
#include <sys/ioctl.h>
36
#include <sys/queue.h>
37
#ifndef __DragonFly__
38
#include <sys/disk.h>
39
#endif
40
#endif
41

    
42
#ifdef _WIN32
43
#include <windows.h>
44
#endif
45

    
46
static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
47
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
48
        BlockDriverCompletionFunc *cb, void *opaque);
49
static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
50
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
51
        BlockDriverCompletionFunc *cb, void *opaque);
52
static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
53
        BlockDriverCompletionFunc *cb, void *opaque);
54
static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
55
        BlockDriverCompletionFunc *cb, void *opaque);
56
static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
57
                        uint8_t *buf, int nb_sectors);
58
static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
59
                         const uint8_t *buf, int nb_sectors);
60

    
61
static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
62
    QTAILQ_HEAD_INITIALIZER(bdrv_states);
63

    
64
static QLIST_HEAD(, BlockDriver) bdrv_drivers =
65
    QLIST_HEAD_INITIALIZER(bdrv_drivers);
66

    
67
/* The device to use for VM snapshots */
68
static BlockDriverState *bs_snapshots;
69

    
70
/* If non-zero, use only whitelisted block drivers */
71
static int use_bdrv_whitelist;
72

    
73
#ifdef _WIN32
74
static int is_windows_drive_prefix(const char *filename)
75
{
76
    return (((filename[0] >= 'a' && filename[0] <= 'z') ||
77
             (filename[0] >= 'A' && filename[0] <= 'Z')) &&
78
            filename[1] == ':');
79
}
80

    
81
int is_windows_drive(const char *filename)
82
{
83
    if (is_windows_drive_prefix(filename) &&
84
        filename[2] == '\0')
85
        return 1;
86
    if (strstart(filename, "\\\\.\\", NULL) ||
87
        strstart(filename, "//./", NULL))
88
        return 1;
89
    return 0;
90
}
91
#endif
92

    
93
/* check if the path starts with "<protocol>:" */
94
static int path_has_protocol(const char *path)
95
{
96
#ifdef _WIN32
97
    if (is_windows_drive(path) ||
98
        is_windows_drive_prefix(path)) {
99
        return 0;
100
    }
101
#endif
102

    
103
    return strchr(path, ':') != NULL;
104
}
105

    
106
int path_is_absolute(const char *path)
107
{
108
    const char *p;
109
#ifdef _WIN32
110
    /* specific case for names like: "\\.\d:" */
111
    if (*path == '/' || *path == '\\')
112
        return 1;
113
#endif
114
    p = strchr(path, ':');
115
    if (p)
116
        p++;
117
    else
118
        p = path;
119
#ifdef _WIN32
120
    return (*p == '/' || *p == '\\');
121
#else
122
    return (*p == '/');
123
#endif
124
}
125

    
126
/* if filename is absolute, just copy it to dest. Otherwise, build a
127
   path to it by considering it is relative to base_path. URL are
128
   supported. */
129
void path_combine(char *dest, int dest_size,
130
                  const char *base_path,
131
                  const char *filename)
132
{
133
    const char *p, *p1;
134
    int len;
135

    
136
    if (dest_size <= 0)
137
        return;
138
    if (path_is_absolute(filename)) {
139
        pstrcpy(dest, dest_size, filename);
140
    } else {
141
        p = strchr(base_path, ':');
142
        if (p)
143
            p++;
144
        else
145
            p = base_path;
146
        p1 = strrchr(base_path, '/');
147
#ifdef _WIN32
148
        {
149
            const char *p2;
150
            p2 = strrchr(base_path, '\\');
151
            if (!p1 || p2 > p1)
152
                p1 = p2;
153
        }
154
#endif
155
        if (p1)
156
            p1++;
157
        else
158
            p1 = base_path;
159
        if (p1 > p)
160
            p = p1;
161
        len = p - base_path;
162
        if (len > dest_size - 1)
163
            len = dest_size - 1;
164
        memcpy(dest, base_path, len);
165
        dest[len] = '\0';
166
        pstrcat(dest, dest_size, filename);
167
    }
168
}
169

    
170
void bdrv_register(BlockDriver *bdrv)
171
{
172
    if (!bdrv->bdrv_aio_readv) {
173
        /* add AIO emulation layer */
174
        bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
175
        bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
176
    } else if (!bdrv->bdrv_read) {
177
        /* add synchronous IO emulation layer */
178
        bdrv->bdrv_read = bdrv_read_em;
179
        bdrv->bdrv_write = bdrv_write_em;
180
    }
181

    
182
    if (!bdrv->bdrv_aio_flush)
183
        bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
184

    
185
    QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
186
}
187

    
188
/* create a new block device (by default it is empty) */
189
BlockDriverState *bdrv_new(const char *device_name)
190
{
191
    BlockDriverState *bs;
192

    
193
    bs = qemu_mallocz(sizeof(BlockDriverState));
194
    pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
195
    if (device_name[0] != '\0') {
196
        QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
197
    }
198
    return bs;
199
}
200

    
201
BlockDriver *bdrv_find_format(const char *format_name)
202
{
203
    BlockDriver *drv1;
204
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
205
        if (!strcmp(drv1->format_name, format_name)) {
206
            return drv1;
207
        }
208
    }
209
    return NULL;
210
}
211

    
212
static int bdrv_is_whitelisted(BlockDriver *drv)
213
{
214
    static const char *whitelist[] = {
215
        CONFIG_BDRV_WHITELIST
216
    };
217
    const char **p;
218

    
219
    if (!whitelist[0])
220
        return 1;               /* no whitelist, anything goes */
221

    
222
    for (p = whitelist; *p; p++) {
223
        if (!strcmp(drv->format_name, *p)) {
224
            return 1;
225
        }
226
    }
227
    return 0;
228
}
229

    
230
BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
231
{
232
    BlockDriver *drv = bdrv_find_format(format_name);
233
    return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
234
}
235

    
236
int bdrv_create(BlockDriver *drv, const char* filename,
237
    QEMUOptionParameter *options)
238
{
239
    if (!drv->bdrv_create)
240
        return -ENOTSUP;
241

    
242
    return drv->bdrv_create(filename, options);
243
}
244

    
245
int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
246
{
247
    BlockDriver *drv;
248

    
249
    drv = bdrv_find_protocol(filename);
250
    if (drv == NULL) {
251
        return -ENOENT;
252
    }
253

    
254
    return bdrv_create(drv, filename, options);
255
}
256

    
257
#ifdef _WIN32
258
void get_tmp_filename(char *filename, int size)
259
{
260
    char temp_dir[MAX_PATH];
261

    
262
    GetTempPath(MAX_PATH, temp_dir);
263
    GetTempFileName(temp_dir, "qem", 0, filename);
264
}
265
#else
266
void get_tmp_filename(char *filename, int size)
267
{
268
    int fd;
269
    const char *tmpdir;
270
    /* XXX: race condition possible */
271
    tmpdir = getenv("TMPDIR");
272
    if (!tmpdir)
273
        tmpdir = "/tmp";
274
    snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
275
    fd = mkstemp(filename);
276
    close(fd);
277
}
278
#endif
279

    
280
/*
281
 * Detect host devices. By convention, /dev/cdrom[N] is always
282
 * recognized as a host CDROM.
283
 */
284
static BlockDriver *find_hdev_driver(const char *filename)
285
{
286
    int score_max = 0, score;
287
    BlockDriver *drv = NULL, *d;
288

    
289
    QLIST_FOREACH(d, &bdrv_drivers, list) {
290
        if (d->bdrv_probe_device) {
291
            score = d->bdrv_probe_device(filename);
292
            if (score > score_max) {
293
                score_max = score;
294
                drv = d;
295
            }
296
        }
297
    }
298

    
299
    return drv;
300
}
301

    
302
BlockDriver *bdrv_find_protocol(const char *filename)
303
{
304
    BlockDriver *drv1;
305
    char protocol[128];
306
    int len;
307
    const char *p;
308

    
309
    /* TODO Drivers without bdrv_file_open must be specified explicitly */
310

    
311
    /*
312
     * XXX(hch): we really should not let host device detection
313
     * override an explicit protocol specification, but moving this
314
     * later breaks access to device names with colons in them.
315
     * Thanks to the brain-dead persistent naming schemes on udev-
316
     * based Linux systems those actually are quite common.
317
     */
318
    drv1 = find_hdev_driver(filename);
319
    if (drv1) {
320
        return drv1;
321
    }
322

    
323
    if (!path_has_protocol(filename)) {
324
        return bdrv_find_format("file");
325
    }
326
    p = strchr(filename, ':');
327
    assert(p != NULL);
328
    len = p - filename;
329
    if (len > sizeof(protocol) - 1)
330
        len = sizeof(protocol) - 1;
331
    memcpy(protocol, filename, len);
332
    protocol[len] = '\0';
333
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
334
        if (drv1->protocol_name &&
335
            !strcmp(drv1->protocol_name, protocol)) {
336
            return drv1;
337
        }
338
    }
339
    return NULL;
340
}
341

    
342
static int find_image_format(const char *filename, BlockDriver **pdrv)
343
{
344
    int ret, score, score_max;
345
    BlockDriver *drv1, *drv;
346
    uint8_t buf[2048];
347
    BlockDriverState *bs;
348

    
349
    ret = bdrv_file_open(&bs, filename, 0);
350
    if (ret < 0) {
351
        *pdrv = NULL;
352
        return ret;
353
    }
354

    
355
    /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
356
    if (bs->sg || !bdrv_is_inserted(bs)) {
357
        bdrv_delete(bs);
358
        drv = bdrv_find_format("raw");
359
        if (!drv) {
360
            ret = -ENOENT;
361
        }
362
        *pdrv = drv;
363
        return ret;
364
    }
365

    
366
    ret = bdrv_pread(bs, 0, buf, sizeof(buf));
367
    bdrv_delete(bs);
368
    if (ret < 0) {
369
        *pdrv = NULL;
370
        return ret;
371
    }
372

    
373
    score_max = 0;
374
    drv = NULL;
375
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
376
        if (drv1->bdrv_probe) {
377
            score = drv1->bdrv_probe(buf, ret, filename);
378
            if (score > score_max) {
379
                score_max = score;
380
                drv = drv1;
381
            }
382
        }
383
    }
384
    if (!drv) {
385
        ret = -ENOENT;
386
    }
387
    *pdrv = drv;
388
    return ret;
389
}
390

    
391
/**
392
 * Set the current 'total_sectors' value
393
 */
394
static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
395
{
396
    BlockDriver *drv = bs->drv;
397

    
398
    /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
399
    if (bs->sg)
400
        return 0;
401

    
402
    /* query actual device if possible, otherwise just trust the hint */
403
    if (drv->bdrv_getlength) {
404
        int64_t length = drv->bdrv_getlength(bs);
405
        if (length < 0) {
406
            return length;
407
        }
408
        hint = length >> BDRV_SECTOR_BITS;
409
    }
410

    
411
    bs->total_sectors = hint;
412
    return 0;
413
}
414

    
415
/*
416
 * Common part for opening disk images and files
417
 */
418
static int bdrv_open_common(BlockDriverState *bs, const char *filename,
419
    int flags, BlockDriver *drv)
420
{
421
    int ret, open_flags;
422

    
423
    assert(drv != NULL);
424

    
425
    bs->file = NULL;
426
    bs->total_sectors = 0;
427
    bs->encrypted = 0;
428
    bs->valid_key = 0;
429
    bs->open_flags = flags;
430
    /* buffer_alignment defaulted to 512, drivers can change this value */
431
    bs->buffer_alignment = 512;
432

    
433
    pstrcpy(bs->filename, sizeof(bs->filename), filename);
434

    
435
    if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
436
        return -ENOTSUP;
437
    }
438

    
439
    bs->drv = drv;
440
    bs->opaque = qemu_mallocz(drv->instance_size);
441

    
442
    if (flags & BDRV_O_CACHE_WB)
443
        bs->enable_write_cache = 1;
444

    
445
    /*
446
     * Clear flags that are internal to the block layer before opening the
447
     * image.
448
     */
449
    open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
450

    
451
    /*
452
     * Snapshots should be writable.
453
     */
454
    if (bs->is_temporary) {
455
        open_flags |= BDRV_O_RDWR;
456
    }
457

    
458
    /* Open the image, either directly or using a protocol */
459
    if (drv->bdrv_file_open) {
460
        ret = drv->bdrv_file_open(bs, filename, open_flags);
461
    } else {
462
        ret = bdrv_file_open(&bs->file, filename, open_flags);
463
        if (ret >= 0) {
464
            ret = drv->bdrv_open(bs, open_flags);
465
        }
466
    }
467

    
468
    if (ret < 0) {
469
        goto free_and_fail;
470
    }
471

    
472
    bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
473

    
474
    ret = refresh_total_sectors(bs, bs->total_sectors);
475
    if (ret < 0) {
476
        goto free_and_fail;
477
    }
478

    
479
#ifndef _WIN32
480
    if (bs->is_temporary) {
481
        unlink(filename);
482
    }
483
#endif
484
    return 0;
485

    
486
free_and_fail:
487
    if (bs->file) {
488
        bdrv_delete(bs->file);
489
        bs->file = NULL;
490
    }
491
    qemu_free(bs->opaque);
492
    bs->opaque = NULL;
493
    bs->drv = NULL;
494
    return ret;
495
}
496

    
497
/*
498
 * Opens a file using a protocol (file, host_device, nbd, ...)
499
 */
500
int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
501
{
502
    BlockDriverState *bs;
503
    BlockDriver *drv;
504
    int ret;
505

    
506
    drv = bdrv_find_protocol(filename);
507
    if (!drv) {
508
        return -ENOENT;
509
    }
510

    
511
    bs = bdrv_new("");
512
    ret = bdrv_open_common(bs, filename, flags, drv);
513
    if (ret < 0) {
514
        bdrv_delete(bs);
515
        return ret;
516
    }
517
    bs->growable = 1;
518
    *pbs = bs;
519
    return 0;
520
}
521

    
522
/*
523
 * Opens a disk image (raw, qcow2, vmdk, ...)
524
 */
525
int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
526
              BlockDriver *drv)
527
{
528
    int ret;
529

    
530
    if (flags & BDRV_O_SNAPSHOT) {
531
        BlockDriverState *bs1;
532
        int64_t total_size;
533
        int is_protocol = 0;
534
        BlockDriver *bdrv_qcow2;
535
        QEMUOptionParameter *options;
536
        char tmp_filename[PATH_MAX];
537
        char backing_filename[PATH_MAX];
538

    
539
        /* if snapshot, we create a temporary backing file and open it
540
           instead of opening 'filename' directly */
541

    
542
        /* if there is a backing file, use it */
543
        bs1 = bdrv_new("");
544
        ret = bdrv_open(bs1, filename, 0, drv);
545
        if (ret < 0) {
546
            bdrv_delete(bs1);
547
            return ret;
548
        }
549
        total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
550

    
551
        if (bs1->drv && bs1->drv->protocol_name)
552
            is_protocol = 1;
553

    
554
        bdrv_delete(bs1);
555

    
556
        get_tmp_filename(tmp_filename, sizeof(tmp_filename));
557

    
558
        /* Real path is meaningless for protocols */
559
        if (is_protocol)
560
            snprintf(backing_filename, sizeof(backing_filename),
561
                     "%s", filename);
562
        else if (!realpath(filename, backing_filename))
563
            return -errno;
564

    
565
        bdrv_qcow2 = bdrv_find_format("qcow2");
566
        options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
567

    
568
        set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
569
        set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
570
        if (drv) {
571
            set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
572
                drv->format_name);
573
        }
574

    
575
        ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
576
        free_option_parameters(options);
577
        if (ret < 0) {
578
            return ret;
579
        }
580

    
581
        filename = tmp_filename;
582
        drv = bdrv_qcow2;
583
        bs->is_temporary = 1;
584
    }
585

    
586
    /* Find the right image format driver */
587
    if (!drv) {
588
        ret = find_image_format(filename, &drv);
589
    }
590

    
591
    if (!drv) {
592
        goto unlink_and_fail;
593
    }
594

    
595
    /* Open the image */
596
    ret = bdrv_open_common(bs, filename, flags, drv);
597
    if (ret < 0) {
598
        goto unlink_and_fail;
599
    }
600

    
601
    /* If there is a backing file, use it */
602
    if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
603
        char backing_filename[PATH_MAX];
604
        int back_flags;
605
        BlockDriver *back_drv = NULL;
606

    
607
        bs->backing_hd = bdrv_new("");
608

    
609
        if (path_has_protocol(bs->backing_file)) {
610
            pstrcpy(backing_filename, sizeof(backing_filename),
611
                    bs->backing_file);
612
        } else {
613
            path_combine(backing_filename, sizeof(backing_filename),
614
                         filename, bs->backing_file);
615
        }
616

    
617
        if (bs->backing_format[0] != '\0') {
618
            back_drv = bdrv_find_format(bs->backing_format);
619
        }
620

    
621
        /* backing files always opened read-only */
622
        back_flags =
623
            flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
624

    
625
        ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
626
        if (ret < 0) {
627
            bdrv_close(bs);
628
            return ret;
629
        }
630
        if (bs->is_temporary) {
631
            bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
632
        } else {
633
            /* base image inherits from "parent" */
634
            bs->backing_hd->keep_read_only = bs->keep_read_only;
635
        }
636
    }
637

    
638
    if (!bdrv_key_required(bs)) {
639
        /* call the change callback */
640
        bs->media_changed = 1;
641
        if (bs->change_cb)
642
            bs->change_cb(bs->change_opaque, CHANGE_MEDIA);
643
    }
644

    
645
    return 0;
646

    
647
unlink_and_fail:
648
    if (bs->is_temporary) {
649
        unlink(filename);
650
    }
651
    return ret;
652
}
653

    
654
void bdrv_close(BlockDriverState *bs)
655
{
656
    if (bs->drv) {
657
        if (bs == bs_snapshots) {
658
            bs_snapshots = NULL;
659
        }
660
        if (bs->backing_hd) {
661
            bdrv_delete(bs->backing_hd);
662
            bs->backing_hd = NULL;
663
        }
664
        bs->drv->bdrv_close(bs);
665
        qemu_free(bs->opaque);
666
#ifdef _WIN32
667
        if (bs->is_temporary) {
668
            unlink(bs->filename);
669
        }
670
#endif
671
        bs->opaque = NULL;
672
        bs->drv = NULL;
673

    
674
        if (bs->file != NULL) {
675
            bdrv_close(bs->file);
676
        }
677

    
678
        /* call the change callback */
679
        bs->media_changed = 1;
680
        if (bs->change_cb)
681
            bs->change_cb(bs->change_opaque, CHANGE_MEDIA);
682
    }
683
}
684

    
685
void bdrv_close_all(void)
686
{
687
    BlockDriverState *bs;
688

    
689
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
690
        bdrv_close(bs);
691
    }
692
}
693

    
694
/* make a BlockDriverState anonymous by removing from bdrv_state list.
695
   Also, NULL terminate the device_name to prevent double remove */
696
void bdrv_make_anon(BlockDriverState *bs)
697
{
698
    if (bs->device_name[0] != '\0') {
699
        QTAILQ_REMOVE(&bdrv_states, bs, list);
700
    }
701
    bs->device_name[0] = '\0';
702
}
703

    
704
void bdrv_delete(BlockDriverState *bs)
705
{
706
    assert(!bs->peer);
707

    
708
    /* remove from list, if necessary */
709
    bdrv_make_anon(bs);
710

    
711
    bdrv_close(bs);
712
    if (bs->file != NULL) {
713
        bdrv_delete(bs->file);
714
    }
715

    
716
    assert(bs != bs_snapshots);
717
    qemu_free(bs);
718
}
719

    
720
int bdrv_attach(BlockDriverState *bs, DeviceState *qdev)
721
{
722
    if (bs->peer) {
723
        return -EBUSY;
724
    }
725
    bs->peer = qdev;
726
    return 0;
727
}
728

    
729
void bdrv_detach(BlockDriverState *bs, DeviceState *qdev)
730
{
731
    assert(bs->peer == qdev);
732
    bs->peer = NULL;
733
}
734

    
735
DeviceState *bdrv_get_attached(BlockDriverState *bs)
736
{
737
    return bs->peer;
738
}
739

    
740
/*
741
 * Run consistency checks on an image
742
 *
743
 * Returns 0 if the check could be completed (it doesn't mean that the image is
744
 * free of errors) or -errno when an internal error occurred. The results of the
745
 * check are stored in res.
746
 */
747
int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
748
{
749
    if (bs->drv->bdrv_check == NULL) {
750
        return -ENOTSUP;
751
    }
752

    
753
    memset(res, 0, sizeof(*res));
754
    return bs->drv->bdrv_check(bs, res);
755
}
756

    
757
#define COMMIT_BUF_SECTORS 2048
758

    
759
/* commit COW file into the raw image */
760
int bdrv_commit(BlockDriverState *bs)
761
{
762
    BlockDriver *drv = bs->drv;
763
    BlockDriver *backing_drv;
764
    int64_t sector, total_sectors;
765
    int n, ro, open_flags;
766
    int ret = 0, rw_ret = 0;
767
    uint8_t *buf;
768
    char filename[1024];
769
    BlockDriverState *bs_rw, *bs_ro;
770

    
771
    if (!drv)
772
        return -ENOMEDIUM;
773
    
774
    if (!bs->backing_hd) {
775
        return -ENOTSUP;
776
    }
777

    
778
    if (bs->backing_hd->keep_read_only) {
779
        return -EACCES;
780
    }
781

    
782
    backing_drv = bs->backing_hd->drv;
783
    ro = bs->backing_hd->read_only;
784
    strncpy(filename, bs->backing_hd->filename, sizeof(filename));
785
    open_flags =  bs->backing_hd->open_flags;
786

    
787
    if (ro) {
788
        /* re-open as RW */
789
        bdrv_delete(bs->backing_hd);
790
        bs->backing_hd = NULL;
791
        bs_rw = bdrv_new("");
792
        rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
793
            backing_drv);
794
        if (rw_ret < 0) {
795
            bdrv_delete(bs_rw);
796
            /* try to re-open read-only */
797
            bs_ro = bdrv_new("");
798
            ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
799
                backing_drv);
800
            if (ret < 0) {
801
                bdrv_delete(bs_ro);
802
                /* drive not functional anymore */
803
                bs->drv = NULL;
804
                return ret;
805
            }
806
            bs->backing_hd = bs_ro;
807
            return rw_ret;
808
        }
809
        bs->backing_hd = bs_rw;
810
    }
811

    
812
    total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
813
    buf = qemu_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
814

    
815
    for (sector = 0; sector < total_sectors; sector += n) {
816
        if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
817

    
818
            if (bdrv_read(bs, sector, buf, n) != 0) {
819
                ret = -EIO;
820
                goto ro_cleanup;
821
            }
822

    
823
            if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
824
                ret = -EIO;
825
                goto ro_cleanup;
826
            }
827
        }
828
    }
829

    
830
    if (drv->bdrv_make_empty) {
831
        ret = drv->bdrv_make_empty(bs);
832
        bdrv_flush(bs);
833
    }
834

    
835
    /*
836
     * Make sure all data we wrote to the backing device is actually
837
     * stable on disk.
838
     */
839
    if (bs->backing_hd)
840
        bdrv_flush(bs->backing_hd);
841

    
842
ro_cleanup:
843
    qemu_free(buf);
844

    
845
    if (ro) {
846
        /* re-open as RO */
847
        bdrv_delete(bs->backing_hd);
848
        bs->backing_hd = NULL;
849
        bs_ro = bdrv_new("");
850
        ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
851
            backing_drv);
852
        if (ret < 0) {
853
            bdrv_delete(bs_ro);
854
            /* drive not functional anymore */
855
            bs->drv = NULL;
856
            return ret;
857
        }
858
        bs->backing_hd = bs_ro;
859
        bs->backing_hd->keep_read_only = 0;
860
    }
861

    
862
    return ret;
863
}
864

    
865
void bdrv_commit_all(void)
866
{
867
    BlockDriverState *bs;
868

    
869
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
870
        bdrv_commit(bs);
871
    }
872
}
873

    
874
/*
875
 * Return values:
876
 * 0        - success
877
 * -EINVAL  - backing format specified, but no file
878
 * -ENOSPC  - can't update the backing file because no space is left in the
879
 *            image file header
880
 * -ENOTSUP - format driver doesn't support changing the backing file
881
 */
882
int bdrv_change_backing_file(BlockDriverState *bs,
883
    const char *backing_file, const char *backing_fmt)
884
{
885
    BlockDriver *drv = bs->drv;
886

    
887
    if (drv->bdrv_change_backing_file != NULL) {
888
        return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
889
    } else {
890
        return -ENOTSUP;
891
    }
892
}
893

    
894
static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
895
                                   size_t size)
896
{
897
    int64_t len;
898

    
899
    if (!bdrv_is_inserted(bs))
900
        return -ENOMEDIUM;
901

    
902
    if (bs->growable)
903
        return 0;
904

    
905
    len = bdrv_getlength(bs);
906

    
907
    if (offset < 0)
908
        return -EIO;
909

    
910
    if ((offset > len) || (len - offset < size))
911
        return -EIO;
912

    
913
    return 0;
914
}
915

    
916
static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
917
                              int nb_sectors)
918
{
919
    return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
920
                                   nb_sectors * BDRV_SECTOR_SIZE);
921
}
922

    
923
/* return < 0 if error. See bdrv_write() for the return codes */
924
int bdrv_read(BlockDriverState *bs, int64_t sector_num,
925
              uint8_t *buf, int nb_sectors)
926
{
927
    BlockDriver *drv = bs->drv;
928

    
929
    if (!drv)
930
        return -ENOMEDIUM;
931
    if (bdrv_check_request(bs, sector_num, nb_sectors))
932
        return -EIO;
933

    
934
    return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
935
}
936

    
937
static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
938
                             int nb_sectors, int dirty)
939
{
940
    int64_t start, end;
941
    unsigned long val, idx, bit;
942

    
943
    start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
944
    end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
945

    
946
    for (; start <= end; start++) {
947
        idx = start / (sizeof(unsigned long) * 8);
948
        bit = start % (sizeof(unsigned long) * 8);
949
        val = bs->dirty_bitmap[idx];
950
        if (dirty) {
951
            if (!(val & (1UL << bit))) {
952
                bs->dirty_count++;
953
                val |= 1UL << bit;
954
            }
955
        } else {
956
            if (val & (1UL << bit)) {
957
                bs->dirty_count--;
958
                val &= ~(1UL << bit);
959
            }
960
        }
961
        bs->dirty_bitmap[idx] = val;
962
    }
963
}
964

    
965
/* Return < 0 if error. Important errors are:
966
  -EIO         generic I/O error (may happen for all errors)
967
  -ENOMEDIUM   No media inserted.
968
  -EINVAL      Invalid sector number or nb_sectors
969
  -EACCES      Trying to write a read-only device
970
*/
971
int bdrv_write(BlockDriverState *bs, int64_t sector_num,
972
               const uint8_t *buf, int nb_sectors)
973
{
974
    BlockDriver *drv = bs->drv;
975
    if (!bs->drv)
976
        return -ENOMEDIUM;
977
    if (bs->read_only)
978
        return -EACCES;
979
    if (bdrv_check_request(bs, sector_num, nb_sectors))
980
        return -EIO;
981

    
982
    if (bs->dirty_bitmap) {
983
        set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
984
    }
985

    
986
    if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
987
        bs->wr_highest_sector = sector_num + nb_sectors - 1;
988
    }
989

    
990
    return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
991
}
992

    
993
int bdrv_pread(BlockDriverState *bs, int64_t offset,
994
               void *buf, int count1)
995
{
996
    uint8_t tmp_buf[BDRV_SECTOR_SIZE];
997
    int len, nb_sectors, count;
998
    int64_t sector_num;
999
    int ret;
1000

    
1001
    count = count1;
1002
    /* first read to align to sector start */
1003
    len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1004
    if (len > count)
1005
        len = count;
1006
    sector_num = offset >> BDRV_SECTOR_BITS;
1007
    if (len > 0) {
1008
        if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1009
            return ret;
1010
        memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
1011
        count -= len;
1012
        if (count == 0)
1013
            return count1;
1014
        sector_num++;
1015
        buf += len;
1016
    }
1017

    
1018
    /* read the sectors "in place" */
1019
    nb_sectors = count >> BDRV_SECTOR_BITS;
1020
    if (nb_sectors > 0) {
1021
        if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1022
            return ret;
1023
        sector_num += nb_sectors;
1024
        len = nb_sectors << BDRV_SECTOR_BITS;
1025
        buf += len;
1026
        count -= len;
1027
    }
1028

    
1029
    /* add data from the last sector */
1030
    if (count > 0) {
1031
        if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1032
            return ret;
1033
        memcpy(buf, tmp_buf, count);
1034
    }
1035
    return count1;
1036
}
1037

    
1038
int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1039
                const void *buf, int count1)
1040
{
1041
    uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1042
    int len, nb_sectors, count;
1043
    int64_t sector_num;
1044
    int ret;
1045

    
1046
    count = count1;
1047
    /* first write to align to sector start */
1048
    len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1049
    if (len > count)
1050
        len = count;
1051
    sector_num = offset >> BDRV_SECTOR_BITS;
1052
    if (len > 0) {
1053
        if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1054
            return ret;
1055
        memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
1056
        if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1057
            return ret;
1058
        count -= len;
1059
        if (count == 0)
1060
            return count1;
1061
        sector_num++;
1062
        buf += len;
1063
    }
1064

    
1065
    /* write the sectors "in place" */
1066
    nb_sectors = count >> BDRV_SECTOR_BITS;
1067
    if (nb_sectors > 0) {
1068
        if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1069
            return ret;
1070
        sector_num += nb_sectors;
1071
        len = nb_sectors << BDRV_SECTOR_BITS;
1072
        buf += len;
1073
        count -= len;
1074
    }
1075

    
1076
    /* add data from the last sector */
1077
    if (count > 0) {
1078
        if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1079
            return ret;
1080
        memcpy(tmp_buf, buf, count);
1081
        if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1082
            return ret;
1083
    }
1084
    return count1;
1085
}
1086

    
1087
/*
1088
 * Writes to the file and ensures that no writes are reordered across this
1089
 * request (acts as a barrier)
1090
 *
1091
 * Returns 0 on success, -errno in error cases.
1092
 */
1093
int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1094
    const void *buf, int count)
1095
{
1096
    int ret;
1097

    
1098
    ret = bdrv_pwrite(bs, offset, buf, count);
1099
    if (ret < 0) {
1100
        return ret;
1101
    }
1102

    
1103
    /* No flush needed for cache=writethrough, it uses O_DSYNC */
1104
    if ((bs->open_flags & BDRV_O_CACHE_MASK) != 0) {
1105
        bdrv_flush(bs);
1106
    }
1107

    
1108
    return 0;
1109
}
1110

    
1111
/*
1112
 * Writes to the file and ensures that no writes are reordered across this
1113
 * request (acts as a barrier)
1114
 *
1115
 * Returns 0 on success, -errno in error cases.
1116
 */
1117
int bdrv_write_sync(BlockDriverState *bs, int64_t sector_num,
1118
    const uint8_t *buf, int nb_sectors)
1119
{
1120
    return bdrv_pwrite_sync(bs, BDRV_SECTOR_SIZE * sector_num,
1121
        buf, BDRV_SECTOR_SIZE * nb_sectors);
1122
}
1123

    
1124
/**
1125
 * Truncate file to 'offset' bytes (needed only for file protocols)
1126
 */
1127
int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1128
{
1129
    BlockDriver *drv = bs->drv;
1130
    int ret;
1131
    if (!drv)
1132
        return -ENOMEDIUM;
1133
    if (!drv->bdrv_truncate)
1134
        return -ENOTSUP;
1135
    if (bs->read_only)
1136
        return -EACCES;
1137
    if (bdrv_in_use(bs))
1138
        return -EBUSY;
1139
    ret = drv->bdrv_truncate(bs, offset);
1140
    if (ret == 0) {
1141
        ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
1142
        if (bs->change_cb) {
1143
            bs->change_cb(bs->change_opaque, CHANGE_SIZE);
1144
        }
1145
    }
1146
    return ret;
1147
}
1148

    
1149
/**
1150
 * Length of a file in bytes. Return < 0 if error or unknown.
1151
 */
1152
int64_t bdrv_getlength(BlockDriverState *bs)
1153
{
1154
    BlockDriver *drv = bs->drv;
1155
    if (!drv)
1156
        return -ENOMEDIUM;
1157

    
1158
    if (bs->growable || bs->removable) {
1159
        if (drv->bdrv_getlength) {
1160
            return drv->bdrv_getlength(bs);
1161
        }
1162
    }
1163
    return bs->total_sectors * BDRV_SECTOR_SIZE;
1164
}
1165

    
1166
/* return 0 as number of sectors if no device present or error */
1167
void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
1168
{
1169
    int64_t length;
1170
    length = bdrv_getlength(bs);
1171
    if (length < 0)
1172
        length = 0;
1173
    else
1174
        length = length >> BDRV_SECTOR_BITS;
1175
    *nb_sectors_ptr = length;
1176
}
1177

    
1178
struct partition {
1179
        uint8_t boot_ind;           /* 0x80 - active */
1180
        uint8_t head;               /* starting head */
1181
        uint8_t sector;             /* starting sector */
1182
        uint8_t cyl;                /* starting cylinder */
1183
        uint8_t sys_ind;            /* What partition type */
1184
        uint8_t end_head;           /* end head */
1185
        uint8_t end_sector;         /* end sector */
1186
        uint8_t end_cyl;            /* end cylinder */
1187
        uint32_t start_sect;        /* starting sector counting from 0 */
1188
        uint32_t nr_sects;          /* nr of sectors in partition */
1189
} __attribute__((packed));
1190

    
1191
/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1192
static int guess_disk_lchs(BlockDriverState *bs,
1193
                           int *pcylinders, int *pheads, int *psectors)
1194
{
1195
    uint8_t buf[BDRV_SECTOR_SIZE];
1196
    int ret, i, heads, sectors, cylinders;
1197
    struct partition *p;
1198
    uint32_t nr_sects;
1199
    uint64_t nb_sectors;
1200

    
1201
    bdrv_get_geometry(bs, &nb_sectors);
1202

    
1203
    ret = bdrv_read(bs, 0, buf, 1);
1204
    if (ret < 0)
1205
        return -1;
1206
    /* test msdos magic */
1207
    if (buf[510] != 0x55 || buf[511] != 0xaa)
1208
        return -1;
1209
    for(i = 0; i < 4; i++) {
1210
        p = ((struct partition *)(buf + 0x1be)) + i;
1211
        nr_sects = le32_to_cpu(p->nr_sects);
1212
        if (nr_sects && p->end_head) {
1213
            /* We make the assumption that the partition terminates on
1214
               a cylinder boundary */
1215
            heads = p->end_head + 1;
1216
            sectors = p->end_sector & 63;
1217
            if (sectors == 0)
1218
                continue;
1219
            cylinders = nb_sectors / (heads * sectors);
1220
            if (cylinders < 1 || cylinders > 16383)
1221
                continue;
1222
            *pheads = heads;
1223
            *psectors = sectors;
1224
            *pcylinders = cylinders;
1225
#if 0
1226
            printf("guessed geometry: LCHS=%d %d %d\n",
1227
                   cylinders, heads, sectors);
1228
#endif
1229
            return 0;
1230
        }
1231
    }
1232
    return -1;
1233
}
1234

    
1235
void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1236
{
1237
    int translation, lba_detected = 0;
1238
    int cylinders, heads, secs;
1239
    uint64_t nb_sectors;
1240

    
1241
    /* if a geometry hint is available, use it */
1242
    bdrv_get_geometry(bs, &nb_sectors);
1243
    bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1244
    translation = bdrv_get_translation_hint(bs);
1245
    if (cylinders != 0) {
1246
        *pcyls = cylinders;
1247
        *pheads = heads;
1248
        *psecs = secs;
1249
    } else {
1250
        if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1251
            if (heads > 16) {
1252
                /* if heads > 16, it means that a BIOS LBA
1253
                   translation was active, so the default
1254
                   hardware geometry is OK */
1255
                lba_detected = 1;
1256
                goto default_geometry;
1257
            } else {
1258
                *pcyls = cylinders;
1259
                *pheads = heads;
1260
                *psecs = secs;
1261
                /* disable any translation to be in sync with
1262
                   the logical geometry */
1263
                if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1264
                    bdrv_set_translation_hint(bs,
1265
                                              BIOS_ATA_TRANSLATION_NONE);
1266
                }
1267
            }
1268
        } else {
1269
        default_geometry:
1270
            /* if no geometry, use a standard physical disk geometry */
1271
            cylinders = nb_sectors / (16 * 63);
1272

    
1273
            if (cylinders > 16383)
1274
                cylinders = 16383;
1275
            else if (cylinders < 2)
1276
                cylinders = 2;
1277
            *pcyls = cylinders;
1278
            *pheads = 16;
1279
            *psecs = 63;
1280
            if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1281
                if ((*pcyls * *pheads) <= 131072) {
1282
                    bdrv_set_translation_hint(bs,
1283
                                              BIOS_ATA_TRANSLATION_LARGE);
1284
                } else {
1285
                    bdrv_set_translation_hint(bs,
1286
                                              BIOS_ATA_TRANSLATION_LBA);
1287
                }
1288
            }
1289
        }
1290
        bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1291
    }
1292
}
1293

    
1294
void bdrv_set_geometry_hint(BlockDriverState *bs,
1295
                            int cyls, int heads, int secs)
1296
{
1297
    bs->cyls = cyls;
1298
    bs->heads = heads;
1299
    bs->secs = secs;
1300
}
1301

    
1302
void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1303
{
1304
    bs->translation = translation;
1305
}
1306

    
1307
void bdrv_get_geometry_hint(BlockDriverState *bs,
1308
                            int *pcyls, int *pheads, int *psecs)
1309
{
1310
    *pcyls = bs->cyls;
1311
    *pheads = bs->heads;
1312
    *psecs = bs->secs;
1313
}
1314

    
1315
/* Recognize floppy formats */
1316
typedef struct FDFormat {
1317
    FDriveType drive;
1318
    uint8_t last_sect;
1319
    uint8_t max_track;
1320
    uint8_t max_head;
1321
} FDFormat;
1322

    
1323
static const FDFormat fd_formats[] = {
1324
    /* First entry is default format */
1325
    /* 1.44 MB 3"1/2 floppy disks */
1326
    { FDRIVE_DRV_144, 18, 80, 1, },
1327
    { FDRIVE_DRV_144, 20, 80, 1, },
1328
    { FDRIVE_DRV_144, 21, 80, 1, },
1329
    { FDRIVE_DRV_144, 21, 82, 1, },
1330
    { FDRIVE_DRV_144, 21, 83, 1, },
1331
    { FDRIVE_DRV_144, 22, 80, 1, },
1332
    { FDRIVE_DRV_144, 23, 80, 1, },
1333
    { FDRIVE_DRV_144, 24, 80, 1, },
1334
    /* 2.88 MB 3"1/2 floppy disks */
1335
    { FDRIVE_DRV_288, 36, 80, 1, },
1336
    { FDRIVE_DRV_288, 39, 80, 1, },
1337
    { FDRIVE_DRV_288, 40, 80, 1, },
1338
    { FDRIVE_DRV_288, 44, 80, 1, },
1339
    { FDRIVE_DRV_288, 48, 80, 1, },
1340
    /* 720 kB 3"1/2 floppy disks */
1341
    { FDRIVE_DRV_144,  9, 80, 1, },
1342
    { FDRIVE_DRV_144, 10, 80, 1, },
1343
    { FDRIVE_DRV_144, 10, 82, 1, },
1344
    { FDRIVE_DRV_144, 10, 83, 1, },
1345
    { FDRIVE_DRV_144, 13, 80, 1, },
1346
    { FDRIVE_DRV_144, 14, 80, 1, },
1347
    /* 1.2 MB 5"1/4 floppy disks */
1348
    { FDRIVE_DRV_120, 15, 80, 1, },
1349
    { FDRIVE_DRV_120, 18, 80, 1, },
1350
    { FDRIVE_DRV_120, 18, 82, 1, },
1351
    { FDRIVE_DRV_120, 18, 83, 1, },
1352
    { FDRIVE_DRV_120, 20, 80, 1, },
1353
    /* 720 kB 5"1/4 floppy disks */
1354
    { FDRIVE_DRV_120,  9, 80, 1, },
1355
    { FDRIVE_DRV_120, 11, 80, 1, },
1356
    /* 360 kB 5"1/4 floppy disks */
1357
    { FDRIVE_DRV_120,  9, 40, 1, },
1358
    { FDRIVE_DRV_120,  9, 40, 0, },
1359
    { FDRIVE_DRV_120, 10, 41, 1, },
1360
    { FDRIVE_DRV_120, 10, 42, 1, },
1361
    /* 320 kB 5"1/4 floppy disks */
1362
    { FDRIVE_DRV_120,  8, 40, 1, },
1363
    { FDRIVE_DRV_120,  8, 40, 0, },
1364
    /* 360 kB must match 5"1/4 better than 3"1/2... */
1365
    { FDRIVE_DRV_144,  9, 80, 0, },
1366
    /* end */
1367
    { FDRIVE_DRV_NONE, -1, -1, 0, },
1368
};
1369

    
1370
void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1371
                                   int *max_track, int *last_sect,
1372
                                   FDriveType drive_in, FDriveType *drive)
1373
{
1374
    const FDFormat *parse;
1375
    uint64_t nb_sectors, size;
1376
    int i, first_match, match;
1377

    
1378
    bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1379
    if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1380
        /* User defined disk */
1381
    } else {
1382
        bdrv_get_geometry(bs, &nb_sectors);
1383
        match = -1;
1384
        first_match = -1;
1385
        for (i = 0; ; i++) {
1386
            parse = &fd_formats[i];
1387
            if (parse->drive == FDRIVE_DRV_NONE) {
1388
                break;
1389
            }
1390
            if (drive_in == parse->drive ||
1391
                drive_in == FDRIVE_DRV_NONE) {
1392
                size = (parse->max_head + 1) * parse->max_track *
1393
                    parse->last_sect;
1394
                if (nb_sectors == size) {
1395
                    match = i;
1396
                    break;
1397
                }
1398
                if (first_match == -1) {
1399
                    first_match = i;
1400
                }
1401
            }
1402
        }
1403
        if (match == -1) {
1404
            if (first_match == -1) {
1405
                match = 1;
1406
            } else {
1407
                match = first_match;
1408
            }
1409
            parse = &fd_formats[match];
1410
        }
1411
        *nb_heads = parse->max_head + 1;
1412
        *max_track = parse->max_track;
1413
        *last_sect = parse->last_sect;
1414
        *drive = parse->drive;
1415
    }
1416
}
1417

    
1418
int bdrv_get_translation_hint(BlockDriverState *bs)
1419
{
1420
    return bs->translation;
1421
}
1422

    
1423
void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1424
                       BlockErrorAction on_write_error)
1425
{
1426
    bs->on_read_error = on_read_error;
1427
    bs->on_write_error = on_write_error;
1428
}
1429

    
1430
BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1431
{
1432
    return is_read ? bs->on_read_error : bs->on_write_error;
1433
}
1434

    
1435
void bdrv_set_removable(BlockDriverState *bs, int removable)
1436
{
1437
    bs->removable = removable;
1438
    if (removable && bs == bs_snapshots) {
1439
        bs_snapshots = NULL;
1440
    }
1441
}
1442

    
1443
int bdrv_is_removable(BlockDriverState *bs)
1444
{
1445
    return bs->removable;
1446
}
1447

    
1448
int bdrv_is_read_only(BlockDriverState *bs)
1449
{
1450
    return bs->read_only;
1451
}
1452

    
1453
int bdrv_is_sg(BlockDriverState *bs)
1454
{
1455
    return bs->sg;
1456
}
1457

    
1458
int bdrv_enable_write_cache(BlockDriverState *bs)
1459
{
1460
    return bs->enable_write_cache;
1461
}
1462

    
1463
/* XXX: no longer used */
1464
void bdrv_set_change_cb(BlockDriverState *bs,
1465
                        void (*change_cb)(void *opaque, int reason),
1466
                        void *opaque)
1467
{
1468
    bs->change_cb = change_cb;
1469
    bs->change_opaque = opaque;
1470
}
1471

    
1472
int bdrv_is_encrypted(BlockDriverState *bs)
1473
{
1474
    if (bs->backing_hd && bs->backing_hd->encrypted)
1475
        return 1;
1476
    return bs->encrypted;
1477
}
1478

    
1479
int bdrv_key_required(BlockDriverState *bs)
1480
{
1481
    BlockDriverState *backing_hd = bs->backing_hd;
1482

    
1483
    if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1484
        return 1;
1485
    return (bs->encrypted && !bs->valid_key);
1486
}
1487

    
1488
int bdrv_set_key(BlockDriverState *bs, const char *key)
1489
{
1490
    int ret;
1491
    if (bs->backing_hd && bs->backing_hd->encrypted) {
1492
        ret = bdrv_set_key(bs->backing_hd, key);
1493
        if (ret < 0)
1494
            return ret;
1495
        if (!bs->encrypted)
1496
            return 0;
1497
    }
1498
    if (!bs->encrypted) {
1499
        return -EINVAL;
1500
    } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1501
        return -ENOMEDIUM;
1502
    }
1503
    ret = bs->drv->bdrv_set_key(bs, key);
1504
    if (ret < 0) {
1505
        bs->valid_key = 0;
1506
    } else if (!bs->valid_key) {
1507
        bs->valid_key = 1;
1508
        /* call the change callback now, we skipped it on open */
1509
        bs->media_changed = 1;
1510
        if (bs->change_cb)
1511
            bs->change_cb(bs->change_opaque, CHANGE_MEDIA);
1512
    }
1513
    return ret;
1514
}
1515

    
1516
void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1517
{
1518
    if (!bs->drv) {
1519
        buf[0] = '\0';
1520
    } else {
1521
        pstrcpy(buf, buf_size, bs->drv->format_name);
1522
    }
1523
}
1524

    
1525
void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
1526
                         void *opaque)
1527
{
1528
    BlockDriver *drv;
1529

    
1530
    QLIST_FOREACH(drv, &bdrv_drivers, list) {
1531
        it(opaque, drv->format_name);
1532
    }
1533
}
1534

    
1535
BlockDriverState *bdrv_find(const char *name)
1536
{
1537
    BlockDriverState *bs;
1538

    
1539
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
1540
        if (!strcmp(name, bs->device_name)) {
1541
            return bs;
1542
        }
1543
    }
1544
    return NULL;
1545
}
1546

    
1547
BlockDriverState *bdrv_next(BlockDriverState *bs)
1548
{
1549
    if (!bs) {
1550
        return QTAILQ_FIRST(&bdrv_states);
1551
    }
1552
    return QTAILQ_NEXT(bs, list);
1553
}
1554

    
1555
void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1556
{
1557
    BlockDriverState *bs;
1558

    
1559
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
1560
        it(opaque, bs);
1561
    }
1562
}
1563

    
1564
const char *bdrv_get_device_name(BlockDriverState *bs)
1565
{
1566
    return bs->device_name;
1567
}
1568

    
1569
int bdrv_flush(BlockDriverState *bs)
1570
{
1571
    if (bs->open_flags & BDRV_O_NO_FLUSH) {
1572
        return 0;
1573
    }
1574

    
1575
    if (bs->drv && bs->drv->bdrv_flush) {
1576
        return bs->drv->bdrv_flush(bs);
1577
    }
1578

    
1579
    /*
1580
     * Some block drivers always operate in either writethrough or unsafe mode
1581
     * and don't support bdrv_flush therefore. Usually qemu doesn't know how
1582
     * the server works (because the behaviour is hardcoded or depends on
1583
     * server-side configuration), so we can't ensure that everything is safe
1584
     * on disk. Returning an error doesn't work because that would break guests
1585
     * even if the server operates in writethrough mode.
1586
     *
1587
     * Let's hope the user knows what he's doing.
1588
     */
1589
    return 0;
1590
}
1591

    
1592
void bdrv_flush_all(void)
1593
{
1594
    BlockDriverState *bs;
1595

    
1596
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
1597
        if (bs->drv && !bdrv_is_read_only(bs) &&
1598
            (!bdrv_is_removable(bs) || bdrv_is_inserted(bs))) {
1599
            bdrv_flush(bs);
1600
        }
1601
    }
1602
}
1603

    
1604
int bdrv_has_zero_init(BlockDriverState *bs)
1605
{
1606
    assert(bs->drv);
1607

    
1608
    if (bs->drv->bdrv_has_zero_init) {
1609
        return bs->drv->bdrv_has_zero_init(bs);
1610
    }
1611

    
1612
    return 1;
1613
}
1614

    
1615
int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
1616
{
1617
    if (!bs->drv) {
1618
        return -ENOMEDIUM;
1619
    }
1620
    if (!bs->drv->bdrv_discard) {
1621
        return 0;
1622
    }
1623
    return bs->drv->bdrv_discard(bs, sector_num, nb_sectors);
1624
}
1625

    
1626
/*
1627
 * Returns true iff the specified sector is present in the disk image. Drivers
1628
 * not implementing the functionality are assumed to not support backing files,
1629
 * hence all their sectors are reported as allocated.
1630
 *
1631
 * 'pnum' is set to the number of sectors (including and immediately following
1632
 * the specified sector) that are known to be in the same
1633
 * allocated/unallocated state.
1634
 *
1635
 * 'nb_sectors' is the max value 'pnum' should be set to.
1636
 */
1637
int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1638
        int *pnum)
1639
{
1640
    int64_t n;
1641
    if (!bs->drv->bdrv_is_allocated) {
1642
        if (sector_num >= bs->total_sectors) {
1643
            *pnum = 0;
1644
            return 0;
1645
        }
1646
        n = bs->total_sectors - sector_num;
1647
        *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1648
        return 1;
1649
    }
1650
    return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1651
}
1652

    
1653
void bdrv_mon_event(const BlockDriverState *bdrv,
1654
                    BlockMonEventAction action, int is_read)
1655
{
1656
    QObject *data;
1657
    const char *action_str;
1658

    
1659
    switch (action) {
1660
    case BDRV_ACTION_REPORT:
1661
        action_str = "report";
1662
        break;
1663
    case BDRV_ACTION_IGNORE:
1664
        action_str = "ignore";
1665
        break;
1666
    case BDRV_ACTION_STOP:
1667
        action_str = "stop";
1668
        break;
1669
    default:
1670
        abort();
1671
    }
1672

    
1673
    data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1674
                              bdrv->device_name,
1675
                              action_str,
1676
                              is_read ? "read" : "write");
1677
    monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1678

    
1679
    qobject_decref(data);
1680
}
1681

    
1682
static void bdrv_print_dict(QObject *obj, void *opaque)
1683
{
1684
    QDict *bs_dict;
1685
    Monitor *mon = opaque;
1686

    
1687
    bs_dict = qobject_to_qdict(obj);
1688

    
1689
    monitor_printf(mon, "%s: removable=%d",
1690
                        qdict_get_str(bs_dict, "device"),
1691
                        qdict_get_bool(bs_dict, "removable"));
1692

    
1693
    if (qdict_get_bool(bs_dict, "removable")) {
1694
        monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked"));
1695
    }
1696

    
1697
    if (qdict_haskey(bs_dict, "inserted")) {
1698
        QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted"));
1699

    
1700
        monitor_printf(mon, " file=");
1701
        monitor_print_filename(mon, qdict_get_str(qdict, "file"));
1702
        if (qdict_haskey(qdict, "backing_file")) {
1703
            monitor_printf(mon, " backing_file=");
1704
            monitor_print_filename(mon, qdict_get_str(qdict, "backing_file"));
1705
        }
1706
        monitor_printf(mon, " ro=%d drv=%s encrypted=%d",
1707
                            qdict_get_bool(qdict, "ro"),
1708
                            qdict_get_str(qdict, "drv"),
1709
                            qdict_get_bool(qdict, "encrypted"));
1710
    } else {
1711
        monitor_printf(mon, " [not inserted]");
1712
    }
1713

    
1714
    monitor_printf(mon, "\n");
1715
}
1716

    
1717
void bdrv_info_print(Monitor *mon, const QObject *data)
1718
{
1719
    qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon);
1720
}
1721

    
1722
void bdrv_info(Monitor *mon, QObject **ret_data)
1723
{
1724
    QList *bs_list;
1725
    BlockDriverState *bs;
1726

    
1727
    bs_list = qlist_new();
1728

    
1729
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
1730
        QObject *bs_obj;
1731

    
1732
        bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': 'unknown', "
1733
                                    "'removable': %i, 'locked': %i }",
1734
                                    bs->device_name, bs->removable,
1735
                                    bs->locked);
1736

    
1737
        if (bs->drv) {
1738
            QObject *obj;
1739
            QDict *bs_dict = qobject_to_qdict(bs_obj);
1740

    
1741
            obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
1742
                                     "'encrypted': %i }",
1743
                                     bs->filename, bs->read_only,
1744
                                     bs->drv->format_name,
1745
                                     bdrv_is_encrypted(bs));
1746
            if (bs->backing_file[0] != '\0') {
1747
                QDict *qdict = qobject_to_qdict(obj);
1748
                qdict_put(qdict, "backing_file",
1749
                          qstring_from_str(bs->backing_file));
1750
            }
1751

    
1752
            qdict_put_obj(bs_dict, "inserted", obj);
1753
        }
1754
        qlist_append_obj(bs_list, bs_obj);
1755
    }
1756

    
1757
    *ret_data = QOBJECT(bs_list);
1758
}
1759

    
1760
static void bdrv_stats_iter(QObject *data, void *opaque)
1761
{
1762
    QDict *qdict;
1763
    Monitor *mon = opaque;
1764

    
1765
    qdict = qobject_to_qdict(data);
1766
    monitor_printf(mon, "%s:", qdict_get_str(qdict, "device"));
1767

    
1768
    qdict = qobject_to_qdict(qdict_get(qdict, "stats"));
1769
    monitor_printf(mon, " rd_bytes=%" PRId64
1770
                        " wr_bytes=%" PRId64
1771
                        " rd_operations=%" PRId64
1772
                        " wr_operations=%" PRId64
1773
                        "\n",
1774
                        qdict_get_int(qdict, "rd_bytes"),
1775
                        qdict_get_int(qdict, "wr_bytes"),
1776
                        qdict_get_int(qdict, "rd_operations"),
1777
                        qdict_get_int(qdict, "wr_operations"));
1778
}
1779

    
1780
void bdrv_stats_print(Monitor *mon, const QObject *data)
1781
{
1782
    qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon);
1783
}
1784

    
1785
static QObject* bdrv_info_stats_bs(BlockDriverState *bs)
1786
{
1787
    QObject *res;
1788
    QDict *dict;
1789

    
1790
    res = qobject_from_jsonf("{ 'stats': {"
1791
                             "'rd_bytes': %" PRId64 ","
1792
                             "'wr_bytes': %" PRId64 ","
1793
                             "'rd_operations': %" PRId64 ","
1794
                             "'wr_operations': %" PRId64 ","
1795
                             "'wr_highest_offset': %" PRId64
1796
                             "} }",
1797
                             bs->rd_bytes, bs->wr_bytes,
1798
                             bs->rd_ops, bs->wr_ops,
1799
                             bs->wr_highest_sector *
1800
                             (uint64_t)BDRV_SECTOR_SIZE);
1801
    dict  = qobject_to_qdict(res);
1802

    
1803
    if (*bs->device_name) {
1804
        qdict_put(dict, "device", qstring_from_str(bs->device_name));
1805
    }
1806

    
1807
    if (bs->file) {
1808
        QObject *parent = bdrv_info_stats_bs(bs->file);
1809
        qdict_put_obj(dict, "parent", parent);
1810
    }
1811

    
1812
    return res;
1813
}
1814

    
1815
void bdrv_info_stats(Monitor *mon, QObject **ret_data)
1816
{
1817
    QObject *obj;
1818
    QList *devices;
1819
    BlockDriverState *bs;
1820

    
1821
    devices = qlist_new();
1822

    
1823
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
1824
        obj = bdrv_info_stats_bs(bs);
1825
        qlist_append_obj(devices, obj);
1826
    }
1827

    
1828
    *ret_data = QOBJECT(devices);
1829
}
1830

    
1831
const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
1832
{
1833
    if (bs->backing_hd && bs->backing_hd->encrypted)
1834
        return bs->backing_file;
1835
    else if (bs->encrypted)
1836
        return bs->filename;
1837
    else
1838
        return NULL;
1839
}
1840

    
1841
void bdrv_get_backing_filename(BlockDriverState *bs,
1842
                               char *filename, int filename_size)
1843
{
1844
    if (!bs->backing_file) {
1845
        pstrcpy(filename, filename_size, "");
1846
    } else {
1847
        pstrcpy(filename, filename_size, bs->backing_file);
1848
    }
1849
}
1850

    
1851
int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
1852
                          const uint8_t *buf, int nb_sectors)
1853
{
1854
    BlockDriver *drv = bs->drv;
1855
    if (!drv)
1856
        return -ENOMEDIUM;
1857
    if (!drv->bdrv_write_compressed)
1858
        return -ENOTSUP;
1859
    if (bdrv_check_request(bs, sector_num, nb_sectors))
1860
        return -EIO;
1861

    
1862
    if (bs->dirty_bitmap) {
1863
        set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1864
    }
1865

    
1866
    return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
1867
}
1868

    
1869
int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1870
{
1871
    BlockDriver *drv = bs->drv;
1872
    if (!drv)
1873
        return -ENOMEDIUM;
1874
    if (!drv->bdrv_get_info)
1875
        return -ENOTSUP;
1876
    memset(bdi, 0, sizeof(*bdi));
1877
    return drv->bdrv_get_info(bs, bdi);
1878
}
1879

    
1880
int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
1881
                      int64_t pos, int size)
1882
{
1883
    BlockDriver *drv = bs->drv;
1884
    if (!drv)
1885
        return -ENOMEDIUM;
1886
    if (drv->bdrv_save_vmstate)
1887
        return drv->bdrv_save_vmstate(bs, buf, pos, size);
1888
    if (bs->file)
1889
        return bdrv_save_vmstate(bs->file, buf, pos, size);
1890
    return -ENOTSUP;
1891
}
1892

    
1893
int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
1894
                      int64_t pos, int size)
1895
{
1896
    BlockDriver *drv = bs->drv;
1897
    if (!drv)
1898
        return -ENOMEDIUM;
1899
    if (drv->bdrv_load_vmstate)
1900
        return drv->bdrv_load_vmstate(bs, buf, pos, size);
1901
    if (bs->file)
1902
        return bdrv_load_vmstate(bs->file, buf, pos, size);
1903
    return -ENOTSUP;
1904
}
1905

    
1906
void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
1907
{
1908
    BlockDriver *drv = bs->drv;
1909

    
1910
    if (!drv || !drv->bdrv_debug_event) {
1911
        return;
1912
    }
1913

    
1914
    return drv->bdrv_debug_event(bs, event);
1915

    
1916
}
1917

    
1918
/**************************************************************/
1919
/* handling of snapshots */
1920

    
1921
int bdrv_can_snapshot(BlockDriverState *bs)
1922
{
1923
    BlockDriver *drv = bs->drv;
1924
    if (!drv || bdrv_is_removable(bs) || bdrv_is_read_only(bs)) {
1925
        return 0;
1926
    }
1927

    
1928
    if (!drv->bdrv_snapshot_create) {
1929
        if (bs->file != NULL) {
1930
            return bdrv_can_snapshot(bs->file);
1931
        }
1932
        return 0;
1933
    }
1934

    
1935
    return 1;
1936
}
1937

    
1938
int bdrv_is_snapshot(BlockDriverState *bs)
1939
{
1940
    return !!(bs->open_flags & BDRV_O_SNAPSHOT);
1941
}
1942

    
1943
BlockDriverState *bdrv_snapshots(void)
1944
{
1945
    BlockDriverState *bs;
1946

    
1947
    if (bs_snapshots) {
1948
        return bs_snapshots;
1949
    }
1950

    
1951
    bs = NULL;
1952
    while ((bs = bdrv_next(bs))) {
1953
        if (bdrv_can_snapshot(bs)) {
1954
            bs_snapshots = bs;
1955
            return bs;
1956
        }
1957
    }
1958
    return NULL;
1959
}
1960

    
1961
int bdrv_snapshot_create(BlockDriverState *bs,
1962
                         QEMUSnapshotInfo *sn_info)
1963
{
1964
    BlockDriver *drv = bs->drv;
1965
    if (!drv)
1966
        return -ENOMEDIUM;
1967
    if (drv->bdrv_snapshot_create)
1968
        return drv->bdrv_snapshot_create(bs, sn_info);
1969
    if (bs->file)
1970
        return bdrv_snapshot_create(bs->file, sn_info);
1971
    return -ENOTSUP;
1972
}
1973

    
1974
int bdrv_snapshot_goto(BlockDriverState *bs,
1975
                       const char *snapshot_id)
1976
{
1977
    BlockDriver *drv = bs->drv;
1978
    int ret, open_ret;
1979

    
1980
    if (!drv)
1981
        return -ENOMEDIUM;
1982
    if (drv->bdrv_snapshot_goto)
1983
        return drv->bdrv_snapshot_goto(bs, snapshot_id);
1984

    
1985
    if (bs->file) {
1986
        drv->bdrv_close(bs);
1987
        ret = bdrv_snapshot_goto(bs->file, snapshot_id);
1988
        open_ret = drv->bdrv_open(bs, bs->open_flags);
1989
        if (open_ret < 0) {
1990
            bdrv_delete(bs->file);
1991
            bs->drv = NULL;
1992
            return open_ret;
1993
        }
1994
        return ret;
1995
    }
1996

    
1997
    return -ENOTSUP;
1998
}
1999

    
2000
int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2001
{
2002
    BlockDriver *drv = bs->drv;
2003
    if (!drv)
2004
        return -ENOMEDIUM;
2005
    if (drv->bdrv_snapshot_delete)
2006
        return drv->bdrv_snapshot_delete(bs, snapshot_id);
2007
    if (bs->file)
2008
        return bdrv_snapshot_delete(bs->file, snapshot_id);
2009
    return -ENOTSUP;
2010
}
2011

    
2012
int bdrv_snapshot_list(BlockDriverState *bs,
2013
                       QEMUSnapshotInfo **psn_info)
2014
{
2015
    BlockDriver *drv = bs->drv;
2016
    if (!drv)
2017
        return -ENOMEDIUM;
2018
    if (drv->bdrv_snapshot_list)
2019
        return drv->bdrv_snapshot_list(bs, psn_info);
2020
    if (bs->file)
2021
        return bdrv_snapshot_list(bs->file, psn_info);
2022
    return -ENOTSUP;
2023
}
2024

    
2025
int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2026
        const char *snapshot_name)
2027
{
2028
    BlockDriver *drv = bs->drv;
2029
    if (!drv) {
2030
        return -ENOMEDIUM;
2031
    }
2032
    if (!bs->read_only) {
2033
        return -EINVAL;
2034
    }
2035
    if (drv->bdrv_snapshot_load_tmp) {
2036
        return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2037
    }
2038
    return -ENOTSUP;
2039
}
2040

    
2041
#define NB_SUFFIXES 4
2042

    
2043
char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2044
{
2045
    static const char suffixes[NB_SUFFIXES] = "KMGT";
2046
    int64_t base;
2047
    int i;
2048

    
2049
    if (size <= 999) {
2050
        snprintf(buf, buf_size, "%" PRId64, size);
2051
    } else {
2052
        base = 1024;
2053
        for(i = 0; i < NB_SUFFIXES; i++) {
2054
            if (size < (10 * base)) {
2055
                snprintf(buf, buf_size, "%0.1f%c",
2056
                         (double)size / base,
2057
                         suffixes[i]);
2058
                break;
2059
            } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
2060
                snprintf(buf, buf_size, "%" PRId64 "%c",
2061
                         ((size + (base >> 1)) / base),
2062
                         suffixes[i]);
2063
                break;
2064
            }
2065
            base = base * 1024;
2066
        }
2067
    }
2068
    return buf;
2069
}
2070

    
2071
char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2072
{
2073
    char buf1[128], date_buf[128], clock_buf[128];
2074
#ifdef _WIN32
2075
    struct tm *ptm;
2076
#else
2077
    struct tm tm;
2078
#endif
2079
    time_t ti;
2080
    int64_t secs;
2081

    
2082
    if (!sn) {
2083
        snprintf(buf, buf_size,
2084
                 "%-10s%-20s%7s%20s%15s",
2085
                 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2086
    } else {
2087
        ti = sn->date_sec;
2088
#ifdef _WIN32
2089
        ptm = localtime(&ti);
2090
        strftime(date_buf, sizeof(date_buf),
2091
                 "%Y-%m-%d %H:%M:%S", ptm);
2092
#else
2093
        localtime_r(&ti, &tm);
2094
        strftime(date_buf, sizeof(date_buf),
2095
                 "%Y-%m-%d %H:%M:%S", &tm);
2096
#endif
2097
        secs = sn->vm_clock_nsec / 1000000000;
2098
        snprintf(clock_buf, sizeof(clock_buf),
2099
                 "%02d:%02d:%02d.%03d",
2100
                 (int)(secs / 3600),
2101
                 (int)((secs / 60) % 60),
2102
                 (int)(secs % 60),
2103
                 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2104
        snprintf(buf, buf_size,
2105
                 "%-10s%-20s%7s%20s%15s",
2106
                 sn->id_str, sn->name,
2107
                 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2108
                 date_buf,
2109
                 clock_buf);
2110
    }
2111
    return buf;
2112
}
2113

    
2114

    
2115
/**************************************************************/
2116
/* async I/Os */
2117

    
2118
BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
2119
                                 QEMUIOVector *qiov, int nb_sectors,
2120
                                 BlockDriverCompletionFunc *cb, void *opaque)
2121
{
2122
    BlockDriver *drv = bs->drv;
2123
    BlockDriverAIOCB *ret;
2124

    
2125
    trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2126

    
2127
    if (!drv)
2128
        return NULL;
2129
    if (bdrv_check_request(bs, sector_num, nb_sectors))
2130
        return NULL;
2131

    
2132
    ret = drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
2133
                              cb, opaque);
2134

    
2135
    if (ret) {
2136
        /* Update stats even though technically transfer has not happened. */
2137
        bs->rd_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
2138
        bs->rd_ops ++;
2139
    }
2140

    
2141
    return ret;
2142
}
2143

    
2144
typedef struct BlockCompleteData {
2145
    BlockDriverCompletionFunc *cb;
2146
    void *opaque;
2147
    BlockDriverState *bs;
2148
    int64_t sector_num;
2149
    int nb_sectors;
2150
} BlockCompleteData;
2151

    
2152
static void block_complete_cb(void *opaque, int ret)
2153
{
2154
    BlockCompleteData *b = opaque;
2155

    
2156
    if (b->bs->dirty_bitmap) {
2157
        set_dirty_bitmap(b->bs, b->sector_num, b->nb_sectors, 1);
2158
    }
2159
    b->cb(b->opaque, ret);
2160
    qemu_free(b);
2161
}
2162

    
2163
static BlockCompleteData *blk_dirty_cb_alloc(BlockDriverState *bs,
2164
                                             int64_t sector_num,
2165
                                             int nb_sectors,
2166
                                             BlockDriverCompletionFunc *cb,
2167
                                             void *opaque)
2168
{
2169
    BlockCompleteData *blkdata = qemu_mallocz(sizeof(BlockCompleteData));
2170

    
2171
    blkdata->bs = bs;
2172
    blkdata->cb = cb;
2173
    blkdata->opaque = opaque;
2174
    blkdata->sector_num = sector_num;
2175
    blkdata->nb_sectors = nb_sectors;
2176

    
2177
    return blkdata;
2178
}
2179

    
2180
BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2181
                                  QEMUIOVector *qiov, int nb_sectors,
2182
                                  BlockDriverCompletionFunc *cb, void *opaque)
2183
{
2184
    BlockDriver *drv = bs->drv;
2185
    BlockDriverAIOCB *ret;
2186
    BlockCompleteData *blk_cb_data;
2187

    
2188
    trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2189

    
2190
    if (!drv)
2191
        return NULL;
2192
    if (bs->read_only)
2193
        return NULL;
2194
    if (bdrv_check_request(bs, sector_num, nb_sectors))
2195
        return NULL;
2196

    
2197
    if (bs->dirty_bitmap) {
2198
        blk_cb_data = blk_dirty_cb_alloc(bs, sector_num, nb_sectors, cb,
2199
                                         opaque);
2200
        cb = &block_complete_cb;
2201
        opaque = blk_cb_data;
2202
    }
2203

    
2204
    ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
2205
                               cb, opaque);
2206

    
2207
    if (ret) {
2208
        /* Update stats even though technically transfer has not happened. */
2209
        bs->wr_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
2210
        bs->wr_ops ++;
2211
        if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
2212
            bs->wr_highest_sector = sector_num + nb_sectors - 1;
2213
        }
2214
    }
2215

    
2216
    return ret;
2217
}
2218

    
2219

    
2220
typedef struct MultiwriteCB {
2221
    int error;
2222
    int num_requests;
2223
    int num_callbacks;
2224
    struct {
2225
        BlockDriverCompletionFunc *cb;
2226
        void *opaque;
2227
        QEMUIOVector *free_qiov;
2228
        void *free_buf;
2229
    } callbacks[];
2230
} MultiwriteCB;
2231

    
2232
static void multiwrite_user_cb(MultiwriteCB *mcb)
2233
{
2234
    int i;
2235

    
2236
    for (i = 0; i < mcb->num_callbacks; i++) {
2237
        mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
2238
        if (mcb->callbacks[i].free_qiov) {
2239
            qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2240
        }
2241
        qemu_free(mcb->callbacks[i].free_qiov);
2242
        qemu_vfree(mcb->callbacks[i].free_buf);
2243
    }
2244
}
2245

    
2246
static void multiwrite_cb(void *opaque, int ret)
2247
{
2248
    MultiwriteCB *mcb = opaque;
2249

    
2250
    trace_multiwrite_cb(mcb, ret);
2251

    
2252
    if (ret < 0 && !mcb->error) {
2253
        mcb->error = ret;
2254
    }
2255

    
2256
    mcb->num_requests--;
2257
    if (mcb->num_requests == 0) {
2258
        multiwrite_user_cb(mcb);
2259
        qemu_free(mcb);
2260
    }
2261
}
2262

    
2263
static int multiwrite_req_compare(const void *a, const void *b)
2264
{
2265
    const BlockRequest *req1 = a, *req2 = b;
2266

    
2267
    /*
2268
     * Note that we can't simply subtract req2->sector from req1->sector
2269
     * here as that could overflow the return value.
2270
     */
2271
    if (req1->sector > req2->sector) {
2272
        return 1;
2273
    } else if (req1->sector < req2->sector) {
2274
        return -1;
2275
    } else {
2276
        return 0;
2277
    }
2278
}
2279

    
2280
/*
2281
 * Takes a bunch of requests and tries to merge them. Returns the number of
2282
 * requests that remain after merging.
2283
 */
2284
static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2285
    int num_reqs, MultiwriteCB *mcb)
2286
{
2287
    int i, outidx;
2288

    
2289
    // Sort requests by start sector
2290
    qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2291

    
2292
    // Check if adjacent requests touch the same clusters. If so, combine them,
2293
    // filling up gaps with zero sectors.
2294
    outidx = 0;
2295
    for (i = 1; i < num_reqs; i++) {
2296
        int merge = 0;
2297
        int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2298

    
2299
        // This handles the cases that are valid for all block drivers, namely
2300
        // exactly sequential writes and overlapping writes.
2301
        if (reqs[i].sector <= oldreq_last) {
2302
            merge = 1;
2303
        }
2304

    
2305
        // The block driver may decide that it makes sense to combine requests
2306
        // even if there is a gap of some sectors between them. In this case,
2307
        // the gap is filled with zeros (therefore only applicable for yet
2308
        // unused space in format like qcow2).
2309
        if (!merge && bs->drv->bdrv_merge_requests) {
2310
            merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2311
        }
2312

    
2313
        if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2314
            merge = 0;
2315
        }
2316

    
2317
        if (merge) {
2318
            size_t size;
2319
            QEMUIOVector *qiov = qemu_mallocz(sizeof(*qiov));
2320
            qemu_iovec_init(qiov,
2321
                reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2322

    
2323
            // Add the first request to the merged one. If the requests are
2324
            // overlapping, drop the last sectors of the first request.
2325
            size = (reqs[i].sector - reqs[outidx].sector) << 9;
2326
            qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2327

    
2328
            // We might need to add some zeros between the two requests
2329
            if (reqs[i].sector > oldreq_last) {
2330
                size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2331
                uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2332
                memset(buf, 0, zero_bytes);
2333
                qemu_iovec_add(qiov, buf, zero_bytes);
2334
                mcb->callbacks[i].free_buf = buf;
2335
            }
2336

    
2337
            // Add the second request
2338
            qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2339

    
2340
            reqs[outidx].nb_sectors = qiov->size >> 9;
2341
            reqs[outidx].qiov = qiov;
2342

    
2343
            mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2344
        } else {
2345
            outidx++;
2346
            reqs[outidx].sector     = reqs[i].sector;
2347
            reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2348
            reqs[outidx].qiov       = reqs[i].qiov;
2349
        }
2350
    }
2351

    
2352
    return outidx + 1;
2353
}
2354

    
2355
/*
2356
 * Submit multiple AIO write requests at once.
2357
 *
2358
 * On success, the function returns 0 and all requests in the reqs array have
2359
 * been submitted. In error case this function returns -1, and any of the
2360
 * requests may or may not be submitted yet. In particular, this means that the
2361
 * callback will be called for some of the requests, for others it won't. The
2362
 * caller must check the error field of the BlockRequest to wait for the right
2363
 * callbacks (if error != 0, no callback will be called).
2364
 *
2365
 * The implementation may modify the contents of the reqs array, e.g. to merge
2366
 * requests. However, the fields opaque and error are left unmodified as they
2367
 * are used to signal failure for a single request to the caller.
2368
 */
2369
int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2370
{
2371
    BlockDriverAIOCB *acb;
2372
    MultiwriteCB *mcb;
2373
    int i;
2374

    
2375
    /* don't submit writes if we don't have a medium */
2376
    if (bs->drv == NULL) {
2377
        for (i = 0; i < num_reqs; i++) {
2378
            reqs[i].error = -ENOMEDIUM;
2379
        }
2380
        return -1;
2381
    }
2382

    
2383
    if (num_reqs == 0) {
2384
        return 0;
2385
    }
2386

    
2387
    // Create MultiwriteCB structure
2388
    mcb = qemu_mallocz(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
2389
    mcb->num_requests = 0;
2390
    mcb->num_callbacks = num_reqs;
2391

    
2392
    for (i = 0; i < num_reqs; i++) {
2393
        mcb->callbacks[i].cb = reqs[i].cb;
2394
        mcb->callbacks[i].opaque = reqs[i].opaque;
2395
    }
2396

    
2397
    // Check for mergable requests
2398
    num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2399

    
2400
    trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2401

    
2402
    /*
2403
     * Run the aio requests. As soon as one request can't be submitted
2404
     * successfully, fail all requests that are not yet submitted (we must
2405
     * return failure for all requests anyway)
2406
     *
2407
     * num_requests cannot be set to the right value immediately: If
2408
     * bdrv_aio_writev fails for some request, num_requests would be too high
2409
     * and therefore multiwrite_cb() would never recognize the multiwrite
2410
     * request as completed. We also cannot use the loop variable i to set it
2411
     * when the first request fails because the callback may already have been
2412
     * called for previously submitted requests. Thus, num_requests must be
2413
     * incremented for each request that is submitted.
2414
     *
2415
     * The problem that callbacks may be called early also means that we need
2416
     * to take care that num_requests doesn't become 0 before all requests are
2417
     * submitted - multiwrite_cb() would consider the multiwrite request
2418
     * completed. A dummy request that is "completed" by a manual call to
2419
     * multiwrite_cb() takes care of this.
2420
     */
2421
    mcb->num_requests = 1;
2422

    
2423
    // Run the aio requests
2424
    for (i = 0; i < num_reqs; i++) {
2425
        mcb->num_requests++;
2426
        acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2427
            reqs[i].nb_sectors, multiwrite_cb, mcb);
2428

    
2429
        if (acb == NULL) {
2430
            // We can only fail the whole thing if no request has been
2431
            // submitted yet. Otherwise we'll wait for the submitted AIOs to
2432
            // complete and report the error in the callback.
2433
            if (i == 0) {
2434
                trace_bdrv_aio_multiwrite_earlyfail(mcb);
2435
                goto fail;
2436
            } else {
2437
                trace_bdrv_aio_multiwrite_latefail(mcb, i);
2438
                multiwrite_cb(mcb, -EIO);
2439
                break;
2440
            }
2441
        }
2442
    }
2443

    
2444
    /* Complete the dummy request */
2445
    multiwrite_cb(mcb, 0);
2446

    
2447
    return 0;
2448

    
2449
fail:
2450
    for (i = 0; i < mcb->num_callbacks; i++) {
2451
        reqs[i].error = -EIO;
2452
    }
2453
    qemu_free(mcb);
2454
    return -1;
2455
}
2456

    
2457
BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
2458
        BlockDriverCompletionFunc *cb, void *opaque)
2459
{
2460
    BlockDriver *drv = bs->drv;
2461

    
2462
    trace_bdrv_aio_flush(bs, opaque);
2463

    
2464
    if (bs->open_flags & BDRV_O_NO_FLUSH) {
2465
        return bdrv_aio_noop_em(bs, cb, opaque);
2466
    }
2467

    
2468
    if (!drv)
2469
        return NULL;
2470
    return drv->bdrv_aio_flush(bs, cb, opaque);
2471
}
2472

    
2473
void bdrv_aio_cancel(BlockDriverAIOCB *acb)
2474
{
2475
    acb->pool->cancel(acb);
2476
}
2477

    
2478

    
2479
/**************************************************************/
2480
/* async block device emulation */
2481

    
2482
typedef struct BlockDriverAIOCBSync {
2483
    BlockDriverAIOCB common;
2484
    QEMUBH *bh;
2485
    int ret;
2486
    /* vector translation state */
2487
    QEMUIOVector *qiov;
2488
    uint8_t *bounce;
2489
    int is_write;
2490
} BlockDriverAIOCBSync;
2491

    
2492
static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2493
{
2494
    BlockDriverAIOCBSync *acb =
2495
        container_of(blockacb, BlockDriverAIOCBSync, common);
2496
    qemu_bh_delete(acb->bh);
2497
    acb->bh = NULL;
2498
    qemu_aio_release(acb);
2499
}
2500

    
2501
static AIOPool bdrv_em_aio_pool = {
2502
    .aiocb_size         = sizeof(BlockDriverAIOCBSync),
2503
    .cancel             = bdrv_aio_cancel_em,
2504
};
2505

    
2506
static void bdrv_aio_bh_cb(void *opaque)
2507
{
2508
    BlockDriverAIOCBSync *acb = opaque;
2509

    
2510
    if (!acb->is_write)
2511
        qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
2512
    qemu_vfree(acb->bounce);
2513
    acb->common.cb(acb->common.opaque, acb->ret);
2514
    qemu_bh_delete(acb->bh);
2515
    acb->bh = NULL;
2516
    qemu_aio_release(acb);
2517
}
2518

    
2519
static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2520
                                            int64_t sector_num,
2521
                                            QEMUIOVector *qiov,
2522
                                            int nb_sectors,
2523
                                            BlockDriverCompletionFunc *cb,
2524
                                            void *opaque,
2525
                                            int is_write)
2526

    
2527
{
2528
    BlockDriverAIOCBSync *acb;
2529

    
2530
    acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2531
    acb->is_write = is_write;
2532
    acb->qiov = qiov;
2533
    acb->bounce = qemu_blockalign(bs, qiov->size);
2534

    
2535
    if (!acb->bh)
2536
        acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2537

    
2538
    if (is_write) {
2539
        qemu_iovec_to_buffer(acb->qiov, acb->bounce);
2540
        acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
2541
    } else {
2542
        acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
2543
    }
2544

    
2545
    qemu_bh_schedule(acb->bh);
2546

    
2547
    return &acb->common;
2548
}
2549

    
2550
static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2551
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2552
        BlockDriverCompletionFunc *cb, void *opaque)
2553
{
2554
    return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2555
}
2556

    
2557
static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2558
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2559
        BlockDriverCompletionFunc *cb, void *opaque)
2560
{
2561
    return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
2562
}
2563

    
2564
static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
2565
        BlockDriverCompletionFunc *cb, void *opaque)
2566
{
2567
    BlockDriverAIOCBSync *acb;
2568

    
2569
    acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2570
    acb->is_write = 1; /* don't bounce in the completion hadler */
2571
    acb->qiov = NULL;
2572
    acb->bounce = NULL;
2573
    acb->ret = 0;
2574

    
2575
    if (!acb->bh)
2576
        acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2577

    
2578
    bdrv_flush(bs);
2579
    qemu_bh_schedule(acb->bh);
2580
    return &acb->common;
2581
}
2582

    
2583
static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
2584
        BlockDriverCompletionFunc *cb, void *opaque)
2585
{
2586
    BlockDriverAIOCBSync *acb;
2587

    
2588
    acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2589
    acb->is_write = 1; /* don't bounce in the completion handler */
2590
    acb->qiov = NULL;
2591
    acb->bounce = NULL;
2592
    acb->ret = 0;
2593

    
2594
    if (!acb->bh) {
2595
        acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2596
    }
2597

    
2598
    qemu_bh_schedule(acb->bh);
2599
    return &acb->common;
2600
}
2601

    
2602
/**************************************************************/
2603
/* sync block device emulation */
2604

    
2605
static void bdrv_rw_em_cb(void *opaque, int ret)
2606
{
2607
    *(int *)opaque = ret;
2608
}
2609

    
2610
#define NOT_DONE 0x7fffffff
2611

    
2612
static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
2613
                        uint8_t *buf, int nb_sectors)
2614
{
2615
    int async_ret;
2616
    BlockDriverAIOCB *acb;
2617
    struct iovec iov;
2618
    QEMUIOVector qiov;
2619

    
2620
    async_context_push();
2621

    
2622
    async_ret = NOT_DONE;
2623
    iov.iov_base = (void *)buf;
2624
    iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
2625
    qemu_iovec_init_external(&qiov, &iov, 1);
2626
    acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors,
2627
        bdrv_rw_em_cb, &async_ret);
2628
    if (acb == NULL) {
2629
        async_ret = -1;
2630
        goto fail;
2631
    }
2632

    
2633
    while (async_ret == NOT_DONE) {
2634
        qemu_aio_wait();
2635
    }
2636

    
2637

    
2638
fail:
2639
    async_context_pop();
2640
    return async_ret;
2641
}
2642

    
2643
static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
2644
                         const uint8_t *buf, int nb_sectors)
2645
{
2646
    int async_ret;
2647
    BlockDriverAIOCB *acb;
2648
    struct iovec iov;
2649
    QEMUIOVector qiov;
2650

    
2651
    async_context_push();
2652

    
2653
    async_ret = NOT_DONE;
2654
    iov.iov_base = (void *)buf;
2655
    iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
2656
    qemu_iovec_init_external(&qiov, &iov, 1);
2657
    acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors,
2658
        bdrv_rw_em_cb, &async_ret);
2659
    if (acb == NULL) {
2660
        async_ret = -1;
2661
        goto fail;
2662
    }
2663
    while (async_ret == NOT_DONE) {
2664
        qemu_aio_wait();
2665
    }
2666

    
2667
fail:
2668
    async_context_pop();
2669
    return async_ret;
2670
}
2671

    
2672
void bdrv_init(void)
2673
{
2674
    module_call_init(MODULE_INIT_BLOCK);
2675
}
2676

    
2677
void bdrv_init_with_whitelist(void)
2678
{
2679
    use_bdrv_whitelist = 1;
2680
    bdrv_init();
2681
}
2682

    
2683
void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2684
                   BlockDriverCompletionFunc *cb, void *opaque)
2685
{
2686
    BlockDriverAIOCB *acb;
2687

    
2688
    if (pool->free_aiocb) {
2689
        acb = pool->free_aiocb;
2690
        pool->free_aiocb = acb->next;
2691
    } else {
2692
        acb = qemu_mallocz(pool->aiocb_size);
2693
        acb->pool = pool;
2694
    }
2695
    acb->bs = bs;
2696
    acb->cb = cb;
2697
    acb->opaque = opaque;
2698
    return acb;
2699
}
2700

    
2701
void qemu_aio_release(void *p)
2702
{
2703
    BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2704
    AIOPool *pool = acb->pool;
2705
    acb->next = pool->free_aiocb;
2706
    pool->free_aiocb = acb;
2707
}
2708

    
2709
/**************************************************************/
2710
/* removable device support */
2711

    
2712
/**
2713
 * Return TRUE if the media is present
2714
 */
2715
int bdrv_is_inserted(BlockDriverState *bs)
2716
{
2717
    BlockDriver *drv = bs->drv;
2718
    int ret;
2719
    if (!drv)
2720
        return 0;
2721
    if (!drv->bdrv_is_inserted)
2722
        return !bs->tray_open;
2723
    ret = drv->bdrv_is_inserted(bs);
2724
    return ret;
2725
}
2726

    
2727
/**
2728
 * Return TRUE if the media changed since the last call to this
2729
 * function. It is currently only used for floppy disks
2730
 */
2731
int bdrv_media_changed(BlockDriverState *bs)
2732
{
2733
    BlockDriver *drv = bs->drv;
2734
    int ret;
2735

    
2736
    if (!drv || !drv->bdrv_media_changed)
2737
        ret = -ENOTSUP;
2738
    else
2739
        ret = drv->bdrv_media_changed(bs);
2740
    if (ret == -ENOTSUP)
2741
        ret = bs->media_changed;
2742
    bs->media_changed = 0;
2743
    return ret;
2744
}
2745

    
2746
/**
2747
 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
2748
 */
2749
int bdrv_eject(BlockDriverState *bs, int eject_flag)
2750
{
2751
    BlockDriver *drv = bs->drv;
2752
    int ret;
2753

    
2754
    if (bs->locked) {
2755
        return -EBUSY;
2756
    }
2757

    
2758
    if (!drv || !drv->bdrv_eject) {
2759
        ret = -ENOTSUP;
2760
    } else {
2761
        ret = drv->bdrv_eject(bs, eject_flag);
2762
    }
2763
    if (ret == -ENOTSUP) {
2764
        ret = 0;
2765
    }
2766
    if (ret >= 0) {
2767
        bs->tray_open = eject_flag;
2768
    }
2769

    
2770
    return ret;
2771
}
2772

    
2773
int bdrv_is_locked(BlockDriverState *bs)
2774
{
2775
    return bs->locked;
2776
}
2777

    
2778
/**
2779
 * Lock or unlock the media (if it is locked, the user won't be able
2780
 * to eject it manually).
2781
 */
2782
void bdrv_set_locked(BlockDriverState *bs, int locked)
2783
{
2784
    BlockDriver *drv = bs->drv;
2785

    
2786
    trace_bdrv_set_locked(bs, locked);
2787

    
2788
    bs->locked = locked;
2789
    if (drv && drv->bdrv_set_locked) {
2790
        drv->bdrv_set_locked(bs, locked);
2791
    }
2792
}
2793

    
2794
/* needed for generic scsi interface */
2795

    
2796
int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
2797
{
2798
    BlockDriver *drv = bs->drv;
2799

    
2800
    if (drv && drv->bdrv_ioctl)
2801
        return drv->bdrv_ioctl(bs, req, buf);
2802
    return -ENOTSUP;
2803
}
2804

    
2805
BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
2806
        unsigned long int req, void *buf,
2807
        BlockDriverCompletionFunc *cb, void *opaque)
2808
{
2809
    BlockDriver *drv = bs->drv;
2810

    
2811
    if (drv && drv->bdrv_aio_ioctl)
2812
        return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
2813
    return NULL;
2814
}
2815

    
2816

    
2817

    
2818
void *qemu_blockalign(BlockDriverState *bs, size_t size)
2819
{
2820
    return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
2821
}
2822

    
2823
void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
2824
{
2825
    int64_t bitmap_size;
2826

    
2827
    bs->dirty_count = 0;
2828
    if (enable) {
2829
        if (!bs->dirty_bitmap) {
2830
            bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
2831
                    BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
2832
            bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
2833

    
2834
            bs->dirty_bitmap = qemu_mallocz(bitmap_size);
2835
        }
2836
    } else {
2837
        if (bs->dirty_bitmap) {
2838
            qemu_free(bs->dirty_bitmap);
2839
            bs->dirty_bitmap = NULL;
2840
        }
2841
    }
2842
}
2843

    
2844
int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
2845
{
2846
    int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
2847

    
2848
    if (bs->dirty_bitmap &&
2849
        (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
2850
        return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
2851
            (1UL << (chunk % (sizeof(unsigned long) * 8))));
2852
    } else {
2853
        return 0;
2854
    }
2855
}
2856

    
2857
void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
2858
                      int nr_sectors)
2859
{
2860
    set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
2861
}
2862

    
2863
int64_t bdrv_get_dirty_count(BlockDriverState *bs)
2864
{
2865
    return bs->dirty_count;
2866
}
2867

    
2868
void bdrv_set_in_use(BlockDriverState *bs, int in_use)
2869
{
2870
    assert(bs->in_use != in_use);
2871
    bs->in_use = in_use;
2872
}
2873

    
2874
int bdrv_in_use(BlockDriverState *bs)
2875
{
2876
    return bs->in_use;
2877
}
2878

    
2879
int bdrv_img_create(const char *filename, const char *fmt,
2880
                    const char *base_filename, const char *base_fmt,
2881
                    char *options, uint64_t img_size, int flags)
2882
{
2883
    QEMUOptionParameter *param = NULL, *create_options = NULL;
2884
    QEMUOptionParameter *backing_fmt, *backing_file, *size;
2885
    BlockDriverState *bs = NULL;
2886
    BlockDriver *drv, *proto_drv;
2887
    BlockDriver *backing_drv = NULL;
2888
    int ret = 0;
2889

    
2890
    /* Find driver and parse its options */
2891
    drv = bdrv_find_format(fmt);
2892
    if (!drv) {
2893
        error_report("Unknown file format '%s'", fmt);
2894
        ret = -EINVAL;
2895
        goto out;
2896
    }
2897

    
2898
    proto_drv = bdrv_find_protocol(filename);
2899
    if (!proto_drv) {
2900
        error_report("Unknown protocol '%s'", filename);
2901
        ret = -EINVAL;
2902
        goto out;
2903
    }
2904

    
2905
    create_options = append_option_parameters(create_options,
2906
                                              drv->create_options);
2907
    create_options = append_option_parameters(create_options,
2908
                                              proto_drv->create_options);
2909

    
2910
    /* Create parameter list with default values */
2911
    param = parse_option_parameters("", create_options, param);
2912

    
2913
    set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
2914

    
2915
    /* Parse -o options */
2916
    if (options) {
2917
        param = parse_option_parameters(options, create_options, param);
2918
        if (param == NULL) {
2919
            error_report("Invalid options for file format '%s'.", fmt);
2920
            ret = -EINVAL;
2921
            goto out;
2922
        }
2923
    }
2924

    
2925
    if (base_filename) {
2926
        if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
2927
                                 base_filename)) {
2928
            error_report("Backing file not supported for file format '%s'",
2929
                         fmt);
2930
            ret = -EINVAL;
2931
            goto out;
2932
        }
2933
    }
2934

    
2935
    if (base_fmt) {
2936
        if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
2937
            error_report("Backing file format not supported for file "
2938
                         "format '%s'", fmt);
2939
            ret = -EINVAL;
2940
            goto out;
2941
        }
2942
    }
2943

    
2944
    backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
2945
    if (backing_file && backing_file->value.s) {
2946
        if (!strcmp(filename, backing_file->value.s)) {
2947
            error_report("Error: Trying to create an image with the "
2948
                         "same filename as the backing file");
2949
            ret = -EINVAL;
2950
            goto out;
2951
        }
2952
    }
2953

    
2954
    backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
2955
    if (backing_fmt && backing_fmt->value.s) {
2956
        backing_drv = bdrv_find_format(backing_fmt->value.s);
2957
        if (!backing_drv) {
2958
            error_report("Unknown backing file format '%s'",
2959
                         backing_fmt->value.s);
2960
            ret = -EINVAL;
2961
            goto out;
2962
        }
2963
    }
2964

    
2965
    // The size for the image must always be specified, with one exception:
2966
    // If we are using a backing file, we can obtain the size from there
2967
    size = get_option_parameter(param, BLOCK_OPT_SIZE);
2968
    if (size && size->value.n == -1) {
2969
        if (backing_file && backing_file->value.s) {
2970
            uint64_t size;
2971
            char buf[32];
2972

    
2973
            bs = bdrv_new("");
2974

    
2975
            ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
2976
            if (ret < 0) {
2977
                error_report("Could not open '%s'", backing_file->value.s);
2978
                goto out;
2979
            }
2980
            bdrv_get_geometry(bs, &size);
2981
            size *= 512;
2982

    
2983
            snprintf(buf, sizeof(buf), "%" PRId64, size);
2984
            set_option_parameter(param, BLOCK_OPT_SIZE, buf);
2985
        } else {
2986
            error_report("Image creation needs a size parameter");
2987
            ret = -EINVAL;
2988
            goto out;
2989
        }
2990
    }
2991

    
2992
    printf("Formatting '%s', fmt=%s ", filename, fmt);
2993
    print_option_parameters(param);
2994
    puts("");
2995

    
2996
    ret = bdrv_create(drv, filename, param);
2997

    
2998
    if (ret < 0) {
2999
        if (ret == -ENOTSUP) {
3000
            error_report("Formatting or formatting option not supported for "
3001
                         "file format '%s'", fmt);
3002
        } else if (ret == -EFBIG) {
3003
            error_report("The image size is too large for file format '%s'",
3004
                         fmt);
3005
        } else {
3006
            error_report("%s: error while creating %s: %s", filename, fmt,
3007
                         strerror(-ret));
3008
        }
3009
    }
3010

    
3011
out:
3012
    free_option_parameters(create_options);
3013
    free_option_parameters(param);
3014

    
3015
    if (bs) {
3016
        bdrv_delete(bs);
3017
    }
3018

    
3019
    return ret;
3020
}