Statistics
| Branch: | Revision:

root / block.c @ 7267c094

History | View | Annotate | Download (88.4 kB)

1
/*
2
 * QEMU System Emulator block driver
3
 *
4
 * Copyright (c) 2003 Fabrice Bellard
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
 * THE SOFTWARE.
23
 */
24
#include "config-host.h"
25
#include "qemu-common.h"
26
#include "trace.h"
27
#include "monitor.h"
28
#include "block_int.h"
29
#include "module.h"
30
#include "qemu-objects.h"
31
#include "qemu-coroutine.h"
32

    
33
#ifdef CONFIG_BSD
34
#include <sys/types.h>
35
#include <sys/stat.h>
36
#include <sys/ioctl.h>
37
#include <sys/queue.h>
38
#ifndef __DragonFly__
39
#include <sys/disk.h>
40
#endif
41
#endif
42

    
43
#ifdef _WIN32
44
#include <windows.h>
45
#endif
46

    
47
static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
48
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
49
        BlockDriverCompletionFunc *cb, void *opaque);
50
static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
51
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
52
        BlockDriverCompletionFunc *cb, void *opaque);
53
static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
54
        BlockDriverCompletionFunc *cb, void *opaque);
55
static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
56
        BlockDriverCompletionFunc *cb, void *opaque);
57
static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
58
                        uint8_t *buf, int nb_sectors);
59
static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
60
                         const uint8_t *buf, int nb_sectors);
61
static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
62
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
63
        BlockDriverCompletionFunc *cb, void *opaque);
64
static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
65
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
66
        BlockDriverCompletionFunc *cb, void *opaque);
67
static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
68
                                         int64_t sector_num, int nb_sectors,
69
                                         QEMUIOVector *iov);
70
static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
71
                                         int64_t sector_num, int nb_sectors,
72
                                         QEMUIOVector *iov);
73
static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs);
74

    
75
static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
76
    QTAILQ_HEAD_INITIALIZER(bdrv_states);
77

    
78
static QLIST_HEAD(, BlockDriver) bdrv_drivers =
79
    QLIST_HEAD_INITIALIZER(bdrv_drivers);
80

    
81
/* The device to use for VM snapshots */
82
static BlockDriverState *bs_snapshots;
83

    
84
/* If non-zero, use only whitelisted block drivers */
85
static int use_bdrv_whitelist;
86

    
87
#ifdef _WIN32
88
static int is_windows_drive_prefix(const char *filename)
89
{
90
    return (((filename[0] >= 'a' && filename[0] <= 'z') ||
91
             (filename[0] >= 'A' && filename[0] <= 'Z')) &&
92
            filename[1] == ':');
93
}
94

    
95
int is_windows_drive(const char *filename)
96
{
97
    if (is_windows_drive_prefix(filename) &&
98
        filename[2] == '\0')
99
        return 1;
100
    if (strstart(filename, "\\\\.\\", NULL) ||
101
        strstart(filename, "//./", NULL))
102
        return 1;
103
    return 0;
104
}
105
#endif
106

    
107
/* check if the path starts with "<protocol>:" */
108
static int path_has_protocol(const char *path)
109
{
110
#ifdef _WIN32
111
    if (is_windows_drive(path) ||
112
        is_windows_drive_prefix(path)) {
113
        return 0;
114
    }
115
#endif
116

    
117
    return strchr(path, ':') != NULL;
118
}
119

    
120
int path_is_absolute(const char *path)
121
{
122
    const char *p;
123
#ifdef _WIN32
124
    /* specific case for names like: "\\.\d:" */
125
    if (*path == '/' || *path == '\\')
126
        return 1;
127
#endif
128
    p = strchr(path, ':');
129
    if (p)
130
        p++;
131
    else
132
        p = path;
133
#ifdef _WIN32
134
    return (*p == '/' || *p == '\\');
135
#else
136
    return (*p == '/');
137
#endif
138
}
139

    
140
/* if filename is absolute, just copy it to dest. Otherwise, build a
141
   path to it by considering it is relative to base_path. URL are
142
   supported. */
143
void path_combine(char *dest, int dest_size,
144
                  const char *base_path,
145
                  const char *filename)
146
{
147
    const char *p, *p1;
148
    int len;
149

    
150
    if (dest_size <= 0)
151
        return;
152
    if (path_is_absolute(filename)) {
153
        pstrcpy(dest, dest_size, filename);
154
    } else {
155
        p = strchr(base_path, ':');
156
        if (p)
157
            p++;
158
        else
159
            p = base_path;
160
        p1 = strrchr(base_path, '/');
161
#ifdef _WIN32
162
        {
163
            const char *p2;
164
            p2 = strrchr(base_path, '\\');
165
            if (!p1 || p2 > p1)
166
                p1 = p2;
167
        }
168
#endif
169
        if (p1)
170
            p1++;
171
        else
172
            p1 = base_path;
173
        if (p1 > p)
174
            p = p1;
175
        len = p - base_path;
176
        if (len > dest_size - 1)
177
            len = dest_size - 1;
178
        memcpy(dest, base_path, len);
179
        dest[len] = '\0';
180
        pstrcat(dest, dest_size, filename);
181
    }
182
}
183

    
184
void bdrv_register(BlockDriver *bdrv)
185
{
186
    if (bdrv->bdrv_co_readv) {
187
        /* Emulate AIO by coroutines, and sync by AIO */
188
        bdrv->bdrv_aio_readv = bdrv_co_aio_readv_em;
189
        bdrv->bdrv_aio_writev = bdrv_co_aio_writev_em;
190
        bdrv->bdrv_read = bdrv_read_em;
191
        bdrv->bdrv_write = bdrv_write_em;
192
     } else {
193
        bdrv->bdrv_co_readv = bdrv_co_readv_em;
194
        bdrv->bdrv_co_writev = bdrv_co_writev_em;
195

    
196
        if (!bdrv->bdrv_aio_readv) {
197
            /* add AIO emulation layer */
198
            bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
199
            bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
200
        } else if (!bdrv->bdrv_read) {
201
            /* add synchronous IO emulation layer */
202
            bdrv->bdrv_read = bdrv_read_em;
203
            bdrv->bdrv_write = bdrv_write_em;
204
        }
205
    }
206

    
207
    if (!bdrv->bdrv_aio_flush)
208
        bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
209

    
210
    QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
211
}
212

    
213
/* create a new block device (by default it is empty) */
214
BlockDriverState *bdrv_new(const char *device_name)
215
{
216
    BlockDriverState *bs;
217

    
218
    bs = g_malloc0(sizeof(BlockDriverState));
219
    pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
220
    if (device_name[0] != '\0') {
221
        QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
222
    }
223
    return bs;
224
}
225

    
226
BlockDriver *bdrv_find_format(const char *format_name)
227
{
228
    BlockDriver *drv1;
229
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
230
        if (!strcmp(drv1->format_name, format_name)) {
231
            return drv1;
232
        }
233
    }
234
    return NULL;
235
}
236

    
237
static int bdrv_is_whitelisted(BlockDriver *drv)
238
{
239
    static const char *whitelist[] = {
240
        CONFIG_BDRV_WHITELIST
241
    };
242
    const char **p;
243

    
244
    if (!whitelist[0])
245
        return 1;               /* no whitelist, anything goes */
246

    
247
    for (p = whitelist; *p; p++) {
248
        if (!strcmp(drv->format_name, *p)) {
249
            return 1;
250
        }
251
    }
252
    return 0;
253
}
254

    
255
BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
256
{
257
    BlockDriver *drv = bdrv_find_format(format_name);
258
    return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
259
}
260

    
261
int bdrv_create(BlockDriver *drv, const char* filename,
262
    QEMUOptionParameter *options)
263
{
264
    if (!drv->bdrv_create)
265
        return -ENOTSUP;
266

    
267
    return drv->bdrv_create(filename, options);
268
}
269

    
270
int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
271
{
272
    BlockDriver *drv;
273

    
274
    drv = bdrv_find_protocol(filename);
275
    if (drv == NULL) {
276
        return -ENOENT;
277
    }
278

    
279
    return bdrv_create(drv, filename, options);
280
}
281

    
282
#ifdef _WIN32
283
void get_tmp_filename(char *filename, int size)
284
{
285
    char temp_dir[MAX_PATH];
286

    
287
    GetTempPath(MAX_PATH, temp_dir);
288
    GetTempFileName(temp_dir, "qem", 0, filename);
289
}
290
#else
291
void get_tmp_filename(char *filename, int size)
292
{
293
    int fd;
294
    const char *tmpdir;
295
    /* XXX: race condition possible */
296
    tmpdir = getenv("TMPDIR");
297
    if (!tmpdir)
298
        tmpdir = "/tmp";
299
    snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
300
    fd = mkstemp(filename);
301
    close(fd);
302
}
303
#endif
304

    
305
/*
306
 * Detect host devices. By convention, /dev/cdrom[N] is always
307
 * recognized as a host CDROM.
308
 */
309
static BlockDriver *find_hdev_driver(const char *filename)
310
{
311
    int score_max = 0, score;
312
    BlockDriver *drv = NULL, *d;
313

    
314
    QLIST_FOREACH(d, &bdrv_drivers, list) {
315
        if (d->bdrv_probe_device) {
316
            score = d->bdrv_probe_device(filename);
317
            if (score > score_max) {
318
                score_max = score;
319
                drv = d;
320
            }
321
        }
322
    }
323

    
324
    return drv;
325
}
326

    
327
BlockDriver *bdrv_find_protocol(const char *filename)
328
{
329
    BlockDriver *drv1;
330
    char protocol[128];
331
    int len;
332
    const char *p;
333

    
334
    /* TODO Drivers without bdrv_file_open must be specified explicitly */
335

    
336
    /*
337
     * XXX(hch): we really should not let host device detection
338
     * override an explicit protocol specification, but moving this
339
     * later breaks access to device names with colons in them.
340
     * Thanks to the brain-dead persistent naming schemes on udev-
341
     * based Linux systems those actually are quite common.
342
     */
343
    drv1 = find_hdev_driver(filename);
344
    if (drv1) {
345
        return drv1;
346
    }
347

    
348
    if (!path_has_protocol(filename)) {
349
        return bdrv_find_format("file");
350
    }
351
    p = strchr(filename, ':');
352
    assert(p != NULL);
353
    len = p - filename;
354
    if (len > sizeof(protocol) - 1)
355
        len = sizeof(protocol) - 1;
356
    memcpy(protocol, filename, len);
357
    protocol[len] = '\0';
358
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
359
        if (drv1->protocol_name &&
360
            !strcmp(drv1->protocol_name, protocol)) {
361
            return drv1;
362
        }
363
    }
364
    return NULL;
365
}
366

    
367
static int find_image_format(const char *filename, BlockDriver **pdrv)
368
{
369
    int ret, score, score_max;
370
    BlockDriver *drv1, *drv;
371
    uint8_t buf[2048];
372
    BlockDriverState *bs;
373

    
374
    ret = bdrv_file_open(&bs, filename, 0);
375
    if (ret < 0) {
376
        *pdrv = NULL;
377
        return ret;
378
    }
379

    
380
    /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
381
    if (bs->sg || !bdrv_is_inserted(bs)) {
382
        bdrv_delete(bs);
383
        drv = bdrv_find_format("raw");
384
        if (!drv) {
385
            ret = -ENOENT;
386
        }
387
        *pdrv = drv;
388
        return ret;
389
    }
390

    
391
    ret = bdrv_pread(bs, 0, buf, sizeof(buf));
392
    bdrv_delete(bs);
393
    if (ret < 0) {
394
        *pdrv = NULL;
395
        return ret;
396
    }
397

    
398
    score_max = 0;
399
    drv = NULL;
400
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
401
        if (drv1->bdrv_probe) {
402
            score = drv1->bdrv_probe(buf, ret, filename);
403
            if (score > score_max) {
404
                score_max = score;
405
                drv = drv1;
406
            }
407
        }
408
    }
409
    if (!drv) {
410
        ret = -ENOENT;
411
    }
412
    *pdrv = drv;
413
    return ret;
414
}
415

    
416
/**
417
 * Set the current 'total_sectors' value
418
 */
419
static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
420
{
421
    BlockDriver *drv = bs->drv;
422

    
423
    /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
424
    if (bs->sg)
425
        return 0;
426

    
427
    /* query actual device if possible, otherwise just trust the hint */
428
    if (drv->bdrv_getlength) {
429
        int64_t length = drv->bdrv_getlength(bs);
430
        if (length < 0) {
431
            return length;
432
        }
433
        hint = length >> BDRV_SECTOR_BITS;
434
    }
435

    
436
    bs->total_sectors = hint;
437
    return 0;
438
}
439

    
440
/*
441
 * Common part for opening disk images and files
442
 */
443
static int bdrv_open_common(BlockDriverState *bs, const char *filename,
444
    int flags, BlockDriver *drv)
445
{
446
    int ret, open_flags;
447

    
448
    assert(drv != NULL);
449

    
450
    bs->file = NULL;
451
    bs->total_sectors = 0;
452
    bs->encrypted = 0;
453
    bs->valid_key = 0;
454
    bs->open_flags = flags;
455
    /* buffer_alignment defaulted to 512, drivers can change this value */
456
    bs->buffer_alignment = 512;
457

    
458
    pstrcpy(bs->filename, sizeof(bs->filename), filename);
459

    
460
    if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
461
        return -ENOTSUP;
462
    }
463

    
464
    bs->drv = drv;
465
    bs->opaque = g_malloc0(drv->instance_size);
466

    
467
    if (flags & BDRV_O_CACHE_WB)
468
        bs->enable_write_cache = 1;
469

    
470
    /*
471
     * Clear flags that are internal to the block layer before opening the
472
     * image.
473
     */
474
    open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
475

    
476
    /*
477
     * Snapshots should be writable.
478
     */
479
    if (bs->is_temporary) {
480
        open_flags |= BDRV_O_RDWR;
481
    }
482

    
483
    /* Open the image, either directly or using a protocol */
484
    if (drv->bdrv_file_open) {
485
        ret = drv->bdrv_file_open(bs, filename, open_flags);
486
    } else {
487
        ret = bdrv_file_open(&bs->file, filename, open_flags);
488
        if (ret >= 0) {
489
            ret = drv->bdrv_open(bs, open_flags);
490
        }
491
    }
492

    
493
    if (ret < 0) {
494
        goto free_and_fail;
495
    }
496

    
497
    bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
498

    
499
    ret = refresh_total_sectors(bs, bs->total_sectors);
500
    if (ret < 0) {
501
        goto free_and_fail;
502
    }
503

    
504
#ifndef _WIN32
505
    if (bs->is_temporary) {
506
        unlink(filename);
507
    }
508
#endif
509
    return 0;
510

    
511
free_and_fail:
512
    if (bs->file) {
513
        bdrv_delete(bs->file);
514
        bs->file = NULL;
515
    }
516
    g_free(bs->opaque);
517
    bs->opaque = NULL;
518
    bs->drv = NULL;
519
    return ret;
520
}
521

    
522
/*
523
 * Opens a file using a protocol (file, host_device, nbd, ...)
524
 */
525
int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
526
{
527
    BlockDriverState *bs;
528
    BlockDriver *drv;
529
    int ret;
530

    
531
    drv = bdrv_find_protocol(filename);
532
    if (!drv) {
533
        return -ENOENT;
534
    }
535

    
536
    bs = bdrv_new("");
537
    ret = bdrv_open_common(bs, filename, flags, drv);
538
    if (ret < 0) {
539
        bdrv_delete(bs);
540
        return ret;
541
    }
542
    bs->growable = 1;
543
    *pbs = bs;
544
    return 0;
545
}
546

    
547
/*
548
 * Opens a disk image (raw, qcow2, vmdk, ...)
549
 */
550
int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
551
              BlockDriver *drv)
552
{
553
    int ret;
554

    
555
    if (flags & BDRV_O_SNAPSHOT) {
556
        BlockDriverState *bs1;
557
        int64_t total_size;
558
        int is_protocol = 0;
559
        BlockDriver *bdrv_qcow2;
560
        QEMUOptionParameter *options;
561
        char tmp_filename[PATH_MAX];
562
        char backing_filename[PATH_MAX];
563

    
564
        /* if snapshot, we create a temporary backing file and open it
565
           instead of opening 'filename' directly */
566

    
567
        /* if there is a backing file, use it */
568
        bs1 = bdrv_new("");
569
        ret = bdrv_open(bs1, filename, 0, drv);
570
        if (ret < 0) {
571
            bdrv_delete(bs1);
572
            return ret;
573
        }
574
        total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
575

    
576
        if (bs1->drv && bs1->drv->protocol_name)
577
            is_protocol = 1;
578

    
579
        bdrv_delete(bs1);
580

    
581
        get_tmp_filename(tmp_filename, sizeof(tmp_filename));
582

    
583
        /* Real path is meaningless for protocols */
584
        if (is_protocol)
585
            snprintf(backing_filename, sizeof(backing_filename),
586
                     "%s", filename);
587
        else if (!realpath(filename, backing_filename))
588
            return -errno;
589

    
590
        bdrv_qcow2 = bdrv_find_format("qcow2");
591
        options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
592

    
593
        set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
594
        set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
595
        if (drv) {
596
            set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
597
                drv->format_name);
598
        }
599

    
600
        ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
601
        free_option_parameters(options);
602
        if (ret < 0) {
603
            return ret;
604
        }
605

    
606
        filename = tmp_filename;
607
        drv = bdrv_qcow2;
608
        bs->is_temporary = 1;
609
    }
610

    
611
    /* Find the right image format driver */
612
    if (!drv) {
613
        ret = find_image_format(filename, &drv);
614
    }
615

    
616
    if (!drv) {
617
        goto unlink_and_fail;
618
    }
619

    
620
    /* Open the image */
621
    ret = bdrv_open_common(bs, filename, flags, drv);
622
    if (ret < 0) {
623
        goto unlink_and_fail;
624
    }
625

    
626
    /* If there is a backing file, use it */
627
    if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
628
        char backing_filename[PATH_MAX];
629
        int back_flags;
630
        BlockDriver *back_drv = NULL;
631

    
632
        bs->backing_hd = bdrv_new("");
633

    
634
        if (path_has_protocol(bs->backing_file)) {
635
            pstrcpy(backing_filename, sizeof(backing_filename),
636
                    bs->backing_file);
637
        } else {
638
            path_combine(backing_filename, sizeof(backing_filename),
639
                         filename, bs->backing_file);
640
        }
641

    
642
        if (bs->backing_format[0] != '\0') {
643
            back_drv = bdrv_find_format(bs->backing_format);
644
        }
645

    
646
        /* backing files always opened read-only */
647
        back_flags =
648
            flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
649

    
650
        ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
651
        if (ret < 0) {
652
            bdrv_close(bs);
653
            return ret;
654
        }
655
        if (bs->is_temporary) {
656
            bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
657
        } else {
658
            /* base image inherits from "parent" */
659
            bs->backing_hd->keep_read_only = bs->keep_read_only;
660
        }
661
    }
662

    
663
    if (!bdrv_key_required(bs)) {
664
        /* call the change callback */
665
        bs->media_changed = 1;
666
        if (bs->change_cb)
667
            bs->change_cb(bs->change_opaque, CHANGE_MEDIA);
668
    }
669

    
670
    return 0;
671

    
672
unlink_and_fail:
673
    if (bs->is_temporary) {
674
        unlink(filename);
675
    }
676
    return ret;
677
}
678

    
679
void bdrv_close(BlockDriverState *bs)
680
{
681
    if (bs->drv) {
682
        if (bs == bs_snapshots) {
683
            bs_snapshots = NULL;
684
        }
685
        if (bs->backing_hd) {
686
            bdrv_delete(bs->backing_hd);
687
            bs->backing_hd = NULL;
688
        }
689
        bs->drv->bdrv_close(bs);
690
        g_free(bs->opaque);
691
#ifdef _WIN32
692
        if (bs->is_temporary) {
693
            unlink(bs->filename);
694
        }
695
#endif
696
        bs->opaque = NULL;
697
        bs->drv = NULL;
698

    
699
        if (bs->file != NULL) {
700
            bdrv_close(bs->file);
701
        }
702

    
703
        /* call the change callback */
704
        bs->media_changed = 1;
705
        if (bs->change_cb)
706
            bs->change_cb(bs->change_opaque, CHANGE_MEDIA);
707
    }
708
}
709

    
710
void bdrv_close_all(void)
711
{
712
    BlockDriverState *bs;
713

    
714
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
715
        bdrv_close(bs);
716
    }
717
}
718

    
719
/* make a BlockDriverState anonymous by removing from bdrv_state list.
720
   Also, NULL terminate the device_name to prevent double remove */
721
void bdrv_make_anon(BlockDriverState *bs)
722
{
723
    if (bs->device_name[0] != '\0') {
724
        QTAILQ_REMOVE(&bdrv_states, bs, list);
725
    }
726
    bs->device_name[0] = '\0';
727
}
728

    
729
void bdrv_delete(BlockDriverState *bs)
730
{
731
    assert(!bs->peer);
732

    
733
    /* remove from list, if necessary */
734
    bdrv_make_anon(bs);
735

    
736
    bdrv_close(bs);
737
    if (bs->file != NULL) {
738
        bdrv_delete(bs->file);
739
    }
740

    
741
    assert(bs != bs_snapshots);
742
    g_free(bs);
743
}
744

    
745
int bdrv_attach(BlockDriverState *bs, DeviceState *qdev)
746
{
747
    if (bs->peer) {
748
        return -EBUSY;
749
    }
750
    bs->peer = qdev;
751
    return 0;
752
}
753

    
754
void bdrv_detach(BlockDriverState *bs, DeviceState *qdev)
755
{
756
    assert(bs->peer == qdev);
757
    bs->peer = NULL;
758
    bs->change_cb = NULL;
759
    bs->change_opaque = NULL;
760
}
761

    
762
DeviceState *bdrv_get_attached(BlockDriverState *bs)
763
{
764
    return bs->peer;
765
}
766

    
767
/*
768
 * Run consistency checks on an image
769
 *
770
 * Returns 0 if the check could be completed (it doesn't mean that the image is
771
 * free of errors) or -errno when an internal error occurred. The results of the
772
 * check are stored in res.
773
 */
774
int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
775
{
776
    if (bs->drv->bdrv_check == NULL) {
777
        return -ENOTSUP;
778
    }
779

    
780
    memset(res, 0, sizeof(*res));
781
    return bs->drv->bdrv_check(bs, res);
782
}
783

    
784
#define COMMIT_BUF_SECTORS 2048
785

    
786
/* commit COW file into the raw image */
787
int bdrv_commit(BlockDriverState *bs)
788
{
789
    BlockDriver *drv = bs->drv;
790
    BlockDriver *backing_drv;
791
    int64_t sector, total_sectors;
792
    int n, ro, open_flags;
793
    int ret = 0, rw_ret = 0;
794
    uint8_t *buf;
795
    char filename[1024];
796
    BlockDriverState *bs_rw, *bs_ro;
797

    
798
    if (!drv)
799
        return -ENOMEDIUM;
800
    
801
    if (!bs->backing_hd) {
802
        return -ENOTSUP;
803
    }
804

    
805
    if (bs->backing_hd->keep_read_only) {
806
        return -EACCES;
807
    }
808

    
809
    backing_drv = bs->backing_hd->drv;
810
    ro = bs->backing_hd->read_only;
811
    strncpy(filename, bs->backing_hd->filename, sizeof(filename));
812
    open_flags =  bs->backing_hd->open_flags;
813

    
814
    if (ro) {
815
        /* re-open as RW */
816
        bdrv_delete(bs->backing_hd);
817
        bs->backing_hd = NULL;
818
        bs_rw = bdrv_new("");
819
        rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
820
            backing_drv);
821
        if (rw_ret < 0) {
822
            bdrv_delete(bs_rw);
823
            /* try to re-open read-only */
824
            bs_ro = bdrv_new("");
825
            ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
826
                backing_drv);
827
            if (ret < 0) {
828
                bdrv_delete(bs_ro);
829
                /* drive not functional anymore */
830
                bs->drv = NULL;
831
                return ret;
832
            }
833
            bs->backing_hd = bs_ro;
834
            return rw_ret;
835
        }
836
        bs->backing_hd = bs_rw;
837
    }
838

    
839
    total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
840
    buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
841

    
842
    for (sector = 0; sector < total_sectors; sector += n) {
843
        if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
844

    
845
            if (bdrv_read(bs, sector, buf, n) != 0) {
846
                ret = -EIO;
847
                goto ro_cleanup;
848
            }
849

    
850
            if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
851
                ret = -EIO;
852
                goto ro_cleanup;
853
            }
854
        }
855
    }
856

    
857
    if (drv->bdrv_make_empty) {
858
        ret = drv->bdrv_make_empty(bs);
859
        bdrv_flush(bs);
860
    }
861

    
862
    /*
863
     * Make sure all data we wrote to the backing device is actually
864
     * stable on disk.
865
     */
866
    if (bs->backing_hd)
867
        bdrv_flush(bs->backing_hd);
868

    
869
ro_cleanup:
870
    g_free(buf);
871

    
872
    if (ro) {
873
        /* re-open as RO */
874
        bdrv_delete(bs->backing_hd);
875
        bs->backing_hd = NULL;
876
        bs_ro = bdrv_new("");
877
        ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
878
            backing_drv);
879
        if (ret < 0) {
880
            bdrv_delete(bs_ro);
881
            /* drive not functional anymore */
882
            bs->drv = NULL;
883
            return ret;
884
        }
885
        bs->backing_hd = bs_ro;
886
        bs->backing_hd->keep_read_only = 0;
887
    }
888

    
889
    return ret;
890
}
891

    
892
void bdrv_commit_all(void)
893
{
894
    BlockDriverState *bs;
895

    
896
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
897
        bdrv_commit(bs);
898
    }
899
}
900

    
901
/*
902
 * Return values:
903
 * 0        - success
904
 * -EINVAL  - backing format specified, but no file
905
 * -ENOSPC  - can't update the backing file because no space is left in the
906
 *            image file header
907
 * -ENOTSUP - format driver doesn't support changing the backing file
908
 */
909
int bdrv_change_backing_file(BlockDriverState *bs,
910
    const char *backing_file, const char *backing_fmt)
911
{
912
    BlockDriver *drv = bs->drv;
913

    
914
    if (drv->bdrv_change_backing_file != NULL) {
915
        return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
916
    } else {
917
        return -ENOTSUP;
918
    }
919
}
920

    
921
static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
922
                                   size_t size)
923
{
924
    int64_t len;
925

    
926
    if (!bdrv_is_inserted(bs))
927
        return -ENOMEDIUM;
928

    
929
    if (bs->growable)
930
        return 0;
931

    
932
    len = bdrv_getlength(bs);
933

    
934
    if (offset < 0)
935
        return -EIO;
936

    
937
    if ((offset > len) || (len - offset < size))
938
        return -EIO;
939

    
940
    return 0;
941
}
942

    
943
static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
944
                              int nb_sectors)
945
{
946
    return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
947
                                   nb_sectors * BDRV_SECTOR_SIZE);
948
}
949

    
950
static inline bool bdrv_has_async_rw(BlockDriver *drv)
951
{
952
    return drv->bdrv_co_readv != bdrv_co_readv_em
953
        || drv->bdrv_aio_readv != bdrv_aio_readv_em;
954
}
955

    
956
static inline bool bdrv_has_async_flush(BlockDriver *drv)
957
{
958
    return drv->bdrv_aio_flush != bdrv_aio_flush_em;
959
}
960

    
961
/* return < 0 if error. See bdrv_write() for the return codes */
962
int bdrv_read(BlockDriverState *bs, int64_t sector_num,
963
              uint8_t *buf, int nb_sectors)
964
{
965
    BlockDriver *drv = bs->drv;
966

    
967
    if (!drv)
968
        return -ENOMEDIUM;
969

    
970
    if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) {
971
        QEMUIOVector qiov;
972
        struct iovec iov = {
973
            .iov_base = (void *)buf,
974
            .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
975
        };
976

    
977
        qemu_iovec_init_external(&qiov, &iov, 1);
978
        return bdrv_co_readv(bs, sector_num, nb_sectors, &qiov);
979
    }
980

    
981
    if (bdrv_check_request(bs, sector_num, nb_sectors))
982
        return -EIO;
983

    
984
    return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
985
}
986

    
987
static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
988
                             int nb_sectors, int dirty)
989
{
990
    int64_t start, end;
991
    unsigned long val, idx, bit;
992

    
993
    start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
994
    end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
995

    
996
    for (; start <= end; start++) {
997
        idx = start / (sizeof(unsigned long) * 8);
998
        bit = start % (sizeof(unsigned long) * 8);
999
        val = bs->dirty_bitmap[idx];
1000
        if (dirty) {
1001
            if (!(val & (1UL << bit))) {
1002
                bs->dirty_count++;
1003
                val |= 1UL << bit;
1004
            }
1005
        } else {
1006
            if (val & (1UL << bit)) {
1007
                bs->dirty_count--;
1008
                val &= ~(1UL << bit);
1009
            }
1010
        }
1011
        bs->dirty_bitmap[idx] = val;
1012
    }
1013
}
1014

    
1015
/* Return < 0 if error. Important errors are:
1016
  -EIO         generic I/O error (may happen for all errors)
1017
  -ENOMEDIUM   No media inserted.
1018
  -EINVAL      Invalid sector number or nb_sectors
1019
  -EACCES      Trying to write a read-only device
1020
*/
1021
int bdrv_write(BlockDriverState *bs, int64_t sector_num,
1022
               const uint8_t *buf, int nb_sectors)
1023
{
1024
    BlockDriver *drv = bs->drv;
1025

    
1026
    if (!bs->drv)
1027
        return -ENOMEDIUM;
1028

    
1029
    if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) {
1030
        QEMUIOVector qiov;
1031
        struct iovec iov = {
1032
            .iov_base = (void *)buf,
1033
            .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1034
        };
1035

    
1036
        qemu_iovec_init_external(&qiov, &iov, 1);
1037
        return bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
1038
    }
1039

    
1040
    if (bs->read_only)
1041
        return -EACCES;
1042
    if (bdrv_check_request(bs, sector_num, nb_sectors))
1043
        return -EIO;
1044

    
1045
    if (bs->dirty_bitmap) {
1046
        set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1047
    }
1048

    
1049
    if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1050
        bs->wr_highest_sector = sector_num + nb_sectors - 1;
1051
    }
1052

    
1053
    return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
1054
}
1055

    
1056
int bdrv_pread(BlockDriverState *bs, int64_t offset,
1057
               void *buf, int count1)
1058
{
1059
    uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1060
    int len, nb_sectors, count;
1061
    int64_t sector_num;
1062
    int ret;
1063

    
1064
    count = count1;
1065
    /* first read to align to sector start */
1066
    len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1067
    if (len > count)
1068
        len = count;
1069
    sector_num = offset >> BDRV_SECTOR_BITS;
1070
    if (len > 0) {
1071
        if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1072
            return ret;
1073
        memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
1074
        count -= len;
1075
        if (count == 0)
1076
            return count1;
1077
        sector_num++;
1078
        buf += len;
1079
    }
1080

    
1081
    /* read the sectors "in place" */
1082
    nb_sectors = count >> BDRV_SECTOR_BITS;
1083
    if (nb_sectors > 0) {
1084
        if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1085
            return ret;
1086
        sector_num += nb_sectors;
1087
        len = nb_sectors << BDRV_SECTOR_BITS;
1088
        buf += len;
1089
        count -= len;
1090
    }
1091

    
1092
    /* add data from the last sector */
1093
    if (count > 0) {
1094
        if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1095
            return ret;
1096
        memcpy(buf, tmp_buf, count);
1097
    }
1098
    return count1;
1099
}
1100

    
1101
int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1102
                const void *buf, int count1)
1103
{
1104
    uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1105
    int len, nb_sectors, count;
1106
    int64_t sector_num;
1107
    int ret;
1108

    
1109
    count = count1;
1110
    /* first write to align to sector start */
1111
    len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1112
    if (len > count)
1113
        len = count;
1114
    sector_num = offset >> BDRV_SECTOR_BITS;
1115
    if (len > 0) {
1116
        if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1117
            return ret;
1118
        memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
1119
        if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1120
            return ret;
1121
        count -= len;
1122
        if (count == 0)
1123
            return count1;
1124
        sector_num++;
1125
        buf += len;
1126
    }
1127

    
1128
    /* write the sectors "in place" */
1129
    nb_sectors = count >> BDRV_SECTOR_BITS;
1130
    if (nb_sectors > 0) {
1131
        if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1132
            return ret;
1133
        sector_num += nb_sectors;
1134
        len = nb_sectors << BDRV_SECTOR_BITS;
1135
        buf += len;
1136
        count -= len;
1137
    }
1138

    
1139
    /* add data from the last sector */
1140
    if (count > 0) {
1141
        if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1142
            return ret;
1143
        memcpy(tmp_buf, buf, count);
1144
        if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1145
            return ret;
1146
    }
1147
    return count1;
1148
}
1149

    
1150
/*
1151
 * Writes to the file and ensures that no writes are reordered across this
1152
 * request (acts as a barrier)
1153
 *
1154
 * Returns 0 on success, -errno in error cases.
1155
 */
1156
int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1157
    const void *buf, int count)
1158
{
1159
    int ret;
1160

    
1161
    ret = bdrv_pwrite(bs, offset, buf, count);
1162
    if (ret < 0) {
1163
        return ret;
1164
    }
1165

    
1166
    /* No flush needed for cache=writethrough, it uses O_DSYNC */
1167
    if ((bs->open_flags & BDRV_O_CACHE_MASK) != 0) {
1168
        bdrv_flush(bs);
1169
    }
1170

    
1171
    return 0;
1172
}
1173

    
1174
int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
1175
    int nb_sectors, QEMUIOVector *qiov)
1176
{
1177
    BlockDriver *drv = bs->drv;
1178

    
1179
    trace_bdrv_co_readv(bs, sector_num, nb_sectors);
1180

    
1181
    if (!drv) {
1182
        return -ENOMEDIUM;
1183
    }
1184
    if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1185
        return -EIO;
1186
    }
1187

    
1188
    return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1189
}
1190

    
1191
int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1192
    int nb_sectors, QEMUIOVector *qiov)
1193
{
1194
    BlockDriver *drv = bs->drv;
1195

    
1196
    trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1197

    
1198
    if (!bs->drv) {
1199
        return -ENOMEDIUM;
1200
    }
1201
    if (bs->read_only) {
1202
        return -EACCES;
1203
    }
1204
    if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1205
        return -EIO;
1206
    }
1207

    
1208
    if (bs->dirty_bitmap) {
1209
        set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1210
    }
1211

    
1212
    if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1213
        bs->wr_highest_sector = sector_num + nb_sectors - 1;
1214
    }
1215

    
1216
    return drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1217
}
1218

    
1219
/**
1220
 * Truncate file to 'offset' bytes (needed only for file protocols)
1221
 */
1222
int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1223
{
1224
    BlockDriver *drv = bs->drv;
1225
    int ret;
1226
    if (!drv)
1227
        return -ENOMEDIUM;
1228
    if (!drv->bdrv_truncate)
1229
        return -ENOTSUP;
1230
    if (bs->read_only)
1231
        return -EACCES;
1232
    if (bdrv_in_use(bs))
1233
        return -EBUSY;
1234
    ret = drv->bdrv_truncate(bs, offset);
1235
    if (ret == 0) {
1236
        ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
1237
        if (bs->change_cb) {
1238
            bs->change_cb(bs->change_opaque, CHANGE_SIZE);
1239
        }
1240
    }
1241
    return ret;
1242
}
1243

    
1244
/**
1245
 * Length of a allocated file in bytes. Sparse files are counted by actual
1246
 * allocated space. Return < 0 if error or unknown.
1247
 */
1248
int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1249
{
1250
    BlockDriver *drv = bs->drv;
1251
    if (!drv) {
1252
        return -ENOMEDIUM;
1253
    }
1254
    if (drv->bdrv_get_allocated_file_size) {
1255
        return drv->bdrv_get_allocated_file_size(bs);
1256
    }
1257
    if (bs->file) {
1258
        return bdrv_get_allocated_file_size(bs->file);
1259
    }
1260
    return -ENOTSUP;
1261
}
1262

    
1263
/**
1264
 * Length of a file in bytes. Return < 0 if error or unknown.
1265
 */
1266
int64_t bdrv_getlength(BlockDriverState *bs)
1267
{
1268
    BlockDriver *drv = bs->drv;
1269
    if (!drv)
1270
        return -ENOMEDIUM;
1271

    
1272
    if (bs->growable || bs->removable) {
1273
        if (drv->bdrv_getlength) {
1274
            return drv->bdrv_getlength(bs);
1275
        }
1276
    }
1277
    return bs->total_sectors * BDRV_SECTOR_SIZE;
1278
}
1279

    
1280
/* return 0 as number of sectors if no device present or error */
1281
void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
1282
{
1283
    int64_t length;
1284
    length = bdrv_getlength(bs);
1285
    if (length < 0)
1286
        length = 0;
1287
    else
1288
        length = length >> BDRV_SECTOR_BITS;
1289
    *nb_sectors_ptr = length;
1290
}
1291

    
1292
struct partition {
1293
        uint8_t boot_ind;           /* 0x80 - active */
1294
        uint8_t head;               /* starting head */
1295
        uint8_t sector;             /* starting sector */
1296
        uint8_t cyl;                /* starting cylinder */
1297
        uint8_t sys_ind;            /* What partition type */
1298
        uint8_t end_head;           /* end head */
1299
        uint8_t end_sector;         /* end sector */
1300
        uint8_t end_cyl;            /* end cylinder */
1301
        uint32_t start_sect;        /* starting sector counting from 0 */
1302
        uint32_t nr_sects;          /* nr of sectors in partition */
1303
} __attribute__((packed));
1304

    
1305
/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1306
static int guess_disk_lchs(BlockDriverState *bs,
1307
                           int *pcylinders, int *pheads, int *psectors)
1308
{
1309
    uint8_t buf[BDRV_SECTOR_SIZE];
1310
    int ret, i, heads, sectors, cylinders;
1311
    struct partition *p;
1312
    uint32_t nr_sects;
1313
    uint64_t nb_sectors;
1314

    
1315
    bdrv_get_geometry(bs, &nb_sectors);
1316

    
1317
    ret = bdrv_read(bs, 0, buf, 1);
1318
    if (ret < 0)
1319
        return -1;
1320
    /* test msdos magic */
1321
    if (buf[510] != 0x55 || buf[511] != 0xaa)
1322
        return -1;
1323
    for(i = 0; i < 4; i++) {
1324
        p = ((struct partition *)(buf + 0x1be)) + i;
1325
        nr_sects = le32_to_cpu(p->nr_sects);
1326
        if (nr_sects && p->end_head) {
1327
            /* We make the assumption that the partition terminates on
1328
               a cylinder boundary */
1329
            heads = p->end_head + 1;
1330
            sectors = p->end_sector & 63;
1331
            if (sectors == 0)
1332
                continue;
1333
            cylinders = nb_sectors / (heads * sectors);
1334
            if (cylinders < 1 || cylinders > 16383)
1335
                continue;
1336
            *pheads = heads;
1337
            *psectors = sectors;
1338
            *pcylinders = cylinders;
1339
#if 0
1340
            printf("guessed geometry: LCHS=%d %d %d\n",
1341
                   cylinders, heads, sectors);
1342
#endif
1343
            return 0;
1344
        }
1345
    }
1346
    return -1;
1347
}
1348

    
1349
void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1350
{
1351
    int translation, lba_detected = 0;
1352
    int cylinders, heads, secs;
1353
    uint64_t nb_sectors;
1354

    
1355
    /* if a geometry hint is available, use it */
1356
    bdrv_get_geometry(bs, &nb_sectors);
1357
    bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1358
    translation = bdrv_get_translation_hint(bs);
1359
    if (cylinders != 0) {
1360
        *pcyls = cylinders;
1361
        *pheads = heads;
1362
        *psecs = secs;
1363
    } else {
1364
        if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1365
            if (heads > 16) {
1366
                /* if heads > 16, it means that a BIOS LBA
1367
                   translation was active, so the default
1368
                   hardware geometry is OK */
1369
                lba_detected = 1;
1370
                goto default_geometry;
1371
            } else {
1372
                *pcyls = cylinders;
1373
                *pheads = heads;
1374
                *psecs = secs;
1375
                /* disable any translation to be in sync with
1376
                   the logical geometry */
1377
                if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1378
                    bdrv_set_translation_hint(bs,
1379
                                              BIOS_ATA_TRANSLATION_NONE);
1380
                }
1381
            }
1382
        } else {
1383
        default_geometry:
1384
            /* if no geometry, use a standard physical disk geometry */
1385
            cylinders = nb_sectors / (16 * 63);
1386

    
1387
            if (cylinders > 16383)
1388
                cylinders = 16383;
1389
            else if (cylinders < 2)
1390
                cylinders = 2;
1391
            *pcyls = cylinders;
1392
            *pheads = 16;
1393
            *psecs = 63;
1394
            if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1395
                if ((*pcyls * *pheads) <= 131072) {
1396
                    bdrv_set_translation_hint(bs,
1397
                                              BIOS_ATA_TRANSLATION_LARGE);
1398
                } else {
1399
                    bdrv_set_translation_hint(bs,
1400
                                              BIOS_ATA_TRANSLATION_LBA);
1401
                }
1402
            }
1403
        }
1404
        bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1405
    }
1406
}
1407

    
1408
void bdrv_set_geometry_hint(BlockDriverState *bs,
1409
                            int cyls, int heads, int secs)
1410
{
1411
    bs->cyls = cyls;
1412
    bs->heads = heads;
1413
    bs->secs = secs;
1414
}
1415

    
1416
void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1417
{
1418
    bs->translation = translation;
1419
}
1420

    
1421
void bdrv_get_geometry_hint(BlockDriverState *bs,
1422
                            int *pcyls, int *pheads, int *psecs)
1423
{
1424
    *pcyls = bs->cyls;
1425
    *pheads = bs->heads;
1426
    *psecs = bs->secs;
1427
}
1428

    
1429
/* Recognize floppy formats */
1430
typedef struct FDFormat {
1431
    FDriveType drive;
1432
    uint8_t last_sect;
1433
    uint8_t max_track;
1434
    uint8_t max_head;
1435
} FDFormat;
1436

    
1437
static const FDFormat fd_formats[] = {
1438
    /* First entry is default format */
1439
    /* 1.44 MB 3"1/2 floppy disks */
1440
    { FDRIVE_DRV_144, 18, 80, 1, },
1441
    { FDRIVE_DRV_144, 20, 80, 1, },
1442
    { FDRIVE_DRV_144, 21, 80, 1, },
1443
    { FDRIVE_DRV_144, 21, 82, 1, },
1444
    { FDRIVE_DRV_144, 21, 83, 1, },
1445
    { FDRIVE_DRV_144, 22, 80, 1, },
1446
    { FDRIVE_DRV_144, 23, 80, 1, },
1447
    { FDRIVE_DRV_144, 24, 80, 1, },
1448
    /* 2.88 MB 3"1/2 floppy disks */
1449
    { FDRIVE_DRV_288, 36, 80, 1, },
1450
    { FDRIVE_DRV_288, 39, 80, 1, },
1451
    { FDRIVE_DRV_288, 40, 80, 1, },
1452
    { FDRIVE_DRV_288, 44, 80, 1, },
1453
    { FDRIVE_DRV_288, 48, 80, 1, },
1454
    /* 720 kB 3"1/2 floppy disks */
1455
    { FDRIVE_DRV_144,  9, 80, 1, },
1456
    { FDRIVE_DRV_144, 10, 80, 1, },
1457
    { FDRIVE_DRV_144, 10, 82, 1, },
1458
    { FDRIVE_DRV_144, 10, 83, 1, },
1459
    { FDRIVE_DRV_144, 13, 80, 1, },
1460
    { FDRIVE_DRV_144, 14, 80, 1, },
1461
    /* 1.2 MB 5"1/4 floppy disks */
1462
    { FDRIVE_DRV_120, 15, 80, 1, },
1463
    { FDRIVE_DRV_120, 18, 80, 1, },
1464
    { FDRIVE_DRV_120, 18, 82, 1, },
1465
    { FDRIVE_DRV_120, 18, 83, 1, },
1466
    { FDRIVE_DRV_120, 20, 80, 1, },
1467
    /* 720 kB 5"1/4 floppy disks */
1468
    { FDRIVE_DRV_120,  9, 80, 1, },
1469
    { FDRIVE_DRV_120, 11, 80, 1, },
1470
    /* 360 kB 5"1/4 floppy disks */
1471
    { FDRIVE_DRV_120,  9, 40, 1, },
1472
    { FDRIVE_DRV_120,  9, 40, 0, },
1473
    { FDRIVE_DRV_120, 10, 41, 1, },
1474
    { FDRIVE_DRV_120, 10, 42, 1, },
1475
    /* 320 kB 5"1/4 floppy disks */
1476
    { FDRIVE_DRV_120,  8, 40, 1, },
1477
    { FDRIVE_DRV_120,  8, 40, 0, },
1478
    /* 360 kB must match 5"1/4 better than 3"1/2... */
1479
    { FDRIVE_DRV_144,  9, 80, 0, },
1480
    /* end */
1481
    { FDRIVE_DRV_NONE, -1, -1, 0, },
1482
};
1483

    
1484
void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1485
                                   int *max_track, int *last_sect,
1486
                                   FDriveType drive_in, FDriveType *drive)
1487
{
1488
    const FDFormat *parse;
1489
    uint64_t nb_sectors, size;
1490
    int i, first_match, match;
1491

    
1492
    bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1493
    if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1494
        /* User defined disk */
1495
    } else {
1496
        bdrv_get_geometry(bs, &nb_sectors);
1497
        match = -1;
1498
        first_match = -1;
1499
        for (i = 0; ; i++) {
1500
            parse = &fd_formats[i];
1501
            if (parse->drive == FDRIVE_DRV_NONE) {
1502
                break;
1503
            }
1504
            if (drive_in == parse->drive ||
1505
                drive_in == FDRIVE_DRV_NONE) {
1506
                size = (parse->max_head + 1) * parse->max_track *
1507
                    parse->last_sect;
1508
                if (nb_sectors == size) {
1509
                    match = i;
1510
                    break;
1511
                }
1512
                if (first_match == -1) {
1513
                    first_match = i;
1514
                }
1515
            }
1516
        }
1517
        if (match == -1) {
1518
            if (first_match == -1) {
1519
                match = 1;
1520
            } else {
1521
                match = first_match;
1522
            }
1523
            parse = &fd_formats[match];
1524
        }
1525
        *nb_heads = parse->max_head + 1;
1526
        *max_track = parse->max_track;
1527
        *last_sect = parse->last_sect;
1528
        *drive = parse->drive;
1529
    }
1530
}
1531

    
1532
int bdrv_get_translation_hint(BlockDriverState *bs)
1533
{
1534
    return bs->translation;
1535
}
1536

    
1537
void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1538
                       BlockErrorAction on_write_error)
1539
{
1540
    bs->on_read_error = on_read_error;
1541
    bs->on_write_error = on_write_error;
1542
}
1543

    
1544
BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1545
{
1546
    return is_read ? bs->on_read_error : bs->on_write_error;
1547
}
1548

    
1549
void bdrv_set_removable(BlockDriverState *bs, int removable)
1550
{
1551
    bs->removable = removable;
1552
    if (removable && bs == bs_snapshots) {
1553
        bs_snapshots = NULL;
1554
    }
1555
}
1556

    
1557
int bdrv_is_removable(BlockDriverState *bs)
1558
{
1559
    return bs->removable;
1560
}
1561

    
1562
int bdrv_is_read_only(BlockDriverState *bs)
1563
{
1564
    return bs->read_only;
1565
}
1566

    
1567
int bdrv_is_sg(BlockDriverState *bs)
1568
{
1569
    return bs->sg;
1570
}
1571

    
1572
int bdrv_enable_write_cache(BlockDriverState *bs)
1573
{
1574
    return bs->enable_write_cache;
1575
}
1576

    
1577
/* XXX: no longer used */
1578
void bdrv_set_change_cb(BlockDriverState *bs,
1579
                        void (*change_cb)(void *opaque, int reason),
1580
                        void *opaque)
1581
{
1582
    bs->change_cb = change_cb;
1583
    bs->change_opaque = opaque;
1584
}
1585

    
1586
int bdrv_is_encrypted(BlockDriverState *bs)
1587
{
1588
    if (bs->backing_hd && bs->backing_hd->encrypted)
1589
        return 1;
1590
    return bs->encrypted;
1591
}
1592

    
1593
int bdrv_key_required(BlockDriverState *bs)
1594
{
1595
    BlockDriverState *backing_hd = bs->backing_hd;
1596

    
1597
    if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1598
        return 1;
1599
    return (bs->encrypted && !bs->valid_key);
1600
}
1601

    
1602
int bdrv_set_key(BlockDriverState *bs, const char *key)
1603
{
1604
    int ret;
1605
    if (bs->backing_hd && bs->backing_hd->encrypted) {
1606
        ret = bdrv_set_key(bs->backing_hd, key);
1607
        if (ret < 0)
1608
            return ret;
1609
        if (!bs->encrypted)
1610
            return 0;
1611
    }
1612
    if (!bs->encrypted) {
1613
        return -EINVAL;
1614
    } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1615
        return -ENOMEDIUM;
1616
    }
1617
    ret = bs->drv->bdrv_set_key(bs, key);
1618
    if (ret < 0) {
1619
        bs->valid_key = 0;
1620
    } else if (!bs->valid_key) {
1621
        bs->valid_key = 1;
1622
        /* call the change callback now, we skipped it on open */
1623
        bs->media_changed = 1;
1624
        if (bs->change_cb)
1625
            bs->change_cb(bs->change_opaque, CHANGE_MEDIA);
1626
    }
1627
    return ret;
1628
}
1629

    
1630
void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1631
{
1632
    if (!bs->drv) {
1633
        buf[0] = '\0';
1634
    } else {
1635
        pstrcpy(buf, buf_size, bs->drv->format_name);
1636
    }
1637
}
1638

    
1639
void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
1640
                         void *opaque)
1641
{
1642
    BlockDriver *drv;
1643

    
1644
    QLIST_FOREACH(drv, &bdrv_drivers, list) {
1645
        it(opaque, drv->format_name);
1646
    }
1647
}
1648

    
1649
BlockDriverState *bdrv_find(const char *name)
1650
{
1651
    BlockDriverState *bs;
1652

    
1653
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
1654
        if (!strcmp(name, bs->device_name)) {
1655
            return bs;
1656
        }
1657
    }
1658
    return NULL;
1659
}
1660

    
1661
BlockDriverState *bdrv_next(BlockDriverState *bs)
1662
{
1663
    if (!bs) {
1664
        return QTAILQ_FIRST(&bdrv_states);
1665
    }
1666
    return QTAILQ_NEXT(bs, list);
1667
}
1668

    
1669
void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1670
{
1671
    BlockDriverState *bs;
1672

    
1673
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
1674
        it(opaque, bs);
1675
    }
1676
}
1677

    
1678
const char *bdrv_get_device_name(BlockDriverState *bs)
1679
{
1680
    return bs->device_name;
1681
}
1682

    
1683
int bdrv_flush(BlockDriverState *bs)
1684
{
1685
    if (bs->open_flags & BDRV_O_NO_FLUSH) {
1686
        return 0;
1687
    }
1688

    
1689
    if (bs->drv && bdrv_has_async_flush(bs->drv) && qemu_in_coroutine()) {
1690
        return bdrv_co_flush_em(bs);
1691
    }
1692

    
1693
    if (bs->drv && bs->drv->bdrv_flush) {
1694
        return bs->drv->bdrv_flush(bs);
1695
    }
1696

    
1697
    /*
1698
     * Some block drivers always operate in either writethrough or unsafe mode
1699
     * and don't support bdrv_flush therefore. Usually qemu doesn't know how
1700
     * the server works (because the behaviour is hardcoded or depends on
1701
     * server-side configuration), so we can't ensure that everything is safe
1702
     * on disk. Returning an error doesn't work because that would break guests
1703
     * even if the server operates in writethrough mode.
1704
     *
1705
     * Let's hope the user knows what he's doing.
1706
     */
1707
    return 0;
1708
}
1709

    
1710
void bdrv_flush_all(void)
1711
{
1712
    BlockDriverState *bs;
1713

    
1714
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
1715
        if (bs->drv && !bdrv_is_read_only(bs) &&
1716
            (!bdrv_is_removable(bs) || bdrv_is_inserted(bs))) {
1717
            bdrv_flush(bs);
1718
        }
1719
    }
1720
}
1721

    
1722
int bdrv_has_zero_init(BlockDriverState *bs)
1723
{
1724
    assert(bs->drv);
1725

    
1726
    if (bs->drv->bdrv_has_zero_init) {
1727
        return bs->drv->bdrv_has_zero_init(bs);
1728
    }
1729

    
1730
    return 1;
1731
}
1732

    
1733
int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
1734
{
1735
    if (!bs->drv) {
1736
        return -ENOMEDIUM;
1737
    }
1738
    if (!bs->drv->bdrv_discard) {
1739
        return 0;
1740
    }
1741
    return bs->drv->bdrv_discard(bs, sector_num, nb_sectors);
1742
}
1743

    
1744
/*
1745
 * Returns true iff the specified sector is present in the disk image. Drivers
1746
 * not implementing the functionality are assumed to not support backing files,
1747
 * hence all their sectors are reported as allocated.
1748
 *
1749
 * 'pnum' is set to the number of sectors (including and immediately following
1750
 * the specified sector) that are known to be in the same
1751
 * allocated/unallocated state.
1752
 *
1753
 * 'nb_sectors' is the max value 'pnum' should be set to.
1754
 */
1755
int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1756
        int *pnum)
1757
{
1758
    int64_t n;
1759
    if (!bs->drv->bdrv_is_allocated) {
1760
        if (sector_num >= bs->total_sectors) {
1761
            *pnum = 0;
1762
            return 0;
1763
        }
1764
        n = bs->total_sectors - sector_num;
1765
        *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1766
        return 1;
1767
    }
1768
    return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1769
}
1770

    
1771
void bdrv_mon_event(const BlockDriverState *bdrv,
1772
                    BlockMonEventAction action, int is_read)
1773
{
1774
    QObject *data;
1775
    const char *action_str;
1776

    
1777
    switch (action) {
1778
    case BDRV_ACTION_REPORT:
1779
        action_str = "report";
1780
        break;
1781
    case BDRV_ACTION_IGNORE:
1782
        action_str = "ignore";
1783
        break;
1784
    case BDRV_ACTION_STOP:
1785
        action_str = "stop";
1786
        break;
1787
    default:
1788
        abort();
1789
    }
1790

    
1791
    data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1792
                              bdrv->device_name,
1793
                              action_str,
1794
                              is_read ? "read" : "write");
1795
    monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1796

    
1797
    qobject_decref(data);
1798
}
1799

    
1800
static void bdrv_print_dict(QObject *obj, void *opaque)
1801
{
1802
    QDict *bs_dict;
1803
    Monitor *mon = opaque;
1804

    
1805
    bs_dict = qobject_to_qdict(obj);
1806

    
1807
    monitor_printf(mon, "%s: removable=%d",
1808
                        qdict_get_str(bs_dict, "device"),
1809
                        qdict_get_bool(bs_dict, "removable"));
1810

    
1811
    if (qdict_get_bool(bs_dict, "removable")) {
1812
        monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked"));
1813
    }
1814

    
1815
    if (qdict_haskey(bs_dict, "inserted")) {
1816
        QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted"));
1817

    
1818
        monitor_printf(mon, " file=");
1819
        monitor_print_filename(mon, qdict_get_str(qdict, "file"));
1820
        if (qdict_haskey(qdict, "backing_file")) {
1821
            monitor_printf(mon, " backing_file=");
1822
            monitor_print_filename(mon, qdict_get_str(qdict, "backing_file"));
1823
        }
1824
        monitor_printf(mon, " ro=%d drv=%s encrypted=%d",
1825
                            qdict_get_bool(qdict, "ro"),
1826
                            qdict_get_str(qdict, "drv"),
1827
                            qdict_get_bool(qdict, "encrypted"));
1828
    } else {
1829
        monitor_printf(mon, " [not inserted]");
1830
    }
1831

    
1832
    monitor_printf(mon, "\n");
1833
}
1834

    
1835
void bdrv_info_print(Monitor *mon, const QObject *data)
1836
{
1837
    qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon);
1838
}
1839

    
1840
void bdrv_info(Monitor *mon, QObject **ret_data)
1841
{
1842
    QList *bs_list;
1843
    BlockDriverState *bs;
1844

    
1845
    bs_list = qlist_new();
1846

    
1847
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
1848
        QObject *bs_obj;
1849

    
1850
        bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': 'unknown', "
1851
                                    "'removable': %i, 'locked': %i }",
1852
                                    bs->device_name, bs->removable,
1853
                                    bs->locked);
1854

    
1855
        if (bs->drv) {
1856
            QObject *obj;
1857
            QDict *bs_dict = qobject_to_qdict(bs_obj);
1858

    
1859
            obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
1860
                                     "'encrypted': %i }",
1861
                                     bs->filename, bs->read_only,
1862
                                     bs->drv->format_name,
1863
                                     bdrv_is_encrypted(bs));
1864
            if (bs->backing_file[0] != '\0') {
1865
                QDict *qdict = qobject_to_qdict(obj);
1866
                qdict_put(qdict, "backing_file",
1867
                          qstring_from_str(bs->backing_file));
1868
            }
1869

    
1870
            qdict_put_obj(bs_dict, "inserted", obj);
1871
        }
1872
        qlist_append_obj(bs_list, bs_obj);
1873
    }
1874

    
1875
    *ret_data = QOBJECT(bs_list);
1876
}
1877

    
1878
static void bdrv_stats_iter(QObject *data, void *opaque)
1879
{
1880
    QDict *qdict;
1881
    Monitor *mon = opaque;
1882

    
1883
    qdict = qobject_to_qdict(data);
1884
    monitor_printf(mon, "%s:", qdict_get_str(qdict, "device"));
1885

    
1886
    qdict = qobject_to_qdict(qdict_get(qdict, "stats"));
1887
    monitor_printf(mon, " rd_bytes=%" PRId64
1888
                        " wr_bytes=%" PRId64
1889
                        " rd_operations=%" PRId64
1890
                        " wr_operations=%" PRId64
1891
                        "\n",
1892
                        qdict_get_int(qdict, "rd_bytes"),
1893
                        qdict_get_int(qdict, "wr_bytes"),
1894
                        qdict_get_int(qdict, "rd_operations"),
1895
                        qdict_get_int(qdict, "wr_operations"));
1896
}
1897

    
1898
void bdrv_stats_print(Monitor *mon, const QObject *data)
1899
{
1900
    qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon);
1901
}
1902

    
1903
static QObject* bdrv_info_stats_bs(BlockDriverState *bs)
1904
{
1905
    QObject *res;
1906
    QDict *dict;
1907

    
1908
    res = qobject_from_jsonf("{ 'stats': {"
1909
                             "'rd_bytes': %" PRId64 ","
1910
                             "'wr_bytes': %" PRId64 ","
1911
                             "'rd_operations': %" PRId64 ","
1912
                             "'wr_operations': %" PRId64 ","
1913
                             "'wr_highest_offset': %" PRId64
1914
                             "} }",
1915
                             bs->rd_bytes, bs->wr_bytes,
1916
                             bs->rd_ops, bs->wr_ops,
1917
                             bs->wr_highest_sector *
1918
                             (uint64_t)BDRV_SECTOR_SIZE);
1919
    dict  = qobject_to_qdict(res);
1920

    
1921
    if (*bs->device_name) {
1922
        qdict_put(dict, "device", qstring_from_str(bs->device_name));
1923
    }
1924

    
1925
    if (bs->file) {
1926
        QObject *parent = bdrv_info_stats_bs(bs->file);
1927
        qdict_put_obj(dict, "parent", parent);
1928
    }
1929

    
1930
    return res;
1931
}
1932

    
1933
void bdrv_info_stats(Monitor *mon, QObject **ret_data)
1934
{
1935
    QObject *obj;
1936
    QList *devices;
1937
    BlockDriverState *bs;
1938

    
1939
    devices = qlist_new();
1940

    
1941
    QTAILQ_FOREACH(bs, &bdrv_states, list) {
1942
        obj = bdrv_info_stats_bs(bs);
1943
        qlist_append_obj(devices, obj);
1944
    }
1945

    
1946
    *ret_data = QOBJECT(devices);
1947
}
1948

    
1949
const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
1950
{
1951
    if (bs->backing_hd && bs->backing_hd->encrypted)
1952
        return bs->backing_file;
1953
    else if (bs->encrypted)
1954
        return bs->filename;
1955
    else
1956
        return NULL;
1957
}
1958

    
1959
void bdrv_get_backing_filename(BlockDriverState *bs,
1960
                               char *filename, int filename_size)
1961
{
1962
    if (!bs->backing_file) {
1963
        pstrcpy(filename, filename_size, "");
1964
    } else {
1965
        pstrcpy(filename, filename_size, bs->backing_file);
1966
    }
1967
}
1968

    
1969
int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
1970
                          const uint8_t *buf, int nb_sectors)
1971
{
1972
    BlockDriver *drv = bs->drv;
1973
    if (!drv)
1974
        return -ENOMEDIUM;
1975
    if (!drv->bdrv_write_compressed)
1976
        return -ENOTSUP;
1977
    if (bdrv_check_request(bs, sector_num, nb_sectors))
1978
        return -EIO;
1979

    
1980
    if (bs->dirty_bitmap) {
1981
        set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1982
    }
1983

    
1984
    return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
1985
}
1986

    
1987
int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1988
{
1989
    BlockDriver *drv = bs->drv;
1990
    if (!drv)
1991
        return -ENOMEDIUM;
1992
    if (!drv->bdrv_get_info)
1993
        return -ENOTSUP;
1994
    memset(bdi, 0, sizeof(*bdi));
1995
    return drv->bdrv_get_info(bs, bdi);
1996
}
1997

    
1998
int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
1999
                      int64_t pos, int size)
2000
{
2001
    BlockDriver *drv = bs->drv;
2002
    if (!drv)
2003
        return -ENOMEDIUM;
2004
    if (drv->bdrv_save_vmstate)
2005
        return drv->bdrv_save_vmstate(bs, buf, pos, size);
2006
    if (bs->file)
2007
        return bdrv_save_vmstate(bs->file, buf, pos, size);
2008
    return -ENOTSUP;
2009
}
2010

    
2011
int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2012
                      int64_t pos, int size)
2013
{
2014
    BlockDriver *drv = bs->drv;
2015
    if (!drv)
2016
        return -ENOMEDIUM;
2017
    if (drv->bdrv_load_vmstate)
2018
        return drv->bdrv_load_vmstate(bs, buf, pos, size);
2019
    if (bs->file)
2020
        return bdrv_load_vmstate(bs->file, buf, pos, size);
2021
    return -ENOTSUP;
2022
}
2023

    
2024
void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2025
{
2026
    BlockDriver *drv = bs->drv;
2027

    
2028
    if (!drv || !drv->bdrv_debug_event) {
2029
        return;
2030
    }
2031

    
2032
    return drv->bdrv_debug_event(bs, event);
2033

    
2034
}
2035

    
2036
/**************************************************************/
2037
/* handling of snapshots */
2038

    
2039
int bdrv_can_snapshot(BlockDriverState *bs)
2040
{
2041
    BlockDriver *drv = bs->drv;
2042
    if (!drv || bdrv_is_removable(bs) || bdrv_is_read_only(bs)) {
2043
        return 0;
2044
    }
2045

    
2046
    if (!drv->bdrv_snapshot_create) {
2047
        if (bs->file != NULL) {
2048
            return bdrv_can_snapshot(bs->file);
2049
        }
2050
        return 0;
2051
    }
2052

    
2053
    return 1;
2054
}
2055

    
2056
int bdrv_is_snapshot(BlockDriverState *bs)
2057
{
2058
    return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2059
}
2060

    
2061
BlockDriverState *bdrv_snapshots(void)
2062
{
2063
    BlockDriverState *bs;
2064

    
2065
    if (bs_snapshots) {
2066
        return bs_snapshots;
2067
    }
2068

    
2069
    bs = NULL;
2070
    while ((bs = bdrv_next(bs))) {
2071
        if (bdrv_can_snapshot(bs)) {
2072
            bs_snapshots = bs;
2073
            return bs;
2074
        }
2075
    }
2076
    return NULL;
2077
}
2078

    
2079
int bdrv_snapshot_create(BlockDriverState *bs,
2080
                         QEMUSnapshotInfo *sn_info)
2081
{
2082
    BlockDriver *drv = bs->drv;
2083
    if (!drv)
2084
        return -ENOMEDIUM;
2085
    if (drv->bdrv_snapshot_create)
2086
        return drv->bdrv_snapshot_create(bs, sn_info);
2087
    if (bs->file)
2088
        return bdrv_snapshot_create(bs->file, sn_info);
2089
    return -ENOTSUP;
2090
}
2091

    
2092
int bdrv_snapshot_goto(BlockDriverState *bs,
2093
                       const char *snapshot_id)
2094
{
2095
    BlockDriver *drv = bs->drv;
2096
    int ret, open_ret;
2097

    
2098
    if (!drv)
2099
        return -ENOMEDIUM;
2100
    if (drv->bdrv_snapshot_goto)
2101
        return drv->bdrv_snapshot_goto(bs, snapshot_id);
2102

    
2103
    if (bs->file) {
2104
        drv->bdrv_close(bs);
2105
        ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2106
        open_ret = drv->bdrv_open(bs, bs->open_flags);
2107
        if (open_ret < 0) {
2108
            bdrv_delete(bs->file);
2109
            bs->drv = NULL;
2110
            return open_ret;
2111
        }
2112
        return ret;
2113
    }
2114

    
2115
    return -ENOTSUP;
2116
}
2117

    
2118
int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2119
{
2120
    BlockDriver *drv = bs->drv;
2121
    if (!drv)
2122
        return -ENOMEDIUM;
2123
    if (drv->bdrv_snapshot_delete)
2124
        return drv->bdrv_snapshot_delete(bs, snapshot_id);
2125
    if (bs->file)
2126
        return bdrv_snapshot_delete(bs->file, snapshot_id);
2127
    return -ENOTSUP;
2128
}
2129

    
2130
int bdrv_snapshot_list(BlockDriverState *bs,
2131
                       QEMUSnapshotInfo **psn_info)
2132
{
2133
    BlockDriver *drv = bs->drv;
2134
    if (!drv)
2135
        return -ENOMEDIUM;
2136
    if (drv->bdrv_snapshot_list)
2137
        return drv->bdrv_snapshot_list(bs, psn_info);
2138
    if (bs->file)
2139
        return bdrv_snapshot_list(bs->file, psn_info);
2140
    return -ENOTSUP;
2141
}
2142

    
2143
int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2144
        const char *snapshot_name)
2145
{
2146
    BlockDriver *drv = bs->drv;
2147
    if (!drv) {
2148
        return -ENOMEDIUM;
2149
    }
2150
    if (!bs->read_only) {
2151
        return -EINVAL;
2152
    }
2153
    if (drv->bdrv_snapshot_load_tmp) {
2154
        return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2155
    }
2156
    return -ENOTSUP;
2157
}
2158

    
2159
#define NB_SUFFIXES 4
2160

    
2161
char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2162
{
2163
    static const char suffixes[NB_SUFFIXES] = "KMGT";
2164
    int64_t base;
2165
    int i;
2166

    
2167
    if (size <= 999) {
2168
        snprintf(buf, buf_size, "%" PRId64, size);
2169
    } else {
2170
        base = 1024;
2171
        for(i = 0; i < NB_SUFFIXES; i++) {
2172
            if (size < (10 * base)) {
2173
                snprintf(buf, buf_size, "%0.1f%c",
2174
                         (double)size / base,
2175
                         suffixes[i]);
2176
                break;
2177
            } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
2178
                snprintf(buf, buf_size, "%" PRId64 "%c",
2179
                         ((size + (base >> 1)) / base),
2180
                         suffixes[i]);
2181
                break;
2182
            }
2183
            base = base * 1024;
2184
        }
2185
    }
2186
    return buf;
2187
}
2188

    
2189
char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2190
{
2191
    char buf1[128], date_buf[128], clock_buf[128];
2192
#ifdef _WIN32
2193
    struct tm *ptm;
2194
#else
2195
    struct tm tm;
2196
#endif
2197
    time_t ti;
2198
    int64_t secs;
2199

    
2200
    if (!sn) {
2201
        snprintf(buf, buf_size,
2202
                 "%-10s%-20s%7s%20s%15s",
2203
                 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2204
    } else {
2205
        ti = sn->date_sec;
2206
#ifdef _WIN32
2207
        ptm = localtime(&ti);
2208
        strftime(date_buf, sizeof(date_buf),
2209
                 "%Y-%m-%d %H:%M:%S", ptm);
2210
#else
2211
        localtime_r(&ti, &tm);
2212
        strftime(date_buf, sizeof(date_buf),
2213
                 "%Y-%m-%d %H:%M:%S", &tm);
2214
#endif
2215
        secs = sn->vm_clock_nsec / 1000000000;
2216
        snprintf(clock_buf, sizeof(clock_buf),
2217
                 "%02d:%02d:%02d.%03d",
2218
                 (int)(secs / 3600),
2219
                 (int)((secs / 60) % 60),
2220
                 (int)(secs % 60),
2221
                 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2222
        snprintf(buf, buf_size,
2223
                 "%-10s%-20s%7s%20s%15s",
2224
                 sn->id_str, sn->name,
2225
                 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2226
                 date_buf,
2227
                 clock_buf);
2228
    }
2229
    return buf;
2230
}
2231

    
2232

    
2233
/**************************************************************/
2234
/* async I/Os */
2235

    
2236
BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
2237
                                 QEMUIOVector *qiov, int nb_sectors,
2238
                                 BlockDriverCompletionFunc *cb, void *opaque)
2239
{
2240
    BlockDriver *drv = bs->drv;
2241
    BlockDriverAIOCB *ret;
2242

    
2243
    trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2244

    
2245
    if (!drv)
2246
        return NULL;
2247
    if (bdrv_check_request(bs, sector_num, nb_sectors))
2248
        return NULL;
2249

    
2250
    ret = drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
2251
                              cb, opaque);
2252

    
2253
    if (ret) {
2254
        /* Update stats even though technically transfer has not happened. */
2255
        bs->rd_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
2256
        bs->rd_ops ++;
2257
    }
2258

    
2259
    return ret;
2260
}
2261

    
2262
typedef struct BlockCompleteData {
2263
    BlockDriverCompletionFunc *cb;
2264
    void *opaque;
2265
    BlockDriverState *bs;
2266
    int64_t sector_num;
2267
    int nb_sectors;
2268
} BlockCompleteData;
2269

    
2270
static void block_complete_cb(void *opaque, int ret)
2271
{
2272
    BlockCompleteData *b = opaque;
2273

    
2274
    if (b->bs->dirty_bitmap) {
2275
        set_dirty_bitmap(b->bs, b->sector_num, b->nb_sectors, 1);
2276
    }
2277
    b->cb(b->opaque, ret);
2278
    g_free(b);
2279
}
2280

    
2281
static BlockCompleteData *blk_dirty_cb_alloc(BlockDriverState *bs,
2282
                                             int64_t sector_num,
2283
                                             int nb_sectors,
2284
                                             BlockDriverCompletionFunc *cb,
2285
                                             void *opaque)
2286
{
2287
    BlockCompleteData *blkdata = g_malloc0(sizeof(BlockCompleteData));
2288

    
2289
    blkdata->bs = bs;
2290
    blkdata->cb = cb;
2291
    blkdata->opaque = opaque;
2292
    blkdata->sector_num = sector_num;
2293
    blkdata->nb_sectors = nb_sectors;
2294

    
2295
    return blkdata;
2296
}
2297

    
2298
BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2299
                                  QEMUIOVector *qiov, int nb_sectors,
2300
                                  BlockDriverCompletionFunc *cb, void *opaque)
2301
{
2302
    BlockDriver *drv = bs->drv;
2303
    BlockDriverAIOCB *ret;
2304
    BlockCompleteData *blk_cb_data;
2305

    
2306
    trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2307

    
2308
    if (!drv)
2309
        return NULL;
2310
    if (bs->read_only)
2311
        return NULL;
2312
    if (bdrv_check_request(bs, sector_num, nb_sectors))
2313
        return NULL;
2314

    
2315
    if (bs->dirty_bitmap) {
2316
        blk_cb_data = blk_dirty_cb_alloc(bs, sector_num, nb_sectors, cb,
2317
                                         opaque);
2318
        cb = &block_complete_cb;
2319
        opaque = blk_cb_data;
2320
    }
2321

    
2322
    ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
2323
                               cb, opaque);
2324

    
2325
    if (ret) {
2326
        /* Update stats even though technically transfer has not happened. */
2327
        bs->wr_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
2328
        bs->wr_ops ++;
2329
        if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
2330
            bs->wr_highest_sector = sector_num + nb_sectors - 1;
2331
        }
2332
    }
2333

    
2334
    return ret;
2335
}
2336

    
2337

    
2338
typedef struct MultiwriteCB {
2339
    int error;
2340
    int num_requests;
2341
    int num_callbacks;
2342
    struct {
2343
        BlockDriverCompletionFunc *cb;
2344
        void *opaque;
2345
        QEMUIOVector *free_qiov;
2346
        void *free_buf;
2347
    } callbacks[];
2348
} MultiwriteCB;
2349

    
2350
static void multiwrite_user_cb(MultiwriteCB *mcb)
2351
{
2352
    int i;
2353

    
2354
    for (i = 0; i < mcb->num_callbacks; i++) {
2355
        mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
2356
        if (mcb->callbacks[i].free_qiov) {
2357
            qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2358
        }
2359
        g_free(mcb->callbacks[i].free_qiov);
2360
        qemu_vfree(mcb->callbacks[i].free_buf);
2361
    }
2362
}
2363

    
2364
static void multiwrite_cb(void *opaque, int ret)
2365
{
2366
    MultiwriteCB *mcb = opaque;
2367

    
2368
    trace_multiwrite_cb(mcb, ret);
2369

    
2370
    if (ret < 0 && !mcb->error) {
2371
        mcb->error = ret;
2372
    }
2373

    
2374
    mcb->num_requests--;
2375
    if (mcb->num_requests == 0) {
2376
        multiwrite_user_cb(mcb);
2377
        g_free(mcb);
2378
    }
2379
}
2380

    
2381
static int multiwrite_req_compare(const void *a, const void *b)
2382
{
2383
    const BlockRequest *req1 = a, *req2 = b;
2384

    
2385
    /*
2386
     * Note that we can't simply subtract req2->sector from req1->sector
2387
     * here as that could overflow the return value.
2388
     */
2389
    if (req1->sector > req2->sector) {
2390
        return 1;
2391
    } else if (req1->sector < req2->sector) {
2392
        return -1;
2393
    } else {
2394
        return 0;
2395
    }
2396
}
2397

    
2398
/*
2399
 * Takes a bunch of requests and tries to merge them. Returns the number of
2400
 * requests that remain after merging.
2401
 */
2402
static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2403
    int num_reqs, MultiwriteCB *mcb)
2404
{
2405
    int i, outidx;
2406

    
2407
    // Sort requests by start sector
2408
    qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2409

    
2410
    // Check if adjacent requests touch the same clusters. If so, combine them,
2411
    // filling up gaps with zero sectors.
2412
    outidx = 0;
2413
    for (i = 1; i < num_reqs; i++) {
2414
        int merge = 0;
2415
        int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2416

    
2417
        // This handles the cases that are valid for all block drivers, namely
2418
        // exactly sequential writes and overlapping writes.
2419
        if (reqs[i].sector <= oldreq_last) {
2420
            merge = 1;
2421
        }
2422

    
2423
        // The block driver may decide that it makes sense to combine requests
2424
        // even if there is a gap of some sectors between them. In this case,
2425
        // the gap is filled with zeros (therefore only applicable for yet
2426
        // unused space in format like qcow2).
2427
        if (!merge && bs->drv->bdrv_merge_requests) {
2428
            merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2429
        }
2430

    
2431
        if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2432
            merge = 0;
2433
        }
2434

    
2435
        if (merge) {
2436
            size_t size;
2437
            QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
2438
            qemu_iovec_init(qiov,
2439
                reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2440

    
2441
            // Add the first request to the merged one. If the requests are
2442
            // overlapping, drop the last sectors of the first request.
2443
            size = (reqs[i].sector - reqs[outidx].sector) << 9;
2444
            qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2445

    
2446
            // We might need to add some zeros between the two requests
2447
            if (reqs[i].sector > oldreq_last) {
2448
                size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2449
                uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2450
                memset(buf, 0, zero_bytes);
2451
                qemu_iovec_add(qiov, buf, zero_bytes);
2452
                mcb->callbacks[i].free_buf = buf;
2453
            }
2454

    
2455
            // Add the second request
2456
            qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2457

    
2458
            reqs[outidx].nb_sectors = qiov->size >> 9;
2459
            reqs[outidx].qiov = qiov;
2460

    
2461
            mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2462
        } else {
2463
            outidx++;
2464
            reqs[outidx].sector     = reqs[i].sector;
2465
            reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2466
            reqs[outidx].qiov       = reqs[i].qiov;
2467
        }
2468
    }
2469

    
2470
    return outidx + 1;
2471
}
2472

    
2473
/*
2474
 * Submit multiple AIO write requests at once.
2475
 *
2476
 * On success, the function returns 0 and all requests in the reqs array have
2477
 * been submitted. In error case this function returns -1, and any of the
2478
 * requests may or may not be submitted yet. In particular, this means that the
2479
 * callback will be called for some of the requests, for others it won't. The
2480
 * caller must check the error field of the BlockRequest to wait for the right
2481
 * callbacks (if error != 0, no callback will be called).
2482
 *
2483
 * The implementation may modify the contents of the reqs array, e.g. to merge
2484
 * requests. However, the fields opaque and error are left unmodified as they
2485
 * are used to signal failure for a single request to the caller.
2486
 */
2487
int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2488
{
2489
    BlockDriverAIOCB *acb;
2490
    MultiwriteCB *mcb;
2491
    int i;
2492

    
2493
    /* don't submit writes if we don't have a medium */
2494
    if (bs->drv == NULL) {
2495
        for (i = 0; i < num_reqs; i++) {
2496
            reqs[i].error = -ENOMEDIUM;
2497
        }
2498
        return -1;
2499
    }
2500

    
2501
    if (num_reqs == 0) {
2502
        return 0;
2503
    }
2504

    
2505
    // Create MultiwriteCB structure
2506
    mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
2507
    mcb->num_requests = 0;
2508
    mcb->num_callbacks = num_reqs;
2509

    
2510
    for (i = 0; i < num_reqs; i++) {
2511
        mcb->callbacks[i].cb = reqs[i].cb;
2512
        mcb->callbacks[i].opaque = reqs[i].opaque;
2513
    }
2514

    
2515
    // Check for mergable requests
2516
    num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2517

    
2518
    trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2519

    
2520
    /*
2521
     * Run the aio requests. As soon as one request can't be submitted
2522
     * successfully, fail all requests that are not yet submitted (we must
2523
     * return failure for all requests anyway)
2524
     *
2525
     * num_requests cannot be set to the right value immediately: If
2526
     * bdrv_aio_writev fails for some request, num_requests would be too high
2527
     * and therefore multiwrite_cb() would never recognize the multiwrite
2528
     * request as completed. We also cannot use the loop variable i to set it
2529
     * when the first request fails because the callback may already have been
2530
     * called for previously submitted requests. Thus, num_requests must be
2531
     * incremented for each request that is submitted.
2532
     *
2533
     * The problem that callbacks may be called early also means that we need
2534
     * to take care that num_requests doesn't become 0 before all requests are
2535
     * submitted - multiwrite_cb() would consider the multiwrite request
2536
     * completed. A dummy request that is "completed" by a manual call to
2537
     * multiwrite_cb() takes care of this.
2538
     */
2539
    mcb->num_requests = 1;
2540

    
2541
    // Run the aio requests
2542
    for (i = 0; i < num_reqs; i++) {
2543
        mcb->num_requests++;
2544
        acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2545
            reqs[i].nb_sectors, multiwrite_cb, mcb);
2546

    
2547
        if (acb == NULL) {
2548
            // We can only fail the whole thing if no request has been
2549
            // submitted yet. Otherwise we'll wait for the submitted AIOs to
2550
            // complete and report the error in the callback.
2551
            if (i == 0) {
2552
                trace_bdrv_aio_multiwrite_earlyfail(mcb);
2553
                goto fail;
2554
            } else {
2555
                trace_bdrv_aio_multiwrite_latefail(mcb, i);
2556
                multiwrite_cb(mcb, -EIO);
2557
                break;
2558
            }
2559
        }
2560
    }
2561

    
2562
    /* Complete the dummy request */
2563
    multiwrite_cb(mcb, 0);
2564

    
2565
    return 0;
2566

    
2567
fail:
2568
    for (i = 0; i < mcb->num_callbacks; i++) {
2569
        reqs[i].error = -EIO;
2570
    }
2571
    g_free(mcb);
2572
    return -1;
2573
}
2574

    
2575
BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
2576
        BlockDriverCompletionFunc *cb, void *opaque)
2577
{
2578
    BlockDriver *drv = bs->drv;
2579

    
2580
    trace_bdrv_aio_flush(bs, opaque);
2581

    
2582
    if (bs->open_flags & BDRV_O_NO_FLUSH) {
2583
        return bdrv_aio_noop_em(bs, cb, opaque);
2584
    }
2585

    
2586
    if (!drv)
2587
        return NULL;
2588
    return drv->bdrv_aio_flush(bs, cb, opaque);
2589
}
2590

    
2591
void bdrv_aio_cancel(BlockDriverAIOCB *acb)
2592
{
2593
    acb->pool->cancel(acb);
2594
}
2595

    
2596

    
2597
/**************************************************************/
2598
/* async block device emulation */
2599

    
2600
typedef struct BlockDriverAIOCBSync {
2601
    BlockDriverAIOCB common;
2602
    QEMUBH *bh;
2603
    int ret;
2604
    /* vector translation state */
2605
    QEMUIOVector *qiov;
2606
    uint8_t *bounce;
2607
    int is_write;
2608
} BlockDriverAIOCBSync;
2609

    
2610
static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2611
{
2612
    BlockDriverAIOCBSync *acb =
2613
        container_of(blockacb, BlockDriverAIOCBSync, common);
2614
    qemu_bh_delete(acb->bh);
2615
    acb->bh = NULL;
2616
    qemu_aio_release(acb);
2617
}
2618

    
2619
static AIOPool bdrv_em_aio_pool = {
2620
    .aiocb_size         = sizeof(BlockDriverAIOCBSync),
2621
    .cancel             = bdrv_aio_cancel_em,
2622
};
2623

    
2624
static void bdrv_aio_bh_cb(void *opaque)
2625
{
2626
    BlockDriverAIOCBSync *acb = opaque;
2627

    
2628
    if (!acb->is_write)
2629
        qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
2630
    qemu_vfree(acb->bounce);
2631
    acb->common.cb(acb->common.opaque, acb->ret);
2632
    qemu_bh_delete(acb->bh);
2633
    acb->bh = NULL;
2634
    qemu_aio_release(acb);
2635
}
2636

    
2637
static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2638
                                            int64_t sector_num,
2639
                                            QEMUIOVector *qiov,
2640
                                            int nb_sectors,
2641
                                            BlockDriverCompletionFunc *cb,
2642
                                            void *opaque,
2643
                                            int is_write)
2644

    
2645
{
2646
    BlockDriverAIOCBSync *acb;
2647

    
2648
    acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2649
    acb->is_write = is_write;
2650
    acb->qiov = qiov;
2651
    acb->bounce = qemu_blockalign(bs, qiov->size);
2652

    
2653
    if (!acb->bh)
2654
        acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2655

    
2656
    if (is_write) {
2657
        qemu_iovec_to_buffer(acb->qiov, acb->bounce);
2658
        acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
2659
    } else {
2660
        acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
2661
    }
2662

    
2663
    qemu_bh_schedule(acb->bh);
2664

    
2665
    return &acb->common;
2666
}
2667

    
2668
static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2669
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2670
        BlockDriverCompletionFunc *cb, void *opaque)
2671
{
2672
    return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2673
}
2674

    
2675
static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2676
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2677
        BlockDriverCompletionFunc *cb, void *opaque)
2678
{
2679
    return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
2680
}
2681

    
2682

    
2683
typedef struct BlockDriverAIOCBCoroutine {
2684
    BlockDriverAIOCB common;
2685
    BlockRequest req;
2686
    bool is_write;
2687
    QEMUBH* bh;
2688
} BlockDriverAIOCBCoroutine;
2689

    
2690
static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
2691
{
2692
    qemu_aio_flush();
2693
}
2694

    
2695
static AIOPool bdrv_em_co_aio_pool = {
2696
    .aiocb_size         = sizeof(BlockDriverAIOCBCoroutine),
2697
    .cancel             = bdrv_aio_co_cancel_em,
2698
};
2699

    
2700
static void bdrv_co_rw_bh(void *opaque)
2701
{
2702
    BlockDriverAIOCBCoroutine *acb = opaque;
2703

    
2704
    acb->common.cb(acb->common.opaque, acb->req.error);
2705
    qemu_bh_delete(acb->bh);
2706
    qemu_aio_release(acb);
2707
}
2708

    
2709
static void coroutine_fn bdrv_co_rw(void *opaque)
2710
{
2711
    BlockDriverAIOCBCoroutine *acb = opaque;
2712
    BlockDriverState *bs = acb->common.bs;
2713

    
2714
    if (!acb->is_write) {
2715
        acb->req.error = bs->drv->bdrv_co_readv(bs, acb->req.sector,
2716
            acb->req.nb_sectors, acb->req.qiov);
2717
    } else {
2718
        acb->req.error = bs->drv->bdrv_co_writev(bs, acb->req.sector,
2719
            acb->req.nb_sectors, acb->req.qiov);
2720
    }
2721

    
2722
    acb->bh = qemu_bh_new(bdrv_co_rw_bh, acb);
2723
    qemu_bh_schedule(acb->bh);
2724
}
2725

    
2726
static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
2727
                                               int64_t sector_num,
2728
                                               QEMUIOVector *qiov,
2729
                                               int nb_sectors,
2730
                                               BlockDriverCompletionFunc *cb,
2731
                                               void *opaque,
2732
                                               bool is_write)
2733
{
2734
    Coroutine *co;
2735
    BlockDriverAIOCBCoroutine *acb;
2736

    
2737
    acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2738
    acb->req.sector = sector_num;
2739
    acb->req.nb_sectors = nb_sectors;
2740
    acb->req.qiov = qiov;
2741
    acb->is_write = is_write;
2742

    
2743
    co = qemu_coroutine_create(bdrv_co_rw);
2744
    qemu_coroutine_enter(co, acb);
2745

    
2746
    return &acb->common;
2747
}
2748

    
2749
static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
2750
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2751
        BlockDriverCompletionFunc *cb, void *opaque)
2752
{
2753
    return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque,
2754
                                 false);
2755
}
2756

    
2757
static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
2758
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2759
        BlockDriverCompletionFunc *cb, void *opaque)
2760
{
2761
    return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque,
2762
                                 true);
2763
}
2764

    
2765
static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
2766
        BlockDriverCompletionFunc *cb, void *opaque)
2767
{
2768
    BlockDriverAIOCBSync *acb;
2769

    
2770
    acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2771
    acb->is_write = 1; /* don't bounce in the completion hadler */
2772
    acb->qiov = NULL;
2773
    acb->bounce = NULL;
2774
    acb->ret = 0;
2775

    
2776
    if (!acb->bh)
2777
        acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2778

    
2779
    bdrv_flush(bs);
2780
    qemu_bh_schedule(acb->bh);
2781
    return &acb->common;
2782
}
2783

    
2784
static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
2785
        BlockDriverCompletionFunc *cb, void *opaque)
2786
{
2787
    BlockDriverAIOCBSync *acb;
2788

    
2789
    acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2790
    acb->is_write = 1; /* don't bounce in the completion handler */
2791
    acb->qiov = NULL;
2792
    acb->bounce = NULL;
2793
    acb->ret = 0;
2794

    
2795
    if (!acb->bh) {
2796
        acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2797
    }
2798

    
2799
    qemu_bh_schedule(acb->bh);
2800
    return &acb->common;
2801
}
2802

    
2803
/**************************************************************/
2804
/* sync block device emulation */
2805

    
2806
static void bdrv_rw_em_cb(void *opaque, int ret)
2807
{
2808
    *(int *)opaque = ret;
2809
}
2810

    
2811
#define NOT_DONE 0x7fffffff
2812

    
2813
static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
2814
                        uint8_t *buf, int nb_sectors)
2815
{
2816
    int async_ret;
2817
    BlockDriverAIOCB *acb;
2818
    struct iovec iov;
2819
    QEMUIOVector qiov;
2820

    
2821
    async_ret = NOT_DONE;
2822
    iov.iov_base = (void *)buf;
2823
    iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
2824
    qemu_iovec_init_external(&qiov, &iov, 1);
2825
    acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors,
2826
        bdrv_rw_em_cb, &async_ret);
2827
    if (acb == NULL) {
2828
        async_ret = -1;
2829
        goto fail;
2830
    }
2831

    
2832
    while (async_ret == NOT_DONE) {
2833
        qemu_aio_wait();
2834
    }
2835

    
2836

    
2837
fail:
2838
    return async_ret;
2839
}
2840

    
2841
static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
2842
                         const uint8_t *buf, int nb_sectors)
2843
{
2844
    int async_ret;
2845
    BlockDriverAIOCB *acb;
2846
    struct iovec iov;
2847
    QEMUIOVector qiov;
2848

    
2849
    async_ret = NOT_DONE;
2850
    iov.iov_base = (void *)buf;
2851
    iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
2852
    qemu_iovec_init_external(&qiov, &iov, 1);
2853
    acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors,
2854
        bdrv_rw_em_cb, &async_ret);
2855
    if (acb == NULL) {
2856
        async_ret = -1;
2857
        goto fail;
2858
    }
2859
    while (async_ret == NOT_DONE) {
2860
        qemu_aio_wait();
2861
    }
2862

    
2863
fail:
2864
    return async_ret;
2865
}
2866

    
2867
void bdrv_init(void)
2868
{
2869
    module_call_init(MODULE_INIT_BLOCK);
2870
}
2871

    
2872
void bdrv_init_with_whitelist(void)
2873
{
2874
    use_bdrv_whitelist = 1;
2875
    bdrv_init();
2876
}
2877

    
2878
void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2879
                   BlockDriverCompletionFunc *cb, void *opaque)
2880
{
2881
    BlockDriverAIOCB *acb;
2882

    
2883
    if (pool->free_aiocb) {
2884
        acb = pool->free_aiocb;
2885
        pool->free_aiocb = acb->next;
2886
    } else {
2887
        acb = g_malloc0(pool->aiocb_size);
2888
        acb->pool = pool;
2889
    }
2890
    acb->bs = bs;
2891
    acb->cb = cb;
2892
    acb->opaque = opaque;
2893
    return acb;
2894
}
2895

    
2896
void qemu_aio_release(void *p)
2897
{
2898
    BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2899
    AIOPool *pool = acb->pool;
2900
    acb->next = pool->free_aiocb;
2901
    pool->free_aiocb = acb;
2902
}
2903

    
2904
/**************************************************************/
2905
/* Coroutine block device emulation */
2906

    
2907
typedef struct CoroutineIOCompletion {
2908
    Coroutine *coroutine;
2909
    int ret;
2910
} CoroutineIOCompletion;
2911

    
2912
static void bdrv_co_io_em_complete(void *opaque, int ret)
2913
{
2914
    CoroutineIOCompletion *co = opaque;
2915

    
2916
    co->ret = ret;
2917
    qemu_coroutine_enter(co->coroutine, NULL);
2918
}
2919

    
2920
static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
2921
                                      int nb_sectors, QEMUIOVector *iov,
2922
                                      bool is_write)
2923
{
2924
    CoroutineIOCompletion co = {
2925
        .coroutine = qemu_coroutine_self(),
2926
    };
2927
    BlockDriverAIOCB *acb;
2928

    
2929
    if (is_write) {
2930
        acb = bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
2931
                              bdrv_co_io_em_complete, &co);
2932
    } else {
2933
        acb = bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
2934
                             bdrv_co_io_em_complete, &co);
2935
    }
2936

    
2937
    trace_bdrv_co_io(is_write, acb);
2938
    if (!acb) {
2939
        return -EIO;
2940
    }
2941
    qemu_coroutine_yield();
2942

    
2943
    return co.ret;
2944
}
2945

    
2946
static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
2947
                                         int64_t sector_num, int nb_sectors,
2948
                                         QEMUIOVector *iov)
2949
{
2950
    return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
2951
}
2952

    
2953
static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
2954
                                         int64_t sector_num, int nb_sectors,
2955
                                         QEMUIOVector *iov)
2956
{
2957
    return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
2958
}
2959

    
2960
static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs)
2961
{
2962
    CoroutineIOCompletion co = {
2963
        .coroutine = qemu_coroutine_self(),
2964
    };
2965
    BlockDriverAIOCB *acb;
2966

    
2967
    acb = bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
2968
    if (!acb) {
2969
        return -EIO;
2970
    }
2971
    qemu_coroutine_yield();
2972
    return co.ret;
2973
}
2974

    
2975
/**************************************************************/
2976
/* removable device support */
2977

    
2978
/**
2979
 * Return TRUE if the media is present
2980
 */
2981
int bdrv_is_inserted(BlockDriverState *bs)
2982
{
2983
    BlockDriver *drv = bs->drv;
2984
    int ret;
2985
    if (!drv)
2986
        return 0;
2987
    if (!drv->bdrv_is_inserted)
2988
        return !bs->tray_open;
2989
    ret = drv->bdrv_is_inserted(bs);
2990
    return ret;
2991
}
2992

    
2993
/**
2994
 * Return TRUE if the media changed since the last call to this
2995
 * function. It is currently only used for floppy disks
2996
 */
2997
int bdrv_media_changed(BlockDriverState *bs)
2998
{
2999
    BlockDriver *drv = bs->drv;
3000
    int ret;
3001

    
3002
    if (!drv || !drv->bdrv_media_changed)
3003
        ret = -ENOTSUP;
3004
    else
3005
        ret = drv->bdrv_media_changed(bs);
3006
    if (ret == -ENOTSUP)
3007
        ret = bs->media_changed;
3008
    bs->media_changed = 0;
3009
    return ret;
3010
}
3011

    
3012
/**
3013
 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3014
 */
3015
int bdrv_eject(BlockDriverState *bs, int eject_flag)
3016
{
3017
    BlockDriver *drv = bs->drv;
3018

    
3019
    if (eject_flag && bs->locked) {
3020
        return -EBUSY;
3021
    }
3022

    
3023
    if (drv && drv->bdrv_eject) {
3024
        drv->bdrv_eject(bs, eject_flag);
3025
    }
3026
    bs->tray_open = eject_flag;
3027
    return 0;
3028
}
3029

    
3030
int bdrv_is_locked(BlockDriverState *bs)
3031
{
3032
    return bs->locked;
3033
}
3034

    
3035
/**
3036
 * Lock or unlock the media (if it is locked, the user won't be able
3037
 * to eject it manually).
3038
 */
3039
void bdrv_set_locked(BlockDriverState *bs, int locked)
3040
{
3041
    BlockDriver *drv = bs->drv;
3042

    
3043
    trace_bdrv_set_locked(bs, locked);
3044

    
3045
    bs->locked = locked;
3046
    if (drv && drv->bdrv_set_locked) {
3047
        drv->bdrv_set_locked(bs, locked);
3048
    }
3049
}
3050

    
3051
/* needed for generic scsi interface */
3052

    
3053
int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3054
{
3055
    BlockDriver *drv = bs->drv;
3056

    
3057
    if (drv && drv->bdrv_ioctl)
3058
        return drv->bdrv_ioctl(bs, req, buf);
3059
    return -ENOTSUP;
3060
}
3061

    
3062
BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3063
        unsigned long int req, void *buf,
3064
        BlockDriverCompletionFunc *cb, void *opaque)
3065
{
3066
    BlockDriver *drv = bs->drv;
3067

    
3068
    if (drv && drv->bdrv_aio_ioctl)
3069
        return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3070
    return NULL;
3071
}
3072

    
3073

    
3074

    
3075
void *qemu_blockalign(BlockDriverState *bs, size_t size)
3076
{
3077
    return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3078
}
3079

    
3080
void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3081
{
3082
    int64_t bitmap_size;
3083

    
3084
    bs->dirty_count = 0;
3085
    if (enable) {
3086
        if (!bs->dirty_bitmap) {
3087
            bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3088
                    BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3089
            bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
3090

    
3091
            bs->dirty_bitmap = g_malloc0(bitmap_size);
3092
        }
3093
    } else {
3094
        if (bs->dirty_bitmap) {
3095
            g_free(bs->dirty_bitmap);
3096
            bs->dirty_bitmap = NULL;
3097
        }
3098
    }
3099
}
3100

    
3101
int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3102
{
3103
    int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
3104

    
3105
    if (bs->dirty_bitmap &&
3106
        (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
3107
        return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3108
            (1UL << (chunk % (sizeof(unsigned long) * 8))));
3109
    } else {
3110
        return 0;
3111
    }
3112
}
3113

    
3114
void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3115
                      int nr_sectors)
3116
{
3117
    set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3118
}
3119

    
3120
int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3121
{
3122
    return bs->dirty_count;
3123
}
3124

    
3125
void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3126
{
3127
    assert(bs->in_use != in_use);
3128
    bs->in_use = in_use;
3129
}
3130

    
3131
int bdrv_in_use(BlockDriverState *bs)
3132
{
3133
    return bs->in_use;
3134
}
3135

    
3136
int bdrv_img_create(const char *filename, const char *fmt,
3137
                    const char *base_filename, const char *base_fmt,
3138
                    char *options, uint64_t img_size, int flags)
3139
{
3140
    QEMUOptionParameter *param = NULL, *create_options = NULL;
3141
    QEMUOptionParameter *backing_fmt, *backing_file, *size;
3142
    BlockDriverState *bs = NULL;
3143
    BlockDriver *drv, *proto_drv;
3144
    BlockDriver *backing_drv = NULL;
3145
    int ret = 0;
3146

    
3147
    /* Find driver and parse its options */
3148
    drv = bdrv_find_format(fmt);
3149
    if (!drv) {
3150
        error_report("Unknown file format '%s'", fmt);
3151
        ret = -EINVAL;
3152
        goto out;
3153
    }
3154

    
3155
    proto_drv = bdrv_find_protocol(filename);
3156
    if (!proto_drv) {
3157
        error_report("Unknown protocol '%s'", filename);
3158
        ret = -EINVAL;
3159
        goto out;
3160
    }
3161

    
3162
    create_options = append_option_parameters(create_options,
3163
                                              drv->create_options);
3164
    create_options = append_option_parameters(create_options,
3165
                                              proto_drv->create_options);
3166

    
3167
    /* Create parameter list with default values */
3168
    param = parse_option_parameters("", create_options, param);
3169

    
3170
    set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3171

    
3172
    /* Parse -o options */
3173
    if (options) {
3174
        param = parse_option_parameters(options, create_options, param);
3175
        if (param == NULL) {
3176
            error_report("Invalid options for file format '%s'.", fmt);
3177
            ret = -EINVAL;
3178
            goto out;
3179
        }
3180
    }
3181

    
3182
    if (base_filename) {
3183
        if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3184
                                 base_filename)) {
3185
            error_report("Backing file not supported for file format '%s'",
3186
                         fmt);
3187
            ret = -EINVAL;
3188
            goto out;
3189
        }
3190
    }
3191

    
3192
    if (base_fmt) {
3193
        if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3194
            error_report("Backing file format not supported for file "
3195
                         "format '%s'", fmt);
3196
            ret = -EINVAL;
3197
            goto out;
3198
        }
3199
    }
3200

    
3201
    backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3202
    if (backing_file && backing_file->value.s) {
3203
        if (!strcmp(filename, backing_file->value.s)) {
3204
            error_report("Error: Trying to create an image with the "
3205
                         "same filename as the backing file");
3206
            ret = -EINVAL;
3207
            goto out;
3208
        }
3209
    }
3210

    
3211
    backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3212
    if (backing_fmt && backing_fmt->value.s) {
3213
        backing_drv = bdrv_find_format(backing_fmt->value.s);
3214
        if (!backing_drv) {
3215
            error_report("Unknown backing file format '%s'",
3216
                         backing_fmt->value.s);
3217
            ret = -EINVAL;
3218
            goto out;
3219
        }
3220
    }
3221

    
3222
    // The size for the image must always be specified, with one exception:
3223
    // If we are using a backing file, we can obtain the size from there
3224
    size = get_option_parameter(param, BLOCK_OPT_SIZE);
3225
    if (size && size->value.n == -1) {
3226
        if (backing_file && backing_file->value.s) {
3227
            uint64_t size;
3228
            char buf[32];
3229

    
3230
            bs = bdrv_new("");
3231

    
3232
            ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
3233
            if (ret < 0) {
3234
                error_report("Could not open '%s'", backing_file->value.s);
3235
                goto out;
3236
            }
3237
            bdrv_get_geometry(bs, &size);
3238
            size *= 512;
3239

    
3240
            snprintf(buf, sizeof(buf), "%" PRId64, size);
3241
            set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3242
        } else {
3243
            error_report("Image creation needs a size parameter");
3244
            ret = -EINVAL;
3245
            goto out;
3246
        }
3247
    }
3248

    
3249
    printf("Formatting '%s', fmt=%s ", filename, fmt);
3250
    print_option_parameters(param);
3251
    puts("");
3252

    
3253
    ret = bdrv_create(drv, filename, param);
3254

    
3255
    if (ret < 0) {
3256
        if (ret == -ENOTSUP) {
3257
            error_report("Formatting or formatting option not supported for "
3258
                         "file format '%s'", fmt);
3259
        } else if (ret == -EFBIG) {
3260
            error_report("The image size is too large for file format '%s'",
3261
                         fmt);
3262
        } else {
3263
            error_report("%s: error while creating %s: %s", filename, fmt,
3264
                         strerror(-ret));
3265
        }
3266
    }
3267

    
3268
out:
3269
    free_option_parameters(create_options);
3270
    free_option_parameters(param);
3271

    
3272
    if (bs) {
3273
        bdrv_delete(bs);
3274
    }
3275

    
3276
    return ret;
3277
}