Statistics
| Branch: | Revision:

root / block / raw-posix.c @ b94a2610

History | View | Annotate | Download (51.9 kB)

1
/*
2
 * Block driver for RAW files (posix)
3
 *
4
 * Copyright (c) 2006 Fabrice Bellard
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
 * THE SOFTWARE.
23
 */
24
#include "qemu-common.h"
25
#include "qemu/timer.h"
26
#include "qemu/log.h"
27
#include "block/block_int.h"
28
#include "qemu/module.h"
29
#include "trace.h"
30
#include "block/thread-pool.h"
31
#include "qemu/iov.h"
32
#include "raw-aio.h"
33

    
34
#if defined(__APPLE__) && (__MACH__)
35
#include <paths.h>
36
#include <sys/param.h>
37
#include <IOKit/IOKitLib.h>
38
#include <IOKit/IOBSD.h>
39
#include <IOKit/storage/IOMediaBSDClient.h>
40
#include <IOKit/storage/IOMedia.h>
41
#include <IOKit/storage/IOCDMedia.h>
42
//#include <IOKit/storage/IOCDTypes.h>
43
#include <CoreFoundation/CoreFoundation.h>
44
#endif
45

    
46
#ifdef __sun__
47
#define _POSIX_PTHREAD_SEMANTICS 1
48
#include <sys/dkio.h>
49
#endif
50
#ifdef __linux__
51
#include <sys/types.h>
52
#include <sys/stat.h>
53
#include <sys/ioctl.h>
54
#include <sys/param.h>
55
#include <linux/cdrom.h>
56
#include <linux/fd.h>
57
#include <linux/fs.h>
58
#endif
59
#ifdef CONFIG_FIEMAP
60
#include <linux/fiemap.h>
61
#endif
62
#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
63
#include <linux/falloc.h>
64
#endif
65
#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
66
#include <sys/disk.h>
67
#include <sys/cdio.h>
68
#endif
69

    
70
#ifdef __OpenBSD__
71
#include <sys/ioctl.h>
72
#include <sys/disklabel.h>
73
#include <sys/dkio.h>
74
#endif
75

    
76
#ifdef __NetBSD__
77
#include <sys/ioctl.h>
78
#include <sys/disklabel.h>
79
#include <sys/dkio.h>
80
#include <sys/disk.h>
81
#endif
82

    
83
#ifdef __DragonFly__
84
#include <sys/ioctl.h>
85
#include <sys/diskslice.h>
86
#endif
87

    
88
#ifdef CONFIG_XFS
89
#include <xfs/xfs.h>
90
#endif
91

    
92
//#define DEBUG_FLOPPY
93

    
94
//#define DEBUG_BLOCK
95
#if defined(DEBUG_BLOCK)
96
#define DEBUG_BLOCK_PRINT(formatCstr, ...) do { if (qemu_log_enabled()) \
97
    { qemu_log(formatCstr, ## __VA_ARGS__); qemu_log_flush(); } } while (0)
98
#else
99
#define DEBUG_BLOCK_PRINT(formatCstr, ...)
100
#endif
101

    
102
/* OS X does not have O_DSYNC */
103
#ifndef O_DSYNC
104
#ifdef O_SYNC
105
#define O_DSYNC O_SYNC
106
#elif defined(O_FSYNC)
107
#define O_DSYNC O_FSYNC
108
#endif
109
#endif
110

    
111
/* Approximate O_DIRECT with O_DSYNC if O_DIRECT isn't available */
112
#ifndef O_DIRECT
113
#define O_DIRECT O_DSYNC
114
#endif
115

    
116
#define FTYPE_FILE   0
117
#define FTYPE_CD     1
118
#define FTYPE_FD     2
119

    
120
/* if the FD is not accessed during that time (in ns), we try to
121
   reopen it to see if the disk has been changed */
122
#define FD_OPEN_TIMEOUT (1000000000)
123

    
124
#define MAX_BLOCKSIZE        4096
125

    
126
typedef struct BDRVRawState {
127
    int fd;
128
    int type;
129
    int open_flags;
130
#if defined(__linux__)
131
    /* linux floppy specific */
132
    int64_t fd_open_time;
133
    int64_t fd_error_time;
134
    int fd_got_error;
135
    int fd_media_changed;
136
#endif
137
#ifdef CONFIG_LINUX_AIO
138
    int use_aio;
139
    void *aio_ctx;
140
#endif
141
#ifdef CONFIG_XFS
142
    bool is_xfs : 1;
143
#endif
144
    bool has_discard : 1;
145
} BDRVRawState;
146

    
147
typedef struct BDRVRawReopenState {
148
    int fd;
149
    int open_flags;
150
#ifdef CONFIG_LINUX_AIO
151
    int use_aio;
152
#endif
153
} BDRVRawReopenState;
154

    
155
static int fd_open(BlockDriverState *bs);
156
static int64_t raw_getlength(BlockDriverState *bs);
157

    
158
typedef struct RawPosixAIOData {
159
    BlockDriverState *bs;
160
    int aio_fildes;
161
    union {
162
        struct iovec *aio_iov;
163
        void *aio_ioctl_buf;
164
    };
165
    int aio_niov;
166
    uint64_t aio_nbytes;
167
#define aio_ioctl_cmd   aio_nbytes /* for QEMU_AIO_IOCTL */
168
    off_t aio_offset;
169
    int aio_type;
170
} RawPosixAIOData;
171

    
172
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
173
static int cdrom_reopen(BlockDriverState *bs);
174
#endif
175

    
176
#if defined(__NetBSD__)
177
static int raw_normalize_devicepath(const char **filename)
178
{
179
    static char namebuf[PATH_MAX];
180
    const char *dp, *fname;
181
    struct stat sb;
182

    
183
    fname = *filename;
184
    dp = strrchr(fname, '/');
185
    if (lstat(fname, &sb) < 0) {
186
        fprintf(stderr, "%s: stat failed: %s\n",
187
            fname, strerror(errno));
188
        return -errno;
189
    }
190

    
191
    if (!S_ISBLK(sb.st_mode)) {
192
        return 0;
193
    }
194

    
195
    if (dp == NULL) {
196
        snprintf(namebuf, PATH_MAX, "r%s", fname);
197
    } else {
198
        snprintf(namebuf, PATH_MAX, "%.*s/r%s",
199
            (int)(dp - fname), fname, dp + 1);
200
    }
201
    fprintf(stderr, "%s is a block device", fname);
202
    *filename = namebuf;
203
    fprintf(stderr, ", using %s\n", *filename);
204

    
205
    return 0;
206
}
207
#else
208
static int raw_normalize_devicepath(const char **filename)
209
{
210
    return 0;
211
}
212
#endif
213

    
214
static void raw_parse_flags(int bdrv_flags, int *open_flags)
215
{
216
    assert(open_flags != NULL);
217

    
218
    *open_flags |= O_BINARY;
219
    *open_flags &= ~O_ACCMODE;
220
    if (bdrv_flags & BDRV_O_RDWR) {
221
        *open_flags |= O_RDWR;
222
    } else {
223
        *open_flags |= O_RDONLY;
224
    }
225

    
226
    /* Use O_DSYNC for write-through caching, no flags for write-back caching,
227
     * and O_DIRECT for no caching. */
228
    if ((bdrv_flags & BDRV_O_NOCACHE)) {
229
        *open_flags |= O_DIRECT;
230
    }
231
}
232

    
233
#ifdef CONFIG_LINUX_AIO
234
static int raw_set_aio(void **aio_ctx, int *use_aio, int bdrv_flags)
235
{
236
    int ret = -1;
237
    assert(aio_ctx != NULL);
238
    assert(use_aio != NULL);
239
    /*
240
     * Currently Linux do AIO only for files opened with O_DIRECT
241
     * specified so check NOCACHE flag too
242
     */
243
    if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
244
                      (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) {
245

    
246
        /* if non-NULL, laio_init() has already been run */
247
        if (*aio_ctx == NULL) {
248
            *aio_ctx = laio_init();
249
            if (!*aio_ctx) {
250
                goto error;
251
            }
252
        }
253
        *use_aio = 1;
254
    } else {
255
        *use_aio = 0;
256
    }
257

    
258
    ret = 0;
259

    
260
error:
261
    return ret;
262
}
263
#endif
264

    
265
static QemuOptsList raw_runtime_opts = {
266
    .name = "raw",
267
    .head = QTAILQ_HEAD_INITIALIZER(raw_runtime_opts.head),
268
    .desc = {
269
        {
270
            .name = "filename",
271
            .type = QEMU_OPT_STRING,
272
            .help = "File name of the image",
273
        },
274
        { /* end of list */ }
275
    },
276
};
277

    
278
static int raw_open_common(BlockDriverState *bs, QDict *options,
279
                           int bdrv_flags, int open_flags, Error **errp)
280
{
281
    BDRVRawState *s = bs->opaque;
282
    QemuOpts *opts;
283
    Error *local_err = NULL;
284
    const char *filename;
285
    int fd, ret;
286

    
287
    opts = qemu_opts_create_nofail(&raw_runtime_opts);
288
    qemu_opts_absorb_qdict(opts, options, &local_err);
289
    if (error_is_set(&local_err)) {
290
        error_propagate(errp, local_err);
291
        ret = -EINVAL;
292
        goto fail;
293
    }
294

    
295
    filename = qemu_opt_get(opts, "filename");
296

    
297
    ret = raw_normalize_devicepath(&filename);
298
    if (ret != 0) {
299
        error_setg_errno(errp, -ret, "Could not normalize device path");
300
        goto fail;
301
    }
302

    
303
    s->open_flags = open_flags;
304
    raw_parse_flags(bdrv_flags, &s->open_flags);
305

    
306
    s->fd = -1;
307
    fd = qemu_open(filename, s->open_flags, 0644);
308
    if (fd < 0) {
309
        ret = -errno;
310
        if (ret == -EROFS) {
311
            ret = -EACCES;
312
        }
313
        error_setg_errno(errp, -ret, "Could not open file");
314
        goto fail;
315
    }
316
    s->fd = fd;
317

    
318
#ifdef CONFIG_LINUX_AIO
319
    if (raw_set_aio(&s->aio_ctx, &s->use_aio, bdrv_flags)) {
320
        qemu_close(fd);
321
        ret = -errno;
322
        error_setg_errno(errp, -ret, "Could not set AIO state");
323
        goto fail;
324
    }
325
#endif
326

    
327
    s->has_discard = 1;
328
#ifdef CONFIG_XFS
329
    if (platform_test_xfs_fd(s->fd)) {
330
        s->is_xfs = 1;
331
    }
332
#endif
333

    
334
    ret = 0;
335
fail:
336
    qemu_opts_del(opts);
337
    return ret;
338
}
339

    
340
static int raw_open(BlockDriverState *bs, QDict *options, int flags,
341
                    Error **errp)
342
{
343
    BDRVRawState *s = bs->opaque;
344
    Error *local_err = NULL;
345
    int ret;
346

    
347
    s->type = FTYPE_FILE;
348
    ret = raw_open_common(bs, options, flags, 0, &local_err);
349
    if (error_is_set(&local_err)) {
350
        error_propagate(errp, local_err);
351
    }
352
    return ret;
353
}
354

    
355
static int raw_reopen_prepare(BDRVReopenState *state,
356
                              BlockReopenQueue *queue, Error **errp)
357
{
358
    BDRVRawState *s;
359
    BDRVRawReopenState *raw_s;
360
    int ret = 0;
361

    
362
    assert(state != NULL);
363
    assert(state->bs != NULL);
364

    
365
    s = state->bs->opaque;
366

    
367
    state->opaque = g_malloc0(sizeof(BDRVRawReopenState));
368
    raw_s = state->opaque;
369

    
370
#ifdef CONFIG_LINUX_AIO
371
    raw_s->use_aio = s->use_aio;
372

    
373
    /* we can use s->aio_ctx instead of a copy, because the use_aio flag is
374
     * valid in the 'false' condition even if aio_ctx is set, and raw_set_aio()
375
     * won't override aio_ctx if aio_ctx is non-NULL */
376
    if (raw_set_aio(&s->aio_ctx, &raw_s->use_aio, state->flags)) {
377
        error_setg(errp, "Could not set AIO state");
378
        return -1;
379
    }
380
#endif
381

    
382
    if (s->type == FTYPE_FD || s->type == FTYPE_CD) {
383
        raw_s->open_flags |= O_NONBLOCK;
384
    }
385

    
386
    raw_parse_flags(state->flags, &raw_s->open_flags);
387

    
388
    raw_s->fd = -1;
389

    
390
    int fcntl_flags = O_APPEND | O_NONBLOCK;
391
#ifdef O_NOATIME
392
    fcntl_flags |= O_NOATIME;
393
#endif
394

    
395
#ifdef O_ASYNC
396
    /* Not all operating systems have O_ASYNC, and those that don't
397
     * will not let us track the state into raw_s->open_flags (typically
398
     * you achieve the same effect with an ioctl, for example I_SETSIG
399
     * on Solaris). But we do not use O_ASYNC, so that's fine.
400
     */
401
    assert((s->open_flags & O_ASYNC) == 0);
402
#endif
403

    
404
    if ((raw_s->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
405
        /* dup the original fd */
406
        /* TODO: use qemu fcntl wrapper */
407
#ifdef F_DUPFD_CLOEXEC
408
        raw_s->fd = fcntl(s->fd, F_DUPFD_CLOEXEC, 0);
409
#else
410
        raw_s->fd = dup(s->fd);
411
        if (raw_s->fd != -1) {
412
            qemu_set_cloexec(raw_s->fd);
413
        }
414
#endif
415
        if (raw_s->fd >= 0) {
416
            ret = fcntl_setfl(raw_s->fd, raw_s->open_flags);
417
            if (ret) {
418
                qemu_close(raw_s->fd);
419
                raw_s->fd = -1;
420
            }
421
        }
422
    }
423

    
424
    /* If we cannot use fcntl, or fcntl failed, fall back to qemu_open() */
425
    if (raw_s->fd == -1) {
426
        assert(!(raw_s->open_flags & O_CREAT));
427
        raw_s->fd = qemu_open(state->bs->filename, raw_s->open_flags);
428
        if (raw_s->fd == -1) {
429
            error_setg_errno(errp, errno, "Could not reopen file");
430
            ret = -1;
431
        }
432
    }
433
    return ret;
434
}
435

    
436

    
437
static void raw_reopen_commit(BDRVReopenState *state)
438
{
439
    BDRVRawReopenState *raw_s = state->opaque;
440
    BDRVRawState *s = state->bs->opaque;
441

    
442
    s->open_flags = raw_s->open_flags;
443

    
444
    qemu_close(s->fd);
445
    s->fd = raw_s->fd;
446
#ifdef CONFIG_LINUX_AIO
447
    s->use_aio = raw_s->use_aio;
448
#endif
449

    
450
    g_free(state->opaque);
451
    state->opaque = NULL;
452
}
453

    
454

    
455
static void raw_reopen_abort(BDRVReopenState *state)
456
{
457
    BDRVRawReopenState *raw_s = state->opaque;
458

    
459
     /* nothing to do if NULL, we didn't get far enough */
460
    if (raw_s == NULL) {
461
        return;
462
    }
463

    
464
    if (raw_s->fd >= 0) {
465
        qemu_close(raw_s->fd);
466
        raw_s->fd = -1;
467
    }
468
    g_free(state->opaque);
469
    state->opaque = NULL;
470
}
471

    
472

    
473
/* XXX: use host sector size if necessary with:
474
#ifdef DIOCGSECTORSIZE
475
        {
476
            unsigned int sectorsize = 512;
477
            if (!ioctl(fd, DIOCGSECTORSIZE, &sectorsize) &&
478
                sectorsize > bufsize)
479
                bufsize = sectorsize;
480
        }
481
#endif
482
#ifdef CONFIG_COCOA
483
        uint32_t blockSize = 512;
484
        if ( !ioctl( fd, DKIOCGETBLOCKSIZE, &blockSize ) && blockSize > bufsize) {
485
            bufsize = blockSize;
486
        }
487
#endif
488
*/
489

    
490
static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)
491
{
492
    int ret;
493

    
494
    ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_ioctl_buf);
495
    if (ret == -1) {
496
        return -errno;
497
    }
498

    
499
    return 0;
500
}
501

    
502
static ssize_t handle_aiocb_flush(RawPosixAIOData *aiocb)
503
{
504
    int ret;
505

    
506
    ret = qemu_fdatasync(aiocb->aio_fildes);
507
    if (ret == -1) {
508
        return -errno;
509
    }
510
    return 0;
511
}
512

    
513
#ifdef CONFIG_PREADV
514

    
515
static bool preadv_present = true;
516

    
517
static ssize_t
518
qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
519
{
520
    return preadv(fd, iov, nr_iov, offset);
521
}
522

    
523
static ssize_t
524
qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
525
{
526
    return pwritev(fd, iov, nr_iov, offset);
527
}
528

    
529
#else
530

    
531
static bool preadv_present = false;
532

    
533
static ssize_t
534
qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
535
{
536
    return -ENOSYS;
537
}
538

    
539
static ssize_t
540
qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
541
{
542
    return -ENOSYS;
543
}
544

    
545
#endif
546

    
547
static ssize_t handle_aiocb_rw_vector(RawPosixAIOData *aiocb)
548
{
549
    ssize_t len;
550

    
551
    do {
552
        if (aiocb->aio_type & QEMU_AIO_WRITE)
553
            len = qemu_pwritev(aiocb->aio_fildes,
554
                               aiocb->aio_iov,
555
                               aiocb->aio_niov,
556
                               aiocb->aio_offset);
557
         else
558
            len = qemu_preadv(aiocb->aio_fildes,
559
                              aiocb->aio_iov,
560
                              aiocb->aio_niov,
561
                              aiocb->aio_offset);
562
    } while (len == -1 && errno == EINTR);
563

    
564
    if (len == -1) {
565
        return -errno;
566
    }
567
    return len;
568
}
569

    
570
/*
571
 * Read/writes the data to/from a given linear buffer.
572
 *
573
 * Returns the number of bytes handles or -errno in case of an error. Short
574
 * reads are only returned if the end of the file is reached.
575
 */
576
static ssize_t handle_aiocb_rw_linear(RawPosixAIOData *aiocb, char *buf)
577
{
578
    ssize_t offset = 0;
579
    ssize_t len;
580

    
581
    while (offset < aiocb->aio_nbytes) {
582
        if (aiocb->aio_type & QEMU_AIO_WRITE) {
583
            len = pwrite(aiocb->aio_fildes,
584
                         (const char *)buf + offset,
585
                         aiocb->aio_nbytes - offset,
586
                         aiocb->aio_offset + offset);
587
        } else {
588
            len = pread(aiocb->aio_fildes,
589
                        buf + offset,
590
                        aiocb->aio_nbytes - offset,
591
                        aiocb->aio_offset + offset);
592
        }
593
        if (len == -1 && errno == EINTR) {
594
            continue;
595
        } else if (len == -1) {
596
            offset = -errno;
597
            break;
598
        } else if (len == 0) {
599
            break;
600
        }
601
        offset += len;
602
    }
603

    
604
    return offset;
605
}
606

    
607
static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
608
{
609
    ssize_t nbytes;
610
    char *buf;
611

    
612
    if (!(aiocb->aio_type & QEMU_AIO_MISALIGNED)) {
613
        /*
614
         * If there is just a single buffer, and it is properly aligned
615
         * we can just use plain pread/pwrite without any problems.
616
         */
617
        if (aiocb->aio_niov == 1) {
618
             return handle_aiocb_rw_linear(aiocb, aiocb->aio_iov->iov_base);
619
        }
620
        /*
621
         * We have more than one iovec, and all are properly aligned.
622
         *
623
         * Try preadv/pwritev first and fall back to linearizing the
624
         * buffer if it's not supported.
625
         */
626
        if (preadv_present) {
627
            nbytes = handle_aiocb_rw_vector(aiocb);
628
            if (nbytes == aiocb->aio_nbytes ||
629
                (nbytes < 0 && nbytes != -ENOSYS)) {
630
                return nbytes;
631
            }
632
            preadv_present = false;
633
        }
634

    
635
        /*
636
         * XXX(hch): short read/write.  no easy way to handle the reminder
637
         * using these interfaces.  For now retry using plain
638
         * pread/pwrite?
639
         */
640
    }
641

    
642
    /*
643
     * Ok, we have to do it the hard way, copy all segments into
644
     * a single aligned buffer.
645
     */
646
    buf = qemu_blockalign(aiocb->bs, aiocb->aio_nbytes);
647
    if (aiocb->aio_type & QEMU_AIO_WRITE) {
648
        char *p = buf;
649
        int i;
650

    
651
        for (i = 0; i < aiocb->aio_niov; ++i) {
652
            memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len);
653
            p += aiocb->aio_iov[i].iov_len;
654
        }
655
    }
656

    
657
    nbytes = handle_aiocb_rw_linear(aiocb, buf);
658
    if (!(aiocb->aio_type & QEMU_AIO_WRITE)) {
659
        char *p = buf;
660
        size_t count = aiocb->aio_nbytes, copy;
661
        int i;
662

    
663
        for (i = 0; i < aiocb->aio_niov && count; ++i) {
664
            copy = count;
665
            if (copy > aiocb->aio_iov[i].iov_len) {
666
                copy = aiocb->aio_iov[i].iov_len;
667
            }
668
            memcpy(aiocb->aio_iov[i].iov_base, p, copy);
669
            p     += copy;
670
            count -= copy;
671
        }
672
    }
673
    qemu_vfree(buf);
674

    
675
    return nbytes;
676
}
677

    
678
#ifdef CONFIG_XFS
679
static int xfs_discard(BDRVRawState *s, int64_t offset, uint64_t bytes)
680
{
681
    struct xfs_flock64 fl;
682

    
683
    memset(&fl, 0, sizeof(fl));
684
    fl.l_whence = SEEK_SET;
685
    fl.l_start = offset;
686
    fl.l_len = bytes;
687

    
688
    if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) {
689
        DEBUG_BLOCK_PRINT("cannot punch hole (%s)\n", strerror(errno));
690
        return -errno;
691
    }
692

    
693
    return 0;
694
}
695
#endif
696

    
697
static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb)
698
{
699
    int ret = -EOPNOTSUPP;
700
    BDRVRawState *s = aiocb->bs->opaque;
701

    
702
    if (s->has_discard == 0) {
703
        return 0;
704
    }
705

    
706
    if (aiocb->aio_type & QEMU_AIO_BLKDEV) {
707
#ifdef BLKDISCARD
708
        do {
709
            uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes };
710
            if (ioctl(aiocb->aio_fildes, BLKDISCARD, range) == 0) {
711
                return 0;
712
            }
713
        } while (errno == EINTR);
714

    
715
        ret = -errno;
716
#endif
717
    } else {
718
#ifdef CONFIG_XFS
719
        if (s->is_xfs) {
720
            return xfs_discard(s, aiocb->aio_offset, aiocb->aio_nbytes);
721
        }
722
#endif
723

    
724
#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
725
        do {
726
            if (fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
727
                          aiocb->aio_offset, aiocb->aio_nbytes) == 0) {
728
                return 0;
729
            }
730
        } while (errno == EINTR);
731

    
732
        ret = -errno;
733
#endif
734
    }
735

    
736
    if (ret == -ENODEV || ret == -ENOSYS || ret == -EOPNOTSUPP ||
737
        ret == -ENOTTY) {
738
        s->has_discard = 0;
739
        ret = 0;
740
    }
741
    return ret;
742
}
743

    
744
static int aio_worker(void *arg)
745
{
746
    RawPosixAIOData *aiocb = arg;
747
    ssize_t ret = 0;
748

    
749
    switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
750
    case QEMU_AIO_READ:
751
        ret = handle_aiocb_rw(aiocb);
752
        if (ret >= 0 && ret < aiocb->aio_nbytes && aiocb->bs->growable) {
753
            iov_memset(aiocb->aio_iov, aiocb->aio_niov, ret,
754
                      0, aiocb->aio_nbytes - ret);
755

    
756
            ret = aiocb->aio_nbytes;
757
        }
758
        if (ret == aiocb->aio_nbytes) {
759
            ret = 0;
760
        } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
761
            ret = -EINVAL;
762
        }
763
        break;
764
    case QEMU_AIO_WRITE:
765
        ret = handle_aiocb_rw(aiocb);
766
        if (ret == aiocb->aio_nbytes) {
767
            ret = 0;
768
        } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
769
            ret = -EINVAL;
770
        }
771
        break;
772
    case QEMU_AIO_FLUSH:
773
        ret = handle_aiocb_flush(aiocb);
774
        break;
775
    case QEMU_AIO_IOCTL:
776
        ret = handle_aiocb_ioctl(aiocb);
777
        break;
778
    case QEMU_AIO_DISCARD:
779
        ret = handle_aiocb_discard(aiocb);
780
        break;
781
    default:
782
        fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
783
        ret = -EINVAL;
784
        break;
785
    }
786

    
787
    g_slice_free(RawPosixAIOData, aiocb);
788
    return ret;
789
}
790

    
791
static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
792
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
793
        BlockDriverCompletionFunc *cb, void *opaque, int type)
794
{
795
    RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
796
    ThreadPool *pool;
797

    
798
    acb->bs = bs;
799
    acb->aio_type = type;
800
    acb->aio_fildes = fd;
801

    
802
    if (qiov) {
803
        acb->aio_iov = qiov->iov;
804
        acb->aio_niov = qiov->niov;
805
    }
806
    acb->aio_nbytes = nb_sectors * 512;
807
    acb->aio_offset = sector_num * 512;
808

    
809
    trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
810
    pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
811
    return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
812
}
813

    
814
static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs,
815
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
816
        BlockDriverCompletionFunc *cb, void *opaque, int type)
817
{
818
    BDRVRawState *s = bs->opaque;
819

    
820
    if (fd_open(bs) < 0)
821
        return NULL;
822

    
823
    /*
824
     * If O_DIRECT is used the buffer needs to be aligned on a sector
825
     * boundary.  Check if this is the case or tell the low-level
826
     * driver that it needs to copy the buffer.
827
     */
828
    if ((bs->open_flags & BDRV_O_NOCACHE)) {
829
        if (!bdrv_qiov_is_aligned(bs, qiov)) {
830
            type |= QEMU_AIO_MISALIGNED;
831
#ifdef CONFIG_LINUX_AIO
832
        } else if (s->use_aio) {
833
            return laio_submit(bs, s->aio_ctx, s->fd, sector_num, qiov,
834
                               nb_sectors, cb, opaque, type);
835
#endif
836
        }
837
    }
838

    
839
    return paio_submit(bs, s->fd, sector_num, qiov, nb_sectors,
840
                       cb, opaque, type);
841
}
842

    
843
static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
844
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
845
        BlockDriverCompletionFunc *cb, void *opaque)
846
{
847
    return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
848
                          cb, opaque, QEMU_AIO_READ);
849
}
850

    
851
static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
852
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
853
        BlockDriverCompletionFunc *cb, void *opaque)
854
{
855
    return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
856
                          cb, opaque, QEMU_AIO_WRITE);
857
}
858

    
859
static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs,
860
        BlockDriverCompletionFunc *cb, void *opaque)
861
{
862
    BDRVRawState *s = bs->opaque;
863

    
864
    if (fd_open(bs) < 0)
865
        return NULL;
866

    
867
    return paio_submit(bs, s->fd, 0, NULL, 0, cb, opaque, QEMU_AIO_FLUSH);
868
}
869

    
870
static void raw_close(BlockDriverState *bs)
871
{
872
    BDRVRawState *s = bs->opaque;
873
    if (s->fd >= 0) {
874
        qemu_close(s->fd);
875
        s->fd = -1;
876
    }
877
}
878

    
879
static int raw_truncate(BlockDriverState *bs, int64_t offset)
880
{
881
    BDRVRawState *s = bs->opaque;
882
    struct stat st;
883

    
884
    if (fstat(s->fd, &st)) {
885
        return -errno;
886
    }
887

    
888
    if (S_ISREG(st.st_mode)) {
889
        if (ftruncate(s->fd, offset) < 0) {
890
            return -errno;
891
        }
892
    } else if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
893
       if (offset > raw_getlength(bs)) {
894
           return -EINVAL;
895
       }
896
    } else {
897
        return -ENOTSUP;
898
    }
899

    
900
    return 0;
901
}
902

    
903
#ifdef __OpenBSD__
904
static int64_t raw_getlength(BlockDriverState *bs)
905
{
906
    BDRVRawState *s = bs->opaque;
907
    int fd = s->fd;
908
    struct stat st;
909

    
910
    if (fstat(fd, &st))
911
        return -1;
912
    if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
913
        struct disklabel dl;
914

    
915
        if (ioctl(fd, DIOCGDINFO, &dl))
916
            return -1;
917
        return (uint64_t)dl.d_secsize *
918
            dl.d_partitions[DISKPART(st.st_rdev)].p_size;
919
    } else
920
        return st.st_size;
921
}
922
#elif defined(__NetBSD__)
923
static int64_t raw_getlength(BlockDriverState *bs)
924
{
925
    BDRVRawState *s = bs->opaque;
926
    int fd = s->fd;
927
    struct stat st;
928

    
929
    if (fstat(fd, &st))
930
        return -1;
931
    if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
932
        struct dkwedge_info dkw;
933

    
934
        if (ioctl(fd, DIOCGWEDGEINFO, &dkw) != -1) {
935
            return dkw.dkw_size * 512;
936
        } else {
937
            struct disklabel dl;
938

    
939
            if (ioctl(fd, DIOCGDINFO, &dl))
940
                return -1;
941
            return (uint64_t)dl.d_secsize *
942
                dl.d_partitions[DISKPART(st.st_rdev)].p_size;
943
        }
944
    } else
945
        return st.st_size;
946
}
947
#elif defined(__sun__)
948
static int64_t raw_getlength(BlockDriverState *bs)
949
{
950
    BDRVRawState *s = bs->opaque;
951
    struct dk_minfo minfo;
952
    int ret;
953

    
954
    ret = fd_open(bs);
955
    if (ret < 0) {
956
        return ret;
957
    }
958

    
959
    /*
960
     * Use the DKIOCGMEDIAINFO ioctl to read the size.
961
     */
962
    ret = ioctl(s->fd, DKIOCGMEDIAINFO, &minfo);
963
    if (ret != -1) {
964
        return minfo.dki_lbsize * minfo.dki_capacity;
965
    }
966

    
967
    /*
968
     * There are reports that lseek on some devices fails, but
969
     * irc discussion said that contingency on contingency was overkill.
970
     */
971
    return lseek(s->fd, 0, SEEK_END);
972
}
973
#elif defined(CONFIG_BSD)
974
static int64_t raw_getlength(BlockDriverState *bs)
975
{
976
    BDRVRawState *s = bs->opaque;
977
    int fd = s->fd;
978
    int64_t size;
979
    struct stat sb;
980
#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
981
    int reopened = 0;
982
#endif
983
    int ret;
984

    
985
    ret = fd_open(bs);
986
    if (ret < 0)
987
        return ret;
988

    
989
#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
990
again:
991
#endif
992
    if (!fstat(fd, &sb) && (S_IFCHR & sb.st_mode)) {
993
#ifdef DIOCGMEDIASIZE
994
        if (ioctl(fd, DIOCGMEDIASIZE, (off_t *)&size))
995
#elif defined(DIOCGPART)
996
        {
997
                struct partinfo pi;
998
                if (ioctl(fd, DIOCGPART, &pi) == 0)
999
                        size = pi.media_size;
1000
                else
1001
                        size = 0;
1002
        }
1003
        if (size == 0)
1004
#endif
1005
#if defined(__APPLE__) && defined(__MACH__)
1006
        size = LONG_LONG_MAX;
1007
#else
1008
        size = lseek(fd, 0LL, SEEK_END);
1009
#endif
1010
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
1011
        switch(s->type) {
1012
        case FTYPE_CD:
1013
            /* XXX FreeBSD acd returns UINT_MAX sectors for an empty drive */
1014
            if (size == 2048LL * (unsigned)-1)
1015
                size = 0;
1016
            /* XXX no disc?  maybe we need to reopen... */
1017
            if (size <= 0 && !reopened && cdrom_reopen(bs) >= 0) {
1018
                reopened = 1;
1019
                goto again;
1020
            }
1021
        }
1022
#endif
1023
    } else {
1024
        size = lseek(fd, 0, SEEK_END);
1025
    }
1026
    return size;
1027
}
1028
#else
1029
static int64_t raw_getlength(BlockDriverState *bs)
1030
{
1031
    BDRVRawState *s = bs->opaque;
1032
    int ret;
1033

    
1034
    ret = fd_open(bs);
1035
    if (ret < 0) {
1036
        return ret;
1037
    }
1038

    
1039
    return lseek(s->fd, 0, SEEK_END);
1040
}
1041
#endif
1042

    
1043
static int64_t raw_get_allocated_file_size(BlockDriverState *bs)
1044
{
1045
    struct stat st;
1046
    BDRVRawState *s = bs->opaque;
1047

    
1048
    if (fstat(s->fd, &st) < 0) {
1049
        return -errno;
1050
    }
1051
    return (int64_t)st.st_blocks * 512;
1052
}
1053

    
1054
static int raw_create(const char *filename, QEMUOptionParameter *options,
1055
                      Error **errp)
1056
{
1057
    int fd;
1058
    int result = 0;
1059
    int64_t total_size = 0;
1060

    
1061
    /* Read out options */
1062
    while (options && options->name) {
1063
        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
1064
            total_size = options->value.n / BDRV_SECTOR_SIZE;
1065
        }
1066
        options++;
1067
    }
1068

    
1069
    fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
1070
                   0644);
1071
    if (fd < 0) {
1072
        result = -errno;
1073
        error_setg_errno(errp, -result, "Could not create file");
1074
    } else {
1075
        if (ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) {
1076
            result = -errno;
1077
            error_setg_errno(errp, -result, "Could not resize file");
1078
        }
1079
        if (qemu_close(fd) != 0) {
1080
            result = -errno;
1081
            error_setg_errno(errp, -result, "Could not close the new file");
1082
        }
1083
    }
1084
    return result;
1085
}
1086

    
1087
/*
1088
 * Returns true iff the specified sector is present in the disk image. Drivers
1089
 * not implementing the functionality are assumed to not support backing files,
1090
 * hence all their sectors are reported as allocated.
1091
 *
1092
 * If 'sector_num' is beyond the end of the disk image the return value is 0
1093
 * and 'pnum' is set to 0.
1094
 *
1095
 * 'pnum' is set to the number of sectors (including and immediately following
1096
 * the specified sector) that are known to be in the same
1097
 * allocated/unallocated state.
1098
 *
1099
 * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes
1100
 * beyond the end of the disk image it will be clamped.
1101
 */
1102
static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
1103
                                            int64_t sector_num,
1104
                                            int nb_sectors, int *pnum)
1105
{
1106
    off_t start, data, hole;
1107
    int64_t ret;
1108

    
1109
    ret = fd_open(bs);
1110
    if (ret < 0) {
1111
        return ret;
1112
    }
1113

    
1114
    start = sector_num * BDRV_SECTOR_SIZE;
1115
    ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
1116

    
1117
#ifdef CONFIG_FIEMAP
1118

    
1119
    BDRVRawState *s = bs->opaque;
1120
    struct {
1121
        struct fiemap fm;
1122
        struct fiemap_extent fe;
1123
    } f;
1124

    
1125
    f.fm.fm_start = start;
1126
    f.fm.fm_length = (int64_t)nb_sectors * BDRV_SECTOR_SIZE;
1127
    f.fm.fm_flags = 0;
1128
    f.fm.fm_extent_count = 1;
1129
    f.fm.fm_reserved = 0;
1130
    if (ioctl(s->fd, FS_IOC_FIEMAP, &f) == -1) {
1131
        /* Assume everything is allocated.  */
1132
        *pnum = nb_sectors;
1133
        return ret;
1134
    }
1135

    
1136
    if (f.fm.fm_mapped_extents == 0) {
1137
        /* No extents found, data is beyond f.fm.fm_start + f.fm.fm_length.
1138
         * f.fm.fm_start + f.fm.fm_length must be clamped to the file size!
1139
         */
1140
        off_t length = lseek(s->fd, 0, SEEK_END);
1141
        hole = f.fm.fm_start;
1142
        data = MIN(f.fm.fm_start + f.fm.fm_length, length);
1143
    } else {
1144
        data = f.fe.fe_logical;
1145
        hole = f.fe.fe_logical + f.fe.fe_length;
1146
        if (f.fe.fe_flags & FIEMAP_EXTENT_UNWRITTEN) {
1147
            ret |= BDRV_BLOCK_ZERO;
1148
        }
1149
    }
1150

    
1151
#elif defined SEEK_HOLE && defined SEEK_DATA
1152

    
1153
    BDRVRawState *s = bs->opaque;
1154

    
1155
    hole = lseek(s->fd, start, SEEK_HOLE);
1156
    if (hole == -1) {
1157
        /* -ENXIO indicates that sector_num was past the end of the file.
1158
         * There is a virtual hole there.  */
1159
        assert(errno != -ENXIO);
1160

    
1161
        /* Most likely EINVAL.  Assume everything is allocated.  */
1162
        *pnum = nb_sectors;
1163
        return ret;
1164
    }
1165

    
1166
    if (hole > start) {
1167
        data = start;
1168
    } else {
1169
        /* On a hole.  We need another syscall to find its end.  */
1170
        data = lseek(s->fd, start, SEEK_DATA);
1171
        if (data == -1) {
1172
            data = lseek(s->fd, 0, SEEK_END);
1173
        }
1174
    }
1175
#else
1176
    data = 0;
1177
    hole = start + nb_sectors * BDRV_SECTOR_SIZE;
1178
#endif
1179

    
1180
    if (data <= start) {
1181
        /* On a data extent, compute sectors to the end of the extent.  */
1182
        *pnum = MIN(nb_sectors, (hole - start) / BDRV_SECTOR_SIZE);
1183
    } else {
1184
        /* On a hole, compute sectors to the beginning of the next extent.  */
1185
        *pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE);
1186
        ret &= ~BDRV_BLOCK_DATA;
1187
        ret |= BDRV_BLOCK_ZERO;
1188
    }
1189

    
1190
    return ret;
1191
}
1192

    
1193
static coroutine_fn BlockDriverAIOCB *raw_aio_discard(BlockDriverState *bs,
1194
    int64_t sector_num, int nb_sectors,
1195
    BlockDriverCompletionFunc *cb, void *opaque)
1196
{
1197
    BDRVRawState *s = bs->opaque;
1198

    
1199
    return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors,
1200
                       cb, opaque, QEMU_AIO_DISCARD);
1201
}
1202

    
1203
static QEMUOptionParameter raw_create_options[] = {
1204
    {
1205
        .name = BLOCK_OPT_SIZE,
1206
        .type = OPT_SIZE,
1207
        .help = "Virtual disk size"
1208
    },
1209
    { NULL }
1210
};
1211

    
1212
static BlockDriver bdrv_file = {
1213
    .format_name = "file",
1214
    .protocol_name = "file",
1215
    .instance_size = sizeof(BDRVRawState),
1216
    .bdrv_needs_filename = true,
1217
    .bdrv_probe = NULL, /* no probe for protocols */
1218
    .bdrv_file_open = raw_open,
1219
    .bdrv_reopen_prepare = raw_reopen_prepare,
1220
    .bdrv_reopen_commit = raw_reopen_commit,
1221
    .bdrv_reopen_abort = raw_reopen_abort,
1222
    .bdrv_close = raw_close,
1223
    .bdrv_create = raw_create,
1224
    .bdrv_has_zero_init = bdrv_has_zero_init_1,
1225
    .bdrv_co_get_block_status = raw_co_get_block_status,
1226

    
1227
    .bdrv_aio_readv = raw_aio_readv,
1228
    .bdrv_aio_writev = raw_aio_writev,
1229
    .bdrv_aio_flush = raw_aio_flush,
1230
    .bdrv_aio_discard = raw_aio_discard,
1231

    
1232
    .bdrv_truncate = raw_truncate,
1233
    .bdrv_getlength = raw_getlength,
1234
    .bdrv_get_allocated_file_size
1235
                        = raw_get_allocated_file_size,
1236

    
1237
    .create_options = raw_create_options,
1238
};
1239

    
1240
/***********************************************/
1241
/* host device */
1242

    
1243
#if defined(__APPLE__) && defined(__MACH__)
1244
static kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator );
1245
static kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize );
1246

    
1247
kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator )
1248
{
1249
    kern_return_t       kernResult;
1250
    mach_port_t     masterPort;
1251
    CFMutableDictionaryRef  classesToMatch;
1252

    
1253
    kernResult = IOMasterPort( MACH_PORT_NULL, &masterPort );
1254
    if ( KERN_SUCCESS != kernResult ) {
1255
        printf( "IOMasterPort returned %d\n", kernResult );
1256
    }
1257

    
1258
    classesToMatch = IOServiceMatching( kIOCDMediaClass );
1259
    if ( classesToMatch == NULL ) {
1260
        printf( "IOServiceMatching returned a NULL dictionary.\n" );
1261
    } else {
1262
    CFDictionarySetValue( classesToMatch, CFSTR( kIOMediaEjectableKey ), kCFBooleanTrue );
1263
    }
1264
    kernResult = IOServiceGetMatchingServices( masterPort, classesToMatch, mediaIterator );
1265
    if ( KERN_SUCCESS != kernResult )
1266
    {
1267
        printf( "IOServiceGetMatchingServices returned %d\n", kernResult );
1268
    }
1269

    
1270
    return kernResult;
1271
}
1272

    
1273
kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize )
1274
{
1275
    io_object_t     nextMedia;
1276
    kern_return_t   kernResult = KERN_FAILURE;
1277
    *bsdPath = '\0';
1278
    nextMedia = IOIteratorNext( mediaIterator );
1279
    if ( nextMedia )
1280
    {
1281
        CFTypeRef   bsdPathAsCFString;
1282
    bsdPathAsCFString = IORegistryEntryCreateCFProperty( nextMedia, CFSTR( kIOBSDNameKey ), kCFAllocatorDefault, 0 );
1283
        if ( bsdPathAsCFString ) {
1284
            size_t devPathLength;
1285
            strcpy( bsdPath, _PATH_DEV );
1286
            strcat( bsdPath, "r" );
1287
            devPathLength = strlen( bsdPath );
1288
            if ( CFStringGetCString( bsdPathAsCFString, bsdPath + devPathLength, maxPathSize - devPathLength, kCFStringEncodingASCII ) ) {
1289
                kernResult = KERN_SUCCESS;
1290
            }
1291
            CFRelease( bsdPathAsCFString );
1292
        }
1293
        IOObjectRelease( nextMedia );
1294
    }
1295

    
1296
    return kernResult;
1297
}
1298

    
1299
#endif
1300

    
1301
static int hdev_probe_device(const char *filename)
1302
{
1303
    struct stat st;
1304

    
1305
    /* allow a dedicated CD-ROM driver to match with a higher priority */
1306
    if (strstart(filename, "/dev/cdrom", NULL))
1307
        return 50;
1308

    
1309
    if (stat(filename, &st) >= 0 &&
1310
            (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) {
1311
        return 100;
1312
    }
1313

    
1314
    return 0;
1315
}
1316

    
1317
static int check_hdev_writable(BDRVRawState *s)
1318
{
1319
#if defined(BLKROGET)
1320
    /* Linux block devices can be configured "read-only" using blockdev(8).
1321
     * This is independent of device node permissions and therefore open(2)
1322
     * with O_RDWR succeeds.  Actual writes fail with EPERM.
1323
     *
1324
     * bdrv_open() is supposed to fail if the disk is read-only.  Explicitly
1325
     * check for read-only block devices so that Linux block devices behave
1326
     * properly.
1327
     */
1328
    struct stat st;
1329
    int readonly = 0;
1330

    
1331
    if (fstat(s->fd, &st)) {
1332
        return -errno;
1333
    }
1334

    
1335
    if (!S_ISBLK(st.st_mode)) {
1336
        return 0;
1337
    }
1338

    
1339
    if (ioctl(s->fd, BLKROGET, &readonly) < 0) {
1340
        return -errno;
1341
    }
1342

    
1343
    if (readonly) {
1344
        return -EACCES;
1345
    }
1346
#endif /* defined(BLKROGET) */
1347
    return 0;
1348
}
1349

    
1350
static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
1351
                     Error **errp)
1352
{
1353
    BDRVRawState *s = bs->opaque;
1354
    Error *local_err = NULL;
1355
    int ret;
1356
    const char *filename = qdict_get_str(options, "filename");
1357

    
1358
#if defined(__APPLE__) && defined(__MACH__)
1359
    if (strstart(filename, "/dev/cdrom", NULL)) {
1360
        kern_return_t kernResult;
1361
        io_iterator_t mediaIterator;
1362
        char bsdPath[ MAXPATHLEN ];
1363
        int fd;
1364

    
1365
        kernResult = FindEjectableCDMedia( &mediaIterator );
1366
        kernResult = GetBSDPath( mediaIterator, bsdPath, sizeof( bsdPath ) );
1367

    
1368
        if ( bsdPath[ 0 ] != '\0' ) {
1369
            strcat(bsdPath,"s0");
1370
            /* some CDs don't have a partition 0 */
1371
            fd = qemu_open(bsdPath, O_RDONLY | O_BINARY | O_LARGEFILE);
1372
            if (fd < 0) {
1373
                bsdPath[strlen(bsdPath)-1] = '1';
1374
            } else {
1375
                qemu_close(fd);
1376
            }
1377
            filename = bsdPath;
1378
            qdict_put(options, "filename", qstring_from_str(filename));
1379
        }
1380

    
1381
        if ( mediaIterator )
1382
            IOObjectRelease( mediaIterator );
1383
    }
1384
#endif
1385

    
1386
    s->type = FTYPE_FILE;
1387
#if defined(__linux__)
1388
    {
1389
        char resolved_path[ MAXPATHLEN ], *temp;
1390

    
1391
        temp = realpath(filename, resolved_path);
1392
        if (temp && strstart(temp, "/dev/sg", NULL)) {
1393
            bs->sg = 1;
1394
        }
1395
    }
1396
#endif
1397

    
1398
    ret = raw_open_common(bs, options, flags, 0, &local_err);
1399
    if (ret < 0) {
1400
        if (error_is_set(&local_err)) {
1401
            error_propagate(errp, local_err);
1402
        }
1403
        return ret;
1404
    }
1405

    
1406
    if (flags & BDRV_O_RDWR) {
1407
        ret = check_hdev_writable(s);
1408
        if (ret < 0) {
1409
            raw_close(bs);
1410
            error_setg_errno(errp, -ret, "The device is not writable");
1411
            return ret;
1412
        }
1413
    }
1414

    
1415
    return ret;
1416
}
1417

    
1418
#if defined(__linux__)
1419
/* Note: we do not have a reliable method to detect if the floppy is
1420
   present. The current method is to try to open the floppy at every
1421
   I/O and to keep it opened during a few hundreds of ms. */
1422
static int fd_open(BlockDriverState *bs)
1423
{
1424
    BDRVRawState *s = bs->opaque;
1425
    int last_media_present;
1426

    
1427
    if (s->type != FTYPE_FD)
1428
        return 0;
1429
    last_media_present = (s->fd >= 0);
1430
    if (s->fd >= 0 &&
1431
        (get_clock() - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
1432
        qemu_close(s->fd);
1433
        s->fd = -1;
1434
#ifdef DEBUG_FLOPPY
1435
        printf("Floppy closed\n");
1436
#endif
1437
    }
1438
    if (s->fd < 0) {
1439
        if (s->fd_got_error &&
1440
            (get_clock() - s->fd_error_time) < FD_OPEN_TIMEOUT) {
1441
#ifdef DEBUG_FLOPPY
1442
            printf("No floppy (open delayed)\n");
1443
#endif
1444
            return -EIO;
1445
        }
1446
        s->fd = qemu_open(bs->filename, s->open_flags & ~O_NONBLOCK);
1447
        if (s->fd < 0) {
1448
            s->fd_error_time = get_clock();
1449
            s->fd_got_error = 1;
1450
            if (last_media_present)
1451
                s->fd_media_changed = 1;
1452
#ifdef DEBUG_FLOPPY
1453
            printf("No floppy\n");
1454
#endif
1455
            return -EIO;
1456
        }
1457
#ifdef DEBUG_FLOPPY
1458
        printf("Floppy opened\n");
1459
#endif
1460
    }
1461
    if (!last_media_present)
1462
        s->fd_media_changed = 1;
1463
    s->fd_open_time = get_clock();
1464
    s->fd_got_error = 0;
1465
    return 0;
1466
}
1467

    
1468
static int hdev_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
1469
{
1470
    BDRVRawState *s = bs->opaque;
1471

    
1472
    return ioctl(s->fd, req, buf);
1473
}
1474

    
1475
static BlockDriverAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
1476
        unsigned long int req, void *buf,
1477
        BlockDriverCompletionFunc *cb, void *opaque)
1478
{
1479
    BDRVRawState *s = bs->opaque;
1480
    RawPosixAIOData *acb;
1481
    ThreadPool *pool;
1482

    
1483
    if (fd_open(bs) < 0)
1484
        return NULL;
1485

    
1486
    acb = g_slice_new(RawPosixAIOData);
1487
    acb->bs = bs;
1488
    acb->aio_type = QEMU_AIO_IOCTL;
1489
    acb->aio_fildes = s->fd;
1490
    acb->aio_offset = 0;
1491
    acb->aio_ioctl_buf = buf;
1492
    acb->aio_ioctl_cmd = req;
1493
    pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
1494
    return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
1495
}
1496

    
1497
#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
1498
static int fd_open(BlockDriverState *bs)
1499
{
1500
    BDRVRawState *s = bs->opaque;
1501

    
1502
    /* this is just to ensure s->fd is sane (its called by io ops) */
1503
    if (s->fd >= 0)
1504
        return 0;
1505
    return -EIO;
1506
}
1507
#else /* !linux && !FreeBSD */
1508

    
1509
static int fd_open(BlockDriverState *bs)
1510
{
1511
    return 0;
1512
}
1513

    
1514
#endif /* !linux && !FreeBSD */
1515

    
1516
static coroutine_fn BlockDriverAIOCB *hdev_aio_discard(BlockDriverState *bs,
1517
    int64_t sector_num, int nb_sectors,
1518
    BlockDriverCompletionFunc *cb, void *opaque)
1519
{
1520
    BDRVRawState *s = bs->opaque;
1521

    
1522
    if (fd_open(bs) < 0) {
1523
        return NULL;
1524
    }
1525
    return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors,
1526
                       cb, opaque, QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
1527
}
1528

    
1529
static int hdev_create(const char *filename, QEMUOptionParameter *options,
1530
                       Error **errp)
1531
{
1532
    int fd;
1533
    int ret = 0;
1534
    struct stat stat_buf;
1535
    int64_t total_size = 0;
1536

    
1537
    /* Read out options */
1538
    while (options && options->name) {
1539
        if (!strcmp(options->name, "size")) {
1540
            total_size = options->value.n / BDRV_SECTOR_SIZE;
1541
        }
1542
        options++;
1543
    }
1544

    
1545
    fd = qemu_open(filename, O_WRONLY | O_BINARY);
1546
    if (fd < 0) {
1547
        ret = -errno;
1548
        error_setg_errno(errp, -ret, "Could not open device");
1549
        return ret;
1550
    }
1551

    
1552
    if (fstat(fd, &stat_buf) < 0) {
1553
        ret = -errno;
1554
        error_setg_errno(errp, -ret, "Could not stat device");
1555
    } else if (!S_ISBLK(stat_buf.st_mode) && !S_ISCHR(stat_buf.st_mode)) {
1556
        error_setg(errp,
1557
                   "The given file is neither a block nor a character device");
1558
        ret = -ENODEV;
1559
    } else if (lseek(fd, 0, SEEK_END) < total_size * BDRV_SECTOR_SIZE) {
1560
        error_setg(errp, "Device is too small");
1561
        ret = -ENOSPC;
1562
    }
1563

    
1564
    qemu_close(fd);
1565
    return ret;
1566
}
1567

    
1568
static BlockDriver bdrv_host_device = {
1569
    .format_name        = "host_device",
1570
    .protocol_name        = "host_device",
1571
    .instance_size      = sizeof(BDRVRawState),
1572
    .bdrv_needs_filename = true,
1573
    .bdrv_probe_device  = hdev_probe_device,
1574
    .bdrv_file_open     = hdev_open,
1575
    .bdrv_close         = raw_close,
1576
    .bdrv_reopen_prepare = raw_reopen_prepare,
1577
    .bdrv_reopen_commit  = raw_reopen_commit,
1578
    .bdrv_reopen_abort   = raw_reopen_abort,
1579
    .bdrv_create        = hdev_create,
1580
    .create_options     = raw_create_options,
1581

    
1582
    .bdrv_aio_readv        = raw_aio_readv,
1583
    .bdrv_aio_writev        = raw_aio_writev,
1584
    .bdrv_aio_flush        = raw_aio_flush,
1585
    .bdrv_aio_discard   = hdev_aio_discard,
1586

    
1587
    .bdrv_truncate      = raw_truncate,
1588
    .bdrv_getlength        = raw_getlength,
1589
    .bdrv_get_allocated_file_size
1590
                        = raw_get_allocated_file_size,
1591

    
1592
    /* generic scsi device */
1593
#ifdef __linux__
1594
    .bdrv_ioctl         = hdev_ioctl,
1595
    .bdrv_aio_ioctl     = hdev_aio_ioctl,
1596
#endif
1597
};
1598

    
1599
#ifdef __linux__
1600
static int floppy_open(BlockDriverState *bs, QDict *options, int flags,
1601
                       Error **errp)
1602
{
1603
    BDRVRawState *s = bs->opaque;
1604
    Error *local_err = NULL;
1605
    int ret;
1606

    
1607
    s->type = FTYPE_FD;
1608

    
1609
    /* open will not fail even if no floppy is inserted, so add O_NONBLOCK */
1610
    ret = raw_open_common(bs, options, flags, O_NONBLOCK, &local_err);
1611
    if (ret) {
1612
        if (error_is_set(&local_err)) {
1613
            error_propagate(errp, local_err);
1614
        }
1615
        return ret;
1616
    }
1617

    
1618
    /* close fd so that we can reopen it as needed */
1619
    qemu_close(s->fd);
1620
    s->fd = -1;
1621
    s->fd_media_changed = 1;
1622

    
1623
    return 0;
1624
}
1625

    
1626
static int floppy_probe_device(const char *filename)
1627
{
1628
    int fd, ret;
1629
    int prio = 0;
1630
    struct floppy_struct fdparam;
1631
    struct stat st;
1632

    
1633
    if (strstart(filename, "/dev/fd", NULL) &&
1634
        !strstart(filename, "/dev/fdset/", NULL)) {
1635
        prio = 50;
1636
    }
1637

    
1638
    fd = qemu_open(filename, O_RDONLY | O_NONBLOCK);
1639
    if (fd < 0) {
1640
        goto out;
1641
    }
1642
    ret = fstat(fd, &st);
1643
    if (ret == -1 || !S_ISBLK(st.st_mode)) {
1644
        goto outc;
1645
    }
1646

    
1647
    /* Attempt to detect via a floppy specific ioctl */
1648
    ret = ioctl(fd, FDGETPRM, &fdparam);
1649
    if (ret >= 0)
1650
        prio = 100;
1651

    
1652
outc:
1653
    qemu_close(fd);
1654
out:
1655
    return prio;
1656
}
1657

    
1658

    
1659
static int floppy_is_inserted(BlockDriverState *bs)
1660
{
1661
    return fd_open(bs) >= 0;
1662
}
1663

    
1664
static int floppy_media_changed(BlockDriverState *bs)
1665
{
1666
    BDRVRawState *s = bs->opaque;
1667
    int ret;
1668

    
1669
    /*
1670
     * XXX: we do not have a true media changed indication.
1671
     * It does not work if the floppy is changed without trying to read it.
1672
     */
1673
    fd_open(bs);
1674
    ret = s->fd_media_changed;
1675
    s->fd_media_changed = 0;
1676
#ifdef DEBUG_FLOPPY
1677
    printf("Floppy changed=%d\n", ret);
1678
#endif
1679
    return ret;
1680
}
1681

    
1682
static void floppy_eject(BlockDriverState *bs, bool eject_flag)
1683
{
1684
    BDRVRawState *s = bs->opaque;
1685
    int fd;
1686

    
1687
    if (s->fd >= 0) {
1688
        qemu_close(s->fd);
1689
        s->fd = -1;
1690
    }
1691
    fd = qemu_open(bs->filename, s->open_flags | O_NONBLOCK);
1692
    if (fd >= 0) {
1693
        if (ioctl(fd, FDEJECT, 0) < 0)
1694
            perror("FDEJECT");
1695
        qemu_close(fd);
1696
    }
1697
}
1698

    
1699
static BlockDriver bdrv_host_floppy = {
1700
    .format_name        = "host_floppy",
1701
    .protocol_name      = "host_floppy",
1702
    .instance_size      = sizeof(BDRVRawState),
1703
    .bdrv_needs_filename = true,
1704
    .bdrv_probe_device        = floppy_probe_device,
1705
    .bdrv_file_open     = floppy_open,
1706
    .bdrv_close         = raw_close,
1707
    .bdrv_reopen_prepare = raw_reopen_prepare,
1708
    .bdrv_reopen_commit  = raw_reopen_commit,
1709
    .bdrv_reopen_abort   = raw_reopen_abort,
1710
    .bdrv_create        = hdev_create,
1711
    .create_options     = raw_create_options,
1712

    
1713
    .bdrv_aio_readv     = raw_aio_readv,
1714
    .bdrv_aio_writev    = raw_aio_writev,
1715
    .bdrv_aio_flush        = raw_aio_flush,
1716

    
1717
    .bdrv_truncate      = raw_truncate,
1718
    .bdrv_getlength      = raw_getlength,
1719
    .has_variable_length = true,
1720
    .bdrv_get_allocated_file_size
1721
                        = raw_get_allocated_file_size,
1722

    
1723
    /* removable device support */
1724
    .bdrv_is_inserted   = floppy_is_inserted,
1725
    .bdrv_media_changed = floppy_media_changed,
1726
    .bdrv_eject         = floppy_eject,
1727
};
1728

    
1729
static int cdrom_open(BlockDriverState *bs, QDict *options, int flags,
1730
                      Error **errp)
1731
{
1732
    BDRVRawState *s = bs->opaque;
1733
    Error *local_err = NULL;
1734
    int ret;
1735

    
1736
    s->type = FTYPE_CD;
1737

    
1738
    /* open will not fail even if no CD is inserted, so add O_NONBLOCK */
1739
    ret = raw_open_common(bs, options, flags, O_NONBLOCK, &local_err);
1740
    if (error_is_set(&local_err)) {
1741
        error_propagate(errp, local_err);
1742
    }
1743
    return ret;
1744
}
1745

    
1746
static int cdrom_probe_device(const char *filename)
1747
{
1748
    int fd, ret;
1749
    int prio = 0;
1750
    struct stat st;
1751

    
1752
    fd = qemu_open(filename, O_RDONLY | O_NONBLOCK);
1753
    if (fd < 0) {
1754
        goto out;
1755
    }
1756
    ret = fstat(fd, &st);
1757
    if (ret == -1 || !S_ISBLK(st.st_mode)) {
1758
        goto outc;
1759
    }
1760

    
1761
    /* Attempt to detect via a CDROM specific ioctl */
1762
    ret = ioctl(fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
1763
    if (ret >= 0)
1764
        prio = 100;
1765

    
1766
outc:
1767
    qemu_close(fd);
1768
out:
1769
    return prio;
1770
}
1771

    
1772
static int cdrom_is_inserted(BlockDriverState *bs)
1773
{
1774
    BDRVRawState *s = bs->opaque;
1775
    int ret;
1776

    
1777
    ret = ioctl(s->fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
1778
    if (ret == CDS_DISC_OK)
1779
        return 1;
1780
    return 0;
1781
}
1782

    
1783
static void cdrom_eject(BlockDriverState *bs, bool eject_flag)
1784
{
1785
    BDRVRawState *s = bs->opaque;
1786

    
1787
    if (eject_flag) {
1788
        if (ioctl(s->fd, CDROMEJECT, NULL) < 0)
1789
            perror("CDROMEJECT");
1790
    } else {
1791
        if (ioctl(s->fd, CDROMCLOSETRAY, NULL) < 0)
1792
            perror("CDROMEJECT");
1793
    }
1794
}
1795

    
1796
static void cdrom_lock_medium(BlockDriverState *bs, bool locked)
1797
{
1798
    BDRVRawState *s = bs->opaque;
1799

    
1800
    if (ioctl(s->fd, CDROM_LOCKDOOR, locked) < 0) {
1801
        /*
1802
         * Note: an error can happen if the distribution automatically
1803
         * mounts the CD-ROM
1804
         */
1805
        /* perror("CDROM_LOCKDOOR"); */
1806
    }
1807
}
1808

    
1809
static BlockDriver bdrv_host_cdrom = {
1810
    .format_name        = "host_cdrom",
1811
    .protocol_name      = "host_cdrom",
1812
    .instance_size      = sizeof(BDRVRawState),
1813
    .bdrv_needs_filename = true,
1814
    .bdrv_probe_device        = cdrom_probe_device,
1815
    .bdrv_file_open     = cdrom_open,
1816
    .bdrv_close         = raw_close,
1817
    .bdrv_reopen_prepare = raw_reopen_prepare,
1818
    .bdrv_reopen_commit  = raw_reopen_commit,
1819
    .bdrv_reopen_abort   = raw_reopen_abort,
1820
    .bdrv_create        = hdev_create,
1821
    .create_options     = raw_create_options,
1822

    
1823
    .bdrv_aio_readv     = raw_aio_readv,
1824
    .bdrv_aio_writev    = raw_aio_writev,
1825
    .bdrv_aio_flush        = raw_aio_flush,
1826

    
1827
    .bdrv_truncate      = raw_truncate,
1828
    .bdrv_getlength      = raw_getlength,
1829
    .has_variable_length = true,
1830
    .bdrv_get_allocated_file_size
1831
                        = raw_get_allocated_file_size,
1832

    
1833
    /* removable device support */
1834
    .bdrv_is_inserted   = cdrom_is_inserted,
1835
    .bdrv_eject         = cdrom_eject,
1836
    .bdrv_lock_medium   = cdrom_lock_medium,
1837

    
1838
    /* generic scsi device */
1839
    .bdrv_ioctl         = hdev_ioctl,
1840
    .bdrv_aio_ioctl     = hdev_aio_ioctl,
1841
};
1842
#endif /* __linux__ */
1843

    
1844
#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
1845
static int cdrom_open(BlockDriverState *bs, QDict *options, int flags)
1846
{
1847
    BDRVRawState *s = bs->opaque;
1848
    Error *local_err = NULL;
1849
    int ret;
1850

    
1851
    s->type = FTYPE_CD;
1852

    
1853
    ret = raw_open_common(bs, options, flags, 0, &local_err);
1854
    if (ret) {
1855
        if (error_is_set(&local_err)) {
1856
            error_propagate(errp, local_err);
1857
        }
1858
        return ret;
1859
    }
1860

    
1861
    /* make sure the door isn't locked at this time */
1862
    ioctl(s->fd, CDIOCALLOW);
1863
    return 0;
1864
}
1865

    
1866
static int cdrom_probe_device(const char *filename)
1867
{
1868
    if (strstart(filename, "/dev/cd", NULL) ||
1869
            strstart(filename, "/dev/acd", NULL))
1870
        return 100;
1871
    return 0;
1872
}
1873

    
1874
static int cdrom_reopen(BlockDriverState *bs)
1875
{
1876
    BDRVRawState *s = bs->opaque;
1877
    int fd;
1878

    
1879
    /*
1880
     * Force reread of possibly changed/newly loaded disc,
1881
     * FreeBSD seems to not notice sometimes...
1882
     */
1883
    if (s->fd >= 0)
1884
        qemu_close(s->fd);
1885
    fd = qemu_open(bs->filename, s->open_flags, 0644);
1886
    if (fd < 0) {
1887
        s->fd = -1;
1888
        return -EIO;
1889
    }
1890
    s->fd = fd;
1891

    
1892
    /* make sure the door isn't locked at this time */
1893
    ioctl(s->fd, CDIOCALLOW);
1894
    return 0;
1895
}
1896

    
1897
static int cdrom_is_inserted(BlockDriverState *bs)
1898
{
1899
    return raw_getlength(bs) > 0;
1900
}
1901

    
1902
static void cdrom_eject(BlockDriverState *bs, bool eject_flag)
1903
{
1904
    BDRVRawState *s = bs->opaque;
1905

    
1906
    if (s->fd < 0)
1907
        return;
1908

    
1909
    (void) ioctl(s->fd, CDIOCALLOW);
1910

    
1911
    if (eject_flag) {
1912
        if (ioctl(s->fd, CDIOCEJECT) < 0)
1913
            perror("CDIOCEJECT");
1914
    } else {
1915
        if (ioctl(s->fd, CDIOCCLOSE) < 0)
1916
            perror("CDIOCCLOSE");
1917
    }
1918

    
1919
    cdrom_reopen(bs);
1920
}
1921

    
1922
static void cdrom_lock_medium(BlockDriverState *bs, bool locked)
1923
{
1924
    BDRVRawState *s = bs->opaque;
1925

    
1926
    if (s->fd < 0)
1927
        return;
1928
    if (ioctl(s->fd, (locked ? CDIOCPREVENT : CDIOCALLOW)) < 0) {
1929
        /*
1930
         * Note: an error can happen if the distribution automatically
1931
         * mounts the CD-ROM
1932
         */
1933
        /* perror("CDROM_LOCKDOOR"); */
1934
    }
1935
}
1936

    
1937
static BlockDriver bdrv_host_cdrom = {
1938
    .format_name        = "host_cdrom",
1939
    .protocol_name      = "host_cdrom",
1940
    .instance_size      = sizeof(BDRVRawState),
1941
    .bdrv_needs_filename = true,
1942
    .bdrv_probe_device        = cdrom_probe_device,
1943
    .bdrv_file_open     = cdrom_open,
1944
    .bdrv_close         = raw_close,
1945
    .bdrv_reopen_prepare = raw_reopen_prepare,
1946
    .bdrv_reopen_commit  = raw_reopen_commit,
1947
    .bdrv_reopen_abort   = raw_reopen_abort,
1948
    .bdrv_create        = hdev_create,
1949
    .create_options     = raw_create_options,
1950

    
1951
    .bdrv_aio_readv     = raw_aio_readv,
1952
    .bdrv_aio_writev    = raw_aio_writev,
1953
    .bdrv_aio_flush        = raw_aio_flush,
1954

    
1955
    .bdrv_truncate      = raw_truncate,
1956
    .bdrv_getlength      = raw_getlength,
1957
    .has_variable_length = true,
1958
    .bdrv_get_allocated_file_size
1959
                        = raw_get_allocated_file_size,
1960

    
1961
    /* removable device support */
1962
    .bdrv_is_inserted   = cdrom_is_inserted,
1963
    .bdrv_eject         = cdrom_eject,
1964
    .bdrv_lock_medium   = cdrom_lock_medium,
1965
};
1966
#endif /* __FreeBSD__ */
1967

    
1968
#ifdef CONFIG_LINUX_AIO
1969
/**
1970
 * Return the file descriptor for Linux AIO
1971
 *
1972
 * This function is a layering violation and should be removed when it becomes
1973
 * possible to call the block layer outside the global mutex.  It allows the
1974
 * caller to hijack the file descriptor so I/O can be performed outside the
1975
 * block layer.
1976
 */
1977
int raw_get_aio_fd(BlockDriverState *bs)
1978
{
1979
    BDRVRawState *s;
1980

    
1981
    if (!bs->drv) {
1982
        return -ENOMEDIUM;
1983
    }
1984

    
1985
    if (bs->drv == bdrv_find_format("raw")) {
1986
        bs = bs->file;
1987
    }
1988

    
1989
    /* raw-posix has several protocols so just check for raw_aio_readv */
1990
    if (bs->drv->bdrv_aio_readv != raw_aio_readv) {
1991
        return -ENOTSUP;
1992
    }
1993

    
1994
    s = bs->opaque;
1995
    if (!s->use_aio) {
1996
        return -ENOTSUP;
1997
    }
1998
    return s->fd;
1999
}
2000
#endif /* CONFIG_LINUX_AIO */
2001

    
2002
static void bdrv_file_init(void)
2003
{
2004
    /*
2005
     * Register all the drivers.  Note that order is important, the driver
2006
     * registered last will get probed first.
2007
     */
2008
    bdrv_register(&bdrv_file);
2009
    bdrv_register(&bdrv_host_device);
2010
#ifdef __linux__
2011
    bdrv_register(&bdrv_host_floppy);
2012
    bdrv_register(&bdrv_host_cdrom);
2013
#endif
2014
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
2015
    bdrv_register(&bdrv_host_cdrom);
2016
#endif
2017
}
2018

    
2019
block_init(bdrv_file_init);