Statistics
| Branch: | Revision:

root / block / raw-posix.c @ 9f8540ec

History | View | Annotate | Download (45.1 kB)

1
/*
2
 * Block driver for RAW files (posix)
3
 *
4
 * Copyright (c) 2006 Fabrice Bellard
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
 * THE SOFTWARE.
23
 */
24
#include "qemu-common.h"
25
#include "qemu-timer.h"
26
#include "qemu-char.h"
27
#include "qemu-log.h"
28
#include "block_int.h"
29
#include "module.h"
30
#include "trace.h"
31
#include "thread-pool.h"
32
#include "iov.h"
33
#include "raw-aio.h"
34

    
35
#if defined(__APPLE__) && (__MACH__)
36
#include <paths.h>
37
#include <sys/param.h>
38
#include <IOKit/IOKitLib.h>
39
#include <IOKit/IOBSD.h>
40
#include <IOKit/storage/IOMediaBSDClient.h>
41
#include <IOKit/storage/IOMedia.h>
42
#include <IOKit/storage/IOCDMedia.h>
43
//#include <IOKit/storage/IOCDTypes.h>
44
#include <CoreFoundation/CoreFoundation.h>
45
#endif
46

    
47
#ifdef __sun__
48
#define _POSIX_PTHREAD_SEMANTICS 1
49
#include <sys/dkio.h>
50
#endif
51
#ifdef __linux__
52
#include <sys/types.h>
53
#include <sys/stat.h>
54
#include <sys/ioctl.h>
55
#include <sys/param.h>
56
#include <linux/cdrom.h>
57
#include <linux/fd.h>
58
#include <linux/fs.h>
59
#endif
60
#ifdef CONFIG_FIEMAP
61
#include <linux/fiemap.h>
62
#endif
63
#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
64
#include <sys/disk.h>
65
#include <sys/cdio.h>
66
#endif
67

    
68
#ifdef __OpenBSD__
69
#include <sys/ioctl.h>
70
#include <sys/disklabel.h>
71
#include <sys/dkio.h>
72
#endif
73

    
74
#ifdef __NetBSD__
75
#include <sys/ioctl.h>
76
#include <sys/disklabel.h>
77
#include <sys/dkio.h>
78
#include <sys/disk.h>
79
#endif
80

    
81
#ifdef __DragonFly__
82
#include <sys/ioctl.h>
83
#include <sys/diskslice.h>
84
#endif
85

    
86
#ifdef CONFIG_XFS
87
#include <xfs/xfs.h>
88
#endif
89

    
90
//#define DEBUG_FLOPPY
91

    
92
//#define DEBUG_BLOCK
93
#if defined(DEBUG_BLOCK)
94
#define DEBUG_BLOCK_PRINT(formatCstr, ...) do { if (qemu_log_enabled()) \
95
    { qemu_log(formatCstr, ## __VA_ARGS__); qemu_log_flush(); } } while (0)
96
#else
97
#define DEBUG_BLOCK_PRINT(formatCstr, ...)
98
#endif
99

    
100
/* OS X does not have O_DSYNC */
101
#ifndef O_DSYNC
102
#ifdef O_SYNC
103
#define O_DSYNC O_SYNC
104
#elif defined(O_FSYNC)
105
#define O_DSYNC O_FSYNC
106
#endif
107
#endif
108

    
109
/* Approximate O_DIRECT with O_DSYNC if O_DIRECT isn't available */
110
#ifndef O_DIRECT
111
#define O_DIRECT O_DSYNC
112
#endif
113

    
114
#define FTYPE_FILE   0
115
#define FTYPE_CD     1
116
#define FTYPE_FD     2
117

    
118
/* if the FD is not accessed during that time (in ns), we try to
119
   reopen it to see if the disk has been changed */
120
#define FD_OPEN_TIMEOUT (1000000000)
121

    
122
#define MAX_BLOCKSIZE        4096
123

    
124
typedef struct BDRVRawState {
125
    int fd;
126
    int type;
127
    int open_flags;
128
#if defined(__linux__)
129
    /* linux floppy specific */
130
    int64_t fd_open_time;
131
    int64_t fd_error_time;
132
    int fd_got_error;
133
    int fd_media_changed;
134
#endif
135
#ifdef CONFIG_LINUX_AIO
136
    int use_aio;
137
    void *aio_ctx;
138
#endif
139
#ifdef CONFIG_XFS
140
    bool is_xfs : 1;
141
#endif
142
} BDRVRawState;
143

    
144
typedef struct BDRVRawReopenState {
145
    int fd;
146
    int open_flags;
147
#ifdef CONFIG_LINUX_AIO
148
    int use_aio;
149
#endif
150
} BDRVRawReopenState;
151

    
152
static int fd_open(BlockDriverState *bs);
153
static int64_t raw_getlength(BlockDriverState *bs);
154

    
155
typedef struct RawPosixAIOData {
156
    BlockDriverState *bs;
157
    int aio_fildes;
158
    union {
159
        struct iovec *aio_iov;
160
        void *aio_ioctl_buf;
161
    };
162
    int aio_niov;
163
    size_t aio_nbytes;
164
#define aio_ioctl_cmd   aio_nbytes /* for QEMU_AIO_IOCTL */
165
    off_t aio_offset;
166
    int aio_type;
167
} RawPosixAIOData;
168

    
169
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
170
static int cdrom_reopen(BlockDriverState *bs);
171
#endif
172

    
173
#if defined(__NetBSD__)
174
static int raw_normalize_devicepath(const char **filename)
175
{
176
    static char namebuf[PATH_MAX];
177
    const char *dp, *fname;
178
    struct stat sb;
179

    
180
    fname = *filename;
181
    dp = strrchr(fname, '/');
182
    if (lstat(fname, &sb) < 0) {
183
        fprintf(stderr, "%s: stat failed: %s\n",
184
            fname, strerror(errno));
185
        return -errno;
186
    }
187

    
188
    if (!S_ISBLK(sb.st_mode)) {
189
        return 0;
190
    }
191

    
192
    if (dp == NULL) {
193
        snprintf(namebuf, PATH_MAX, "r%s", fname);
194
    } else {
195
        snprintf(namebuf, PATH_MAX, "%.*s/r%s",
196
            (int)(dp - fname), fname, dp + 1);
197
    }
198
    fprintf(stderr, "%s is a block device", fname);
199
    *filename = namebuf;
200
    fprintf(stderr, ", using %s\n", *filename);
201

    
202
    return 0;
203
}
204
#else
205
static int raw_normalize_devicepath(const char **filename)
206
{
207
    return 0;
208
}
209
#endif
210

    
211
static void raw_parse_flags(int bdrv_flags, int *open_flags)
212
{
213
    assert(open_flags != NULL);
214

    
215
    *open_flags |= O_BINARY;
216
    *open_flags &= ~O_ACCMODE;
217
    if (bdrv_flags & BDRV_O_RDWR) {
218
        *open_flags |= O_RDWR;
219
    } else {
220
        *open_flags |= O_RDONLY;
221
    }
222

    
223
    /* Use O_DSYNC for write-through caching, no flags for write-back caching,
224
     * and O_DIRECT for no caching. */
225
    if ((bdrv_flags & BDRV_O_NOCACHE)) {
226
        *open_flags |= O_DIRECT;
227
    }
228
}
229

    
230
#ifdef CONFIG_LINUX_AIO
231
static int raw_set_aio(void **aio_ctx, int *use_aio, int bdrv_flags)
232
{
233
    int ret = -1;
234
    assert(aio_ctx != NULL);
235
    assert(use_aio != NULL);
236
    /*
237
     * Currently Linux do AIO only for files opened with O_DIRECT
238
     * specified so check NOCACHE flag too
239
     */
240
    if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
241
                      (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) {
242

    
243
        /* if non-NULL, laio_init() has already been run */
244
        if (*aio_ctx == NULL) {
245
            *aio_ctx = laio_init();
246
            if (!*aio_ctx) {
247
                goto error;
248
            }
249
        }
250
        *use_aio = 1;
251
    } else {
252
        *use_aio = 0;
253
    }
254

    
255
    ret = 0;
256

    
257
error:
258
    return ret;
259
}
260
#endif
261

    
262
static int raw_open_common(BlockDriverState *bs, const char *filename,
263
                           int bdrv_flags, int open_flags)
264
{
265
    BDRVRawState *s = bs->opaque;
266
    int fd, ret;
267

    
268
    ret = raw_normalize_devicepath(&filename);
269
    if (ret != 0) {
270
        return ret;
271
    }
272

    
273
    s->open_flags = open_flags;
274
    raw_parse_flags(bdrv_flags, &s->open_flags);
275

    
276
    s->fd = -1;
277
    fd = qemu_open(filename, s->open_flags, 0644);
278
    if (fd < 0) {
279
        ret = -errno;
280
        if (ret == -EROFS)
281
            ret = -EACCES;
282
        return ret;
283
    }
284
    s->fd = fd;
285

    
286
#ifdef CONFIG_LINUX_AIO
287
    if (raw_set_aio(&s->aio_ctx, &s->use_aio, bdrv_flags)) {
288
        qemu_close(fd);
289
        return -errno;
290
    }
291
#endif
292

    
293
#ifdef CONFIG_XFS
294
    if (platform_test_xfs_fd(s->fd)) {
295
        s->is_xfs = 1;
296
    }
297
#endif
298

    
299
    return 0;
300
}
301

    
302
static int raw_open(BlockDriverState *bs, const char *filename, int flags)
303
{
304
    BDRVRawState *s = bs->opaque;
305

    
306
    s->type = FTYPE_FILE;
307
    return raw_open_common(bs, filename, flags, 0);
308
}
309

    
310
static int raw_reopen_prepare(BDRVReopenState *state,
311
                              BlockReopenQueue *queue, Error **errp)
312
{
313
    BDRVRawState *s;
314
    BDRVRawReopenState *raw_s;
315
    int ret = 0;
316

    
317
    assert(state != NULL);
318
    assert(state->bs != NULL);
319

    
320
    s = state->bs->opaque;
321

    
322
    state->opaque = g_malloc0(sizeof(BDRVRawReopenState));
323
    raw_s = state->opaque;
324

    
325
#ifdef CONFIG_LINUX_AIO
326
    raw_s->use_aio = s->use_aio;
327

    
328
    /* we can use s->aio_ctx instead of a copy, because the use_aio flag is
329
     * valid in the 'false' condition even if aio_ctx is set, and raw_set_aio()
330
     * won't override aio_ctx if aio_ctx is non-NULL */
331
    if (raw_set_aio(&s->aio_ctx, &raw_s->use_aio, state->flags)) {
332
        return -1;
333
    }
334
#endif
335

    
336
    raw_parse_flags(state->flags, &raw_s->open_flags);
337

    
338
    raw_s->fd = -1;
339

    
340
    int fcntl_flags = O_APPEND | O_ASYNC | O_NONBLOCK;
341
#ifdef O_NOATIME
342
    fcntl_flags |= O_NOATIME;
343
#endif
344

    
345
    if ((raw_s->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
346
        /* dup the original fd */
347
        /* TODO: use qemu fcntl wrapper */
348
#ifdef F_DUPFD_CLOEXEC
349
        raw_s->fd = fcntl(s->fd, F_DUPFD_CLOEXEC, 0);
350
#else
351
        raw_s->fd = dup(s->fd);
352
        if (raw_s->fd != -1) {
353
            qemu_set_cloexec(raw_s->fd);
354
        }
355
#endif
356
        if (raw_s->fd >= 0) {
357
            ret = fcntl_setfl(raw_s->fd, raw_s->open_flags);
358
            if (ret) {
359
                qemu_close(raw_s->fd);
360
                raw_s->fd = -1;
361
            }
362
        }
363
    }
364

    
365
    /* If we cannot use fcntl, or fcntl failed, fall back to qemu_open() */
366
    if (raw_s->fd == -1) {
367
        assert(!(raw_s->open_flags & O_CREAT));
368
        raw_s->fd = qemu_open(state->bs->filename, raw_s->open_flags);
369
        if (raw_s->fd == -1) {
370
            ret = -1;
371
        }
372
    }
373
    return ret;
374
}
375

    
376

    
377
static void raw_reopen_commit(BDRVReopenState *state)
378
{
379
    BDRVRawReopenState *raw_s = state->opaque;
380
    BDRVRawState *s = state->bs->opaque;
381

    
382
    s->open_flags = raw_s->open_flags;
383

    
384
    qemu_close(s->fd);
385
    s->fd = raw_s->fd;
386
#ifdef CONFIG_LINUX_AIO
387
    s->use_aio = raw_s->use_aio;
388
#endif
389

    
390
    g_free(state->opaque);
391
    state->opaque = NULL;
392
}
393

    
394

    
395
static void raw_reopen_abort(BDRVReopenState *state)
396
{
397
    BDRVRawReopenState *raw_s = state->opaque;
398

    
399
     /* nothing to do if NULL, we didn't get far enough */
400
    if (raw_s == NULL) {
401
        return;
402
    }
403

    
404
    if (raw_s->fd >= 0) {
405
        qemu_close(raw_s->fd);
406
        raw_s->fd = -1;
407
    }
408
    g_free(state->opaque);
409
    state->opaque = NULL;
410
}
411

    
412

    
413
/* XXX: use host sector size if necessary with:
414
#ifdef DIOCGSECTORSIZE
415
        {
416
            unsigned int sectorsize = 512;
417
            if (!ioctl(fd, DIOCGSECTORSIZE, &sectorsize) &&
418
                sectorsize > bufsize)
419
                bufsize = sectorsize;
420
        }
421
#endif
422
#ifdef CONFIG_COCOA
423
        uint32_t blockSize = 512;
424
        if ( !ioctl( fd, DKIOCGETBLOCKSIZE, &blockSize ) && blockSize > bufsize) {
425
            bufsize = blockSize;
426
        }
427
#endif
428
*/
429

    
430
/*
431
 * Check if all memory in this vector is sector aligned.
432
 */
433
static int qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
434
{
435
    int i;
436

    
437
    for (i = 0; i < qiov->niov; i++) {
438
        if ((uintptr_t) qiov->iov[i].iov_base % bs->buffer_alignment) {
439
            return 0;
440
        }
441
    }
442

    
443
    return 1;
444
}
445

    
446
static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)
447
{
448
    int ret;
449

    
450
    ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_ioctl_buf);
451
    if (ret == -1) {
452
        return -errno;
453
    }
454

    
455
    /*
456
     * This looks weird, but the aio code only considers a request
457
     * successful if it has written the full number of bytes.
458
     *
459
     * Now we overload aio_nbytes as aio_ioctl_cmd for the ioctl command,
460
     * so in fact we return the ioctl command here to make posix_aio_read()
461
     * happy..
462
     */
463
    return aiocb->aio_nbytes;
464
}
465

    
466
static ssize_t handle_aiocb_flush(RawPosixAIOData *aiocb)
467
{
468
    int ret;
469

    
470
    ret = qemu_fdatasync(aiocb->aio_fildes);
471
    if (ret == -1) {
472
        return -errno;
473
    }
474
    return 0;
475
}
476

    
477
#ifdef CONFIG_PREADV
478

    
479
static bool preadv_present = true;
480

    
481
static ssize_t
482
qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
483
{
484
    return preadv(fd, iov, nr_iov, offset);
485
}
486

    
487
static ssize_t
488
qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
489
{
490
    return pwritev(fd, iov, nr_iov, offset);
491
}
492

    
493
#else
494

    
495
static bool preadv_present = false;
496

    
497
static ssize_t
498
qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
499
{
500
    return -ENOSYS;
501
}
502

    
503
static ssize_t
504
qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
505
{
506
    return -ENOSYS;
507
}
508

    
509
#endif
510

    
511
static ssize_t handle_aiocb_rw_vector(RawPosixAIOData *aiocb)
512
{
513
    ssize_t len;
514

    
515
    do {
516
        if (aiocb->aio_type & QEMU_AIO_WRITE)
517
            len = qemu_pwritev(aiocb->aio_fildes,
518
                               aiocb->aio_iov,
519
                               aiocb->aio_niov,
520
                               aiocb->aio_offset);
521
         else
522
            len = qemu_preadv(aiocb->aio_fildes,
523
                              aiocb->aio_iov,
524
                              aiocb->aio_niov,
525
                              aiocb->aio_offset);
526
    } while (len == -1 && errno == EINTR);
527

    
528
    if (len == -1) {
529
        return -errno;
530
    }
531
    return len;
532
}
533

    
534
/*
535
 * Read/writes the data to/from a given linear buffer.
536
 *
537
 * Returns the number of bytes handles or -errno in case of an error. Short
538
 * reads are only returned if the end of the file is reached.
539
 */
540
static ssize_t handle_aiocb_rw_linear(RawPosixAIOData *aiocb, char *buf)
541
{
542
    ssize_t offset = 0;
543
    ssize_t len;
544

    
545
    while (offset < aiocb->aio_nbytes) {
546
        if (aiocb->aio_type & QEMU_AIO_WRITE) {
547
            len = pwrite(aiocb->aio_fildes,
548
                         (const char *)buf + offset,
549
                         aiocb->aio_nbytes - offset,
550
                         aiocb->aio_offset + offset);
551
        } else {
552
            len = pread(aiocb->aio_fildes,
553
                        buf + offset,
554
                        aiocb->aio_nbytes - offset,
555
                        aiocb->aio_offset + offset);
556
        }
557
        if (len == -1 && errno == EINTR) {
558
            continue;
559
        } else if (len == -1) {
560
            offset = -errno;
561
            break;
562
        } else if (len == 0) {
563
            break;
564
        }
565
        offset += len;
566
    }
567

    
568
    return offset;
569
}
570

    
571
static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
572
{
573
    ssize_t nbytes;
574
    char *buf;
575

    
576
    if (!(aiocb->aio_type & QEMU_AIO_MISALIGNED)) {
577
        /*
578
         * If there is just a single buffer, and it is properly aligned
579
         * we can just use plain pread/pwrite without any problems.
580
         */
581
        if (aiocb->aio_niov == 1) {
582
             return handle_aiocb_rw_linear(aiocb, aiocb->aio_iov->iov_base);
583
        }
584
        /*
585
         * We have more than one iovec, and all are properly aligned.
586
         *
587
         * Try preadv/pwritev first and fall back to linearizing the
588
         * buffer if it's not supported.
589
         */
590
        if (preadv_present) {
591
            nbytes = handle_aiocb_rw_vector(aiocb);
592
            if (nbytes == aiocb->aio_nbytes ||
593
                (nbytes < 0 && nbytes != -ENOSYS)) {
594
                return nbytes;
595
            }
596
            preadv_present = false;
597
        }
598

    
599
        /*
600
         * XXX(hch): short read/write.  no easy way to handle the reminder
601
         * using these interfaces.  For now retry using plain
602
         * pread/pwrite?
603
         */
604
    }
605

    
606
    /*
607
     * Ok, we have to do it the hard way, copy all segments into
608
     * a single aligned buffer.
609
     */
610
    buf = qemu_blockalign(aiocb->bs, aiocb->aio_nbytes);
611
    if (aiocb->aio_type & QEMU_AIO_WRITE) {
612
        char *p = buf;
613
        int i;
614

    
615
        for (i = 0; i < aiocb->aio_niov; ++i) {
616
            memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len);
617
            p += aiocb->aio_iov[i].iov_len;
618
        }
619
    }
620

    
621
    nbytes = handle_aiocb_rw_linear(aiocb, buf);
622
    if (!(aiocb->aio_type & QEMU_AIO_WRITE)) {
623
        char *p = buf;
624
        size_t count = aiocb->aio_nbytes, copy;
625
        int i;
626

    
627
        for (i = 0; i < aiocb->aio_niov && count; ++i) {
628
            copy = count;
629
            if (copy > aiocb->aio_iov[i].iov_len) {
630
                copy = aiocb->aio_iov[i].iov_len;
631
            }
632
            memcpy(aiocb->aio_iov[i].iov_base, p, copy);
633
            p     += copy;
634
            count -= copy;
635
        }
636
    }
637
    qemu_vfree(buf);
638

    
639
    return nbytes;
640
}
641

    
642
static int aio_worker(void *arg)
643
{
644
    RawPosixAIOData *aiocb = arg;
645
    ssize_t ret = 0;
646

    
647
    switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
648
    case QEMU_AIO_READ:
649
        ret = handle_aiocb_rw(aiocb);
650
        if (ret >= 0 && ret < aiocb->aio_nbytes && aiocb->bs->growable) {
651
            iov_memset(aiocb->aio_iov, aiocb->aio_niov, ret,
652
                      0, aiocb->aio_nbytes - ret);
653

    
654
            ret = aiocb->aio_nbytes;
655
        }
656
        if (ret == aiocb->aio_nbytes) {
657
            ret = 0;
658
        } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
659
            ret = -EINVAL;
660
        }
661
        break;
662
    case QEMU_AIO_WRITE:
663
        ret = handle_aiocb_rw(aiocb);
664
        if (ret == aiocb->aio_nbytes) {
665
            ret = 0;
666
        } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
667
            ret = -EINVAL;
668
        }
669
        break;
670
    case QEMU_AIO_FLUSH:
671
        ret = handle_aiocb_flush(aiocb);
672
        break;
673
    case QEMU_AIO_IOCTL:
674
        ret = handle_aiocb_ioctl(aiocb);
675
        break;
676
    default:
677
        fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
678
        ret = -EINVAL;
679
        break;
680
    }
681

    
682
    g_slice_free(RawPosixAIOData, aiocb);
683
    return ret;
684
}
685

    
686
static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
687
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
688
        BlockDriverCompletionFunc *cb, void *opaque, int type)
689
{
690
    RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
691

    
692
    acb->bs = bs;
693
    acb->aio_type = type;
694
    acb->aio_fildes = fd;
695

    
696
    if (qiov) {
697
        acb->aio_iov = qiov->iov;
698
        acb->aio_niov = qiov->niov;
699
    }
700
    acb->aio_nbytes = nb_sectors * 512;
701
    acb->aio_offset = sector_num * 512;
702

    
703
    trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
704
    return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
705
}
706

    
707
static BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
708
        unsigned long int req, void *buf,
709
        BlockDriverCompletionFunc *cb, void *opaque)
710
{
711
    RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
712

    
713
    acb->bs = bs;
714
    acb->aio_type = QEMU_AIO_IOCTL;
715
    acb->aio_fildes = fd;
716
    acb->aio_offset = 0;
717
    acb->aio_ioctl_buf = buf;
718
    acb->aio_ioctl_cmd = req;
719

    
720
    return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
721
}
722

    
723
static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs,
724
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
725
        BlockDriverCompletionFunc *cb, void *opaque, int type)
726
{
727
    BDRVRawState *s = bs->opaque;
728

    
729
    if (fd_open(bs) < 0)
730
        return NULL;
731

    
732
    /*
733
     * If O_DIRECT is used the buffer needs to be aligned on a sector
734
     * boundary.  Check if this is the case or tell the low-level
735
     * driver that it needs to copy the buffer.
736
     */
737
    if ((bs->open_flags & BDRV_O_NOCACHE)) {
738
        if (!qiov_is_aligned(bs, qiov)) {
739
            type |= QEMU_AIO_MISALIGNED;
740
#ifdef CONFIG_LINUX_AIO
741
        } else if (s->use_aio) {
742
            return laio_submit(bs, s->aio_ctx, s->fd, sector_num, qiov,
743
                               nb_sectors, cb, opaque, type);
744
#endif
745
        }
746
    }
747

    
748
    return paio_submit(bs, s->fd, sector_num, qiov, nb_sectors,
749
                       cb, opaque, type);
750
}
751

    
752
static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
753
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
754
        BlockDriverCompletionFunc *cb, void *opaque)
755
{
756
    return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
757
                          cb, opaque, QEMU_AIO_READ);
758
}
759

    
760
static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
761
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
762
        BlockDriverCompletionFunc *cb, void *opaque)
763
{
764
    return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
765
                          cb, opaque, QEMU_AIO_WRITE);
766
}
767

    
768
static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs,
769
        BlockDriverCompletionFunc *cb, void *opaque)
770
{
771
    BDRVRawState *s = bs->opaque;
772

    
773
    if (fd_open(bs) < 0)
774
        return NULL;
775

    
776
    return paio_submit(bs, s->fd, 0, NULL, 0, cb, opaque, QEMU_AIO_FLUSH);
777
}
778

    
779
static void raw_close(BlockDriverState *bs)
780
{
781
    BDRVRawState *s = bs->opaque;
782
    if (s->fd >= 0) {
783
        qemu_close(s->fd);
784
        s->fd = -1;
785
    }
786
}
787

    
788
static int raw_truncate(BlockDriverState *bs, int64_t offset)
789
{
790
    BDRVRawState *s = bs->opaque;
791
    struct stat st;
792

    
793
    if (fstat(s->fd, &st)) {
794
        return -errno;
795
    }
796

    
797
    if (S_ISREG(st.st_mode)) {
798
        if (ftruncate(s->fd, offset) < 0) {
799
            return -errno;
800
        }
801
    } else if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
802
       if (offset > raw_getlength(bs)) {
803
           return -EINVAL;
804
       }
805
    } else {
806
        return -ENOTSUP;
807
    }
808

    
809
    return 0;
810
}
811

    
812
#ifdef __OpenBSD__
813
static int64_t raw_getlength(BlockDriverState *bs)
814
{
815
    BDRVRawState *s = bs->opaque;
816
    int fd = s->fd;
817
    struct stat st;
818

    
819
    if (fstat(fd, &st))
820
        return -1;
821
    if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
822
        struct disklabel dl;
823

    
824
        if (ioctl(fd, DIOCGDINFO, &dl))
825
            return -1;
826
        return (uint64_t)dl.d_secsize *
827
            dl.d_partitions[DISKPART(st.st_rdev)].p_size;
828
    } else
829
        return st.st_size;
830
}
831
#elif defined(__NetBSD__)
832
static int64_t raw_getlength(BlockDriverState *bs)
833
{
834
    BDRVRawState *s = bs->opaque;
835
    int fd = s->fd;
836
    struct stat st;
837

    
838
    if (fstat(fd, &st))
839
        return -1;
840
    if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
841
        struct dkwedge_info dkw;
842

    
843
        if (ioctl(fd, DIOCGWEDGEINFO, &dkw) != -1) {
844
            return dkw.dkw_size * 512;
845
        } else {
846
            struct disklabel dl;
847

    
848
            if (ioctl(fd, DIOCGDINFO, &dl))
849
                return -1;
850
            return (uint64_t)dl.d_secsize *
851
                dl.d_partitions[DISKPART(st.st_rdev)].p_size;
852
        }
853
    } else
854
        return st.st_size;
855
}
856
#elif defined(__sun__)
857
static int64_t raw_getlength(BlockDriverState *bs)
858
{
859
    BDRVRawState *s = bs->opaque;
860
    struct dk_minfo minfo;
861
    int ret;
862

    
863
    ret = fd_open(bs);
864
    if (ret < 0) {
865
        return ret;
866
    }
867

    
868
    /*
869
     * Use the DKIOCGMEDIAINFO ioctl to read the size.
870
     */
871
    ret = ioctl(s->fd, DKIOCGMEDIAINFO, &minfo);
872
    if (ret != -1) {
873
        return minfo.dki_lbsize * minfo.dki_capacity;
874
    }
875

    
876
    /*
877
     * There are reports that lseek on some devices fails, but
878
     * irc discussion said that contingency on contingency was overkill.
879
     */
880
    return lseek(s->fd, 0, SEEK_END);
881
}
882
#elif defined(CONFIG_BSD)
883
static int64_t raw_getlength(BlockDriverState *bs)
884
{
885
    BDRVRawState *s = bs->opaque;
886
    int fd = s->fd;
887
    int64_t size;
888
    struct stat sb;
889
#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
890
    int reopened = 0;
891
#endif
892
    int ret;
893

    
894
    ret = fd_open(bs);
895
    if (ret < 0)
896
        return ret;
897

    
898
#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
899
again:
900
#endif
901
    if (!fstat(fd, &sb) && (S_IFCHR & sb.st_mode)) {
902
#ifdef DIOCGMEDIASIZE
903
        if (ioctl(fd, DIOCGMEDIASIZE, (off_t *)&size))
904
#elif defined(DIOCGPART)
905
        {
906
                struct partinfo pi;
907
                if (ioctl(fd, DIOCGPART, &pi) == 0)
908
                        size = pi.media_size;
909
                else
910
                        size = 0;
911
        }
912
        if (size == 0)
913
#endif
914
#if defined(__APPLE__) && defined(__MACH__)
915
        size = LONG_LONG_MAX;
916
#else
917
        size = lseek(fd, 0LL, SEEK_END);
918
#endif
919
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
920
        switch(s->type) {
921
        case FTYPE_CD:
922
            /* XXX FreeBSD acd returns UINT_MAX sectors for an empty drive */
923
            if (size == 2048LL * (unsigned)-1)
924
                size = 0;
925
            /* XXX no disc?  maybe we need to reopen... */
926
            if (size <= 0 && !reopened && cdrom_reopen(bs) >= 0) {
927
                reopened = 1;
928
                goto again;
929
            }
930
        }
931
#endif
932
    } else {
933
        size = lseek(fd, 0, SEEK_END);
934
    }
935
    return size;
936
}
937
#else
938
static int64_t raw_getlength(BlockDriverState *bs)
939
{
940
    BDRVRawState *s = bs->opaque;
941
    int ret;
942

    
943
    ret = fd_open(bs);
944
    if (ret < 0) {
945
        return ret;
946
    }
947

    
948
    return lseek(s->fd, 0, SEEK_END);
949
}
950
#endif
951

    
952
static int64_t raw_get_allocated_file_size(BlockDriverState *bs)
953
{
954
    struct stat st;
955
    BDRVRawState *s = bs->opaque;
956

    
957
    if (fstat(s->fd, &st) < 0) {
958
        return -errno;
959
    }
960
    return (int64_t)st.st_blocks * 512;
961
}
962

    
963
static int raw_create(const char *filename, QEMUOptionParameter *options)
964
{
965
    int fd;
966
    int result = 0;
967
    int64_t total_size = 0;
968

    
969
    /* Read out options */
970
    while (options && options->name) {
971
        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
972
            total_size = options->value.n / BDRV_SECTOR_SIZE;
973
        }
974
        options++;
975
    }
976

    
977
    fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
978
                   0644);
979
    if (fd < 0) {
980
        result = -errno;
981
    } else {
982
        if (ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) {
983
            result = -errno;
984
        }
985
        if (qemu_close(fd) != 0) {
986
            result = -errno;
987
        }
988
    }
989
    return result;
990
}
991

    
992
/*
993
 * Returns true iff the specified sector is present in the disk image. Drivers
994
 * not implementing the functionality are assumed to not support backing files,
995
 * hence all their sectors are reported as allocated.
996
 *
997
 * If 'sector_num' is beyond the end of the disk image the return value is 0
998
 * and 'pnum' is set to 0.
999
 *
1000
 * 'pnum' is set to the number of sectors (including and immediately following
1001
 * the specified sector) that are known to be in the same
1002
 * allocated/unallocated state.
1003
 *
1004
 * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes
1005
 * beyond the end of the disk image it will be clamped.
1006
 */
1007
static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs,
1008
                                            int64_t sector_num,
1009
                                            int nb_sectors, int *pnum)
1010
{
1011
    off_t start, data, hole;
1012
    int ret;
1013

    
1014
    ret = fd_open(bs);
1015
    if (ret < 0) {
1016
        return ret;
1017
    }
1018

    
1019
    start = sector_num * BDRV_SECTOR_SIZE;
1020

    
1021
#ifdef CONFIG_FIEMAP
1022

    
1023
    BDRVRawState *s = bs->opaque;
1024
    struct {
1025
        struct fiemap fm;
1026
        struct fiemap_extent fe;
1027
    } f;
1028

    
1029
    f.fm.fm_start = start;
1030
    f.fm.fm_length = (int64_t)nb_sectors * BDRV_SECTOR_SIZE;
1031
    f.fm.fm_flags = 0;
1032
    f.fm.fm_extent_count = 1;
1033
    f.fm.fm_reserved = 0;
1034
    if (ioctl(s->fd, FS_IOC_FIEMAP, &f) == -1) {
1035
        /* Assume everything is allocated.  */
1036
        *pnum = nb_sectors;
1037
        return 1;
1038
    }
1039

    
1040
    if (f.fm.fm_mapped_extents == 0) {
1041
        /* No extents found, data is beyond f.fm.fm_start + f.fm.fm_length.
1042
         * f.fm.fm_start + f.fm.fm_length must be clamped to the file size!
1043
         */
1044
        off_t length = lseek(s->fd, 0, SEEK_END);
1045
        hole = f.fm.fm_start;
1046
        data = MIN(f.fm.fm_start + f.fm.fm_length, length);
1047
    } else {
1048
        data = f.fe.fe_logical;
1049
        hole = f.fe.fe_logical + f.fe.fe_length;
1050
    }
1051

    
1052
#elif defined SEEK_HOLE && defined SEEK_DATA
1053

    
1054
    BDRVRawState *s = bs->opaque;
1055

    
1056
    hole = lseek(s->fd, start, SEEK_HOLE);
1057
    if (hole == -1) {
1058
        /* -ENXIO indicates that sector_num was past the end of the file.
1059
         * There is a virtual hole there.  */
1060
        assert(errno != -ENXIO);
1061

    
1062
        /* Most likely EINVAL.  Assume everything is allocated.  */
1063
        *pnum = nb_sectors;
1064
        return 1;
1065
    }
1066

    
1067
    if (hole > start) {
1068
        data = start;
1069
    } else {
1070
        /* On a hole.  We need another syscall to find its end.  */
1071
        data = lseek(s->fd, start, SEEK_DATA);
1072
        if (data == -1) {
1073
            data = lseek(s->fd, 0, SEEK_END);
1074
        }
1075
    }
1076
#else
1077
    *pnum = nb_sectors;
1078
    return 1;
1079
#endif
1080

    
1081
    if (data <= start) {
1082
        /* On a data extent, compute sectors to the end of the extent.  */
1083
        *pnum = MIN(nb_sectors, (hole - start) / BDRV_SECTOR_SIZE);
1084
        return 1;
1085
    } else {
1086
        /* On a hole, compute sectors to the beginning of the next extent.  */
1087
        *pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE);
1088
        return 0;
1089
    }
1090
}
1091

    
1092
#ifdef CONFIG_XFS
1093
static int xfs_discard(BDRVRawState *s, int64_t sector_num, int nb_sectors)
1094
{
1095
    struct xfs_flock64 fl;
1096

    
1097
    memset(&fl, 0, sizeof(fl));
1098
    fl.l_whence = SEEK_SET;
1099
    fl.l_start = sector_num << 9;
1100
    fl.l_len = (int64_t)nb_sectors << 9;
1101

    
1102
    if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) {
1103
        DEBUG_BLOCK_PRINT("cannot punch hole (%s)\n", strerror(errno));
1104
        return -errno;
1105
    }
1106

    
1107
    return 0;
1108
}
1109
#endif
1110

    
1111
static coroutine_fn int raw_co_discard(BlockDriverState *bs,
1112
    int64_t sector_num, int nb_sectors)
1113
{
1114
#ifdef CONFIG_XFS
1115
    BDRVRawState *s = bs->opaque;
1116

    
1117
    if (s->is_xfs) {
1118
        return xfs_discard(s, sector_num, nb_sectors);
1119
    }
1120
#endif
1121

    
1122
    return 0;
1123
}
1124

    
1125
static QEMUOptionParameter raw_create_options[] = {
1126
    {
1127
        .name = BLOCK_OPT_SIZE,
1128
        .type = OPT_SIZE,
1129
        .help = "Virtual disk size"
1130
    },
1131
    { NULL }
1132
};
1133

    
1134
static BlockDriver bdrv_file = {
1135
    .format_name = "file",
1136
    .protocol_name = "file",
1137
    .instance_size = sizeof(BDRVRawState),
1138
    .bdrv_probe = NULL, /* no probe for protocols */
1139
    .bdrv_file_open = raw_open,
1140
    .bdrv_reopen_prepare = raw_reopen_prepare,
1141
    .bdrv_reopen_commit = raw_reopen_commit,
1142
    .bdrv_reopen_abort = raw_reopen_abort,
1143
    .bdrv_close = raw_close,
1144
    .bdrv_create = raw_create,
1145
    .bdrv_co_discard = raw_co_discard,
1146
    .bdrv_co_is_allocated = raw_co_is_allocated,
1147

    
1148
    .bdrv_aio_readv = raw_aio_readv,
1149
    .bdrv_aio_writev = raw_aio_writev,
1150
    .bdrv_aio_flush = raw_aio_flush,
1151

    
1152
    .bdrv_truncate = raw_truncate,
1153
    .bdrv_getlength = raw_getlength,
1154
    .bdrv_get_allocated_file_size
1155
                        = raw_get_allocated_file_size,
1156

    
1157
    .create_options = raw_create_options,
1158
};
1159

    
1160
/***********************************************/
1161
/* host device */
1162

    
1163
#if defined(__APPLE__) && defined(__MACH__)
1164
static kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator );
1165
static kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize );
1166

    
1167
kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator )
1168
{
1169
    kern_return_t       kernResult;
1170
    mach_port_t     masterPort;
1171
    CFMutableDictionaryRef  classesToMatch;
1172

    
1173
    kernResult = IOMasterPort( MACH_PORT_NULL, &masterPort );
1174
    if ( KERN_SUCCESS != kernResult ) {
1175
        printf( "IOMasterPort returned %d\n", kernResult );
1176
    }
1177

    
1178
    classesToMatch = IOServiceMatching( kIOCDMediaClass );
1179
    if ( classesToMatch == NULL ) {
1180
        printf( "IOServiceMatching returned a NULL dictionary.\n" );
1181
    } else {
1182
    CFDictionarySetValue( classesToMatch, CFSTR( kIOMediaEjectableKey ), kCFBooleanTrue );
1183
    }
1184
    kernResult = IOServiceGetMatchingServices( masterPort, classesToMatch, mediaIterator );
1185
    if ( KERN_SUCCESS != kernResult )
1186
    {
1187
        printf( "IOServiceGetMatchingServices returned %d\n", kernResult );
1188
    }
1189

    
1190
    return kernResult;
1191
}
1192

    
1193
kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize )
1194
{
1195
    io_object_t     nextMedia;
1196
    kern_return_t   kernResult = KERN_FAILURE;
1197
    *bsdPath = '\0';
1198
    nextMedia = IOIteratorNext( mediaIterator );
1199
    if ( nextMedia )
1200
    {
1201
        CFTypeRef   bsdPathAsCFString;
1202
    bsdPathAsCFString = IORegistryEntryCreateCFProperty( nextMedia, CFSTR( kIOBSDNameKey ), kCFAllocatorDefault, 0 );
1203
        if ( bsdPathAsCFString ) {
1204
            size_t devPathLength;
1205
            strcpy( bsdPath, _PATH_DEV );
1206
            strcat( bsdPath, "r" );
1207
            devPathLength = strlen( bsdPath );
1208
            if ( CFStringGetCString( bsdPathAsCFString, bsdPath + devPathLength, maxPathSize - devPathLength, kCFStringEncodingASCII ) ) {
1209
                kernResult = KERN_SUCCESS;
1210
            }
1211
            CFRelease( bsdPathAsCFString );
1212
        }
1213
        IOObjectRelease( nextMedia );
1214
    }
1215

    
1216
    return kernResult;
1217
}
1218

    
1219
#endif
1220

    
1221
static int hdev_probe_device(const char *filename)
1222
{
1223
    struct stat st;
1224

    
1225
    /* allow a dedicated CD-ROM driver to match with a higher priority */
1226
    if (strstart(filename, "/dev/cdrom", NULL))
1227
        return 50;
1228

    
1229
    if (stat(filename, &st) >= 0 &&
1230
            (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) {
1231
        return 100;
1232
    }
1233

    
1234
    return 0;
1235
}
1236

    
1237
static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
1238
{
1239
    BDRVRawState *s = bs->opaque;
1240

    
1241
#if defined(__APPLE__) && defined(__MACH__)
1242
    if (strstart(filename, "/dev/cdrom", NULL)) {
1243
        kern_return_t kernResult;
1244
        io_iterator_t mediaIterator;
1245
        char bsdPath[ MAXPATHLEN ];
1246
        int fd;
1247

    
1248
        kernResult = FindEjectableCDMedia( &mediaIterator );
1249
        kernResult = GetBSDPath( mediaIterator, bsdPath, sizeof( bsdPath ) );
1250

    
1251
        if ( bsdPath[ 0 ] != '\0' ) {
1252
            strcat(bsdPath,"s0");
1253
            /* some CDs don't have a partition 0 */
1254
            fd = qemu_open(bsdPath, O_RDONLY | O_BINARY | O_LARGEFILE);
1255
            if (fd < 0) {
1256
                bsdPath[strlen(bsdPath)-1] = '1';
1257
            } else {
1258
                qemu_close(fd);
1259
            }
1260
            filename = bsdPath;
1261
        }
1262

    
1263
        if ( mediaIterator )
1264
            IOObjectRelease( mediaIterator );
1265
    }
1266
#endif
1267

    
1268
    s->type = FTYPE_FILE;
1269
#if defined(__linux__)
1270
    {
1271
        char resolved_path[ MAXPATHLEN ], *temp;
1272

    
1273
        temp = realpath(filename, resolved_path);
1274
        if (temp && strstart(temp, "/dev/sg", NULL)) {
1275
            bs->sg = 1;
1276
        }
1277
    }
1278
#endif
1279

    
1280
    return raw_open_common(bs, filename, flags, 0);
1281
}
1282

    
1283
#if defined(__linux__)
1284
/* Note: we do not have a reliable method to detect if the floppy is
1285
   present. The current method is to try to open the floppy at every
1286
   I/O and to keep it opened during a few hundreds of ms. */
1287
static int fd_open(BlockDriverState *bs)
1288
{
1289
    BDRVRawState *s = bs->opaque;
1290
    int last_media_present;
1291

    
1292
    if (s->type != FTYPE_FD)
1293
        return 0;
1294
    last_media_present = (s->fd >= 0);
1295
    if (s->fd >= 0 &&
1296
        (get_clock() - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
1297
        qemu_close(s->fd);
1298
        s->fd = -1;
1299
#ifdef DEBUG_FLOPPY
1300
        printf("Floppy closed\n");
1301
#endif
1302
    }
1303
    if (s->fd < 0) {
1304
        if (s->fd_got_error &&
1305
            (get_clock() - s->fd_error_time) < FD_OPEN_TIMEOUT) {
1306
#ifdef DEBUG_FLOPPY
1307
            printf("No floppy (open delayed)\n");
1308
#endif
1309
            return -EIO;
1310
        }
1311
        s->fd = qemu_open(bs->filename, s->open_flags & ~O_NONBLOCK);
1312
        if (s->fd < 0) {
1313
            s->fd_error_time = get_clock();
1314
            s->fd_got_error = 1;
1315
            if (last_media_present)
1316
                s->fd_media_changed = 1;
1317
#ifdef DEBUG_FLOPPY
1318
            printf("No floppy\n");
1319
#endif
1320
            return -EIO;
1321
        }
1322
#ifdef DEBUG_FLOPPY
1323
        printf("Floppy opened\n");
1324
#endif
1325
    }
1326
    if (!last_media_present)
1327
        s->fd_media_changed = 1;
1328
    s->fd_open_time = get_clock();
1329
    s->fd_got_error = 0;
1330
    return 0;
1331
}
1332

    
1333
static int hdev_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
1334
{
1335
    BDRVRawState *s = bs->opaque;
1336

    
1337
    return ioctl(s->fd, req, buf);
1338
}
1339

    
1340
static BlockDriverAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
1341
        unsigned long int req, void *buf,
1342
        BlockDriverCompletionFunc *cb, void *opaque)
1343
{
1344
    BDRVRawState *s = bs->opaque;
1345

    
1346
    if (fd_open(bs) < 0)
1347
        return NULL;
1348
    return paio_ioctl(bs, s->fd, req, buf, cb, opaque);
1349
}
1350

    
1351
#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
1352
static int fd_open(BlockDriverState *bs)
1353
{
1354
    BDRVRawState *s = bs->opaque;
1355

    
1356
    /* this is just to ensure s->fd is sane (its called by io ops) */
1357
    if (s->fd >= 0)
1358
        return 0;
1359
    return -EIO;
1360
}
1361
#else /* !linux && !FreeBSD */
1362

    
1363
static int fd_open(BlockDriverState *bs)
1364
{
1365
    return 0;
1366
}
1367

    
1368
#endif /* !linux && !FreeBSD */
1369

    
1370
static int hdev_create(const char *filename, QEMUOptionParameter *options)
1371
{
1372
    int fd;
1373
    int ret = 0;
1374
    struct stat stat_buf;
1375
    int64_t total_size = 0;
1376

    
1377
    /* Read out options */
1378
    while (options && options->name) {
1379
        if (!strcmp(options->name, "size")) {
1380
            total_size = options->value.n / BDRV_SECTOR_SIZE;
1381
        }
1382
        options++;
1383
    }
1384

    
1385
    fd = qemu_open(filename, O_WRONLY | O_BINARY);
1386
    if (fd < 0)
1387
        return -errno;
1388

    
1389
    if (fstat(fd, &stat_buf) < 0)
1390
        ret = -errno;
1391
    else if (!S_ISBLK(stat_buf.st_mode) && !S_ISCHR(stat_buf.st_mode))
1392
        ret = -ENODEV;
1393
    else if (lseek(fd, 0, SEEK_END) < total_size * BDRV_SECTOR_SIZE)
1394
        ret = -ENOSPC;
1395

    
1396
    qemu_close(fd);
1397
    return ret;
1398
}
1399

    
1400
static int hdev_has_zero_init(BlockDriverState *bs)
1401
{
1402
    return 0;
1403
}
1404

    
1405
static BlockDriver bdrv_host_device = {
1406
    .format_name        = "host_device",
1407
    .protocol_name        = "host_device",
1408
    .instance_size      = sizeof(BDRVRawState),
1409
    .bdrv_probe_device  = hdev_probe_device,
1410
    .bdrv_file_open     = hdev_open,
1411
    .bdrv_close         = raw_close,
1412
    .bdrv_create        = hdev_create,
1413
    .create_options     = raw_create_options,
1414
    .bdrv_has_zero_init = hdev_has_zero_init,
1415

    
1416
    .bdrv_aio_readv        = raw_aio_readv,
1417
    .bdrv_aio_writev        = raw_aio_writev,
1418
    .bdrv_aio_flush        = raw_aio_flush,
1419

    
1420
    .bdrv_truncate      = raw_truncate,
1421
    .bdrv_getlength        = raw_getlength,
1422
    .bdrv_get_allocated_file_size
1423
                        = raw_get_allocated_file_size,
1424

    
1425
    /* generic scsi device */
1426
#ifdef __linux__
1427
    .bdrv_ioctl         = hdev_ioctl,
1428
    .bdrv_aio_ioctl     = hdev_aio_ioctl,
1429
#endif
1430
};
1431

    
1432
#ifdef __linux__
1433
static int floppy_open(BlockDriverState *bs, const char *filename, int flags)
1434
{
1435
    BDRVRawState *s = bs->opaque;
1436
    int ret;
1437

    
1438
    s->type = FTYPE_FD;
1439

    
1440
    /* open will not fail even if no floppy is inserted, so add O_NONBLOCK */
1441
    ret = raw_open_common(bs, filename, flags, O_NONBLOCK);
1442
    if (ret)
1443
        return ret;
1444

    
1445
    /* close fd so that we can reopen it as needed */
1446
    qemu_close(s->fd);
1447
    s->fd = -1;
1448
    s->fd_media_changed = 1;
1449

    
1450
    return 0;
1451
}
1452

    
1453
static int floppy_probe_device(const char *filename)
1454
{
1455
    int fd, ret;
1456
    int prio = 0;
1457
    struct floppy_struct fdparam;
1458
    struct stat st;
1459

    
1460
    if (strstart(filename, "/dev/fd", NULL) &&
1461
        !strstart(filename, "/dev/fdset/", NULL)) {
1462
        prio = 50;
1463
    }
1464

    
1465
    fd = qemu_open(filename, O_RDONLY | O_NONBLOCK);
1466
    if (fd < 0) {
1467
        goto out;
1468
    }
1469
    ret = fstat(fd, &st);
1470
    if (ret == -1 || !S_ISBLK(st.st_mode)) {
1471
        goto outc;
1472
    }
1473

    
1474
    /* Attempt to detect via a floppy specific ioctl */
1475
    ret = ioctl(fd, FDGETPRM, &fdparam);
1476
    if (ret >= 0)
1477
        prio = 100;
1478

    
1479
outc:
1480
    qemu_close(fd);
1481
out:
1482
    return prio;
1483
}
1484

    
1485

    
1486
static int floppy_is_inserted(BlockDriverState *bs)
1487
{
1488
    return fd_open(bs) >= 0;
1489
}
1490

    
1491
static int floppy_media_changed(BlockDriverState *bs)
1492
{
1493
    BDRVRawState *s = bs->opaque;
1494
    int ret;
1495

    
1496
    /*
1497
     * XXX: we do not have a true media changed indication.
1498
     * It does not work if the floppy is changed without trying to read it.
1499
     */
1500
    fd_open(bs);
1501
    ret = s->fd_media_changed;
1502
    s->fd_media_changed = 0;
1503
#ifdef DEBUG_FLOPPY
1504
    printf("Floppy changed=%d\n", ret);
1505
#endif
1506
    return ret;
1507
}
1508

    
1509
static void floppy_eject(BlockDriverState *bs, bool eject_flag)
1510
{
1511
    BDRVRawState *s = bs->opaque;
1512
    int fd;
1513

    
1514
    if (s->fd >= 0) {
1515
        qemu_close(s->fd);
1516
        s->fd = -1;
1517
    }
1518
    fd = qemu_open(bs->filename, s->open_flags | O_NONBLOCK);
1519
    if (fd >= 0) {
1520
        if (ioctl(fd, FDEJECT, 0) < 0)
1521
            perror("FDEJECT");
1522
        qemu_close(fd);
1523
    }
1524
}
1525

    
1526
static BlockDriver bdrv_host_floppy = {
1527
    .format_name        = "host_floppy",
1528
    .protocol_name      = "host_floppy",
1529
    .instance_size      = sizeof(BDRVRawState),
1530
    .bdrv_probe_device        = floppy_probe_device,
1531
    .bdrv_file_open     = floppy_open,
1532
    .bdrv_close         = raw_close,
1533
    .bdrv_create        = hdev_create,
1534
    .create_options     = raw_create_options,
1535
    .bdrv_has_zero_init = hdev_has_zero_init,
1536

    
1537
    .bdrv_aio_readv     = raw_aio_readv,
1538
    .bdrv_aio_writev    = raw_aio_writev,
1539
    .bdrv_aio_flush        = raw_aio_flush,
1540

    
1541
    .bdrv_truncate      = raw_truncate,
1542
    .bdrv_getlength        = raw_getlength,
1543
    .bdrv_get_allocated_file_size
1544
                        = raw_get_allocated_file_size,
1545

    
1546
    /* removable device support */
1547
    .bdrv_is_inserted   = floppy_is_inserted,
1548
    .bdrv_media_changed = floppy_media_changed,
1549
    .bdrv_eject         = floppy_eject,
1550
};
1551

    
1552
static int cdrom_open(BlockDriverState *bs, const char *filename, int flags)
1553
{
1554
    BDRVRawState *s = bs->opaque;
1555

    
1556
    s->type = FTYPE_CD;
1557

    
1558
    /* open will not fail even if no CD is inserted, so add O_NONBLOCK */
1559
    return raw_open_common(bs, filename, flags, O_NONBLOCK);
1560
}
1561

    
1562
static int cdrom_probe_device(const char *filename)
1563
{
1564
    int fd, ret;
1565
    int prio = 0;
1566
    struct stat st;
1567

    
1568
    fd = qemu_open(filename, O_RDONLY | O_NONBLOCK);
1569
    if (fd < 0) {
1570
        goto out;
1571
    }
1572
    ret = fstat(fd, &st);
1573
    if (ret == -1 || !S_ISBLK(st.st_mode)) {
1574
        goto outc;
1575
    }
1576

    
1577
    /* Attempt to detect via a CDROM specific ioctl */
1578
    ret = ioctl(fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
1579
    if (ret >= 0)
1580
        prio = 100;
1581

    
1582
outc:
1583
    qemu_close(fd);
1584
out:
1585
    return prio;
1586
}
1587

    
1588
static int cdrom_is_inserted(BlockDriverState *bs)
1589
{
1590
    BDRVRawState *s = bs->opaque;
1591
    int ret;
1592

    
1593
    ret = ioctl(s->fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
1594
    if (ret == CDS_DISC_OK)
1595
        return 1;
1596
    return 0;
1597
}
1598

    
1599
static void cdrom_eject(BlockDriverState *bs, bool eject_flag)
1600
{
1601
    BDRVRawState *s = bs->opaque;
1602

    
1603
    if (eject_flag) {
1604
        if (ioctl(s->fd, CDROMEJECT, NULL) < 0)
1605
            perror("CDROMEJECT");
1606
    } else {
1607
        if (ioctl(s->fd, CDROMCLOSETRAY, NULL) < 0)
1608
            perror("CDROMEJECT");
1609
    }
1610
}
1611

    
1612
static void cdrom_lock_medium(BlockDriverState *bs, bool locked)
1613
{
1614
    BDRVRawState *s = bs->opaque;
1615

    
1616
    if (ioctl(s->fd, CDROM_LOCKDOOR, locked) < 0) {
1617
        /*
1618
         * Note: an error can happen if the distribution automatically
1619
         * mounts the CD-ROM
1620
         */
1621
        /* perror("CDROM_LOCKDOOR"); */
1622
    }
1623
}
1624

    
1625
static BlockDriver bdrv_host_cdrom = {
1626
    .format_name        = "host_cdrom",
1627
    .protocol_name      = "host_cdrom",
1628
    .instance_size      = sizeof(BDRVRawState),
1629
    .bdrv_probe_device        = cdrom_probe_device,
1630
    .bdrv_file_open     = cdrom_open,
1631
    .bdrv_close         = raw_close,
1632
    .bdrv_create        = hdev_create,
1633
    .create_options     = raw_create_options,
1634
    .bdrv_has_zero_init = hdev_has_zero_init,
1635

    
1636
    .bdrv_aio_readv     = raw_aio_readv,
1637
    .bdrv_aio_writev    = raw_aio_writev,
1638
    .bdrv_aio_flush        = raw_aio_flush,
1639

    
1640
    .bdrv_truncate      = raw_truncate,
1641
    .bdrv_getlength     = raw_getlength,
1642
    .bdrv_get_allocated_file_size
1643
                        = raw_get_allocated_file_size,
1644

    
1645
    /* removable device support */
1646
    .bdrv_is_inserted   = cdrom_is_inserted,
1647
    .bdrv_eject         = cdrom_eject,
1648
    .bdrv_lock_medium   = cdrom_lock_medium,
1649

    
1650
    /* generic scsi device */
1651
    .bdrv_ioctl         = hdev_ioctl,
1652
    .bdrv_aio_ioctl     = hdev_aio_ioctl,
1653
};
1654
#endif /* __linux__ */
1655

    
1656
#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
1657
static int cdrom_open(BlockDriverState *bs, const char *filename, int flags)
1658
{
1659
    BDRVRawState *s = bs->opaque;
1660
    int ret;
1661

    
1662
    s->type = FTYPE_CD;
1663

    
1664
    ret = raw_open_common(bs, filename, flags, 0);
1665
    if (ret)
1666
        return ret;
1667

    
1668
    /* make sure the door isn't locked at this time */
1669
    ioctl(s->fd, CDIOCALLOW);
1670
    return 0;
1671
}
1672

    
1673
static int cdrom_probe_device(const char *filename)
1674
{
1675
    if (strstart(filename, "/dev/cd", NULL) ||
1676
            strstart(filename, "/dev/acd", NULL))
1677
        return 100;
1678
    return 0;
1679
}
1680

    
1681
static int cdrom_reopen(BlockDriverState *bs)
1682
{
1683
    BDRVRawState *s = bs->opaque;
1684
    int fd;
1685

    
1686
    /*
1687
     * Force reread of possibly changed/newly loaded disc,
1688
     * FreeBSD seems to not notice sometimes...
1689
     */
1690
    if (s->fd >= 0)
1691
        qemu_close(s->fd);
1692
    fd = qemu_open(bs->filename, s->open_flags, 0644);
1693
    if (fd < 0) {
1694
        s->fd = -1;
1695
        return -EIO;
1696
    }
1697
    s->fd = fd;
1698

    
1699
    /* make sure the door isn't locked at this time */
1700
    ioctl(s->fd, CDIOCALLOW);
1701
    return 0;
1702
}
1703

    
1704
static int cdrom_is_inserted(BlockDriverState *bs)
1705
{
1706
    return raw_getlength(bs) > 0;
1707
}
1708

    
1709
static void cdrom_eject(BlockDriverState *bs, bool eject_flag)
1710
{
1711
    BDRVRawState *s = bs->opaque;
1712

    
1713
    if (s->fd < 0)
1714
        return;
1715

    
1716
    (void) ioctl(s->fd, CDIOCALLOW);
1717

    
1718
    if (eject_flag) {
1719
        if (ioctl(s->fd, CDIOCEJECT) < 0)
1720
            perror("CDIOCEJECT");
1721
    } else {
1722
        if (ioctl(s->fd, CDIOCCLOSE) < 0)
1723
            perror("CDIOCCLOSE");
1724
    }
1725

    
1726
    cdrom_reopen(bs);
1727
}
1728

    
1729
static void cdrom_lock_medium(BlockDriverState *bs, bool locked)
1730
{
1731
    BDRVRawState *s = bs->opaque;
1732

    
1733
    if (s->fd < 0)
1734
        return;
1735
    if (ioctl(s->fd, (locked ? CDIOCPREVENT : CDIOCALLOW)) < 0) {
1736
        /*
1737
         * Note: an error can happen if the distribution automatically
1738
         * mounts the CD-ROM
1739
         */
1740
        /* perror("CDROM_LOCKDOOR"); */
1741
    }
1742
}
1743

    
1744
static BlockDriver bdrv_host_cdrom = {
1745
    .format_name        = "host_cdrom",
1746
    .protocol_name      = "host_cdrom",
1747
    .instance_size      = sizeof(BDRVRawState),
1748
    .bdrv_probe_device        = cdrom_probe_device,
1749
    .bdrv_file_open     = cdrom_open,
1750
    .bdrv_close         = raw_close,
1751
    .bdrv_create        = hdev_create,
1752
    .create_options     = raw_create_options,
1753
    .bdrv_has_zero_init = hdev_has_zero_init,
1754

    
1755
    .bdrv_aio_readv     = raw_aio_readv,
1756
    .bdrv_aio_writev    = raw_aio_writev,
1757
    .bdrv_aio_flush        = raw_aio_flush,
1758

    
1759
    .bdrv_truncate      = raw_truncate,
1760
    .bdrv_getlength     = raw_getlength,
1761
    .bdrv_get_allocated_file_size
1762
                        = raw_get_allocated_file_size,
1763

    
1764
    /* removable device support */
1765
    .bdrv_is_inserted   = cdrom_is_inserted,
1766
    .bdrv_eject         = cdrom_eject,
1767
    .bdrv_lock_medium   = cdrom_lock_medium,
1768
};
1769
#endif /* __FreeBSD__ */
1770

    
1771
static void bdrv_file_init(void)
1772
{
1773
    /*
1774
     * Register all the drivers.  Note that order is important, the driver
1775
     * registered last will get probed first.
1776
     */
1777
    bdrv_register(&bdrv_file);
1778
    bdrv_register(&bdrv_host_device);
1779
#ifdef __linux__
1780
    bdrv_register(&bdrv_host_floppy);
1781
    bdrv_register(&bdrv_host_cdrom);
1782
#endif
1783
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
1784
    bdrv_register(&bdrv_host_cdrom);
1785
#endif
1786
}
1787

    
1788
block_init(bdrv_file_init);