Statistics
| Branch: | Revision:

root / block-raw-posix.c @ 2acf5af0

History | View | Annotate | Download (30.9 kB)

1
/*
2
 * Block driver for RAW files (posix)
3
 *
4
 * Copyright (c) 2006 Fabrice Bellard
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
 * THE SOFTWARE.
23
 */
24
#include "qemu-common.h"
25
#include "qemu-timer.h"
26
#include "qemu-char.h"
27
#include "block_int.h"
28
#include "compatfd.h"
29
#include <assert.h>
30
#ifdef CONFIG_AIO
31
#include <aio.h>
32
#endif
33

    
34
#ifdef CONFIG_COCOA
35
#include <paths.h>
36
#include <sys/param.h>
37
#include <IOKit/IOKitLib.h>
38
#include <IOKit/IOBSD.h>
39
#include <IOKit/storage/IOMediaBSDClient.h>
40
#include <IOKit/storage/IOMedia.h>
41
#include <IOKit/storage/IOCDMedia.h>
42
//#include <IOKit/storage/IOCDTypes.h>
43
#include <CoreFoundation/CoreFoundation.h>
44
#endif
45

    
46
#ifdef __sun__
47
#define _POSIX_PTHREAD_SEMANTICS 1
48
#include <signal.h>
49
#include <sys/dkio.h>
50
#endif
51
#ifdef __linux__
52
#include <sys/ioctl.h>
53
#include <linux/cdrom.h>
54
#include <linux/fd.h>
55
#endif
56
#ifdef __FreeBSD__
57
#include <signal.h>
58
#include <sys/disk.h>
59
#endif
60

    
61
#ifdef __OpenBSD__
62
#include <sys/ioctl.h>
63
#include <sys/disklabel.h>
64
#include <sys/dkio.h>
65
#endif
66

    
67
//#define DEBUG_FLOPPY
68

    
69
//#define DEBUG_BLOCK
70
#if defined(DEBUG_BLOCK)
71
#define DEBUG_BLOCK_PRINT(formatCstr, args...) do { if (loglevel != 0)        \
72
    { fprintf(logfile, formatCstr, ##args); fflush(logfile); } } while (0)
73
#else
74
#define DEBUG_BLOCK_PRINT(formatCstr, args...)
75
#endif
76

    
77
#define FTYPE_FILE   0
78
#define FTYPE_CD     1
79
#define FTYPE_FD     2
80

    
81
#define ALIGNED_BUFFER_SIZE (32 * 512)
82

    
83
/* if the FD is not accessed during that time (in ms), we try to
84
   reopen it to see if the disk has been changed */
85
#define FD_OPEN_TIMEOUT 1000
86

    
87
typedef struct BDRVRawState {
88
    int fd;
89
    int type;
90
    unsigned int lseek_err_cnt;
91
#if defined(__linux__)
92
    /* linux floppy specific */
93
    int fd_open_flags;
94
    int64_t fd_open_time;
95
    int64_t fd_error_time;
96
    int fd_got_error;
97
    int fd_media_changed;
98
#endif
99
#if defined(O_DIRECT)
100
    uint8_t* aligned_buf;
101
#endif
102
} BDRVRawState;
103

    
104
static int fd_open(BlockDriverState *bs);
105

    
106
static int raw_open(BlockDriverState *bs, const char *filename, int flags)
107
{
108
    BDRVRawState *s = bs->opaque;
109
    int fd, open_flags, ret;
110

    
111
    s->lseek_err_cnt = 0;
112

    
113
    open_flags = O_BINARY;
114
    if ((flags & BDRV_O_ACCESS) == O_RDWR) {
115
        open_flags |= O_RDWR;
116
    } else {
117
        open_flags |= O_RDONLY;
118
        bs->read_only = 1;
119
    }
120
    if (flags & BDRV_O_CREAT)
121
        open_flags |= O_CREAT | O_TRUNC;
122
#ifdef O_DIRECT
123
    if (flags & BDRV_O_DIRECT)
124
        open_flags |= O_DIRECT;
125
#endif
126

    
127
    s->type = FTYPE_FILE;
128

    
129
    fd = open(filename, open_flags, 0644);
130
    if (fd < 0) {
131
        ret = -errno;
132
        if (ret == -EROFS)
133
            ret = -EACCES;
134
        return ret;
135
    }
136
    s->fd = fd;
137
#if defined(O_DIRECT)
138
    s->aligned_buf = NULL;
139
    if (flags & BDRV_O_DIRECT) {
140
        s->aligned_buf = qemu_memalign(512, ALIGNED_BUFFER_SIZE);
141
        if (s->aligned_buf == NULL) {
142
            ret = -errno;
143
            close(fd);
144
            return ret;
145
        }
146
    }
147
#endif
148
    return 0;
149
}
150

    
151
/* XXX: use host sector size if necessary with:
152
#ifdef DIOCGSECTORSIZE
153
        {
154
            unsigned int sectorsize = 512;
155
            if (!ioctl(fd, DIOCGSECTORSIZE, &sectorsize) &&
156
                sectorsize > bufsize)
157
                bufsize = sectorsize;
158
        }
159
#endif
160
#ifdef CONFIG_COCOA
161
        u_int32_t   blockSize = 512;
162
        if ( !ioctl( fd, DKIOCGETBLOCKSIZE, &blockSize ) && blockSize > bufsize) {
163
            bufsize = blockSize;
164
        }
165
#endif
166
*/
167

    
168
/*
169
 * offset and count are in bytes, but must be multiples of 512 for files
170
 * opened with O_DIRECT. buf must be aligned to 512 bytes then.
171
 *
172
 * This function may be called without alignment if the caller ensures
173
 * that O_DIRECT is not in effect.
174
 */
175
static int raw_pread_aligned(BlockDriverState *bs, int64_t offset,
176
                     uint8_t *buf, int count)
177
{
178
    BDRVRawState *s = bs->opaque;
179
    int ret;
180

    
181
    ret = fd_open(bs);
182
    if (ret < 0)
183
        return ret;
184

    
185
    if (offset >= 0 && lseek(s->fd, offset, SEEK_SET) == (off_t)-1) {
186
        ++(s->lseek_err_cnt);
187
        if(s->lseek_err_cnt <= 10) {
188
            DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
189
                              "] lseek failed : %d = %s\n",
190
                              s->fd, bs->filename, offset, buf, count,
191
                              bs->total_sectors, errno, strerror(errno));
192
        }
193
        return -1;
194
    }
195
    s->lseek_err_cnt=0;
196

    
197
    ret = read(s->fd, buf, count);
198
    if (ret == count)
199
        goto label__raw_read__success;
200

    
201
    DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
202
                      "] read failed %d : %d = %s\n",
203
                      s->fd, bs->filename, offset, buf, count,
204
                      bs->total_sectors, ret, errno, strerror(errno));
205

    
206
    /* Try harder for CDrom. */
207
    if (bs->type == BDRV_TYPE_CDROM) {
208
        lseek(s->fd, offset, SEEK_SET);
209
        ret = read(s->fd, buf, count);
210
        if (ret == count)
211
            goto label__raw_read__success;
212
        lseek(s->fd, offset, SEEK_SET);
213
        ret = read(s->fd, buf, count);
214
        if (ret == count)
215
            goto label__raw_read__success;
216

    
217
        DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
218
                          "] retry read failed %d : %d = %s\n",
219
                          s->fd, bs->filename, offset, buf, count,
220
                          bs->total_sectors, ret, errno, strerror(errno));
221
    }
222

    
223
label__raw_read__success:
224

    
225
    return ret;
226
}
227

    
228
/*
229
 * offset and count are in bytes, but must be multiples of 512 for files
230
 * opened with O_DIRECT. buf must be aligned to 512 bytes then.
231
 *
232
 * This function may be called without alignment if the caller ensures
233
 * that O_DIRECT is not in effect.
234
 */
235
static int raw_pwrite_aligned(BlockDriverState *bs, int64_t offset,
236
                      const uint8_t *buf, int count)
237
{
238
    BDRVRawState *s = bs->opaque;
239
    int ret;
240

    
241
    ret = fd_open(bs);
242
    if (ret < 0)
243
        return ret;
244

    
245
    if (offset >= 0 && lseek(s->fd, offset, SEEK_SET) == (off_t)-1) {
246
        ++(s->lseek_err_cnt);
247
        if(s->lseek_err_cnt) {
248
            DEBUG_BLOCK_PRINT("raw_pwrite(%d:%s, %" PRId64 ", %p, %d) [%"
249
                              PRId64 "] lseek failed : %d = %s\n",
250
                              s->fd, bs->filename, offset, buf, count,
251
                              bs->total_sectors, errno, strerror(errno));
252
        }
253
        return -1;
254
    }
255
    s->lseek_err_cnt = 0;
256

    
257
    ret = write(s->fd, buf, count);
258
    if (ret == count)
259
        goto label__raw_write__success;
260

    
261
    DEBUG_BLOCK_PRINT("raw_pwrite(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
262
                      "] write failed %d : %d = %s\n",
263
                      s->fd, bs->filename, offset, buf, count,
264
                      bs->total_sectors, ret, errno, strerror(errno));
265

    
266
label__raw_write__success:
267

    
268
    return ret;
269
}
270

    
271

    
272
#if defined(O_DIRECT)
273
/*
274
 * offset and count are in bytes and possibly not aligned. For files opened
275
 * with O_DIRECT, necessary alignments are ensured before calling
276
 * raw_pread_aligned to do the actual read.
277
 */
278
static int raw_pread(BlockDriverState *bs, int64_t offset,
279
                     uint8_t *buf, int count)
280
{
281
    BDRVRawState *s = bs->opaque;
282
    int size, ret, shift, sum;
283

    
284
    sum = 0;
285

    
286
    if (s->aligned_buf != NULL)  {
287

    
288
        if (offset & 0x1ff) {
289
            /* align offset on a 512 bytes boundary */
290

    
291
            shift = offset & 0x1ff;
292
            size = (shift + count + 0x1ff) & ~0x1ff;
293
            if (size > ALIGNED_BUFFER_SIZE)
294
                size = ALIGNED_BUFFER_SIZE;
295
            ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, size);
296
            if (ret < 0)
297
                return ret;
298

    
299
            size = 512 - shift;
300
            if (size > count)
301
                size = count;
302
            memcpy(buf, s->aligned_buf + shift, size);
303

    
304
            buf += size;
305
            offset += size;
306
            count -= size;
307
            sum += size;
308

    
309
            if (count == 0)
310
                return sum;
311
        }
312
        if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
313

    
314
            /* read on aligned buffer */
315

    
316
            while (count) {
317

    
318
                size = (count + 0x1ff) & ~0x1ff;
319
                if (size > ALIGNED_BUFFER_SIZE)
320
                    size = ALIGNED_BUFFER_SIZE;
321

    
322
                ret = raw_pread_aligned(bs, offset, s->aligned_buf, size);
323
                if (ret < 0)
324
                    return ret;
325

    
326
                size = ret;
327
                if (size > count)
328
                    size = count;
329

    
330
                memcpy(buf, s->aligned_buf, size);
331

    
332
                buf += size;
333
                offset += size;
334
                count -= size;
335
                sum += size;
336
            }
337

    
338
            return sum;
339
        }
340
    }
341

    
342
    return raw_pread_aligned(bs, offset, buf, count) + sum;
343
}
344

    
345
/*
346
 * offset and count are in bytes and possibly not aligned. For files opened
347
 * with O_DIRECT, necessary alignments are ensured before calling
348
 * raw_pwrite_aligned to do the actual write.
349
 */
350
static int raw_pwrite(BlockDriverState *bs, int64_t offset,
351
                      const uint8_t *buf, int count)
352
{
353
    BDRVRawState *s = bs->opaque;
354
    int size, ret, shift, sum;
355

    
356
    sum = 0;
357

    
358
    if (s->aligned_buf != NULL) {
359

    
360
        if (offset & 0x1ff) {
361
            /* align offset on a 512 bytes boundary */
362
            shift = offset & 0x1ff;
363
            ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, 512);
364
            if (ret < 0)
365
                return ret;
366

    
367
            size = 512 - shift;
368
            if (size > count)
369
                size = count;
370
            memcpy(s->aligned_buf + shift, buf, size);
371

    
372
            ret = raw_pwrite_aligned(bs, offset - shift, s->aligned_buf, 512);
373
            if (ret < 0)
374
                return ret;
375

    
376
            buf += size;
377
            offset += size;
378
            count -= size;
379
            sum += size;
380

    
381
            if (count == 0)
382
                return sum;
383
        }
384
        if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
385

    
386
            while ((size = (count & ~0x1ff)) != 0) {
387

    
388
                if (size > ALIGNED_BUFFER_SIZE)
389
                    size = ALIGNED_BUFFER_SIZE;
390

    
391
                memcpy(s->aligned_buf, buf, size);
392

    
393
                ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, size);
394
                if (ret < 0)
395
                    return ret;
396

    
397
                buf += ret;
398
                offset += ret;
399
                count -= ret;
400
                sum += ret;
401
            }
402
            /* here, count < 512 because (count & ~0x1ff) == 0 */
403
            if (count) {
404
                ret = raw_pread_aligned(bs, offset, s->aligned_buf, 512);
405
                if (ret < 0)
406
                    return ret;
407
                 memcpy(s->aligned_buf, buf, count);
408

    
409
                 ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, 512);
410
                 if (ret < 0)
411
                     return ret;
412
                 if (count < ret)
413
                     ret = count;
414

    
415
                 sum += ret;
416
            }
417
            return sum;
418
        }
419
    }
420
    return raw_pwrite_aligned(bs, offset, buf, count) + sum;
421
}
422

    
423
#else
424
#define raw_pread raw_pread_aligned
425
#define raw_pwrite raw_pwrite_aligned
426
#endif
427

    
428

    
429
#ifdef CONFIG_AIO
430
/***********************************************************/
431
/* Unix AIO using POSIX AIO */
432

    
433
typedef struct RawAIOCB {
434
    BlockDriverAIOCB common;
435
    struct aiocb aiocb;
436
    struct RawAIOCB *next;
437
    int ret;
438
} RawAIOCB;
439

    
440
static int aio_sig_fd = -1;
441
static int aio_sig_num = SIGUSR2;
442
static RawAIOCB *first_aio; /* AIO issued */
443
static int aio_initialized = 0;
444

    
445
static void qemu_aio_poll(void *opaque)
446
{
447
    RawAIOCB *acb, **pacb;
448
    int ret;
449
    size_t offset;
450
    union {
451
        struct qemu_signalfd_siginfo siginfo;
452
        char buf[128];
453
    } sig;
454

    
455
    /* try to read from signalfd, don't freak out if we can't read anything */
456
    offset = 0;
457
    while (offset < 128) {
458
        ssize_t len;
459

    
460
        len = read(aio_sig_fd, sig.buf + offset, 128 - offset);
461
        if (len == -1 && errno == EINTR)
462
            continue;
463
        if (len == -1 && errno == EAGAIN) {
464
            /* there is no natural reason for this to happen,
465
             * so we'll spin hard until we get everything just
466
             * to be on the safe side. */
467
            if (offset > 0)
468
                continue;
469
        }
470

    
471
        offset += len;
472
    }
473

    
474
    for(;;) {
475
        pacb = &first_aio;
476
        for(;;) {
477
            acb = *pacb;
478
            if (!acb)
479
                goto the_end;
480
            ret = aio_error(&acb->aiocb);
481
            if (ret == ECANCELED) {
482
                /* remove the request */
483
                *pacb = acb->next;
484
                qemu_aio_release(acb);
485
            } else if (ret != EINPROGRESS) {
486
                /* end of aio */
487
                if (ret == 0) {
488
                    ret = aio_return(&acb->aiocb);
489
                    if (ret == acb->aiocb.aio_nbytes)
490
                        ret = 0;
491
                    else
492
                        ret = -EINVAL;
493
                } else {
494
                    ret = -ret;
495
                }
496
                /* remove the request */
497
                *pacb = acb->next;
498
                /* call the callback */
499
                acb->common.cb(acb->common.opaque, ret);
500
                qemu_aio_release(acb);
501
                break;
502
            } else {
503
                pacb = &acb->next;
504
            }
505
        }
506
    }
507
 the_end: ;
508
}
509

    
510
void qemu_aio_init(void)
511
{
512
    sigset_t mask;
513

    
514
    aio_initialized = 1;
515

    
516
    /* Make sure to block AIO signal */
517
    sigemptyset(&mask);
518
    sigaddset(&mask, aio_sig_num);
519
    sigprocmask(SIG_BLOCK, &mask, NULL);
520
    
521
    aio_sig_fd = qemu_signalfd(&mask);
522

    
523
    fcntl(aio_sig_fd, F_SETFL, O_NONBLOCK);
524

    
525
    qemu_set_fd_handler2(aio_sig_fd, NULL, qemu_aio_poll, NULL, NULL);
526

    
527
#if defined(__GLIBC__) && defined(__linux__)
528
    {
529
        /* XXX: aio thread exit seems to hang on RedHat 9 and this init
530
           seems to fix the problem. */
531
        struct aioinit ai;
532
        memset(&ai, 0, sizeof(ai));
533
        ai.aio_threads = 1;
534
        ai.aio_num = 1;
535
        ai.aio_idle_time = 365 * 100000;
536
        aio_init(&ai);
537
    }
538
#endif
539
}
540

    
541
/* Wait for all IO requests to complete.  */
542
void qemu_aio_flush(void)
543
{
544
    qemu_aio_poll(NULL);
545
    while (first_aio) {
546
        qemu_aio_wait();
547
    }
548
}
549

    
550
void qemu_aio_wait(void)
551
{
552
    int ret;
553

    
554
    if (qemu_bh_poll())
555
        return;
556

    
557
    if (!first_aio)
558
        return;
559

    
560
    do {
561
        fd_set rdfds;
562

    
563
        FD_ZERO(&rdfds);
564
        FD_SET(aio_sig_fd, &rdfds);
565

    
566
        ret = select(aio_sig_fd + 1, &rdfds, NULL, NULL, NULL);
567
        if (ret == -1 && errno == EINTR)
568
            continue;
569
    } while (ret == 0);
570

    
571
    qemu_aio_poll(NULL);
572
}
573

    
574
static RawAIOCB *raw_aio_setup(BlockDriverState *bs,
575
        int64_t sector_num, uint8_t *buf, int nb_sectors,
576
        BlockDriverCompletionFunc *cb, void *opaque)
577
{
578
    BDRVRawState *s = bs->opaque;
579
    RawAIOCB *acb;
580

    
581
    if (fd_open(bs) < 0)
582
        return NULL;
583

    
584
    acb = qemu_aio_get(bs, cb, opaque);
585
    if (!acb)
586
        return NULL;
587
    acb->aiocb.aio_fildes = s->fd;
588
    acb->aiocb.aio_sigevent.sigev_signo = aio_sig_num;
589
    acb->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
590
    acb->aiocb.aio_buf = buf;
591
    if (nb_sectors < 0)
592
        acb->aiocb.aio_nbytes = -nb_sectors;
593
    else
594
        acb->aiocb.aio_nbytes = nb_sectors * 512;
595
    acb->aiocb.aio_offset = sector_num * 512;
596
    acb->next = first_aio;
597
    first_aio = acb;
598
    return acb;
599
}
600

    
601
static void raw_aio_em_cb(void* opaque)
602
{
603
    RawAIOCB *acb = opaque;
604
    acb->common.cb(acb->common.opaque, acb->ret);
605
    qemu_aio_release(acb);
606
}
607

    
608
static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs,
609
        int64_t sector_num, uint8_t *buf, int nb_sectors,
610
        BlockDriverCompletionFunc *cb, void *opaque)
611
{
612
    RawAIOCB *acb;
613

    
614
    /*
615
     * If O_DIRECT is used and the buffer is not aligned fall back
616
     * to synchronous IO.
617
     */
618
#if defined(O_DIRECT)
619
    BDRVRawState *s = bs->opaque;
620

    
621
    if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
622
        QEMUBH *bh;
623
        acb = qemu_aio_get(bs, cb, opaque);
624
        acb->ret = raw_pread(bs, 512 * sector_num, buf, 512 * nb_sectors);
625
        bh = qemu_bh_new(raw_aio_em_cb, acb);
626
        qemu_bh_schedule(bh);
627
        return &acb->common;
628
    }
629
#endif
630

    
631
    acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
632
    if (!acb)
633
        return NULL;
634
    if (aio_read(&acb->aiocb) < 0) {
635
        qemu_aio_release(acb);
636
        return NULL;
637
    }
638
    return &acb->common;
639
}
640

    
641
static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs,
642
        int64_t sector_num, const uint8_t *buf, int nb_sectors,
643
        BlockDriverCompletionFunc *cb, void *opaque)
644
{
645
    RawAIOCB *acb;
646

    
647
    /*
648
     * If O_DIRECT is used and the buffer is not aligned fall back
649
     * to synchronous IO.
650
     */
651
#if defined(O_DIRECT)
652
    BDRVRawState *s = bs->opaque;
653

    
654
    if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
655
        QEMUBH *bh;
656
        acb = qemu_aio_get(bs, cb, opaque);
657
        acb->ret = raw_pwrite(bs, 512 * sector_num, buf, 512 * nb_sectors);
658
        bh = qemu_bh_new(raw_aio_em_cb, acb);
659
        qemu_bh_schedule(bh);
660
        return &acb->common;
661
    }
662
#endif
663

    
664
    acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque);
665
    if (!acb)
666
        return NULL;
667
    if (aio_write(&acb->aiocb) < 0) {
668
        qemu_aio_release(acb);
669
        return NULL;
670
    }
671
    return &acb->common;
672
}
673

    
674
static void raw_aio_cancel(BlockDriverAIOCB *blockacb)
675
{
676
    int ret;
677
    RawAIOCB *acb = (RawAIOCB *)blockacb;
678
    RawAIOCB **pacb;
679

    
680
    ret = aio_cancel(acb->aiocb.aio_fildes, &acb->aiocb);
681
    if (ret == AIO_NOTCANCELED) {
682
        /* fail safe: if the aio could not be canceled, we wait for
683
           it */
684
        while (aio_error(&acb->aiocb) == EINPROGRESS);
685
    }
686

    
687
    /* remove the callback from the queue */
688
    pacb = &first_aio;
689
    for(;;) {
690
        if (*pacb == NULL) {
691
            break;
692
        } else if (*pacb == acb) {
693
            *pacb = acb->next;
694
            qemu_aio_release(acb);
695
            break;
696
        }
697
        pacb = &acb->next;
698
    }
699
}
700

    
701
# else /* CONFIG_AIO */
702

    
703
void qemu_aio_init(void)
704
{
705
}
706

    
707
void qemu_aio_flush(void)
708
{
709
}
710

    
711
void qemu_aio_wait(void)
712
{
713
    qemu_bh_poll();
714
}
715

    
716
#endif /* CONFIG_AIO */
717

    
718
static void raw_close(BlockDriverState *bs)
719
{
720
    BDRVRawState *s = bs->opaque;
721
    if (s->fd >= 0) {
722
        close(s->fd);
723
        s->fd = -1;
724
#if defined(O_DIRECT)
725
        if (s->aligned_buf != NULL)
726
            qemu_free(s->aligned_buf);
727
#endif
728
    }
729
}
730

    
731
static int raw_truncate(BlockDriverState *bs, int64_t offset)
732
{
733
    BDRVRawState *s = bs->opaque;
734
    if (s->type != FTYPE_FILE)
735
        return -ENOTSUP;
736
    if (ftruncate(s->fd, offset) < 0)
737
        return -errno;
738
    return 0;
739
}
740

    
741
#ifdef __OpenBSD__
742
static int64_t raw_getlength(BlockDriverState *bs)
743
{
744
    BDRVRawState *s = bs->opaque;
745
    int fd = s->fd;
746
    struct stat st;
747

    
748
    if (fstat(fd, &st))
749
        return -1;
750
    if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
751
        struct disklabel dl;
752

    
753
        if (ioctl(fd, DIOCGDINFO, &dl))
754
            return -1;
755
        return (uint64_t)dl.d_secsize *
756
            dl.d_partitions[DISKPART(st.st_rdev)].p_size;
757
    } else
758
        return st.st_size;
759
}
760
#else /* !__OpenBSD__ */
761
static int64_t  raw_getlength(BlockDriverState *bs)
762
{
763
    BDRVRawState *s = bs->opaque;
764
    int fd = s->fd;
765
    int64_t size;
766
#ifdef _BSD
767
    struct stat sb;
768
#endif
769
#ifdef __sun__
770
    struct dk_minfo minfo;
771
    int rv;
772
#endif
773
    int ret;
774

    
775
    ret = fd_open(bs);
776
    if (ret < 0)
777
        return ret;
778

    
779
#ifdef _BSD
780
    if (!fstat(fd, &sb) && (S_IFCHR & sb.st_mode)) {
781
#ifdef DIOCGMEDIASIZE
782
        if (ioctl(fd, DIOCGMEDIASIZE, (off_t *)&size))
783
#endif
784
#ifdef CONFIG_COCOA
785
        size = LONG_LONG_MAX;
786
#else
787
        size = lseek(fd, 0LL, SEEK_END);
788
#endif
789
    } else
790
#endif
791
#ifdef __sun__
792
    /*
793
     * use the DKIOCGMEDIAINFO ioctl to read the size.
794
     */
795
    rv = ioctl ( fd, DKIOCGMEDIAINFO, &minfo );
796
    if ( rv != -1 ) {
797
        size = minfo.dki_lbsize * minfo.dki_capacity;
798
    } else /* there are reports that lseek on some devices
799
              fails, but irc discussion said that contingency
800
              on contingency was overkill */
801
#endif
802
    {
803
        size = lseek(fd, 0, SEEK_END);
804
    }
805
    return size;
806
}
807
#endif
808

    
809
static int raw_create(const char *filename, int64_t total_size,
810
                      const char *backing_file, int flags)
811
{
812
    int fd;
813

    
814
    if (flags || backing_file)
815
        return -ENOTSUP;
816

    
817
    fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
818
              0644);
819
    if (fd < 0)
820
        return -EIO;
821
    ftruncate(fd, total_size * 512);
822
    close(fd);
823
    return 0;
824
}
825

    
826
static void raw_flush(BlockDriverState *bs)
827
{
828
    BDRVRawState *s = bs->opaque;
829
    fsync(s->fd);
830
}
831

    
832
BlockDriver bdrv_raw = {
833
    "raw",
834
    sizeof(BDRVRawState),
835
    NULL, /* no probe for protocols */
836
    raw_open,
837
    NULL,
838
    NULL,
839
    raw_close,
840
    raw_create,
841
    raw_flush,
842

    
843
#ifdef CONFIG_AIO
844
    .bdrv_aio_read = raw_aio_read,
845
    .bdrv_aio_write = raw_aio_write,
846
    .bdrv_aio_cancel = raw_aio_cancel,
847
    .aiocb_size = sizeof(RawAIOCB),
848
#endif
849
    .bdrv_pread = raw_pread,
850
    .bdrv_pwrite = raw_pwrite,
851
    .bdrv_truncate = raw_truncate,
852
    .bdrv_getlength = raw_getlength,
853
};
854

    
855
/***********************************************/
856
/* host device */
857

    
858
#ifdef CONFIG_COCOA
859
static kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator );
860
static kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize );
861

    
862
kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator )
863
{
864
    kern_return_t       kernResult;
865
    mach_port_t     masterPort;
866
    CFMutableDictionaryRef  classesToMatch;
867

    
868
    kernResult = IOMasterPort( MACH_PORT_NULL, &masterPort );
869
    if ( KERN_SUCCESS != kernResult ) {
870
        printf( "IOMasterPort returned %d\n", kernResult );
871
    }
872

    
873
    classesToMatch = IOServiceMatching( kIOCDMediaClass );
874
    if ( classesToMatch == NULL ) {
875
        printf( "IOServiceMatching returned a NULL dictionary.\n" );
876
    } else {
877
    CFDictionarySetValue( classesToMatch, CFSTR( kIOMediaEjectableKey ), kCFBooleanTrue );
878
    }
879
    kernResult = IOServiceGetMatchingServices( masterPort, classesToMatch, mediaIterator );
880
    if ( KERN_SUCCESS != kernResult )
881
    {
882
        printf( "IOServiceGetMatchingServices returned %d\n", kernResult );
883
    }
884

    
885
    return kernResult;
886
}
887

    
888
kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize )
889
{
890
    io_object_t     nextMedia;
891
    kern_return_t   kernResult = KERN_FAILURE;
892
    *bsdPath = '\0';
893
    nextMedia = IOIteratorNext( mediaIterator );
894
    if ( nextMedia )
895
    {
896
        CFTypeRef   bsdPathAsCFString;
897
    bsdPathAsCFString = IORegistryEntryCreateCFProperty( nextMedia, CFSTR( kIOBSDNameKey ), kCFAllocatorDefault, 0 );
898
        if ( bsdPathAsCFString ) {
899
            size_t devPathLength;
900
            strcpy( bsdPath, _PATH_DEV );
901
            strcat( bsdPath, "r" );
902
            devPathLength = strlen( bsdPath );
903
            if ( CFStringGetCString( bsdPathAsCFString, bsdPath + devPathLength, maxPathSize - devPathLength, kCFStringEncodingASCII ) ) {
904
                kernResult = KERN_SUCCESS;
905
            }
906
            CFRelease( bsdPathAsCFString );
907
        }
908
        IOObjectRelease( nextMedia );
909
    }
910

    
911
    return kernResult;
912
}
913

    
914
#endif
915

    
916
static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
917
{
918
    BDRVRawState *s = bs->opaque;
919
    int fd, open_flags, ret;
920

    
921
#ifdef CONFIG_COCOA
922
    if (strstart(filename, "/dev/cdrom", NULL)) {
923
        kern_return_t kernResult;
924
        io_iterator_t mediaIterator;
925
        char bsdPath[ MAXPATHLEN ];
926
        int fd;
927

    
928
        kernResult = FindEjectableCDMedia( &mediaIterator );
929
        kernResult = GetBSDPath( mediaIterator, bsdPath, sizeof( bsdPath ) );
930

    
931
        if ( bsdPath[ 0 ] != '\0' ) {
932
            strcat(bsdPath,"s0");
933
            /* some CDs don't have a partition 0 */
934
            fd = open(bsdPath, O_RDONLY | O_BINARY | O_LARGEFILE);
935
            if (fd < 0) {
936
                bsdPath[strlen(bsdPath)-1] = '1';
937
            } else {
938
                close(fd);
939
            }
940
            filename = bsdPath;
941
        }
942

    
943
        if ( mediaIterator )
944
            IOObjectRelease( mediaIterator );
945
    }
946
#endif
947
    open_flags = O_BINARY;
948
    if ((flags & BDRV_O_ACCESS) == O_RDWR) {
949
        open_flags |= O_RDWR;
950
    } else {
951
        open_flags |= O_RDONLY;
952
        bs->read_only = 1;
953
    }
954
#ifdef O_DIRECT
955
    if (flags & BDRV_O_DIRECT)
956
        open_flags |= O_DIRECT;
957
#endif
958

    
959
    s->type = FTYPE_FILE;
960
#if defined(__linux__)
961
    if (strstart(filename, "/dev/cd", NULL)) {
962
        /* open will not fail even if no CD is inserted */
963
        open_flags |= O_NONBLOCK;
964
        s->type = FTYPE_CD;
965
    } else if (strstart(filename, "/dev/fd", NULL)) {
966
        s->type = FTYPE_FD;
967
        s->fd_open_flags = open_flags;
968
        /* open will not fail even if no floppy is inserted */
969
        open_flags |= O_NONBLOCK;
970
    } else if (strstart(filename, "/dev/sg", NULL)) {
971
        bs->sg = 1;
972
    }
973
#endif
974
    fd = open(filename, open_flags, 0644);
975
    if (fd < 0) {
976
        ret = -errno;
977
        if (ret == -EROFS)
978
            ret = -EACCES;
979
        return ret;
980
    }
981
    s->fd = fd;
982
#if defined(__linux__)
983
    /* close fd so that we can reopen it as needed */
984
    if (s->type == FTYPE_FD) {
985
        close(s->fd);
986
        s->fd = -1;
987
        s->fd_media_changed = 1;
988
    }
989
#endif
990
    return 0;
991
}
992

    
993
#if defined(__linux__)
994

    
995
/* Note: we do not have a reliable method to detect if the floppy is
996
   present. The current method is to try to open the floppy at every
997
   I/O and to keep it opened during a few hundreds of ms. */
998
static int fd_open(BlockDriverState *bs)
999
{
1000
    BDRVRawState *s = bs->opaque;
1001
    int last_media_present;
1002

    
1003
    if (s->type != FTYPE_FD)
1004
        return 0;
1005
    last_media_present = (s->fd >= 0);
1006
    if (s->fd >= 0 &&
1007
        (qemu_get_clock(rt_clock) - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
1008
        close(s->fd);
1009
        s->fd = -1;
1010
#ifdef DEBUG_FLOPPY
1011
        printf("Floppy closed\n");
1012
#endif
1013
    }
1014
    if (s->fd < 0) {
1015
        if (s->fd_got_error &&
1016
            (qemu_get_clock(rt_clock) - s->fd_error_time) < FD_OPEN_TIMEOUT) {
1017
#ifdef DEBUG_FLOPPY
1018
            printf("No floppy (open delayed)\n");
1019
#endif
1020
            return -EIO;
1021
        }
1022
        s->fd = open(bs->filename, s->fd_open_flags);
1023
        if (s->fd < 0) {
1024
            s->fd_error_time = qemu_get_clock(rt_clock);
1025
            s->fd_got_error = 1;
1026
            if (last_media_present)
1027
                s->fd_media_changed = 1;
1028
#ifdef DEBUG_FLOPPY
1029
            printf("No floppy\n");
1030
#endif
1031
            return -EIO;
1032
        }
1033
#ifdef DEBUG_FLOPPY
1034
        printf("Floppy opened\n");
1035
#endif
1036
    }
1037
    if (!last_media_present)
1038
        s->fd_media_changed = 1;
1039
    s->fd_open_time = qemu_get_clock(rt_clock);
1040
    s->fd_got_error = 0;
1041
    return 0;
1042
}
1043

    
1044
static int raw_is_inserted(BlockDriverState *bs)
1045
{
1046
    BDRVRawState *s = bs->opaque;
1047
    int ret;
1048

    
1049
    switch(s->type) {
1050
    case FTYPE_CD:
1051
        ret = ioctl(s->fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
1052
        if (ret == CDS_DISC_OK)
1053
            return 1;
1054
        else
1055
            return 0;
1056
        break;
1057
    case FTYPE_FD:
1058
        ret = fd_open(bs);
1059
        return (ret >= 0);
1060
    default:
1061
        return 1;
1062
    }
1063
}
1064

    
1065
/* currently only used by fdc.c, but a CD version would be good too */
1066
static int raw_media_changed(BlockDriverState *bs)
1067
{
1068
    BDRVRawState *s = bs->opaque;
1069

    
1070
    switch(s->type) {
1071
    case FTYPE_FD:
1072
        {
1073
            int ret;
1074
            /* XXX: we do not have a true media changed indication. It
1075
               does not work if the floppy is changed without trying
1076
               to read it */
1077
            fd_open(bs);
1078
            ret = s->fd_media_changed;
1079
            s->fd_media_changed = 0;
1080
#ifdef DEBUG_FLOPPY
1081
            printf("Floppy changed=%d\n", ret);
1082
#endif
1083
            return ret;
1084
        }
1085
    default:
1086
        return -ENOTSUP;
1087
    }
1088
}
1089

    
1090
static int raw_eject(BlockDriverState *bs, int eject_flag)
1091
{
1092
    BDRVRawState *s = bs->opaque;
1093

    
1094
    switch(s->type) {
1095
    case FTYPE_CD:
1096
        if (eject_flag) {
1097
            if (ioctl (s->fd, CDROMEJECT, NULL) < 0)
1098
                perror("CDROMEJECT");
1099
        } else {
1100
            if (ioctl (s->fd, CDROMCLOSETRAY, NULL) < 0)
1101
                perror("CDROMEJECT");
1102
        }
1103
        break;
1104
    case FTYPE_FD:
1105
        {
1106
            int fd;
1107
            if (s->fd >= 0) {
1108
                close(s->fd);
1109
                s->fd = -1;
1110
            }
1111
            fd = open(bs->filename, s->fd_open_flags | O_NONBLOCK);
1112
            if (fd >= 0) {
1113
                if (ioctl(fd, FDEJECT, 0) < 0)
1114
                    perror("FDEJECT");
1115
                close(fd);
1116
            }
1117
        }
1118
        break;
1119
    default:
1120
        return -ENOTSUP;
1121
    }
1122
    return 0;
1123
}
1124

    
1125
static int raw_set_locked(BlockDriverState *bs, int locked)
1126
{
1127
    BDRVRawState *s = bs->opaque;
1128

    
1129
    switch(s->type) {
1130
    case FTYPE_CD:
1131
        if (ioctl (s->fd, CDROM_LOCKDOOR, locked) < 0) {
1132
            /* Note: an error can happen if the distribution automatically
1133
               mounts the CD-ROM */
1134
            //        perror("CDROM_LOCKDOOR");
1135
        }
1136
        break;
1137
    default:
1138
        return -ENOTSUP;
1139
    }
1140
    return 0;
1141
}
1142

    
1143
static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
1144
{
1145
    BDRVRawState *s = bs->opaque;
1146

    
1147
    return ioctl(s->fd, req, buf);
1148
}
1149
#else
1150

    
1151
static int fd_open(BlockDriverState *bs)
1152
{
1153
    return 0;
1154
}
1155

    
1156
static int raw_is_inserted(BlockDriverState *bs)
1157
{
1158
    return 1;
1159
}
1160

    
1161
static int raw_media_changed(BlockDriverState *bs)
1162
{
1163
    return -ENOTSUP;
1164
}
1165

    
1166
static int raw_eject(BlockDriverState *bs, int eject_flag)
1167
{
1168
    return -ENOTSUP;
1169
}
1170

    
1171
static int raw_set_locked(BlockDriverState *bs, int locked)
1172
{
1173
    return -ENOTSUP;
1174
}
1175

    
1176
static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
1177
{
1178
    return -ENOTSUP;
1179
}
1180
#endif /* !linux */
1181

    
1182
BlockDriver bdrv_host_device = {
1183
    "host_device",
1184
    sizeof(BDRVRawState),
1185
    NULL, /* no probe for protocols */
1186
    hdev_open,
1187
    NULL,
1188
    NULL,
1189
    raw_close,
1190
    NULL,
1191
    raw_flush,
1192

    
1193
#ifdef CONFIG_AIO
1194
    .bdrv_aio_read = raw_aio_read,
1195
    .bdrv_aio_write = raw_aio_write,
1196
    .bdrv_aio_cancel = raw_aio_cancel,
1197
    .aiocb_size = sizeof(RawAIOCB),
1198
#endif
1199
    .bdrv_pread = raw_pread,
1200
    .bdrv_pwrite = raw_pwrite,
1201
    .bdrv_getlength = raw_getlength,
1202

    
1203
    /* removable device support */
1204
    .bdrv_is_inserted = raw_is_inserted,
1205
    .bdrv_media_changed = raw_media_changed,
1206
    .bdrv_eject = raw_eject,
1207
    .bdrv_set_locked = raw_set_locked,
1208
    /* generic scsi device */
1209
    .bdrv_ioctl = raw_ioctl,
1210
};