Statistics
| Branch: | Revision:

root / block-raw-posix.c @ 0d0ab49a

History | View | Annotate | Download (32.2 kB)

1
/*
2
 * Block driver for RAW files (posix)
3
 *
4
 * Copyright (c) 2006 Fabrice Bellard
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
 * THE SOFTWARE.
23
 */
24
#include "qemu-common.h"
25
#include "qemu-timer.h"
26
#include "qemu-char.h"
27
#include "block_int.h"
28
#include "compatfd.h"
29
#include <assert.h>
30
#ifdef CONFIG_AIO
31
#include <aio.h>
32
#endif
33

    
34
#ifdef CONFIG_COCOA
35
#include <paths.h>
36
#include <sys/param.h>
37
#include <IOKit/IOKitLib.h>
38
#include <IOKit/IOBSD.h>
39
#include <IOKit/storage/IOMediaBSDClient.h>
40
#include <IOKit/storage/IOMedia.h>
41
#include <IOKit/storage/IOCDMedia.h>
42
//#include <IOKit/storage/IOCDTypes.h>
43
#include <CoreFoundation/CoreFoundation.h>
44
#endif
45

    
46
#ifdef __sun__
47
#define _POSIX_PTHREAD_SEMANTICS 1
48
#include <signal.h>
49
#include <sys/dkio.h>
50
#endif
51
#ifdef __linux__
52
#include <sys/ioctl.h>
53
#include <linux/cdrom.h>
54
#include <linux/fd.h>
55
#endif
56
#ifdef __FreeBSD__
57
#include <signal.h>
58
#include <sys/disk.h>
59
#endif
60

    
61
#ifdef __OpenBSD__
62
#include <sys/ioctl.h>
63
#include <sys/disklabel.h>
64
#include <sys/dkio.h>
65
#endif
66

    
67
//#define DEBUG_FLOPPY
68

    
69
//#define DEBUG_BLOCK
70
#if defined(DEBUG_BLOCK)
71
#define DEBUG_BLOCK_PRINT(formatCstr, args...) do { if (loglevel != 0)        \
72
    { fprintf(logfile, formatCstr, ##args); fflush(logfile); } } while (0)
73
#else
74
#define DEBUG_BLOCK_PRINT(formatCstr, args...)
75
#endif
76

    
77
#define FTYPE_FILE   0
78
#define FTYPE_CD     1
79
#define FTYPE_FD     2
80

    
81
#define ALIGNED_BUFFER_SIZE (32 * 512)
82

    
83
/* if the FD is not accessed during that time (in ms), we try to
84
   reopen it to see if the disk has been changed */
85
#define FD_OPEN_TIMEOUT 1000
86

    
87
/* posix-aio doesn't allow multiple outstanding requests to a single file
88
 * descriptor.  we implement a pool of dup()'d file descriptors to work
89
 * around this */
90
#define RAW_FD_POOL_SIZE        64
91

    
92
typedef struct BDRVRawState {
93
    int fd;
94
    int type;
95
    unsigned int lseek_err_cnt;
96
    int fd_pool[RAW_FD_POOL_SIZE];
97
#if defined(__linux__)
98
    /* linux floppy specific */
99
    int fd_open_flags;
100
    int64_t fd_open_time;
101
    int64_t fd_error_time;
102
    int fd_got_error;
103
    int fd_media_changed;
104
#endif
105
#if defined(O_DIRECT)
106
    uint8_t* aligned_buf;
107
#endif
108
} BDRVRawState;
109

    
110
static int posix_aio_init(void);
111

    
112
static int fd_open(BlockDriverState *bs);
113

    
114
static int raw_open(BlockDriverState *bs, const char *filename, int flags)
115
{
116
    BDRVRawState *s = bs->opaque;
117
    int fd, open_flags, ret;
118
    int i;
119

    
120
    posix_aio_init();
121

    
122
    s->lseek_err_cnt = 0;
123

    
124
    open_flags = O_BINARY;
125
    if ((flags & BDRV_O_ACCESS) == O_RDWR) {
126
        open_flags |= O_RDWR;
127
    } else {
128
        open_flags |= O_RDONLY;
129
        bs->read_only = 1;
130
    }
131
    if (flags & BDRV_O_CREAT)
132
        open_flags |= O_CREAT | O_TRUNC;
133
#ifdef O_DIRECT
134
    if (flags & BDRV_O_DIRECT)
135
        open_flags |= O_DIRECT;
136
#endif
137

    
138
    s->type = FTYPE_FILE;
139

    
140
    fd = open(filename, open_flags, 0644);
141
    if (fd < 0) {
142
        ret = -errno;
143
        if (ret == -EROFS)
144
            ret = -EACCES;
145
        return ret;
146
    }
147
    s->fd = fd;
148
    for (i = 0; i < RAW_FD_POOL_SIZE; i++)
149
        s->fd_pool[i] = -1;
150
#if defined(O_DIRECT)
151
    s->aligned_buf = NULL;
152
    if (flags & BDRV_O_DIRECT) {
153
        s->aligned_buf = qemu_memalign(512, ALIGNED_BUFFER_SIZE);
154
        if (s->aligned_buf == NULL) {
155
            ret = -errno;
156
            close(fd);
157
            return ret;
158
        }
159
    }
160
#endif
161
    return 0;
162
}
163

    
164
/* XXX: use host sector size if necessary with:
165
#ifdef DIOCGSECTORSIZE
166
        {
167
            unsigned int sectorsize = 512;
168
            if (!ioctl(fd, DIOCGSECTORSIZE, &sectorsize) &&
169
                sectorsize > bufsize)
170
                bufsize = sectorsize;
171
        }
172
#endif
173
#ifdef CONFIG_COCOA
174
        u_int32_t   blockSize = 512;
175
        if ( !ioctl( fd, DKIOCGETBLOCKSIZE, &blockSize ) && blockSize > bufsize) {
176
            bufsize = blockSize;
177
        }
178
#endif
179
*/
180

    
181
/*
182
 * offset and count are in bytes, but must be multiples of 512 for files
183
 * opened with O_DIRECT. buf must be aligned to 512 bytes then.
184
 *
185
 * This function may be called without alignment if the caller ensures
186
 * that O_DIRECT is not in effect.
187
 */
188
static int raw_pread_aligned(BlockDriverState *bs, int64_t offset,
189
                     uint8_t *buf, int count)
190
{
191
    BDRVRawState *s = bs->opaque;
192
    int ret;
193

    
194
    ret = fd_open(bs);
195
    if (ret < 0)
196
        return ret;
197

    
198
    if (offset >= 0 && lseek(s->fd, offset, SEEK_SET) == (off_t)-1) {
199
        ++(s->lseek_err_cnt);
200
        if(s->lseek_err_cnt <= 10) {
201
            DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
202
                              "] lseek failed : %d = %s\n",
203
                              s->fd, bs->filename, offset, buf, count,
204
                              bs->total_sectors, errno, strerror(errno));
205
        }
206
        return -1;
207
    }
208
    s->lseek_err_cnt=0;
209

    
210
    ret = read(s->fd, buf, count);
211
    if (ret == count)
212
        goto label__raw_read__success;
213

    
214
    DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
215
                      "] read failed %d : %d = %s\n",
216
                      s->fd, bs->filename, offset, buf, count,
217
                      bs->total_sectors, ret, errno, strerror(errno));
218

    
219
    /* Try harder for CDrom. */
220
    if (bs->type == BDRV_TYPE_CDROM) {
221
        lseek(s->fd, offset, SEEK_SET);
222
        ret = read(s->fd, buf, count);
223
        if (ret == count)
224
            goto label__raw_read__success;
225
        lseek(s->fd, offset, SEEK_SET);
226
        ret = read(s->fd, buf, count);
227
        if (ret == count)
228
            goto label__raw_read__success;
229

    
230
        DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
231
                          "] retry read failed %d : %d = %s\n",
232
                          s->fd, bs->filename, offset, buf, count,
233
                          bs->total_sectors, ret, errno, strerror(errno));
234
    }
235

    
236
label__raw_read__success:
237

    
238
    return ret;
239
}
240

    
241
/*
242
 * offset and count are in bytes, but must be multiples of 512 for files
243
 * opened with O_DIRECT. buf must be aligned to 512 bytes then.
244
 *
245
 * This function may be called without alignment if the caller ensures
246
 * that O_DIRECT is not in effect.
247
 */
248
static int raw_pwrite_aligned(BlockDriverState *bs, int64_t offset,
249
                      const uint8_t *buf, int count)
250
{
251
    BDRVRawState *s = bs->opaque;
252
    int ret;
253

    
254
    ret = fd_open(bs);
255
    if (ret < 0)
256
        return ret;
257

    
258
    if (offset >= 0 && lseek(s->fd, offset, SEEK_SET) == (off_t)-1) {
259
        ++(s->lseek_err_cnt);
260
        if(s->lseek_err_cnt) {
261
            DEBUG_BLOCK_PRINT("raw_pwrite(%d:%s, %" PRId64 ", %p, %d) [%"
262
                              PRId64 "] lseek failed : %d = %s\n",
263
                              s->fd, bs->filename, offset, buf, count,
264
                              bs->total_sectors, errno, strerror(errno));
265
        }
266
        return -1;
267
    }
268
    s->lseek_err_cnt = 0;
269

    
270
    ret = write(s->fd, buf, count);
271
    if (ret == count)
272
        goto label__raw_write__success;
273

    
274
    DEBUG_BLOCK_PRINT("raw_pwrite(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
275
                      "] write failed %d : %d = %s\n",
276
                      s->fd, bs->filename, offset, buf, count,
277
                      bs->total_sectors, ret, errno, strerror(errno));
278

    
279
label__raw_write__success:
280

    
281
    return ret;
282
}
283

    
284

    
285
#if defined(O_DIRECT)
286
/*
287
 * offset and count are in bytes and possibly not aligned. For files opened
288
 * with O_DIRECT, necessary alignments are ensured before calling
289
 * raw_pread_aligned to do the actual read.
290
 */
291
static int raw_pread(BlockDriverState *bs, int64_t offset,
292
                     uint8_t *buf, int count)
293
{
294
    BDRVRawState *s = bs->opaque;
295
    int size, ret, shift, sum;
296

    
297
    sum = 0;
298

    
299
    if (s->aligned_buf != NULL)  {
300

    
301
        if (offset & 0x1ff) {
302
            /* align offset on a 512 bytes boundary */
303

    
304
            shift = offset & 0x1ff;
305
            size = (shift + count + 0x1ff) & ~0x1ff;
306
            if (size > ALIGNED_BUFFER_SIZE)
307
                size = ALIGNED_BUFFER_SIZE;
308
            ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, size);
309
            if (ret < 0)
310
                return ret;
311

    
312
            size = 512 - shift;
313
            if (size > count)
314
                size = count;
315
            memcpy(buf, s->aligned_buf + shift, size);
316

    
317
            buf += size;
318
            offset += size;
319
            count -= size;
320
            sum += size;
321

    
322
            if (count == 0)
323
                return sum;
324
        }
325
        if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
326

    
327
            /* read on aligned buffer */
328

    
329
            while (count) {
330

    
331
                size = (count + 0x1ff) & ~0x1ff;
332
                if (size > ALIGNED_BUFFER_SIZE)
333
                    size = ALIGNED_BUFFER_SIZE;
334

    
335
                ret = raw_pread_aligned(bs, offset, s->aligned_buf, size);
336
                if (ret < 0)
337
                    return ret;
338

    
339
                size = ret;
340
                if (size > count)
341
                    size = count;
342

    
343
                memcpy(buf, s->aligned_buf, size);
344

    
345
                buf += size;
346
                offset += size;
347
                count -= size;
348
                sum += size;
349
            }
350

    
351
            return sum;
352
        }
353
    }
354

    
355
    return raw_pread_aligned(bs, offset, buf, count) + sum;
356
}
357

    
358
/*
359
 * offset and count are in bytes and possibly not aligned. For files opened
360
 * with O_DIRECT, necessary alignments are ensured before calling
361
 * raw_pwrite_aligned to do the actual write.
362
 */
363
static int raw_pwrite(BlockDriverState *bs, int64_t offset,
364
                      const uint8_t *buf, int count)
365
{
366
    BDRVRawState *s = bs->opaque;
367
    int size, ret, shift, sum;
368

    
369
    sum = 0;
370

    
371
    if (s->aligned_buf != NULL) {
372

    
373
        if (offset & 0x1ff) {
374
            /* align offset on a 512 bytes boundary */
375
            shift = offset & 0x1ff;
376
            ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, 512);
377
            if (ret < 0)
378
                return ret;
379

    
380
            size = 512 - shift;
381
            if (size > count)
382
                size = count;
383
            memcpy(s->aligned_buf + shift, buf, size);
384

    
385
            ret = raw_pwrite_aligned(bs, offset - shift, s->aligned_buf, 512);
386
            if (ret < 0)
387
                return ret;
388

    
389
            buf += size;
390
            offset += size;
391
            count -= size;
392
            sum += size;
393

    
394
            if (count == 0)
395
                return sum;
396
        }
397
        if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
398

    
399
            while ((size = (count & ~0x1ff)) != 0) {
400

    
401
                if (size > ALIGNED_BUFFER_SIZE)
402
                    size = ALIGNED_BUFFER_SIZE;
403

    
404
                memcpy(s->aligned_buf, buf, size);
405

    
406
                ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, size);
407
                if (ret < 0)
408
                    return ret;
409

    
410
                buf += ret;
411
                offset += ret;
412
                count -= ret;
413
                sum += ret;
414
            }
415
            /* here, count < 512 because (count & ~0x1ff) == 0 */
416
            if (count) {
417
                ret = raw_pread_aligned(bs, offset, s->aligned_buf, 512);
418
                if (ret < 0)
419
                    return ret;
420
                 memcpy(s->aligned_buf, buf, count);
421

    
422
                 ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, 512);
423
                 if (ret < 0)
424
                     return ret;
425
                 if (count < ret)
426
                     ret = count;
427

    
428
                 sum += ret;
429
            }
430
            return sum;
431
        }
432
    }
433
    return raw_pwrite_aligned(bs, offset, buf, count) + sum;
434
}
435

    
436
#else
437
#define raw_pread raw_pread_aligned
438
#define raw_pwrite raw_pwrite_aligned
439
#endif
440

    
441

    
442
#ifdef CONFIG_AIO
443
/***********************************************************/
444
/* Unix AIO using POSIX AIO */
445

    
446
typedef struct RawAIOCB {
447
    BlockDriverAIOCB common;
448
    int fd;
449
    struct aiocb aiocb;
450
    struct RawAIOCB *next;
451
    int ret;
452
} RawAIOCB;
453

    
454
typedef struct PosixAioState
455
{
456
    int fd;
457
    RawAIOCB *first_aio;
458
} PosixAioState;
459

    
460
static int raw_fd_pool_get(BDRVRawState *s)
461
{
462
    int i;
463

    
464
    for (i = 0; i < RAW_FD_POOL_SIZE; i++) {
465
        /* already in use */
466
        if (s->fd_pool[i] != -1)
467
            continue;
468

    
469
        /* try to dup file descriptor */
470
        s->fd_pool[i] = dup(s->fd);
471
        if (s->fd_pool[i] != -1)
472
            return s->fd_pool[i];
473
    }
474

    
475
    /* we couldn't dup the file descriptor so just use the main one */
476
    return s->fd;
477
}
478

    
479
static void raw_fd_pool_put(RawAIOCB *acb)
480
{
481
    BDRVRawState *s = acb->common.bs->opaque;
482
    int i;
483

    
484
    for (i = 0; i < RAW_FD_POOL_SIZE; i++) {
485
        if (s->fd_pool[i] == acb->fd) {
486
            close(s->fd_pool[i]);
487
            s->fd_pool[i] = -1;
488
        }
489
    }
490
}
491

    
492
static void posix_aio_read(void *opaque)
493
{
494
    PosixAioState *s = opaque;
495
    RawAIOCB *acb, **pacb;
496
    int ret;
497
    size_t offset;
498
    union {
499
        struct qemu_signalfd_siginfo siginfo;
500
        char buf[128];
501
    } sig;
502

    
503
    /* try to read from signalfd, don't freak out if we can't read anything */
504
    offset = 0;
505
    while (offset < 128) {
506
        ssize_t len;
507

    
508
        len = read(s->fd, sig.buf + offset, 128 - offset);
509
        if (len == -1 && errno == EINTR)
510
            continue;
511
        if (len == -1 && errno == EAGAIN) {
512
            /* there is no natural reason for this to happen,
513
             * so we'll spin hard until we get everything just
514
             * to be on the safe side. */
515
            if (offset > 0)
516
                continue;
517
        }
518

    
519
        offset += len;
520
    }
521

    
522
    for(;;) {
523
        pacb = &s->first_aio;
524
        for(;;) {
525
            acb = *pacb;
526
            if (!acb)
527
                goto the_end;
528
            ret = aio_error(&acb->aiocb);
529
            if (ret == ECANCELED) {
530
                /* remove the request */
531
                *pacb = acb->next;
532
                raw_fd_pool_put(acb);
533
                qemu_aio_release(acb);
534
            } else if (ret != EINPROGRESS) {
535
                /* end of aio */
536
                if (ret == 0) {
537
                    ret = aio_return(&acb->aiocb);
538
                    if (ret == acb->aiocb.aio_nbytes)
539
                        ret = 0;
540
                    else
541
                        ret = -EINVAL;
542
                } else {
543
                    ret = -ret;
544
                }
545
                /* remove the request */
546
                *pacb = acb->next;
547
                /* call the callback */
548
                acb->common.cb(acb->common.opaque, ret);
549
                raw_fd_pool_put(acb);
550
                qemu_aio_release(acb);
551
                break;
552
            } else {
553
                pacb = &acb->next;
554
            }
555
        }
556
    }
557
 the_end: ;
558
}
559

    
560
static int posix_aio_flush(void *opaque)
561
{
562
    PosixAioState *s = opaque;
563
    return !!s->first_aio;
564
}
565

    
566
static PosixAioState *posix_aio_state;
567

    
568
static int posix_aio_init(void)
569
{
570
    sigset_t mask;
571
    PosixAioState *s;
572
    struct aioinit ai;
573
  
574
    if (posix_aio_state)
575
        return 0;
576

    
577
    s = qemu_malloc(sizeof(PosixAioState));
578
    if (s == NULL)
579
        return -ENOMEM;
580

    
581
    /* Make sure to block AIO signal */
582
    sigemptyset(&mask);
583
    sigaddset(&mask, SIGUSR2);
584
    sigprocmask(SIG_BLOCK, &mask, NULL);
585
    
586
    s->first_aio = NULL;
587
    s->fd = qemu_signalfd(&mask);
588

    
589
    fcntl(s->fd, F_SETFL, O_NONBLOCK);
590

    
591
    qemu_aio_set_fd_handler(s->fd, posix_aio_read, NULL, posix_aio_flush, s);
592

    
593
    memset(&ai, 0, sizeof(ai));
594
#if !defined(__linux__) || (defined(__GLIBC_PREREQ) && __GLIBC_PREREQ(2, 4))
595
    ai.aio_threads = 5;
596
    ai.aio_num = 1;
597
#else
598
    /* XXX: aio thread exit seems to hang on RedHat 9 and this init
599
       seems to fix the problem. */
600
    ai.aio_threads = 1;
601
    ai.aio_num = 1;
602
    ai.aio_idle_time = 365 * 100000;
603
#endif
604
    aio_init(&ai);
605
    posix_aio_state = s;
606

    
607
    return 0;
608
}
609

    
610
static RawAIOCB *raw_aio_setup(BlockDriverState *bs,
611
        int64_t sector_num, uint8_t *buf, int nb_sectors,
612
        BlockDriverCompletionFunc *cb, void *opaque)
613
{
614
    BDRVRawState *s = bs->opaque;
615
    RawAIOCB *acb;
616

    
617
    if (fd_open(bs) < 0)
618
        return NULL;
619

    
620
    acb = qemu_aio_get(bs, cb, opaque);
621
    if (!acb)
622
        return NULL;
623
    acb->fd = raw_fd_pool_get(s);
624
    acb->aiocb.aio_fildes = acb->fd;
625
    acb->aiocb.aio_sigevent.sigev_signo = SIGUSR2;
626
    acb->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
627
    acb->aiocb.aio_buf = buf;
628
    if (nb_sectors < 0)
629
        acb->aiocb.aio_nbytes = -nb_sectors;
630
    else
631
        acb->aiocb.aio_nbytes = nb_sectors * 512;
632
    acb->aiocb.aio_offset = sector_num * 512;
633
    acb->next = posix_aio_state->first_aio;
634
    posix_aio_state->first_aio = acb;
635
    return acb;
636
}
637

    
638
static void raw_aio_em_cb(void* opaque)
639
{
640
    RawAIOCB *acb = opaque;
641
    acb->common.cb(acb->common.opaque, acb->ret);
642
    qemu_aio_release(acb);
643
}
644

    
645
static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs,
646
        int64_t sector_num, uint8_t *buf, int nb_sectors,
647
        BlockDriverCompletionFunc *cb, void *opaque)
648
{
649
    RawAIOCB *acb;
650

    
651
    /*
652
     * If O_DIRECT is used and the buffer is not aligned fall back
653
     * to synchronous IO.
654
     */
655
#if defined(O_DIRECT)
656
    BDRVRawState *s = bs->opaque;
657

    
658
    if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
659
        QEMUBH *bh;
660
        acb = qemu_aio_get(bs, cb, opaque);
661
        acb->ret = raw_pread(bs, 512 * sector_num, buf, 512 * nb_sectors);
662
        bh = qemu_bh_new(raw_aio_em_cb, acb);
663
        qemu_bh_schedule(bh);
664
        return &acb->common;
665
    }
666
#endif
667

    
668
    acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
669
    if (!acb)
670
        return NULL;
671
    if (aio_read(&acb->aiocb) < 0) {
672
        qemu_aio_release(acb);
673
        return NULL;
674
    }
675
    return &acb->common;
676
}
677

    
678
static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs,
679
        int64_t sector_num, const uint8_t *buf, int nb_sectors,
680
        BlockDriverCompletionFunc *cb, void *opaque)
681
{
682
    RawAIOCB *acb;
683

    
684
    /*
685
     * If O_DIRECT is used and the buffer is not aligned fall back
686
     * to synchronous IO.
687
     */
688
#if defined(O_DIRECT)
689
    BDRVRawState *s = bs->opaque;
690

    
691
    if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
692
        QEMUBH *bh;
693
        acb = qemu_aio_get(bs, cb, opaque);
694
        acb->ret = raw_pwrite(bs, 512 * sector_num, buf, 512 * nb_sectors);
695
        bh = qemu_bh_new(raw_aio_em_cb, acb);
696
        qemu_bh_schedule(bh);
697
        return &acb->common;
698
    }
699
#endif
700

    
701
    acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque);
702
    if (!acb)
703
        return NULL;
704
    if (aio_write(&acb->aiocb) < 0) {
705
        qemu_aio_release(acb);
706
        return NULL;
707
    }
708
    return &acb->common;
709
}
710

    
711
static void raw_aio_cancel(BlockDriverAIOCB *blockacb)
712
{
713
    int ret;
714
    RawAIOCB *acb = (RawAIOCB *)blockacb;
715
    RawAIOCB **pacb;
716

    
717
    ret = aio_cancel(acb->aiocb.aio_fildes, &acb->aiocb);
718
    if (ret == AIO_NOTCANCELED) {
719
        /* fail safe: if the aio could not be canceled, we wait for
720
           it */
721
        while (aio_error(&acb->aiocb) == EINPROGRESS);
722
    }
723

    
724
    /* remove the callback from the queue */
725
    pacb = &posix_aio_state->first_aio;
726
    for(;;) {
727
        if (*pacb == NULL) {
728
            break;
729
        } else if (*pacb == acb) {
730
            *pacb = acb->next;
731
            raw_fd_pool_put(acb);
732
            qemu_aio_release(acb);
733
            break;
734
        }
735
        pacb = &acb->next;
736
    }
737
}
738

    
739
#else /* CONFIG_AIO */
740
static int posix_aio_init(void)
741
{
742
}
743
#endif /* CONFIG_AIO */
744

    
745
static void raw_close_fd_pool(BDRVRawState *s)
746
{
747
    int i;
748

    
749
    for (i = 0; i < RAW_FD_POOL_SIZE; i++) {
750
        if (s->fd_pool[i] != -1) {
751
            close(s->fd_pool[i]);
752
            s->fd_pool[i] = -1;
753
        }
754
    }
755
}
756

    
757
static void raw_close(BlockDriverState *bs)
758
{
759
    BDRVRawState *s = bs->opaque;
760
    if (s->fd >= 0) {
761
        close(s->fd);
762
        s->fd = -1;
763
#if defined(O_DIRECT)
764
        if (s->aligned_buf != NULL)
765
            qemu_free(s->aligned_buf);
766
#endif
767
    }
768
    raw_close_fd_pool(s);
769
}
770

    
771
static int raw_truncate(BlockDriverState *bs, int64_t offset)
772
{
773
    BDRVRawState *s = bs->opaque;
774
    if (s->type != FTYPE_FILE)
775
        return -ENOTSUP;
776
    if (ftruncate(s->fd, offset) < 0)
777
        return -errno;
778
    return 0;
779
}
780

    
781
#ifdef __OpenBSD__
782
static int64_t raw_getlength(BlockDriverState *bs)
783
{
784
    BDRVRawState *s = bs->opaque;
785
    int fd = s->fd;
786
    struct stat st;
787

    
788
    if (fstat(fd, &st))
789
        return -1;
790
    if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
791
        struct disklabel dl;
792

    
793
        if (ioctl(fd, DIOCGDINFO, &dl))
794
            return -1;
795
        return (uint64_t)dl.d_secsize *
796
            dl.d_partitions[DISKPART(st.st_rdev)].p_size;
797
    } else
798
        return st.st_size;
799
}
800
#else /* !__OpenBSD__ */
801
static int64_t  raw_getlength(BlockDriverState *bs)
802
{
803
    BDRVRawState *s = bs->opaque;
804
    int fd = s->fd;
805
    int64_t size;
806
#ifdef _BSD
807
    struct stat sb;
808
#endif
809
#ifdef __sun__
810
    struct dk_minfo minfo;
811
    int rv;
812
#endif
813
    int ret;
814

    
815
    ret = fd_open(bs);
816
    if (ret < 0)
817
        return ret;
818

    
819
#ifdef _BSD
820
    if (!fstat(fd, &sb) && (S_IFCHR & sb.st_mode)) {
821
#ifdef DIOCGMEDIASIZE
822
        if (ioctl(fd, DIOCGMEDIASIZE, (off_t *)&size))
823
#endif
824
#ifdef CONFIG_COCOA
825
        size = LONG_LONG_MAX;
826
#else
827
        size = lseek(fd, 0LL, SEEK_END);
828
#endif
829
    } else
830
#endif
831
#ifdef __sun__
832
    /*
833
     * use the DKIOCGMEDIAINFO ioctl to read the size.
834
     */
835
    rv = ioctl ( fd, DKIOCGMEDIAINFO, &minfo );
836
    if ( rv != -1 ) {
837
        size = minfo.dki_lbsize * minfo.dki_capacity;
838
    } else /* there are reports that lseek on some devices
839
              fails, but irc discussion said that contingency
840
              on contingency was overkill */
841
#endif
842
    {
843
        size = lseek(fd, 0, SEEK_END);
844
    }
845
    return size;
846
}
847
#endif
848

    
849
static int raw_create(const char *filename, int64_t total_size,
850
                      const char *backing_file, int flags)
851
{
852
    int fd;
853

    
854
    if (flags || backing_file)
855
        return -ENOTSUP;
856

    
857
    fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
858
              0644);
859
    if (fd < 0)
860
        return -EIO;
861
    ftruncate(fd, total_size * 512);
862
    close(fd);
863
    return 0;
864
}
865

    
866
static void raw_flush(BlockDriverState *bs)
867
{
868
    BDRVRawState *s = bs->opaque;
869
    fsync(s->fd);
870
}
871

    
872
BlockDriver bdrv_raw = {
873
    "raw",
874
    sizeof(BDRVRawState),
875
    NULL, /* no probe for protocols */
876
    raw_open,
877
    NULL,
878
    NULL,
879
    raw_close,
880
    raw_create,
881
    raw_flush,
882

    
883
#ifdef CONFIG_AIO
884
    .bdrv_aio_read = raw_aio_read,
885
    .bdrv_aio_write = raw_aio_write,
886
    .bdrv_aio_cancel = raw_aio_cancel,
887
    .aiocb_size = sizeof(RawAIOCB),
888
#endif
889
    .bdrv_pread = raw_pread,
890
    .bdrv_pwrite = raw_pwrite,
891
    .bdrv_truncate = raw_truncate,
892
    .bdrv_getlength = raw_getlength,
893
};
894

    
895
/***********************************************/
896
/* host device */
897

    
898
#ifdef CONFIG_COCOA
899
static kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator );
900
static kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize );
901

    
902
kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator )
903
{
904
    kern_return_t       kernResult;
905
    mach_port_t     masterPort;
906
    CFMutableDictionaryRef  classesToMatch;
907

    
908
    kernResult = IOMasterPort( MACH_PORT_NULL, &masterPort );
909
    if ( KERN_SUCCESS != kernResult ) {
910
        printf( "IOMasterPort returned %d\n", kernResult );
911
    }
912

    
913
    classesToMatch = IOServiceMatching( kIOCDMediaClass );
914
    if ( classesToMatch == NULL ) {
915
        printf( "IOServiceMatching returned a NULL dictionary.\n" );
916
    } else {
917
    CFDictionarySetValue( classesToMatch, CFSTR( kIOMediaEjectableKey ), kCFBooleanTrue );
918
    }
919
    kernResult = IOServiceGetMatchingServices( masterPort, classesToMatch, mediaIterator );
920
    if ( KERN_SUCCESS != kernResult )
921
    {
922
        printf( "IOServiceGetMatchingServices returned %d\n", kernResult );
923
    }
924

    
925
    return kernResult;
926
}
927

    
928
kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize )
929
{
930
    io_object_t     nextMedia;
931
    kern_return_t   kernResult = KERN_FAILURE;
932
    *bsdPath = '\0';
933
    nextMedia = IOIteratorNext( mediaIterator );
934
    if ( nextMedia )
935
    {
936
        CFTypeRef   bsdPathAsCFString;
937
    bsdPathAsCFString = IORegistryEntryCreateCFProperty( nextMedia, CFSTR( kIOBSDNameKey ), kCFAllocatorDefault, 0 );
938
        if ( bsdPathAsCFString ) {
939
            size_t devPathLength;
940
            strcpy( bsdPath, _PATH_DEV );
941
            strcat( bsdPath, "r" );
942
            devPathLength = strlen( bsdPath );
943
            if ( CFStringGetCString( bsdPathAsCFString, bsdPath + devPathLength, maxPathSize - devPathLength, kCFStringEncodingASCII ) ) {
944
                kernResult = KERN_SUCCESS;
945
            }
946
            CFRelease( bsdPathAsCFString );
947
        }
948
        IOObjectRelease( nextMedia );
949
    }
950

    
951
    return kernResult;
952
}
953

    
954
#endif
955

    
956
static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
957
{
958
    BDRVRawState *s = bs->opaque;
959
    int fd, open_flags, ret, i;
960

    
961
    posix_aio_init();
962

    
963
#ifdef CONFIG_COCOA
964
    if (strstart(filename, "/dev/cdrom", NULL)) {
965
        kern_return_t kernResult;
966
        io_iterator_t mediaIterator;
967
        char bsdPath[ MAXPATHLEN ];
968
        int fd;
969

    
970
        kernResult = FindEjectableCDMedia( &mediaIterator );
971
        kernResult = GetBSDPath( mediaIterator, bsdPath, sizeof( bsdPath ) );
972

    
973
        if ( bsdPath[ 0 ] != '\0' ) {
974
            strcat(bsdPath,"s0");
975
            /* some CDs don't have a partition 0 */
976
            fd = open(bsdPath, O_RDONLY | O_BINARY | O_LARGEFILE);
977
            if (fd < 0) {
978
                bsdPath[strlen(bsdPath)-1] = '1';
979
            } else {
980
                close(fd);
981
            }
982
            filename = bsdPath;
983
        }
984

    
985
        if ( mediaIterator )
986
            IOObjectRelease( mediaIterator );
987
    }
988
#endif
989
    open_flags = O_BINARY;
990
    if ((flags & BDRV_O_ACCESS) == O_RDWR) {
991
        open_flags |= O_RDWR;
992
    } else {
993
        open_flags |= O_RDONLY;
994
        bs->read_only = 1;
995
    }
996
#ifdef O_DIRECT
997
    if (flags & BDRV_O_DIRECT)
998
        open_flags |= O_DIRECT;
999
#endif
1000

    
1001
    s->type = FTYPE_FILE;
1002
#if defined(__linux__)
1003
    if (strstart(filename, "/dev/cd", NULL)) {
1004
        /* open will not fail even if no CD is inserted */
1005
        open_flags |= O_NONBLOCK;
1006
        s->type = FTYPE_CD;
1007
    } else if (strstart(filename, "/dev/fd", NULL)) {
1008
        s->type = FTYPE_FD;
1009
        s->fd_open_flags = open_flags;
1010
        /* open will not fail even if no floppy is inserted */
1011
        open_flags |= O_NONBLOCK;
1012
    } else if (strstart(filename, "/dev/sg", NULL)) {
1013
        bs->sg = 1;
1014
    }
1015
#endif
1016
    fd = open(filename, open_flags, 0644);
1017
    if (fd < 0) {
1018
        ret = -errno;
1019
        if (ret == -EROFS)
1020
            ret = -EACCES;
1021
        return ret;
1022
    }
1023
    s->fd = fd;
1024
    for (i = 0; i < RAW_FD_POOL_SIZE; i++)
1025
        s->fd_pool[i] = -1;
1026
#if defined(__linux__)
1027
    /* close fd so that we can reopen it as needed */
1028
    if (s->type == FTYPE_FD) {
1029
        close(s->fd);
1030
        s->fd = -1;
1031
        s->fd_media_changed = 1;
1032
    }
1033
#endif
1034
    return 0;
1035
}
1036

    
1037
#if defined(__linux__)
1038
/* Note: we do not have a reliable method to detect if the floppy is
1039
   present. The current method is to try to open the floppy at every
1040
   I/O and to keep it opened during a few hundreds of ms. */
1041
static int fd_open(BlockDriverState *bs)
1042
{
1043
    BDRVRawState *s = bs->opaque;
1044
    int last_media_present;
1045

    
1046
    if (s->type != FTYPE_FD)
1047
        return 0;
1048
    last_media_present = (s->fd >= 0);
1049
    if (s->fd >= 0 &&
1050
        (qemu_get_clock(rt_clock) - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
1051
        close(s->fd);
1052
        s->fd = -1;
1053
        raw_close_fd_pool(s);
1054
#ifdef DEBUG_FLOPPY
1055
        printf("Floppy closed\n");
1056
#endif
1057
    }
1058
    if (s->fd < 0) {
1059
        if (s->fd_got_error &&
1060
            (qemu_get_clock(rt_clock) - s->fd_error_time) < FD_OPEN_TIMEOUT) {
1061
#ifdef DEBUG_FLOPPY
1062
            printf("No floppy (open delayed)\n");
1063
#endif
1064
            return -EIO;
1065
        }
1066
        s->fd = open(bs->filename, s->fd_open_flags);
1067
        if (s->fd < 0) {
1068
            s->fd_error_time = qemu_get_clock(rt_clock);
1069
            s->fd_got_error = 1;
1070
            if (last_media_present)
1071
                s->fd_media_changed = 1;
1072
#ifdef DEBUG_FLOPPY
1073
            printf("No floppy\n");
1074
#endif
1075
            return -EIO;
1076
        }
1077
#ifdef DEBUG_FLOPPY
1078
        printf("Floppy opened\n");
1079
#endif
1080
    }
1081
    if (!last_media_present)
1082
        s->fd_media_changed = 1;
1083
    s->fd_open_time = qemu_get_clock(rt_clock);
1084
    s->fd_got_error = 0;
1085
    return 0;
1086
}
1087

    
1088
static int raw_is_inserted(BlockDriverState *bs)
1089
{
1090
    BDRVRawState *s = bs->opaque;
1091
    int ret;
1092

    
1093
    switch(s->type) {
1094
    case FTYPE_CD:
1095
        ret = ioctl(s->fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
1096
        if (ret == CDS_DISC_OK)
1097
            return 1;
1098
        else
1099
            return 0;
1100
        break;
1101
    case FTYPE_FD:
1102
        ret = fd_open(bs);
1103
        return (ret >= 0);
1104
    default:
1105
        return 1;
1106
    }
1107
}
1108

    
1109
/* currently only used by fdc.c, but a CD version would be good too */
1110
static int raw_media_changed(BlockDriverState *bs)
1111
{
1112
    BDRVRawState *s = bs->opaque;
1113

    
1114
    switch(s->type) {
1115
    case FTYPE_FD:
1116
        {
1117
            int ret;
1118
            /* XXX: we do not have a true media changed indication. It
1119
               does not work if the floppy is changed without trying
1120
               to read it */
1121
            fd_open(bs);
1122
            ret = s->fd_media_changed;
1123
            s->fd_media_changed = 0;
1124
#ifdef DEBUG_FLOPPY
1125
            printf("Floppy changed=%d\n", ret);
1126
#endif
1127
            return ret;
1128
        }
1129
    default:
1130
        return -ENOTSUP;
1131
    }
1132
}
1133

    
1134
static int raw_eject(BlockDriverState *bs, int eject_flag)
1135
{
1136
    BDRVRawState *s = bs->opaque;
1137

    
1138
    switch(s->type) {
1139
    case FTYPE_CD:
1140
        if (eject_flag) {
1141
            if (ioctl (s->fd, CDROMEJECT, NULL) < 0)
1142
                perror("CDROMEJECT");
1143
        } else {
1144
            if (ioctl (s->fd, CDROMCLOSETRAY, NULL) < 0)
1145
                perror("CDROMEJECT");
1146
        }
1147
        break;
1148
    case FTYPE_FD:
1149
        {
1150
            int fd;
1151
            if (s->fd >= 0) {
1152
                close(s->fd);
1153
                s->fd = -1;
1154
                raw_close_fd_pool(s);
1155
            }
1156
            fd = open(bs->filename, s->fd_open_flags | O_NONBLOCK);
1157
            if (fd >= 0) {
1158
                if (ioctl(fd, FDEJECT, 0) < 0)
1159
                    perror("FDEJECT");
1160
                close(fd);
1161
            }
1162
        }
1163
        break;
1164
    default:
1165
        return -ENOTSUP;
1166
    }
1167
    return 0;
1168
}
1169

    
1170
static int raw_set_locked(BlockDriverState *bs, int locked)
1171
{
1172
    BDRVRawState *s = bs->opaque;
1173

    
1174
    switch(s->type) {
1175
    case FTYPE_CD:
1176
        if (ioctl (s->fd, CDROM_LOCKDOOR, locked) < 0) {
1177
            /* Note: an error can happen if the distribution automatically
1178
               mounts the CD-ROM */
1179
            //        perror("CDROM_LOCKDOOR");
1180
        }
1181
        break;
1182
    default:
1183
        return -ENOTSUP;
1184
    }
1185
    return 0;
1186
}
1187

    
1188
static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
1189
{
1190
    BDRVRawState *s = bs->opaque;
1191

    
1192
    return ioctl(s->fd, req, buf);
1193
}
1194
#else
1195

    
1196
static int fd_open(BlockDriverState *bs)
1197
{
1198
    return 0;
1199
}
1200

    
1201
static int raw_is_inserted(BlockDriverState *bs)
1202
{
1203
    return 1;
1204
}
1205

    
1206
static int raw_media_changed(BlockDriverState *bs)
1207
{
1208
    return -ENOTSUP;
1209
}
1210

    
1211
static int raw_eject(BlockDriverState *bs, int eject_flag)
1212
{
1213
    return -ENOTSUP;
1214
}
1215

    
1216
static int raw_set_locked(BlockDriverState *bs, int locked)
1217
{
1218
    return -ENOTSUP;
1219
}
1220

    
1221
static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
1222
{
1223
    return -ENOTSUP;
1224
}
1225
#endif /* !linux */
1226

    
1227
BlockDriver bdrv_host_device = {
1228
    "host_device",
1229
    sizeof(BDRVRawState),
1230
    NULL, /* no probe for protocols */
1231
    hdev_open,
1232
    NULL,
1233
    NULL,
1234
    raw_close,
1235
    NULL,
1236
    raw_flush,
1237

    
1238
#ifdef CONFIG_AIO
1239
    .bdrv_aio_read = raw_aio_read,
1240
    .bdrv_aio_write = raw_aio_write,
1241
    .bdrv_aio_cancel = raw_aio_cancel,
1242
    .aiocb_size = sizeof(RawAIOCB),
1243
#endif
1244
    .bdrv_pread = raw_pread,
1245
    .bdrv_pwrite = raw_pwrite,
1246
    .bdrv_getlength = raw_getlength,
1247

    
1248
    /* removable device support */
1249
    .bdrv_is_inserted = raw_is_inserted,
1250
    .bdrv_media_changed = raw_media_changed,
1251
    .bdrv_eject = raw_eject,
1252
    .bdrv_set_locked = raw_set_locked,
1253
    /* generic scsi device */
1254
    .bdrv_ioctl = raw_ioctl,
1255
};