Statistics
| Branch: | Revision:

root / block-raw-posix.c @ 04eeb8b6

History | View | Annotate | Download (32.3 kB)

1
/*
2
 * Block driver for RAW files (posix)
3
 *
4
 * Copyright (c) 2006 Fabrice Bellard
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
 * THE SOFTWARE.
23
 */
24
#include "qemu-common.h"
25
#include "qemu-timer.h"
26
#include "qemu-char.h"
27
#include "block_int.h"
28
#include <assert.h>
29
#ifdef CONFIG_AIO
30
#include "posix-aio-compat.h"
31
#endif
32

    
33
#ifdef CONFIG_COCOA
34
#include <paths.h>
35
#include <sys/param.h>
36
#include <IOKit/IOKitLib.h>
37
#include <IOKit/IOBSD.h>
38
#include <IOKit/storage/IOMediaBSDClient.h>
39
#include <IOKit/storage/IOMedia.h>
40
#include <IOKit/storage/IOCDMedia.h>
41
//#include <IOKit/storage/IOCDTypes.h>
42
#include <CoreFoundation/CoreFoundation.h>
43
#endif
44

    
45
#ifdef __sun__
46
#define _POSIX_PTHREAD_SEMANTICS 1
47
#include <signal.h>
48
#include <sys/dkio.h>
49
#endif
50
#ifdef __linux__
51
#include <sys/ioctl.h>
52
#include <linux/cdrom.h>
53
#include <linux/fd.h>
54
#endif
55
#ifdef __FreeBSD__
56
#include <signal.h>
57
#include <sys/disk.h>
58
#endif
59

    
60
#ifdef __OpenBSD__
61
#include <sys/ioctl.h>
62
#include <sys/disklabel.h>
63
#include <sys/dkio.h>
64
#endif
65

    
66
#ifdef __DragonFly__
67
#include <sys/ioctl.h>
68
#include <sys/diskslice.h>
69
#endif
70

    
71
//#define DEBUG_FLOPPY
72

    
73
//#define DEBUG_BLOCK
74
#if defined(DEBUG_BLOCK)
75
#define DEBUG_BLOCK_PRINT(formatCstr, args...) do { if (qemu_log_enabled())        \
76
    { qemu_log(formatCstr, ##args); qemu_log_flush(); } } while (0)
77
#else
78
#define DEBUG_BLOCK_PRINT(formatCstr, args...)
79
#endif
80

    
81
/* OS X does not have O_DSYNC */
82
#ifndef O_DSYNC
83
#define O_DSYNC O_SYNC
84
#endif
85

    
86
/* Approximate O_DIRECT with O_DSYNC if O_DIRECT isn't available */
87
#ifndef O_DIRECT
88
#define O_DIRECT O_DSYNC
89
#endif
90

    
91
#define FTYPE_FILE   0
92
#define FTYPE_CD     1
93
#define FTYPE_FD     2
94

    
95
#define ALIGNED_BUFFER_SIZE (32 * 512)
96

    
97
/* if the FD is not accessed during that time (in ms), we try to
98
   reopen it to see if the disk has been changed */
99
#define FD_OPEN_TIMEOUT 1000
100

    
101
typedef struct BDRVRawState {
102
    int fd;
103
    int type;
104
    unsigned int lseek_err_cnt;
105
#if defined(__linux__)
106
    /* linux floppy specific */
107
    int fd_open_flags;
108
    int64_t fd_open_time;
109
    int64_t fd_error_time;
110
    int fd_got_error;
111
    int fd_media_changed;
112
#endif
113
    uint8_t* aligned_buf;
114
} BDRVRawState;
115

    
116
static int posix_aio_init(void);
117

    
118
static int fd_open(BlockDriverState *bs);
119

    
120
static int raw_open(BlockDriverState *bs, const char *filename, int flags)
121
{
122
    BDRVRawState *s = bs->opaque;
123
    int fd, open_flags, ret;
124

    
125
    posix_aio_init();
126

    
127
    s->lseek_err_cnt = 0;
128

    
129
    open_flags = O_BINARY;
130
    if ((flags & BDRV_O_ACCESS) == O_RDWR) {
131
        open_flags |= O_RDWR;
132
    } else {
133
        open_flags |= O_RDONLY;
134
        bs->read_only = 1;
135
    }
136
    if (flags & BDRV_O_CREAT)
137
        open_flags |= O_CREAT | O_TRUNC;
138

    
139
    /* Use O_DSYNC for write-through caching, no flags for write-back caching,
140
     * and O_DIRECT for no caching. */
141
    if ((flags & BDRV_O_NOCACHE))
142
        open_flags |= O_DIRECT;
143
    else if (!(flags & BDRV_O_CACHE_WB))
144
        open_flags |= O_DSYNC;
145

    
146
    s->type = FTYPE_FILE;
147

    
148
    fd = open(filename, open_flags, 0644);
149
    if (fd < 0) {
150
        ret = -errno;
151
        if (ret == -EROFS)
152
            ret = -EACCES;
153
        return ret;
154
    }
155
    s->fd = fd;
156
    s->aligned_buf = NULL;
157
    if ((flags & BDRV_O_NOCACHE)) {
158
        s->aligned_buf = qemu_memalign(512, ALIGNED_BUFFER_SIZE);
159
        if (s->aligned_buf == NULL) {
160
            ret = -errno;
161
            close(fd);
162
            return ret;
163
        }
164
    }
165
    return 0;
166
}
167

    
168
/* XXX: use host sector size if necessary with:
169
#ifdef DIOCGSECTORSIZE
170
        {
171
            unsigned int sectorsize = 512;
172
            if (!ioctl(fd, DIOCGSECTORSIZE, &sectorsize) &&
173
                sectorsize > bufsize)
174
                bufsize = sectorsize;
175
        }
176
#endif
177
#ifdef CONFIG_COCOA
178
        u_int32_t   blockSize = 512;
179
        if ( !ioctl( fd, DKIOCGETBLOCKSIZE, &blockSize ) && blockSize > bufsize) {
180
            bufsize = blockSize;
181
        }
182
#endif
183
*/
184

    
185
/*
186
 * offset and count are in bytes, but must be multiples of 512 for files
187
 * opened with O_DIRECT. buf must be aligned to 512 bytes then.
188
 *
189
 * This function may be called without alignment if the caller ensures
190
 * that O_DIRECT is not in effect.
191
 */
192
static int raw_pread_aligned(BlockDriverState *bs, int64_t offset,
193
                     uint8_t *buf, int count)
194
{
195
    BDRVRawState *s = bs->opaque;
196
    int ret;
197

    
198
    ret = fd_open(bs);
199
    if (ret < 0)
200
        return ret;
201

    
202
    if (offset >= 0 && lseek(s->fd, offset, SEEK_SET) == (off_t)-1) {
203
        ++(s->lseek_err_cnt);
204
        if(s->lseek_err_cnt <= 10) {
205
            DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
206
                              "] lseek failed : %d = %s\n",
207
                              s->fd, bs->filename, offset, buf, count,
208
                              bs->total_sectors, errno, strerror(errno));
209
        }
210
        return -1;
211
    }
212
    s->lseek_err_cnt=0;
213

    
214
    ret = read(s->fd, buf, count);
215
    if (ret == count)
216
        goto label__raw_read__success;
217

    
218
    DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
219
                      "] read failed %d : %d = %s\n",
220
                      s->fd, bs->filename, offset, buf, count,
221
                      bs->total_sectors, ret, errno, strerror(errno));
222

    
223
    /* Try harder for CDrom. */
224
    if (bs->type == BDRV_TYPE_CDROM) {
225
        lseek(s->fd, offset, SEEK_SET);
226
        ret = read(s->fd, buf, count);
227
        if (ret == count)
228
            goto label__raw_read__success;
229
        lseek(s->fd, offset, SEEK_SET);
230
        ret = read(s->fd, buf, count);
231
        if (ret == count)
232
            goto label__raw_read__success;
233

    
234
        DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
235
                          "] retry read failed %d : %d = %s\n",
236
                          s->fd, bs->filename, offset, buf, count,
237
                          bs->total_sectors, ret, errno, strerror(errno));
238
    }
239

    
240
label__raw_read__success:
241

    
242
    return ret;
243
}
244

    
245
/*
246
 * offset and count are in bytes, but must be multiples of 512 for files
247
 * opened with O_DIRECT. buf must be aligned to 512 bytes then.
248
 *
249
 * This function may be called without alignment if the caller ensures
250
 * that O_DIRECT is not in effect.
251
 */
252
static int raw_pwrite_aligned(BlockDriverState *bs, int64_t offset,
253
                      const uint8_t *buf, int count)
254
{
255
    BDRVRawState *s = bs->opaque;
256
    int ret;
257

    
258
    ret = fd_open(bs);
259
    if (ret < 0)
260
        return -errno;
261

    
262
    if (offset >= 0 && lseek(s->fd, offset, SEEK_SET) == (off_t)-1) {
263
        ++(s->lseek_err_cnt);
264
        if(s->lseek_err_cnt) {
265
            DEBUG_BLOCK_PRINT("raw_pwrite(%d:%s, %" PRId64 ", %p, %d) [%"
266
                              PRId64 "] lseek failed : %d = %s\n",
267
                              s->fd, bs->filename, offset, buf, count,
268
                              bs->total_sectors, errno, strerror(errno));
269
        }
270
        return -EIO;
271
    }
272
    s->lseek_err_cnt = 0;
273

    
274
    ret = write(s->fd, buf, count);
275
    if (ret == count)
276
        goto label__raw_write__success;
277

    
278
    DEBUG_BLOCK_PRINT("raw_pwrite(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
279
                      "] write failed %d : %d = %s\n",
280
                      s->fd, bs->filename, offset, buf, count,
281
                      bs->total_sectors, ret, errno, strerror(errno));
282

    
283
label__raw_write__success:
284

    
285
    return  (ret < 0) ? -errno : ret;
286
}
287

    
288

    
289
/*
290
 * offset and count are in bytes and possibly not aligned. For files opened
291
 * with O_DIRECT, necessary alignments are ensured before calling
292
 * raw_pread_aligned to do the actual read.
293
 */
294
static int raw_pread(BlockDriverState *bs, int64_t offset,
295
                     uint8_t *buf, int count)
296
{
297
    BDRVRawState *s = bs->opaque;
298
    int size, ret, shift, sum;
299

    
300
    sum = 0;
301

    
302
    if (s->aligned_buf != NULL)  {
303

    
304
        if (offset & 0x1ff) {
305
            /* align offset on a 512 bytes boundary */
306

    
307
            shift = offset & 0x1ff;
308
            size = (shift + count + 0x1ff) & ~0x1ff;
309
            if (size > ALIGNED_BUFFER_SIZE)
310
                size = ALIGNED_BUFFER_SIZE;
311
            ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, size);
312
            if (ret < 0)
313
                return ret;
314

    
315
            size = 512 - shift;
316
            if (size > count)
317
                size = count;
318
            memcpy(buf, s->aligned_buf + shift, size);
319

    
320
            buf += size;
321
            offset += size;
322
            count -= size;
323
            sum += size;
324

    
325
            if (count == 0)
326
                return sum;
327
        }
328
        if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
329

    
330
            /* read on aligned buffer */
331

    
332
            while (count) {
333

    
334
                size = (count + 0x1ff) & ~0x1ff;
335
                if (size > ALIGNED_BUFFER_SIZE)
336
                    size = ALIGNED_BUFFER_SIZE;
337

    
338
                ret = raw_pread_aligned(bs, offset, s->aligned_buf, size);
339
                if (ret < 0)
340
                    return ret;
341

    
342
                size = ret;
343
                if (size > count)
344
                    size = count;
345

    
346
                memcpy(buf, s->aligned_buf, size);
347

    
348
                buf += size;
349
                offset += size;
350
                count -= size;
351
                sum += size;
352
            }
353

    
354
            return sum;
355
        }
356
    }
357

    
358
    return raw_pread_aligned(bs, offset, buf, count) + sum;
359
}
360

    
361
/*
362
 * offset and count are in bytes and possibly not aligned. For files opened
363
 * with O_DIRECT, necessary alignments are ensured before calling
364
 * raw_pwrite_aligned to do the actual write.
365
 */
366
static int raw_pwrite(BlockDriverState *bs, int64_t offset,
367
                      const uint8_t *buf, int count)
368
{
369
    BDRVRawState *s = bs->opaque;
370
    int size, ret, shift, sum;
371

    
372
    sum = 0;
373

    
374
    if (s->aligned_buf != NULL) {
375

    
376
        if (offset & 0x1ff) {
377
            /* align offset on a 512 bytes boundary */
378
            shift = offset & 0x1ff;
379
            ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, 512);
380
            if (ret < 0)
381
                return ret;
382

    
383
            size = 512 - shift;
384
            if (size > count)
385
                size = count;
386
            memcpy(s->aligned_buf + shift, buf, size);
387

    
388
            ret = raw_pwrite_aligned(bs, offset - shift, s->aligned_buf, 512);
389
            if (ret < 0)
390
                return ret;
391

    
392
            buf += size;
393
            offset += size;
394
            count -= size;
395
            sum += size;
396

    
397
            if (count == 0)
398
                return sum;
399
        }
400
        if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
401

    
402
            while ((size = (count & ~0x1ff)) != 0) {
403

    
404
                if (size > ALIGNED_BUFFER_SIZE)
405
                    size = ALIGNED_BUFFER_SIZE;
406

    
407
                memcpy(s->aligned_buf, buf, size);
408

    
409
                ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, size);
410
                if (ret < 0)
411
                    return ret;
412

    
413
                buf += ret;
414
                offset += ret;
415
                count -= ret;
416
                sum += ret;
417
            }
418
            /* here, count < 512 because (count & ~0x1ff) == 0 */
419
            if (count) {
420
                ret = raw_pread_aligned(bs, offset, s->aligned_buf, 512);
421
                if (ret < 0)
422
                    return ret;
423
                 memcpy(s->aligned_buf, buf, count);
424

    
425
                 ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, 512);
426
                 if (ret < 0)
427
                     return ret;
428
                 if (count < ret)
429
                     ret = count;
430

    
431
                 sum += ret;
432
            }
433
            return sum;
434
        }
435
    }
436
    return raw_pwrite_aligned(bs, offset, buf, count) + sum;
437
}
438

    
439
#ifdef CONFIG_AIO
440
/***********************************************************/
441
/* Unix AIO using POSIX AIO */
442

    
443
typedef struct RawAIOCB {
444
    BlockDriverAIOCB common;
445
    struct qemu_paiocb aiocb;
446
    struct RawAIOCB *next;
447
    int ret;
448
} RawAIOCB;
449

    
450
typedef struct PosixAioState
451
{
452
    int rfd, wfd;
453
    RawAIOCB *first_aio;
454
} PosixAioState;
455

    
456
static void posix_aio_read(void *opaque)
457
{
458
    PosixAioState *s = opaque;
459
    RawAIOCB *acb, **pacb;
460
    int ret;
461
    ssize_t len;
462

    
463
    /* read all bytes from signal pipe */
464
    for (;;) {
465
        char bytes[16];
466

    
467
        len = read(s->rfd, bytes, sizeof(bytes));
468
        if (len == -1 && errno == EINTR)
469
            continue; /* try again */
470
        if (len == sizeof(bytes))
471
            continue; /* more to read */
472
        break;
473
    }
474

    
475
    for(;;) {
476
        pacb = &s->first_aio;
477
        for(;;) {
478
            acb = *pacb;
479
            if (!acb)
480
                goto the_end;
481
            ret = qemu_paio_error(&acb->aiocb);
482
            if (ret == ECANCELED) {
483
                /* remove the request */
484
                *pacb = acb->next;
485
                qemu_aio_release(acb);
486
            } else if (ret != EINPROGRESS) {
487
                /* end of aio */
488
                if (ret == 0) {
489
                    ret = qemu_paio_return(&acb->aiocb);
490
                    if (ret == acb->aiocb.aio_nbytes)
491
                        ret = 0;
492
                    else
493
                        ret = -EINVAL;
494
                } else {
495
                    ret = -ret;
496
                }
497
                /* remove the request */
498
                *pacb = acb->next;
499
                /* call the callback */
500
                acb->common.cb(acb->common.opaque, ret);
501
                qemu_aio_release(acb);
502
                break;
503
            } else {
504
                pacb = &acb->next;
505
            }
506
        }
507
    }
508
 the_end: ;
509
}
510

    
511
static int posix_aio_flush(void *opaque)
512
{
513
    PosixAioState *s = opaque;
514
    return !!s->first_aio;
515
}
516

    
517
static PosixAioState *posix_aio_state;
518

    
519
static void aio_signal_handler(int signum)
520
{
521
    if (posix_aio_state) {
522
        char byte = 0;
523

    
524
        write(posix_aio_state->wfd, &byte, sizeof(byte));
525
    }
526

    
527
    qemu_service_io();
528
}
529

    
530
static int posix_aio_init(void)
531
{
532
    struct sigaction act;
533
    PosixAioState *s;
534
    int fds[2];
535
    struct qemu_paioinit ai;
536
  
537
    if (posix_aio_state)
538
        return 0;
539

    
540
    s = qemu_malloc(sizeof(PosixAioState));
541

    
542
    sigfillset(&act.sa_mask);
543
    act.sa_flags = 0; /* do not restart syscalls to interrupt select() */
544
    act.sa_handler = aio_signal_handler;
545
    sigaction(SIGUSR2, &act, NULL);
546

    
547
    s->first_aio = NULL;
548
    if (pipe(fds) == -1) {
549
        fprintf(stderr, "failed to create pipe\n");
550
        return -errno;
551
    }
552

    
553
    s->rfd = fds[0];
554
    s->wfd = fds[1];
555

    
556
    fcntl(s->rfd, F_SETFL, O_NONBLOCK);
557
    fcntl(s->wfd, F_SETFL, O_NONBLOCK);
558

    
559
    qemu_aio_set_fd_handler(s->rfd, posix_aio_read, NULL, posix_aio_flush, s);
560

    
561
    memset(&ai, 0, sizeof(ai));
562
    ai.aio_threads = 64;
563
    ai.aio_num = 64;
564
    qemu_paio_init(&ai);
565

    
566
    posix_aio_state = s;
567

    
568
    return 0;
569
}
570

    
571
static RawAIOCB *raw_aio_setup(BlockDriverState *bs,
572
        int64_t sector_num, uint8_t *buf, int nb_sectors,
573
        BlockDriverCompletionFunc *cb, void *opaque)
574
{
575
    BDRVRawState *s = bs->opaque;
576
    RawAIOCB *acb;
577

    
578
    if (fd_open(bs) < 0)
579
        return NULL;
580

    
581
    acb = qemu_aio_get(bs, cb, opaque);
582
    if (!acb)
583
        return NULL;
584
    acb->aiocb.aio_fildes = s->fd;
585
    acb->aiocb.ev_signo = SIGUSR2;
586
    acb->aiocb.aio_buf = buf;
587
    if (nb_sectors < 0)
588
        acb->aiocb.aio_nbytes = -nb_sectors;
589
    else
590
        acb->aiocb.aio_nbytes = nb_sectors * 512;
591
    acb->aiocb.aio_offset = sector_num * 512;
592
    acb->next = posix_aio_state->first_aio;
593
    posix_aio_state->first_aio = acb;
594
    return acb;
595
}
596

    
597
static void raw_aio_em_cb(void* opaque)
598
{
599
    RawAIOCB *acb = opaque;
600
    acb->common.cb(acb->common.opaque, acb->ret);
601
    qemu_aio_release(acb);
602
}
603

    
604
static void raw_aio_remove(RawAIOCB *acb)
605
{
606
    RawAIOCB **pacb;
607

    
608
    /* remove the callback from the queue */
609
    pacb = &posix_aio_state->first_aio;
610
    for(;;) {
611
        if (*pacb == NULL) {
612
            fprintf(stderr, "raw_aio_remove: aio request not found!\n");
613
            break;
614
        } else if (*pacb == acb) {
615
            *pacb = acb->next;
616
            qemu_aio_release(acb);
617
            break;
618
        }
619
        pacb = &(*pacb)->next;
620
    }
621
}
622

    
623
static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs,
624
        int64_t sector_num, uint8_t *buf, int nb_sectors,
625
        BlockDriverCompletionFunc *cb, void *opaque)
626
{
627
    RawAIOCB *acb;
628

    
629
    /*
630
     * If O_DIRECT is used and the buffer is not aligned fall back
631
     * to synchronous IO.
632
     */
633
    BDRVRawState *s = bs->opaque;
634

    
635
    if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
636
        QEMUBH *bh;
637
        acb = qemu_aio_get(bs, cb, opaque);
638
        acb->ret = raw_pread(bs, 512 * sector_num, buf, 512 * nb_sectors);
639
        bh = qemu_bh_new(raw_aio_em_cb, acb);
640
        qemu_bh_schedule(bh);
641
        return &acb->common;
642
    }
643

    
644
    acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
645
    if (!acb)
646
        return NULL;
647
    if (qemu_paio_read(&acb->aiocb) < 0) {
648
        raw_aio_remove(acb);
649
        return NULL;
650
    }
651
    return &acb->common;
652
}
653

    
654
static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs,
655
        int64_t sector_num, const uint8_t *buf, int nb_sectors,
656
        BlockDriverCompletionFunc *cb, void *opaque)
657
{
658
    RawAIOCB *acb;
659

    
660
    /*
661
     * If O_DIRECT is used and the buffer is not aligned fall back
662
     * to synchronous IO.
663
     */
664
    BDRVRawState *s = bs->opaque;
665

    
666
    if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
667
        QEMUBH *bh;
668
        acb = qemu_aio_get(bs, cb, opaque);
669
        acb->ret = raw_pwrite(bs, 512 * sector_num, buf, 512 * nb_sectors);
670
        bh = qemu_bh_new(raw_aio_em_cb, acb);
671
        qemu_bh_schedule(bh);
672
        return &acb->common;
673
    }
674

    
675
    acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque);
676
    if (!acb)
677
        return NULL;
678
    if (qemu_paio_write(&acb->aiocb) < 0) {
679
        raw_aio_remove(acb);
680
        return NULL;
681
    }
682
    return &acb->common;
683
}
684

    
685
static void raw_aio_cancel(BlockDriverAIOCB *blockacb)
686
{
687
    int ret;
688
    RawAIOCB *acb = (RawAIOCB *)blockacb;
689

    
690
    ret = qemu_paio_cancel(acb->aiocb.aio_fildes, &acb->aiocb);
691
    if (ret == QEMU_PAIO_NOTCANCELED) {
692
        /* fail safe: if the aio could not be canceled, we wait for
693
           it */
694
        while (qemu_paio_error(&acb->aiocb) == EINPROGRESS);
695
    }
696

    
697
    raw_aio_remove(acb);
698
}
699
#else /* CONFIG_AIO */
700
static int posix_aio_init(void)
701
{
702
    return 0;
703
}
704
#endif /* CONFIG_AIO */
705

    
706

    
707
static void raw_close(BlockDriverState *bs)
708
{
709
    BDRVRawState *s = bs->opaque;
710
    if (s->fd >= 0) {
711
        close(s->fd);
712
        s->fd = -1;
713
        if (s->aligned_buf != NULL)
714
            qemu_free(s->aligned_buf);
715
    }
716
}
717

    
718
static int raw_truncate(BlockDriverState *bs, int64_t offset)
719
{
720
    BDRVRawState *s = bs->opaque;
721
    if (s->type != FTYPE_FILE)
722
        return -ENOTSUP;
723
    if (ftruncate(s->fd, offset) < 0)
724
        return -errno;
725
    return 0;
726
}
727

    
728
#ifdef __OpenBSD__
729
static int64_t raw_getlength(BlockDriverState *bs)
730
{
731
    BDRVRawState *s = bs->opaque;
732
    int fd = s->fd;
733
    struct stat st;
734

    
735
    if (fstat(fd, &st))
736
        return -1;
737
    if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
738
        struct disklabel dl;
739

    
740
        if (ioctl(fd, DIOCGDINFO, &dl))
741
            return -1;
742
        return (uint64_t)dl.d_secsize *
743
            dl.d_partitions[DISKPART(st.st_rdev)].p_size;
744
    } else
745
        return st.st_size;
746
}
747
#else /* !__OpenBSD__ */
748
static int64_t  raw_getlength(BlockDriverState *bs)
749
{
750
    BDRVRawState *s = bs->opaque;
751
    int fd = s->fd;
752
    int64_t size;
753
#ifdef HOST_BSD
754
    struct stat sb;
755
#endif
756
#ifdef __sun__
757
    struct dk_minfo minfo;
758
    int rv;
759
#endif
760
    int ret;
761

    
762
    ret = fd_open(bs);
763
    if (ret < 0)
764
        return ret;
765

    
766
#ifdef HOST_BSD
767
    if (!fstat(fd, &sb) && (S_IFCHR & sb.st_mode)) {
768
#ifdef DIOCGMEDIASIZE
769
        if (ioctl(fd, DIOCGMEDIASIZE, (off_t *)&size))
770
#elif defined(DIOCGPART)
771
        {
772
                struct partinfo pi;
773
                if (ioctl(fd, DIOCGPART, &pi) == 0)
774
                        size = pi.media_size;
775
                else
776
                        size = 0;
777
        }
778
        if (size == 0)
779
#endif
780
#ifdef CONFIG_COCOA
781
        size = LONG_LONG_MAX;
782
#else
783
        size = lseek(fd, 0LL, SEEK_END);
784
#endif
785
    } else
786
#endif
787
#ifdef __sun__
788
    /*
789
     * use the DKIOCGMEDIAINFO ioctl to read the size.
790
     */
791
    rv = ioctl ( fd, DKIOCGMEDIAINFO, &minfo );
792
    if ( rv != -1 ) {
793
        size = minfo.dki_lbsize * minfo.dki_capacity;
794
    } else /* there are reports that lseek on some devices
795
              fails, but irc discussion said that contingency
796
              on contingency was overkill */
797
#endif
798
    {
799
        size = lseek(fd, 0, SEEK_END);
800
    }
801
    return size;
802
}
803
#endif
804

    
805
static int raw_create(const char *filename, int64_t total_size,
806
                      const char *backing_file, int flags)
807
{
808
    int fd;
809

    
810
    if (flags || backing_file)
811
        return -ENOTSUP;
812

    
813
    fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
814
              0644);
815
    if (fd < 0)
816
        return -EIO;
817
    ftruncate(fd, total_size * 512);
818
    close(fd);
819
    return 0;
820
}
821

    
822
static void raw_flush(BlockDriverState *bs)
823
{
824
    BDRVRawState *s = bs->opaque;
825
    fsync(s->fd);
826
}
827

    
828
BlockDriver bdrv_raw = {
829
    "raw",
830
    sizeof(BDRVRawState),
831
    NULL, /* no probe for protocols */
832
    raw_open,
833
    NULL,
834
    NULL,
835
    raw_close,
836
    raw_create,
837
    raw_flush,
838

    
839
#ifdef CONFIG_AIO
840
    .bdrv_aio_read = raw_aio_read,
841
    .bdrv_aio_write = raw_aio_write,
842
    .bdrv_aio_cancel = raw_aio_cancel,
843
    .aiocb_size = sizeof(RawAIOCB),
844
#endif
845

    
846
    .bdrv_pread = raw_pread,
847
    .bdrv_pwrite = raw_pwrite,
848
    .bdrv_truncate = raw_truncate,
849
    .bdrv_getlength = raw_getlength,
850
};
851

    
852
/***********************************************/
853
/* host device */
854

    
855
#ifdef CONFIG_COCOA
856
static kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator );
857
static kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize );
858

    
859
kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator )
860
{
861
    kern_return_t       kernResult;
862
    mach_port_t     masterPort;
863
    CFMutableDictionaryRef  classesToMatch;
864

    
865
    kernResult = IOMasterPort( MACH_PORT_NULL, &masterPort );
866
    if ( KERN_SUCCESS != kernResult ) {
867
        printf( "IOMasterPort returned %d\n", kernResult );
868
    }
869

    
870
    classesToMatch = IOServiceMatching( kIOCDMediaClass );
871
    if ( classesToMatch == NULL ) {
872
        printf( "IOServiceMatching returned a NULL dictionary.\n" );
873
    } else {
874
    CFDictionarySetValue( classesToMatch, CFSTR( kIOMediaEjectableKey ), kCFBooleanTrue );
875
    }
876
    kernResult = IOServiceGetMatchingServices( masterPort, classesToMatch, mediaIterator );
877
    if ( KERN_SUCCESS != kernResult )
878
    {
879
        printf( "IOServiceGetMatchingServices returned %d\n", kernResult );
880
    }
881

    
882
    return kernResult;
883
}
884

    
885
kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize )
886
{
887
    io_object_t     nextMedia;
888
    kern_return_t   kernResult = KERN_FAILURE;
889
    *bsdPath = '\0';
890
    nextMedia = IOIteratorNext( mediaIterator );
891
    if ( nextMedia )
892
    {
893
        CFTypeRef   bsdPathAsCFString;
894
    bsdPathAsCFString = IORegistryEntryCreateCFProperty( nextMedia, CFSTR( kIOBSDNameKey ), kCFAllocatorDefault, 0 );
895
        if ( bsdPathAsCFString ) {
896
            size_t devPathLength;
897
            strcpy( bsdPath, _PATH_DEV );
898
            strcat( bsdPath, "r" );
899
            devPathLength = strlen( bsdPath );
900
            if ( CFStringGetCString( bsdPathAsCFString, bsdPath + devPathLength, maxPathSize - devPathLength, kCFStringEncodingASCII ) ) {
901
                kernResult = KERN_SUCCESS;
902
            }
903
            CFRelease( bsdPathAsCFString );
904
        }
905
        IOObjectRelease( nextMedia );
906
    }
907

    
908
    return kernResult;
909
}
910

    
911
#endif
912

    
913
static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
914
{
915
    BDRVRawState *s = bs->opaque;
916
    int fd, open_flags, ret;
917

    
918
    posix_aio_init();
919

    
920
#ifdef CONFIG_COCOA
921
    if (strstart(filename, "/dev/cdrom", NULL)) {
922
        kern_return_t kernResult;
923
        io_iterator_t mediaIterator;
924
        char bsdPath[ MAXPATHLEN ];
925
        int fd;
926

    
927
        kernResult = FindEjectableCDMedia( &mediaIterator );
928
        kernResult = GetBSDPath( mediaIterator, bsdPath, sizeof( bsdPath ) );
929

    
930
        if ( bsdPath[ 0 ] != '\0' ) {
931
            strcat(bsdPath,"s0");
932
            /* some CDs don't have a partition 0 */
933
            fd = open(bsdPath, O_RDONLY | O_BINARY | O_LARGEFILE);
934
            if (fd < 0) {
935
                bsdPath[strlen(bsdPath)-1] = '1';
936
            } else {
937
                close(fd);
938
            }
939
            filename = bsdPath;
940
        }
941

    
942
        if ( mediaIterator )
943
            IOObjectRelease( mediaIterator );
944
    }
945
#endif
946
    open_flags = O_BINARY;
947
    if ((flags & BDRV_O_ACCESS) == O_RDWR) {
948
        open_flags |= O_RDWR;
949
    } else {
950
        open_flags |= O_RDONLY;
951
        bs->read_only = 1;
952
    }
953
    /* Use O_DSYNC for write-through caching, no flags for write-back caching,
954
     * and O_DIRECT for no caching. */
955
    if ((flags & BDRV_O_NOCACHE))
956
        open_flags |= O_DIRECT;
957
    else if (!(flags & BDRV_O_CACHE_WB))
958
        open_flags |= O_DSYNC;
959

    
960
    s->type = FTYPE_FILE;
961
#if defined(__linux__)
962
    if (strstart(filename, "/dev/cd", NULL)) {
963
        /* open will not fail even if no CD is inserted */
964
        open_flags |= O_NONBLOCK;
965
        s->type = FTYPE_CD;
966
    } else if (strstart(filename, "/dev/fd", NULL)) {
967
        s->type = FTYPE_FD;
968
        s->fd_open_flags = open_flags;
969
        /* open will not fail even if no floppy is inserted */
970
        open_flags |= O_NONBLOCK;
971
    } else if (strstart(filename, "/dev/sg", NULL)) {
972
        bs->sg = 1;
973
    }
974
#endif
975
    fd = open(filename, open_flags, 0644);
976
    if (fd < 0) {
977
        ret = -errno;
978
        if (ret == -EROFS)
979
            ret = -EACCES;
980
        return ret;
981
    }
982
    s->fd = fd;
983
#if defined(__linux__)
984
    /* close fd so that we can reopen it as needed */
985
    if (s->type == FTYPE_FD) {
986
        close(s->fd);
987
        s->fd = -1;
988
        s->fd_media_changed = 1;
989
    }
990
#endif
991
    return 0;
992
}
993

    
994
#if defined(__linux__)
995
/* Note: we do not have a reliable method to detect if the floppy is
996
   present. The current method is to try to open the floppy at every
997
   I/O and to keep it opened during a few hundreds of ms. */
998
static int fd_open(BlockDriverState *bs)
999
{
1000
    BDRVRawState *s = bs->opaque;
1001
    int last_media_present;
1002

    
1003
    if (s->type != FTYPE_FD)
1004
        return 0;
1005
    last_media_present = (s->fd >= 0);
1006
    if (s->fd >= 0 &&
1007
        (qemu_get_clock(rt_clock) - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
1008
        close(s->fd);
1009
        s->fd = -1;
1010
#ifdef DEBUG_FLOPPY
1011
        printf("Floppy closed\n");
1012
#endif
1013
    }
1014
    if (s->fd < 0) {
1015
        if (s->fd_got_error &&
1016
            (qemu_get_clock(rt_clock) - s->fd_error_time) < FD_OPEN_TIMEOUT) {
1017
#ifdef DEBUG_FLOPPY
1018
            printf("No floppy (open delayed)\n");
1019
#endif
1020
            return -EIO;
1021
        }
1022
        s->fd = open(bs->filename, s->fd_open_flags);
1023
        if (s->fd < 0) {
1024
            s->fd_error_time = qemu_get_clock(rt_clock);
1025
            s->fd_got_error = 1;
1026
            if (last_media_present)
1027
                s->fd_media_changed = 1;
1028
#ifdef DEBUG_FLOPPY
1029
            printf("No floppy\n");
1030
#endif
1031
            return -EIO;
1032
        }
1033
#ifdef DEBUG_FLOPPY
1034
        printf("Floppy opened\n");
1035
#endif
1036
    }
1037
    if (!last_media_present)
1038
        s->fd_media_changed = 1;
1039
    s->fd_open_time = qemu_get_clock(rt_clock);
1040
    s->fd_got_error = 0;
1041
    return 0;
1042
}
1043

    
1044
static int raw_is_inserted(BlockDriverState *bs)
1045
{
1046
    BDRVRawState *s = bs->opaque;
1047
    int ret;
1048

    
1049
    switch(s->type) {
1050
    case FTYPE_CD:
1051
        ret = ioctl(s->fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
1052
        if (ret == CDS_DISC_OK)
1053
            return 1;
1054
        else
1055
            return 0;
1056
        break;
1057
    case FTYPE_FD:
1058
        ret = fd_open(bs);
1059
        return (ret >= 0);
1060
    default:
1061
        return 1;
1062
    }
1063
}
1064

    
1065
/* currently only used by fdc.c, but a CD version would be good too */
1066
static int raw_media_changed(BlockDriverState *bs)
1067
{
1068
    BDRVRawState *s = bs->opaque;
1069

    
1070
    switch(s->type) {
1071
    case FTYPE_FD:
1072
        {
1073
            int ret;
1074
            /* XXX: we do not have a true media changed indication. It
1075
               does not work if the floppy is changed without trying
1076
               to read it */
1077
            fd_open(bs);
1078
            ret = s->fd_media_changed;
1079
            s->fd_media_changed = 0;
1080
#ifdef DEBUG_FLOPPY
1081
            printf("Floppy changed=%d\n", ret);
1082
#endif
1083
            return ret;
1084
        }
1085
    default:
1086
        return -ENOTSUP;
1087
    }
1088
}
1089

    
1090
static int raw_eject(BlockDriverState *bs, int eject_flag)
1091
{
1092
    BDRVRawState *s = bs->opaque;
1093

    
1094
    switch(s->type) {
1095
    case FTYPE_CD:
1096
        if (eject_flag) {
1097
            if (ioctl (s->fd, CDROMEJECT, NULL) < 0)
1098
                perror("CDROMEJECT");
1099
        } else {
1100
            if (ioctl (s->fd, CDROMCLOSETRAY, NULL) < 0)
1101
                perror("CDROMEJECT");
1102
        }
1103
        break;
1104
    case FTYPE_FD:
1105
        {
1106
            int fd;
1107
            if (s->fd >= 0) {
1108
                close(s->fd);
1109
                s->fd = -1;
1110
            }
1111
            fd = open(bs->filename, s->fd_open_flags | O_NONBLOCK);
1112
            if (fd >= 0) {
1113
                if (ioctl(fd, FDEJECT, 0) < 0)
1114
                    perror("FDEJECT");
1115
                close(fd);
1116
            }
1117
        }
1118
        break;
1119
    default:
1120
        return -ENOTSUP;
1121
    }
1122
    return 0;
1123
}
1124

    
1125
static int raw_set_locked(BlockDriverState *bs, int locked)
1126
{
1127
    BDRVRawState *s = bs->opaque;
1128

    
1129
    switch(s->type) {
1130
    case FTYPE_CD:
1131
        if (ioctl (s->fd, CDROM_LOCKDOOR, locked) < 0) {
1132
            /* Note: an error can happen if the distribution automatically
1133
               mounts the CD-ROM */
1134
            //        perror("CDROM_LOCKDOOR");
1135
        }
1136
        break;
1137
    default:
1138
        return -ENOTSUP;
1139
    }
1140
    return 0;
1141
}
1142

    
1143
static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
1144
{
1145
    BDRVRawState *s = bs->opaque;
1146

    
1147
    return ioctl(s->fd, req, buf);
1148
}
1149
#else
1150

    
1151
static int fd_open(BlockDriverState *bs)
1152
{
1153
    return 0;
1154
}
1155

    
1156
static int raw_is_inserted(BlockDriverState *bs)
1157
{
1158
    return 1;
1159
}
1160

    
1161
static int raw_media_changed(BlockDriverState *bs)
1162
{
1163
    return -ENOTSUP;
1164
}
1165

    
1166
static int raw_eject(BlockDriverState *bs, int eject_flag)
1167
{
1168
    return -ENOTSUP;
1169
}
1170

    
1171
static int raw_set_locked(BlockDriverState *bs, int locked)
1172
{
1173
    return -ENOTSUP;
1174
}
1175

    
1176
static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
1177
{
1178
    return -ENOTSUP;
1179
}
1180
#endif /* !linux */
1181

    
1182
static int raw_sg_send_command(BlockDriverState *bs, void *buf, int count)
1183
{
1184
    return raw_pwrite(bs, -1, buf, count);
1185
}
1186

    
1187
static int raw_sg_recv_response(BlockDriverState *bs, void *buf, int count)
1188
{
1189
    return raw_pread(bs, -1, buf, count);
1190
}
1191

    
1192
static BlockDriverAIOCB *raw_sg_aio_read(BlockDriverState *bs,
1193
                                         void *buf, int count,
1194
                                         BlockDriverCompletionFunc *cb,
1195
                                         void *opaque)
1196
{
1197
    return raw_aio_read(bs, 0, buf, -(int64_t)count, cb, opaque);
1198
}
1199

    
1200
static BlockDriverAIOCB *raw_sg_aio_write(BlockDriverState *bs,
1201
                                          void *buf, int count,
1202
                                          BlockDriverCompletionFunc *cb,
1203
                                          void *opaque)
1204
{
1205
    return raw_aio_write(bs, 0, buf, -(int64_t)count, cb, opaque);
1206
}
1207

    
1208
BlockDriver bdrv_host_device = {
1209
    .format_name        = "host_device",
1210
    .instance_size        = sizeof(BDRVRawState),
1211
    .bdrv_open                = hdev_open,
1212
    .bdrv_close                = raw_close,
1213
    .bdrv_flush                = raw_flush,
1214

    
1215
#ifdef CONFIG_AIO
1216
    .bdrv_aio_read        = raw_aio_read,
1217
    .bdrv_aio_write        = raw_aio_write,
1218
    .bdrv_aio_cancel        = raw_aio_cancel,
1219
    .aiocb_size                = sizeof(RawAIOCB),
1220
#endif
1221

    
1222
    .bdrv_pread                = raw_pread,
1223
    .bdrv_pwrite        = raw_pwrite,
1224
    .bdrv_getlength        = raw_getlength,
1225

    
1226
    /* removable device support */
1227
    .bdrv_is_inserted        = raw_is_inserted,
1228
    .bdrv_media_changed        = raw_media_changed,
1229
    .bdrv_eject                = raw_eject,
1230
    .bdrv_set_locked        = raw_set_locked,
1231
    /* generic scsi device */
1232
    .bdrv_ioctl                = raw_ioctl,
1233
    .bdrv_sg_send_command  = raw_sg_send_command,
1234
    .bdrv_sg_recv_response = raw_sg_recv_response,
1235
    .bdrv_sg_aio_read      = raw_sg_aio_read,
1236
    .bdrv_sg_aio_write     = raw_sg_aio_write,
1237
};