Statistics
| Branch: | Revision:

root / block-raw-posix.c @ 9f7965c7

History | View | Annotate | Download (32.4 kB)

1
/*
2
 * Block driver for RAW files (posix)
3
 *
4
 * Copyright (c) 2006 Fabrice Bellard
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
 * THE SOFTWARE.
23
 */
24
#include "qemu-common.h"
25
#include "qemu-timer.h"
26
#include "qemu-char.h"
27
#include "block_int.h"
28
#include <assert.h>
29
#ifdef CONFIG_AIO
30
#include <aio.h>
31
#endif
32

    
33
#ifdef CONFIG_COCOA
34
#include <paths.h>
35
#include <sys/param.h>
36
#include <IOKit/IOKitLib.h>
37
#include <IOKit/IOBSD.h>
38
#include <IOKit/storage/IOMediaBSDClient.h>
39
#include <IOKit/storage/IOMedia.h>
40
#include <IOKit/storage/IOCDMedia.h>
41
//#include <IOKit/storage/IOCDTypes.h>
42
#include <CoreFoundation/CoreFoundation.h>
43
#endif
44

    
45
#ifdef __sun__
46
#define _POSIX_PTHREAD_SEMANTICS 1
47
#include <signal.h>
48
#include <sys/dkio.h>
49
#endif
50
#ifdef __linux__
51
#include <sys/ioctl.h>
52
#include <linux/cdrom.h>
53
#include <linux/fd.h>
54
#endif
55
#ifdef __FreeBSD__
56
#include <signal.h>
57
#include <sys/disk.h>
58
#endif
59

    
60
#ifdef __OpenBSD__
61
#include <sys/ioctl.h>
62
#include <sys/disklabel.h>
63
#include <sys/dkio.h>
64
#endif
65

    
66
//#define DEBUG_FLOPPY
67

    
68
//#define DEBUG_BLOCK
69
#if defined(DEBUG_BLOCK)
70
#define DEBUG_BLOCK_PRINT(formatCstr, args...) do { if (loglevel != 0)        \
71
    { fprintf(logfile, formatCstr, ##args); fflush(logfile); } } while (0)
72
#else
73
#define DEBUG_BLOCK_PRINT(formatCstr, args...)
74
#endif
75

    
76
/* Approximate O_DIRECT with O_DSYNC if O_DIRECT isn't available */
77
#ifndef O_DIRECT
78
#define O_DIRECT O_DSYNC
79
#endif
80

    
81
#define FTYPE_FILE   0
82
#define FTYPE_CD     1
83
#define FTYPE_FD     2
84

    
85
#define ALIGNED_BUFFER_SIZE (32 * 512)
86

    
87
/* if the FD is not accessed during that time (in ms), we try to
88
   reopen it to see if the disk has been changed */
89
#define FD_OPEN_TIMEOUT 1000
90

    
91
/* posix-aio doesn't allow multiple outstanding requests to a single file
92
 * descriptor.  we implement a pool of dup()'d file descriptors to work
93
 * around this */
94
#define RAW_FD_POOL_SIZE        64
95

    
96
typedef struct BDRVRawState {
97
    int fd;
98
    int type;
99
    unsigned int lseek_err_cnt;
100
    int fd_pool[RAW_FD_POOL_SIZE];
101
#if defined(__linux__)
102
    /* linux floppy specific */
103
    int fd_open_flags;
104
    int64_t fd_open_time;
105
    int64_t fd_error_time;
106
    int fd_got_error;
107
    int fd_media_changed;
108
#endif
109
    uint8_t* aligned_buf;
110
} BDRVRawState;
111

    
112
static int posix_aio_init(void);
113

    
114
static int fd_open(BlockDriverState *bs);
115

    
116
static int raw_open(BlockDriverState *bs, const char *filename, int flags)
117
{
118
    BDRVRawState *s = bs->opaque;
119
    int fd, open_flags, ret;
120
    int i;
121

    
122
    posix_aio_init();
123

    
124
    s->lseek_err_cnt = 0;
125

    
126
    open_flags = O_BINARY;
127
    if ((flags & BDRV_O_ACCESS) == O_RDWR) {
128
        open_flags |= O_RDWR;
129
    } else {
130
        open_flags |= O_RDONLY;
131
        bs->read_only = 1;
132
    }
133
    if (flags & BDRV_O_CREAT)
134
        open_flags |= O_CREAT | O_TRUNC;
135

    
136
    /* Use O_DSYNC for write-through caching, no flags for write-back caching,
137
     * and O_DIRECT for no caching. */
138
    if ((flags & BDRV_O_NOCACHE))
139
        open_flags |= O_DIRECT;
140
    else if (!(flags & BDRV_O_CACHE_WB))
141
        open_flags |= O_DSYNC;
142

    
143
    s->type = FTYPE_FILE;
144

    
145
    fd = open(filename, open_flags, 0644);
146
    if (fd < 0) {
147
        ret = -errno;
148
        if (ret == -EROFS)
149
            ret = -EACCES;
150
        return ret;
151
    }
152
    s->fd = fd;
153
    for (i = 0; i < RAW_FD_POOL_SIZE; i++)
154
        s->fd_pool[i] = -1;
155
    s->aligned_buf = NULL;
156
    if ((flags & BDRV_O_NOCACHE)) {
157
        s->aligned_buf = qemu_memalign(512, ALIGNED_BUFFER_SIZE);
158
        if (s->aligned_buf == NULL) {
159
            ret = -errno;
160
            close(fd);
161
            return ret;
162
        }
163
    }
164
    return 0;
165
}
166

    
167
/* XXX: use host sector size if necessary with:
168
#ifdef DIOCGSECTORSIZE
169
        {
170
            unsigned int sectorsize = 512;
171
            if (!ioctl(fd, DIOCGSECTORSIZE, &sectorsize) &&
172
                sectorsize > bufsize)
173
                bufsize = sectorsize;
174
        }
175
#endif
176
#ifdef CONFIG_COCOA
177
        u_int32_t   blockSize = 512;
178
        if ( !ioctl( fd, DKIOCGETBLOCKSIZE, &blockSize ) && blockSize > bufsize) {
179
            bufsize = blockSize;
180
        }
181
#endif
182
*/
183

    
184
/*
185
 * offset and count are in bytes, but must be multiples of 512 for files
186
 * opened with O_DIRECT. buf must be aligned to 512 bytes then.
187
 *
188
 * This function may be called without alignment if the caller ensures
189
 * that O_DIRECT is not in effect.
190
 */
191
static int raw_pread_aligned(BlockDriverState *bs, int64_t offset,
192
                     uint8_t *buf, int count)
193
{
194
    BDRVRawState *s = bs->opaque;
195
    int ret;
196

    
197
    ret = fd_open(bs);
198
    if (ret < 0)
199
        return ret;
200

    
201
    if (offset >= 0 && lseek(s->fd, offset, SEEK_SET) == (off_t)-1) {
202
        ++(s->lseek_err_cnt);
203
        if(s->lseek_err_cnt <= 10) {
204
            DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
205
                              "] lseek failed : %d = %s\n",
206
                              s->fd, bs->filename, offset, buf, count,
207
                              bs->total_sectors, errno, strerror(errno));
208
        }
209
        return -1;
210
    }
211
    s->lseek_err_cnt=0;
212

    
213
    ret = read(s->fd, buf, count);
214
    if (ret == count)
215
        goto label__raw_read__success;
216

    
217
    DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
218
                      "] read failed %d : %d = %s\n",
219
                      s->fd, bs->filename, offset, buf, count,
220
                      bs->total_sectors, ret, errno, strerror(errno));
221

    
222
    /* Try harder for CDrom. */
223
    if (bs->type == BDRV_TYPE_CDROM) {
224
        lseek(s->fd, offset, SEEK_SET);
225
        ret = read(s->fd, buf, count);
226
        if (ret == count)
227
            goto label__raw_read__success;
228
        lseek(s->fd, offset, SEEK_SET);
229
        ret = read(s->fd, buf, count);
230
        if (ret == count)
231
            goto label__raw_read__success;
232

    
233
        DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
234
                          "] retry read failed %d : %d = %s\n",
235
                          s->fd, bs->filename, offset, buf, count,
236
                          bs->total_sectors, ret, errno, strerror(errno));
237
    }
238

    
239
label__raw_read__success:
240

    
241
    return ret;
242
}
243

    
244
/*
245
 * offset and count are in bytes, but must be multiples of 512 for files
246
 * opened with O_DIRECT. buf must be aligned to 512 bytes then.
247
 *
248
 * This function may be called without alignment if the caller ensures
249
 * that O_DIRECT is not in effect.
250
 */
251
static int raw_pwrite_aligned(BlockDriverState *bs, int64_t offset,
252
                      const uint8_t *buf, int count)
253
{
254
    BDRVRawState *s = bs->opaque;
255
    int ret;
256

    
257
    ret = fd_open(bs);
258
    if (ret < 0)
259
        return ret;
260

    
261
    if (offset >= 0 && lseek(s->fd, offset, SEEK_SET) == (off_t)-1) {
262
        ++(s->lseek_err_cnt);
263
        if(s->lseek_err_cnt) {
264
            DEBUG_BLOCK_PRINT("raw_pwrite(%d:%s, %" PRId64 ", %p, %d) [%"
265
                              PRId64 "] lseek failed : %d = %s\n",
266
                              s->fd, bs->filename, offset, buf, count,
267
                              bs->total_sectors, errno, strerror(errno));
268
        }
269
        return -1;
270
    }
271
    s->lseek_err_cnt = 0;
272

    
273
    ret = write(s->fd, buf, count);
274
    if (ret == count)
275
        goto label__raw_write__success;
276

    
277
    DEBUG_BLOCK_PRINT("raw_pwrite(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
278
                      "] write failed %d : %d = %s\n",
279
                      s->fd, bs->filename, offset, buf, count,
280
                      bs->total_sectors, ret, errno, strerror(errno));
281

    
282
label__raw_write__success:
283

    
284
    return ret;
285
}
286

    
287

    
288
/*
289
 * offset and count are in bytes and possibly not aligned. For files opened
290
 * with O_DIRECT, necessary alignments are ensured before calling
291
 * raw_pread_aligned to do the actual read.
292
 */
293
static int raw_pread(BlockDriverState *bs, int64_t offset,
294
                     uint8_t *buf, int count)
295
{
296
    BDRVRawState *s = bs->opaque;
297
    int size, ret, shift, sum;
298

    
299
    sum = 0;
300

    
301
    if (s->aligned_buf != NULL)  {
302

    
303
        if (offset & 0x1ff) {
304
            /* align offset on a 512 bytes boundary */
305

    
306
            shift = offset & 0x1ff;
307
            size = (shift + count + 0x1ff) & ~0x1ff;
308
            if (size > ALIGNED_BUFFER_SIZE)
309
                size = ALIGNED_BUFFER_SIZE;
310
            ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, size);
311
            if (ret < 0)
312
                return ret;
313

    
314
            size = 512 - shift;
315
            if (size > count)
316
                size = count;
317
            memcpy(buf, s->aligned_buf + shift, size);
318

    
319
            buf += size;
320
            offset += size;
321
            count -= size;
322
            sum += size;
323

    
324
            if (count == 0)
325
                return sum;
326
        }
327
        if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
328

    
329
            /* read on aligned buffer */
330

    
331
            while (count) {
332

    
333
                size = (count + 0x1ff) & ~0x1ff;
334
                if (size > ALIGNED_BUFFER_SIZE)
335
                    size = ALIGNED_BUFFER_SIZE;
336

    
337
                ret = raw_pread_aligned(bs, offset, s->aligned_buf, size);
338
                if (ret < 0)
339
                    return ret;
340

    
341
                size = ret;
342
                if (size > count)
343
                    size = count;
344

    
345
                memcpy(buf, s->aligned_buf, size);
346

    
347
                buf += size;
348
                offset += size;
349
                count -= size;
350
                sum += size;
351
            }
352

    
353
            return sum;
354
        }
355
    }
356

    
357
    return raw_pread_aligned(bs, offset, buf, count) + sum;
358
}
359

    
360
/*
361
 * offset and count are in bytes and possibly not aligned. For files opened
362
 * with O_DIRECT, necessary alignments are ensured before calling
363
 * raw_pwrite_aligned to do the actual write.
364
 */
365
static int raw_pwrite(BlockDriverState *bs, int64_t offset,
366
                      const uint8_t *buf, int count)
367
{
368
    BDRVRawState *s = bs->opaque;
369
    int size, ret, shift, sum;
370

    
371
    sum = 0;
372

    
373
    if (s->aligned_buf != NULL) {
374

    
375
        if (offset & 0x1ff) {
376
            /* align offset on a 512 bytes boundary */
377
            shift = offset & 0x1ff;
378
            ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, 512);
379
            if (ret < 0)
380
                return ret;
381

    
382
            size = 512 - shift;
383
            if (size > count)
384
                size = count;
385
            memcpy(s->aligned_buf + shift, buf, size);
386

    
387
            ret = raw_pwrite_aligned(bs, offset - shift, s->aligned_buf, 512);
388
            if (ret < 0)
389
                return ret;
390

    
391
            buf += size;
392
            offset += size;
393
            count -= size;
394
            sum += size;
395

    
396
            if (count == 0)
397
                return sum;
398
        }
399
        if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
400

    
401
            while ((size = (count & ~0x1ff)) != 0) {
402

    
403
                if (size > ALIGNED_BUFFER_SIZE)
404
                    size = ALIGNED_BUFFER_SIZE;
405

    
406
                memcpy(s->aligned_buf, buf, size);
407

    
408
                ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, size);
409
                if (ret < 0)
410
                    return ret;
411

    
412
                buf += ret;
413
                offset += ret;
414
                count -= ret;
415
                sum += ret;
416
            }
417
            /* here, count < 512 because (count & ~0x1ff) == 0 */
418
            if (count) {
419
                ret = raw_pread_aligned(bs, offset, s->aligned_buf, 512);
420
                if (ret < 0)
421
                    return ret;
422
                 memcpy(s->aligned_buf, buf, count);
423

    
424
                 ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, 512);
425
                 if (ret < 0)
426
                     return ret;
427
                 if (count < ret)
428
                     ret = count;
429

    
430
                 sum += ret;
431
            }
432
            return sum;
433
        }
434
    }
435
    return raw_pwrite_aligned(bs, offset, buf, count) + sum;
436
}
437

    
438
#ifdef CONFIG_AIO
439
/***********************************************************/
440
/* Unix AIO using POSIX AIO */
441

    
442
typedef struct RawAIOCB {
443
    BlockDriverAIOCB common;
444
    int fd;
445
    struct aiocb aiocb;
446
    struct RawAIOCB *next;
447
    int ret;
448
} RawAIOCB;
449

    
450
typedef struct PosixAioState
451
{
452
    int rfd, wfd;
453
    RawAIOCB *first_aio;
454
} PosixAioState;
455

    
456
static int raw_fd_pool_get(BDRVRawState *s)
457
{
458
    int i;
459

    
460
    for (i = 0; i < RAW_FD_POOL_SIZE; i++) {
461
        /* already in use */
462
        if (s->fd_pool[i] != -1)
463
            continue;
464

    
465
        /* try to dup file descriptor */
466
        s->fd_pool[i] = dup(s->fd);
467
        if (s->fd_pool[i] != -1)
468
            return s->fd_pool[i];
469
    }
470

    
471
    /* we couldn't dup the file descriptor so just use the main one */
472
    return s->fd;
473
}
474

    
475
static void raw_fd_pool_put(RawAIOCB *acb)
476
{
477
    BDRVRawState *s = acb->common.bs->opaque;
478
    int i;
479

    
480
    for (i = 0; i < RAW_FD_POOL_SIZE; i++) {
481
        if (s->fd_pool[i] == acb->fd) {
482
            close(s->fd_pool[i]);
483
            s->fd_pool[i] = -1;
484
        }
485
    }
486
}
487

    
488
static void posix_aio_read(void *opaque)
489
{
490
    PosixAioState *s = opaque;
491
    RawAIOCB *acb, **pacb;
492
    int ret;
493
    ssize_t len;
494

    
495
    do {
496
        char byte;
497

    
498
        len = read(s->rfd, &byte, 1);
499
        if (len == -1 && errno == EINTR)
500
            continue;
501
        if (len == -1 && errno == EAGAIN)
502
            break;
503
    } while (len == -1);
504

    
505
    for(;;) {
506
        pacb = &s->first_aio;
507
        for(;;) {
508
            acb = *pacb;
509
            if (!acb)
510
                goto the_end;
511
            ret = aio_error(&acb->aiocb);
512
            if (ret == ECANCELED) {
513
                /* remove the request */
514
                *pacb = acb->next;
515
                raw_fd_pool_put(acb);
516
                qemu_aio_release(acb);
517
            } else if (ret != EINPROGRESS) {
518
                /* end of aio */
519
                if (ret == 0) {
520
                    ret = aio_return(&acb->aiocb);
521
                    if (ret == acb->aiocb.aio_nbytes)
522
                        ret = 0;
523
                    else
524
                        ret = -EINVAL;
525
                } else {
526
                    ret = -ret;
527
                }
528
                /* remove the request */
529
                *pacb = acb->next;
530
                /* call the callback */
531
                acb->common.cb(acb->common.opaque, ret);
532
                raw_fd_pool_put(acb);
533
                qemu_aio_release(acb);
534
                break;
535
            } else {
536
                pacb = &acb->next;
537
            }
538
        }
539
    }
540
 the_end: ;
541
}
542

    
543
static int posix_aio_flush(void *opaque)
544
{
545
    PosixAioState *s = opaque;
546
    return !!s->first_aio;
547
}
548

    
549
static PosixAioState *posix_aio_state;
550

    
551
static void aio_signal_handler(int signum)
552
{
553
    if (posix_aio_state) {
554
        char byte = 0;
555

    
556
        write(posix_aio_state->wfd, &byte, sizeof(byte));
557
    }
558

    
559
    qemu_service_io();
560
}
561

    
562
static int posix_aio_init(void)
563
{
564
    struct sigaction act;
565
    PosixAioState *s;
566
    int fds[2];
567
  
568
    if (posix_aio_state)
569
        return 0;
570

    
571
    s = qemu_malloc(sizeof(PosixAioState));
572
    if (s == NULL)
573
        return -ENOMEM;
574

    
575
    sigfillset(&act.sa_mask);
576
    act.sa_flags = 0; /* do not restart syscalls to interrupt select() */
577
    act.sa_handler = aio_signal_handler;
578
    sigaction(SIGUSR2, &act, NULL);
579

    
580
    s->first_aio = NULL;
581
    if (pipe(fds) == -1) {
582
        fprintf(stderr, "failed to create pipe\n");
583
        return -errno;
584
    }
585

    
586
    s->rfd = fds[0];
587
    s->wfd = fds[1];
588

    
589
    fcntl(s->wfd, F_SETFL, O_NONBLOCK);
590

    
591
    qemu_aio_set_fd_handler(s->rfd, posix_aio_read, NULL, posix_aio_flush, s);
592

    
593
#if defined(__linux__)
594
    {
595
        struct aioinit ai;
596

    
597
        memset(&ai, 0, sizeof(ai));
598
#if defined(__GLIBC_PREREQ) && __GLIBC_PREREQ(2, 4)
599
        ai.aio_threads = 64;
600
        ai.aio_num = 64;
601
#else
602
        /* XXX: aio thread exit seems to hang on RedHat 9 and this init
603
           seems to fix the problem. */
604
        ai.aio_threads = 1;
605
        ai.aio_num = 1;
606
        ai.aio_idle_time = 365 * 100000;
607
#endif
608
        aio_init(&ai);
609
    }
610
#endif
611
    posix_aio_state = s;
612

    
613
    return 0;
614
}
615

    
616
static RawAIOCB *raw_aio_setup(BlockDriverState *bs,
617
        int64_t sector_num, uint8_t *buf, int nb_sectors,
618
        BlockDriverCompletionFunc *cb, void *opaque)
619
{
620
    BDRVRawState *s = bs->opaque;
621
    RawAIOCB *acb;
622

    
623
    if (fd_open(bs) < 0)
624
        return NULL;
625

    
626
    acb = qemu_aio_get(bs, cb, opaque);
627
    if (!acb)
628
        return NULL;
629
    acb->fd = raw_fd_pool_get(s);
630
    acb->aiocb.aio_fildes = acb->fd;
631
    acb->aiocb.aio_sigevent.sigev_signo = SIGUSR2;
632
    acb->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
633
    acb->aiocb.aio_buf = buf;
634
    if (nb_sectors < 0)
635
        acb->aiocb.aio_nbytes = -nb_sectors;
636
    else
637
        acb->aiocb.aio_nbytes = nb_sectors * 512;
638
    acb->aiocb.aio_offset = sector_num * 512;
639
    acb->next = posix_aio_state->first_aio;
640
    posix_aio_state->first_aio = acb;
641
    return acb;
642
}
643

    
644
static void raw_aio_em_cb(void* opaque)
645
{
646
    RawAIOCB *acb = opaque;
647
    acb->common.cb(acb->common.opaque, acb->ret);
648
    qemu_aio_release(acb);
649
}
650

    
651
static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs,
652
        int64_t sector_num, uint8_t *buf, int nb_sectors,
653
        BlockDriverCompletionFunc *cb, void *opaque)
654
{
655
    RawAIOCB *acb;
656

    
657
    /*
658
     * If O_DIRECT is used and the buffer is not aligned fall back
659
     * to synchronous IO.
660
     */
661
    BDRVRawState *s = bs->opaque;
662

    
663
    if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
664
        QEMUBH *bh;
665
        acb = qemu_aio_get(bs, cb, opaque);
666
        acb->ret = raw_pread(bs, 512 * sector_num, buf, 512 * nb_sectors);
667
        bh = qemu_bh_new(raw_aio_em_cb, acb);
668
        qemu_bh_schedule(bh);
669
        return &acb->common;
670
    }
671

    
672
    acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
673
    if (!acb)
674
        return NULL;
675
    if (aio_read(&acb->aiocb) < 0) {
676
        qemu_aio_release(acb);
677
        return NULL;
678
    }
679
    return &acb->common;
680
}
681

    
682
static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs,
683
        int64_t sector_num, const uint8_t *buf, int nb_sectors,
684
        BlockDriverCompletionFunc *cb, void *opaque)
685
{
686
    RawAIOCB *acb;
687

    
688
    /*
689
     * If O_DIRECT is used and the buffer is not aligned fall back
690
     * to synchronous IO.
691
     */
692
    BDRVRawState *s = bs->opaque;
693

    
694
    if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
695
        QEMUBH *bh;
696
        acb = qemu_aio_get(bs, cb, opaque);
697
        acb->ret = raw_pwrite(bs, 512 * sector_num, buf, 512 * nb_sectors);
698
        bh = qemu_bh_new(raw_aio_em_cb, acb);
699
        qemu_bh_schedule(bh);
700
        return &acb->common;
701
    }
702

    
703
    acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque);
704
    if (!acb)
705
        return NULL;
706
    if (aio_write(&acb->aiocb) < 0) {
707
        qemu_aio_release(acb);
708
        return NULL;
709
    }
710
    return &acb->common;
711
}
712

    
713
static void raw_aio_cancel(BlockDriverAIOCB *blockacb)
714
{
715
    int ret;
716
    RawAIOCB *acb = (RawAIOCB *)blockacb;
717
    RawAIOCB **pacb;
718

    
719
    ret = aio_cancel(acb->aiocb.aio_fildes, &acb->aiocb);
720
    if (ret == AIO_NOTCANCELED) {
721
        /* fail safe: if the aio could not be canceled, we wait for
722
           it */
723
        while (aio_error(&acb->aiocb) == EINPROGRESS);
724
    }
725

    
726
    /* remove the callback from the queue */
727
    pacb = &posix_aio_state->first_aio;
728
    for(;;) {
729
        if (*pacb == NULL) {
730
            break;
731
        } else if (*pacb == acb) {
732
            *pacb = acb->next;
733
            raw_fd_pool_put(acb);
734
            qemu_aio_release(acb);
735
            break;
736
        }
737
        pacb = &acb->next;
738
    }
739
}
740

    
741
#else /* CONFIG_AIO */
742
static int posix_aio_init(void)
743
{
744
    return 0;
745
}
746
#endif /* CONFIG_AIO */
747

    
748
static void raw_close_fd_pool(BDRVRawState *s)
749
{
750
    int i;
751

    
752
    for (i = 0; i < RAW_FD_POOL_SIZE; i++) {
753
        if (s->fd_pool[i] != -1) {
754
            close(s->fd_pool[i]);
755
            s->fd_pool[i] = -1;
756
        }
757
    }
758
}
759

    
760
static void raw_close(BlockDriverState *bs)
761
{
762
    BDRVRawState *s = bs->opaque;
763
    if (s->fd >= 0) {
764
        close(s->fd);
765
        s->fd = -1;
766
        if (s->aligned_buf != NULL)
767
            qemu_free(s->aligned_buf);
768
    }
769
    raw_close_fd_pool(s);
770
}
771

    
772
static int raw_truncate(BlockDriverState *bs, int64_t offset)
773
{
774
    BDRVRawState *s = bs->opaque;
775
    if (s->type != FTYPE_FILE)
776
        return -ENOTSUP;
777
    if (ftruncate(s->fd, offset) < 0)
778
        return -errno;
779
    return 0;
780
}
781

    
782
#ifdef __OpenBSD__
783
static int64_t raw_getlength(BlockDriverState *bs)
784
{
785
    BDRVRawState *s = bs->opaque;
786
    int fd = s->fd;
787
    struct stat st;
788

    
789
    if (fstat(fd, &st))
790
        return -1;
791
    if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
792
        struct disklabel dl;
793

    
794
        if (ioctl(fd, DIOCGDINFO, &dl))
795
            return -1;
796
        return (uint64_t)dl.d_secsize *
797
            dl.d_partitions[DISKPART(st.st_rdev)].p_size;
798
    } else
799
        return st.st_size;
800
}
801
#else /* !__OpenBSD__ */
802
static int64_t  raw_getlength(BlockDriverState *bs)
803
{
804
    BDRVRawState *s = bs->opaque;
805
    int fd = s->fd;
806
    int64_t size;
807
#ifdef _BSD
808
    struct stat sb;
809
#endif
810
#ifdef __sun__
811
    struct dk_minfo minfo;
812
    int rv;
813
#endif
814
    int ret;
815

    
816
    ret = fd_open(bs);
817
    if (ret < 0)
818
        return ret;
819

    
820
#ifdef _BSD
821
    if (!fstat(fd, &sb) && (S_IFCHR & sb.st_mode)) {
822
#ifdef DIOCGMEDIASIZE
823
        if (ioctl(fd, DIOCGMEDIASIZE, (off_t *)&size))
824
#endif
825
#ifdef CONFIG_COCOA
826
        size = LONG_LONG_MAX;
827
#else
828
        size = lseek(fd, 0LL, SEEK_END);
829
#endif
830
    } else
831
#endif
832
#ifdef __sun__
833
    /*
834
     * use the DKIOCGMEDIAINFO ioctl to read the size.
835
     */
836
    rv = ioctl ( fd, DKIOCGMEDIAINFO, &minfo );
837
    if ( rv != -1 ) {
838
        size = minfo.dki_lbsize * minfo.dki_capacity;
839
    } else /* there are reports that lseek on some devices
840
              fails, but irc discussion said that contingency
841
              on contingency was overkill */
842
#endif
843
    {
844
        size = lseek(fd, 0, SEEK_END);
845
    }
846
    return size;
847
}
848
#endif
849

    
850
static int raw_create(const char *filename, int64_t total_size,
851
                      const char *backing_file, int flags)
852
{
853
    int fd;
854

    
855
    if (flags || backing_file)
856
        return -ENOTSUP;
857

    
858
    fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
859
              0644);
860
    if (fd < 0)
861
        return -EIO;
862
    ftruncate(fd, total_size * 512);
863
    close(fd);
864
    return 0;
865
}
866

    
867
static void raw_flush(BlockDriverState *bs)
868
{
869
    BDRVRawState *s = bs->opaque;
870
    fsync(s->fd);
871
}
872

    
873
BlockDriver bdrv_raw = {
874
    "raw",
875
    sizeof(BDRVRawState),
876
    NULL, /* no probe for protocols */
877
    raw_open,
878
    NULL,
879
    NULL,
880
    raw_close,
881
    raw_create,
882
    raw_flush,
883

    
884
#ifdef CONFIG_AIO
885
    .bdrv_aio_read = raw_aio_read,
886
    .bdrv_aio_write = raw_aio_write,
887
    .bdrv_aio_cancel = raw_aio_cancel,
888
    .aiocb_size = sizeof(RawAIOCB),
889
#endif
890
    .bdrv_pread = raw_pread,
891
    .bdrv_pwrite = raw_pwrite,
892
    .bdrv_truncate = raw_truncate,
893
    .bdrv_getlength = raw_getlength,
894
};
895

    
896
/***********************************************/
897
/* host device */
898

    
899
#ifdef CONFIG_COCOA
900
static kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator );
901
static kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize );
902

    
903
kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator )
904
{
905
    kern_return_t       kernResult;
906
    mach_port_t     masterPort;
907
    CFMutableDictionaryRef  classesToMatch;
908

    
909
    kernResult = IOMasterPort( MACH_PORT_NULL, &masterPort );
910
    if ( KERN_SUCCESS != kernResult ) {
911
        printf( "IOMasterPort returned %d\n", kernResult );
912
    }
913

    
914
    classesToMatch = IOServiceMatching( kIOCDMediaClass );
915
    if ( classesToMatch == NULL ) {
916
        printf( "IOServiceMatching returned a NULL dictionary.\n" );
917
    } else {
918
    CFDictionarySetValue( classesToMatch, CFSTR( kIOMediaEjectableKey ), kCFBooleanTrue );
919
    }
920
    kernResult = IOServiceGetMatchingServices( masterPort, classesToMatch, mediaIterator );
921
    if ( KERN_SUCCESS != kernResult )
922
    {
923
        printf( "IOServiceGetMatchingServices returned %d\n", kernResult );
924
    }
925

    
926
    return kernResult;
927
}
928

    
929
kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize )
930
{
931
    io_object_t     nextMedia;
932
    kern_return_t   kernResult = KERN_FAILURE;
933
    *bsdPath = '\0';
934
    nextMedia = IOIteratorNext( mediaIterator );
935
    if ( nextMedia )
936
    {
937
        CFTypeRef   bsdPathAsCFString;
938
    bsdPathAsCFString = IORegistryEntryCreateCFProperty( nextMedia, CFSTR( kIOBSDNameKey ), kCFAllocatorDefault, 0 );
939
        if ( bsdPathAsCFString ) {
940
            size_t devPathLength;
941
            strcpy( bsdPath, _PATH_DEV );
942
            strcat( bsdPath, "r" );
943
            devPathLength = strlen( bsdPath );
944
            if ( CFStringGetCString( bsdPathAsCFString, bsdPath + devPathLength, maxPathSize - devPathLength, kCFStringEncodingASCII ) ) {
945
                kernResult = KERN_SUCCESS;
946
            }
947
            CFRelease( bsdPathAsCFString );
948
        }
949
        IOObjectRelease( nextMedia );
950
    }
951

    
952
    return kernResult;
953
}
954

    
955
#endif
956

    
957
static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
958
{
959
    BDRVRawState *s = bs->opaque;
960
    int fd, open_flags, ret, i;
961

    
962
    posix_aio_init();
963

    
964
#ifdef CONFIG_COCOA
965
    if (strstart(filename, "/dev/cdrom", NULL)) {
966
        kern_return_t kernResult;
967
        io_iterator_t mediaIterator;
968
        char bsdPath[ MAXPATHLEN ];
969
        int fd;
970

    
971
        kernResult = FindEjectableCDMedia( &mediaIterator );
972
        kernResult = GetBSDPath( mediaIterator, bsdPath, sizeof( bsdPath ) );
973

    
974
        if ( bsdPath[ 0 ] != '\0' ) {
975
            strcat(bsdPath,"s0");
976
            /* some CDs don't have a partition 0 */
977
            fd = open(bsdPath, O_RDONLY | O_BINARY | O_LARGEFILE);
978
            if (fd < 0) {
979
                bsdPath[strlen(bsdPath)-1] = '1';
980
            } else {
981
                close(fd);
982
            }
983
            filename = bsdPath;
984
        }
985

    
986
        if ( mediaIterator )
987
            IOObjectRelease( mediaIterator );
988
    }
989
#endif
990
    open_flags = O_BINARY;
991
    if ((flags & BDRV_O_ACCESS) == O_RDWR) {
992
        open_flags |= O_RDWR;
993
    } else {
994
        open_flags |= O_RDONLY;
995
        bs->read_only = 1;
996
    }
997
    /* Use O_DSYNC for write-through caching, no flags for write-back caching,
998
     * and O_DIRECT for no caching. */
999
    if ((flags & BDRV_O_NOCACHE))
1000
        open_flags |= O_DIRECT;
1001
    else if (!(flags & BDRV_O_CACHE_WB))
1002
        open_flags |= O_DSYNC;
1003

    
1004
    s->type = FTYPE_FILE;
1005
#if defined(__linux__)
1006
    if (strstart(filename, "/dev/cd", NULL)) {
1007
        /* open will not fail even if no CD is inserted */
1008
        open_flags |= O_NONBLOCK;
1009
        s->type = FTYPE_CD;
1010
    } else if (strstart(filename, "/dev/fd", NULL)) {
1011
        s->type = FTYPE_FD;
1012
        s->fd_open_flags = open_flags;
1013
        /* open will not fail even if no floppy is inserted */
1014
        open_flags |= O_NONBLOCK;
1015
    } else if (strstart(filename, "/dev/sg", NULL)) {
1016
        bs->sg = 1;
1017
    }
1018
#endif
1019
    fd = open(filename, open_flags, 0644);
1020
    if (fd < 0) {
1021
        ret = -errno;
1022
        if (ret == -EROFS)
1023
            ret = -EACCES;
1024
        return ret;
1025
    }
1026
    s->fd = fd;
1027
    for (i = 0; i < RAW_FD_POOL_SIZE; i++)
1028
        s->fd_pool[i] = -1;
1029
#if defined(__linux__)
1030
    /* close fd so that we can reopen it as needed */
1031
    if (s->type == FTYPE_FD) {
1032
        close(s->fd);
1033
        s->fd = -1;
1034
        s->fd_media_changed = 1;
1035
    }
1036
#endif
1037
    return 0;
1038
}
1039

    
1040
#if defined(__linux__)
1041
/* Note: we do not have a reliable method to detect if the floppy is
1042
   present. The current method is to try to open the floppy at every
1043
   I/O and to keep it opened during a few hundreds of ms. */
1044
static int fd_open(BlockDriverState *bs)
1045
{
1046
    BDRVRawState *s = bs->opaque;
1047
    int last_media_present;
1048

    
1049
    if (s->type != FTYPE_FD)
1050
        return 0;
1051
    last_media_present = (s->fd >= 0);
1052
    if (s->fd >= 0 &&
1053
        (qemu_get_clock(rt_clock) - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
1054
        close(s->fd);
1055
        s->fd = -1;
1056
        raw_close_fd_pool(s);
1057
#ifdef DEBUG_FLOPPY
1058
        printf("Floppy closed\n");
1059
#endif
1060
    }
1061
    if (s->fd < 0) {
1062
        if (s->fd_got_error &&
1063
            (qemu_get_clock(rt_clock) - s->fd_error_time) < FD_OPEN_TIMEOUT) {
1064
#ifdef DEBUG_FLOPPY
1065
            printf("No floppy (open delayed)\n");
1066
#endif
1067
            return -EIO;
1068
        }
1069
        s->fd = open(bs->filename, s->fd_open_flags);
1070
        if (s->fd < 0) {
1071
            s->fd_error_time = qemu_get_clock(rt_clock);
1072
            s->fd_got_error = 1;
1073
            if (last_media_present)
1074
                s->fd_media_changed = 1;
1075
#ifdef DEBUG_FLOPPY
1076
            printf("No floppy\n");
1077
#endif
1078
            return -EIO;
1079
        }
1080
#ifdef DEBUG_FLOPPY
1081
        printf("Floppy opened\n");
1082
#endif
1083
    }
1084
    if (!last_media_present)
1085
        s->fd_media_changed = 1;
1086
    s->fd_open_time = qemu_get_clock(rt_clock);
1087
    s->fd_got_error = 0;
1088
    return 0;
1089
}
1090

    
1091
static int raw_is_inserted(BlockDriverState *bs)
1092
{
1093
    BDRVRawState *s = bs->opaque;
1094
    int ret;
1095

    
1096
    switch(s->type) {
1097
    case FTYPE_CD:
1098
        ret = ioctl(s->fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
1099
        if (ret == CDS_DISC_OK)
1100
            return 1;
1101
        else
1102
            return 0;
1103
        break;
1104
    case FTYPE_FD:
1105
        ret = fd_open(bs);
1106
        return (ret >= 0);
1107
    default:
1108
        return 1;
1109
    }
1110
}
1111

    
1112
/* currently only used by fdc.c, but a CD version would be good too */
1113
static int raw_media_changed(BlockDriverState *bs)
1114
{
1115
    BDRVRawState *s = bs->opaque;
1116

    
1117
    switch(s->type) {
1118
    case FTYPE_FD:
1119
        {
1120
            int ret;
1121
            /* XXX: we do not have a true media changed indication. It
1122
               does not work if the floppy is changed without trying
1123
               to read it */
1124
            fd_open(bs);
1125
            ret = s->fd_media_changed;
1126
            s->fd_media_changed = 0;
1127
#ifdef DEBUG_FLOPPY
1128
            printf("Floppy changed=%d\n", ret);
1129
#endif
1130
            return ret;
1131
        }
1132
    default:
1133
        return -ENOTSUP;
1134
    }
1135
}
1136

    
1137
static int raw_eject(BlockDriverState *bs, int eject_flag)
1138
{
1139
    BDRVRawState *s = bs->opaque;
1140

    
1141
    switch(s->type) {
1142
    case FTYPE_CD:
1143
        if (eject_flag) {
1144
            if (ioctl (s->fd, CDROMEJECT, NULL) < 0)
1145
                perror("CDROMEJECT");
1146
        } else {
1147
            if (ioctl (s->fd, CDROMCLOSETRAY, NULL) < 0)
1148
                perror("CDROMEJECT");
1149
        }
1150
        break;
1151
    case FTYPE_FD:
1152
        {
1153
            int fd;
1154
            if (s->fd >= 0) {
1155
                close(s->fd);
1156
                s->fd = -1;
1157
                raw_close_fd_pool(s);
1158
            }
1159
            fd = open(bs->filename, s->fd_open_flags | O_NONBLOCK);
1160
            if (fd >= 0) {
1161
                if (ioctl(fd, FDEJECT, 0) < 0)
1162
                    perror("FDEJECT");
1163
                close(fd);
1164
            }
1165
        }
1166
        break;
1167
    default:
1168
        return -ENOTSUP;
1169
    }
1170
    return 0;
1171
}
1172

    
1173
static int raw_set_locked(BlockDriverState *bs, int locked)
1174
{
1175
    BDRVRawState *s = bs->opaque;
1176

    
1177
    switch(s->type) {
1178
    case FTYPE_CD:
1179
        if (ioctl (s->fd, CDROM_LOCKDOOR, locked) < 0) {
1180
            /* Note: an error can happen if the distribution automatically
1181
               mounts the CD-ROM */
1182
            //        perror("CDROM_LOCKDOOR");
1183
        }
1184
        break;
1185
    default:
1186
        return -ENOTSUP;
1187
    }
1188
    return 0;
1189
}
1190

    
1191
static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
1192
{
1193
    BDRVRawState *s = bs->opaque;
1194

    
1195
    return ioctl(s->fd, req, buf);
1196
}
1197
#else
1198

    
1199
static int fd_open(BlockDriverState *bs)
1200
{
1201
    return 0;
1202
}
1203

    
1204
static int raw_is_inserted(BlockDriverState *bs)
1205
{
1206
    return 1;
1207
}
1208

    
1209
static int raw_media_changed(BlockDriverState *bs)
1210
{
1211
    return -ENOTSUP;
1212
}
1213

    
1214
static int raw_eject(BlockDriverState *bs, int eject_flag)
1215
{
1216
    return -ENOTSUP;
1217
}
1218

    
1219
static int raw_set_locked(BlockDriverState *bs, int locked)
1220
{
1221
    return -ENOTSUP;
1222
}
1223

    
1224
static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
1225
{
1226
    return -ENOTSUP;
1227
}
1228
#endif /* !linux */
1229

    
1230
BlockDriver bdrv_host_device = {
1231
    "host_device",
1232
    sizeof(BDRVRawState),
1233
    NULL, /* no probe for protocols */
1234
    hdev_open,
1235
    NULL,
1236
    NULL,
1237
    raw_close,
1238
    NULL,
1239
    raw_flush,
1240

    
1241
#ifdef CONFIG_AIO
1242
    .bdrv_aio_read = raw_aio_read,
1243
    .bdrv_aio_write = raw_aio_write,
1244
    .bdrv_aio_cancel = raw_aio_cancel,
1245
    .aiocb_size = sizeof(RawAIOCB),
1246
#endif
1247
    .bdrv_pread = raw_pread,
1248
    .bdrv_pwrite = raw_pwrite,
1249
    .bdrv_getlength = raw_getlength,
1250

    
1251
    /* removable device support */
1252
    .bdrv_is_inserted = raw_is_inserted,
1253
    .bdrv_media_changed = raw_media_changed,
1254
    .bdrv_eject = raw_eject,
1255
    .bdrv_set_locked = raw_set_locked,
1256
    /* generic scsi device */
1257
    .bdrv_ioctl = raw_ioctl,
1258
};