Statistics
| Branch: | Revision:

root / block-raw-posix.c @ 6f382b5e

History | View | Annotate | Download (31.2 kB)

1
/*
2
 * Block driver for RAW files (posix)
3
 *
4
 * Copyright (c) 2006 Fabrice Bellard
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
 * THE SOFTWARE.
23
 */
24
#include "qemu-common.h"
25
#if !defined(QEMU_IMG) && !defined(QEMU_NBD)
26
#include "qemu-timer.h"
27
#include "exec-all.h"
28
#endif
29
#include "block_int.h"
30
#include <assert.h>
31
#ifdef CONFIG_AIO
32
#include <aio.h>
33
#endif
34

    
35
#ifdef CONFIG_COCOA
36
#include <paths.h>
37
#include <sys/param.h>
38
#include <IOKit/IOKitLib.h>
39
#include <IOKit/IOBSD.h>
40
#include <IOKit/storage/IOMediaBSDClient.h>
41
#include <IOKit/storage/IOMedia.h>
42
#include <IOKit/storage/IOCDMedia.h>
43
//#include <IOKit/storage/IOCDTypes.h>
44
#include <CoreFoundation/CoreFoundation.h>
45
#endif
46

    
47
#ifdef __sun__
48
#define _POSIX_PTHREAD_SEMANTICS 1
49
#include <signal.h>
50
#include <sys/dkio.h>
51
#endif
52
#ifdef __linux__
53
#include <sys/ioctl.h>
54
#include <linux/cdrom.h>
55
#include <linux/fd.h>
56
#endif
57
#ifdef __FreeBSD__
58
#include <sys/disk.h>
59
#endif
60

    
61
#ifdef __OpenBSD__
62
#include <sys/ioctl.h>
63
#include <sys/disklabel.h>
64
#include <sys/dkio.h>
65
#endif
66

    
67
//#define DEBUG_FLOPPY
68

    
69
//#define DEBUG_BLOCK
70
#if defined(DEBUG_BLOCK) && !defined(QEMU_IMG) && !defined(QEMU_NBD)
71
#define DEBUG_BLOCK_PRINT(formatCstr, args...) do { if (loglevel != 0)        \
72
    { fprintf(logfile, formatCstr, ##args); fflush(logfile); } } while (0)
73
#else
74
#define DEBUG_BLOCK_PRINT(formatCstr, args...)
75
#endif
76

    
77
#define FTYPE_FILE   0
78
#define FTYPE_CD     1
79
#define FTYPE_FD     2
80

    
81
#define ALIGNED_BUFFER_SIZE (32 * 512)
82

    
83
/* if the FD is not accessed during that time (in ms), we try to
84
   reopen it to see if the disk has been changed */
85
#define FD_OPEN_TIMEOUT 1000
86

    
87
typedef struct BDRVRawState {
88
    int fd;
89
    int type;
90
    unsigned int lseek_err_cnt;
91
#if defined(__linux__)
92
    /* linux floppy specific */
93
    int fd_open_flags;
94
    int64_t fd_open_time;
95
    int64_t fd_error_time;
96
    int fd_got_error;
97
    int fd_media_changed;
98
#endif
99
#if defined(O_DIRECT) && !defined(QEMU_IMG)
100
    uint8_t* aligned_buf;
101
#endif
102
} BDRVRawState;
103

    
104
static int fd_open(BlockDriverState *bs);
105

    
106
static int raw_open(BlockDriverState *bs, const char *filename, int flags)
107
{
108
    BDRVRawState *s = bs->opaque;
109
    int fd, open_flags, ret;
110

    
111
    s->lseek_err_cnt = 0;
112

    
113
    open_flags = O_BINARY;
114
    if ((flags & BDRV_O_ACCESS) == O_RDWR) {
115
        open_flags |= O_RDWR;
116
    } else {
117
        open_flags |= O_RDONLY;
118
        bs->read_only = 1;
119
    }
120
    if (flags & BDRV_O_CREAT)
121
        open_flags |= O_CREAT | O_TRUNC;
122
#ifdef O_DIRECT
123
    if (flags & BDRV_O_DIRECT)
124
        open_flags |= O_DIRECT;
125
#endif
126

    
127
    s->type = FTYPE_FILE;
128

    
129
    fd = open(filename, open_flags, 0644);
130
    if (fd < 0) {
131
        ret = -errno;
132
        if (ret == -EROFS)
133
            ret = -EACCES;
134
        return ret;
135
    }
136
    s->fd = fd;
137
#if defined(O_DIRECT) && !defined(QEMU_IMG)
138
    s->aligned_buf = NULL;
139
    if (flags & BDRV_O_DIRECT) {
140
        s->aligned_buf = qemu_memalign(512, ALIGNED_BUFFER_SIZE);
141
        if (s->aligned_buf == NULL) {
142
            ret = -errno;
143
            close(fd);
144
            return ret;
145
        }
146
    }
147
#endif
148
    return 0;
149
}
150

    
151
/* XXX: use host sector size if necessary with:
152
#ifdef DIOCGSECTORSIZE
153
        {
154
            unsigned int sectorsize = 512;
155
            if (!ioctl(fd, DIOCGSECTORSIZE, &sectorsize) &&
156
                sectorsize > bufsize)
157
                bufsize = sectorsize;
158
        }
159
#endif
160
#ifdef CONFIG_COCOA
161
        u_int32_t   blockSize = 512;
162
        if ( !ioctl( fd, DKIOCGETBLOCKSIZE, &blockSize ) && blockSize > bufsize) {
163
            bufsize = blockSize;
164
        }
165
#endif
166
*/
167

    
168
/*
169
 * offset and count are in bytes, but must be multiples of 512 for files
170
 * opened with O_DIRECT. buf must be aligned to 512 bytes then.
171
 *
172
 * This function may be called without alignment if the caller ensures
173
 * that O_DIRECT is not in effect.
174
 */
175
static int raw_pread_aligned(BlockDriverState *bs, int64_t offset,
176
                     uint8_t *buf, int count)
177
{
178
    BDRVRawState *s = bs->opaque;
179
    int ret;
180

    
181
    ret = fd_open(bs);
182
    if (ret < 0)
183
        return ret;
184

    
185
    if (offset >= 0 && lseek(s->fd, offset, SEEK_SET) == (off_t)-1) {
186
        ++(s->lseek_err_cnt);
187
        if(s->lseek_err_cnt <= 10) {
188
            DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
189
                              "] lseek failed : %d = %s\n",
190
                              s->fd, bs->filename, offset, buf, count,
191
                              bs->total_sectors, errno, strerror(errno));
192
        }
193
        return -1;
194
    }
195
    s->lseek_err_cnt=0;
196

    
197
    ret = read(s->fd, buf, count);
198
    if (ret == count)
199
        goto label__raw_read__success;
200

    
201
    DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
202
                      "] read failed %d : %d = %s\n",
203
                      s->fd, bs->filename, offset, buf, count,
204
                      bs->total_sectors, ret, errno, strerror(errno));
205

    
206
    /* Try harder for CDrom. */
207
    if (bs->type == BDRV_TYPE_CDROM) {
208
        lseek(s->fd, offset, SEEK_SET);
209
        ret = read(s->fd, buf, count);
210
        if (ret == count)
211
            goto label__raw_read__success;
212
        lseek(s->fd, offset, SEEK_SET);
213
        ret = read(s->fd, buf, count);
214
        if (ret == count)
215
            goto label__raw_read__success;
216

    
217
        DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
218
                          "] retry read failed %d : %d = %s\n",
219
                          s->fd, bs->filename, offset, buf, count,
220
                          bs->total_sectors, ret, errno, strerror(errno));
221
    }
222

    
223
label__raw_read__success:
224

    
225
    return ret;
226
}
227

    
228
/*
229
 * offset and count are in bytes, but must be multiples of 512 for files
230
 * opened with O_DIRECT. buf must be aligned to 512 bytes then.
231
 *
232
 * This function may be called without alignment if the caller ensures
233
 * that O_DIRECT is not in effect.
234
 */
235
static int raw_pwrite_aligned(BlockDriverState *bs, int64_t offset,
236
                      const uint8_t *buf, int count)
237
{
238
    BDRVRawState *s = bs->opaque;
239
    int ret;
240

    
241
    ret = fd_open(bs);
242
    if (ret < 0)
243
        return ret;
244

    
245
    if (offset >= 0 && lseek(s->fd, offset, SEEK_SET) == (off_t)-1) {
246
        ++(s->lseek_err_cnt);
247
        if(s->lseek_err_cnt) {
248
            DEBUG_BLOCK_PRINT("raw_pwrite(%d:%s, %" PRId64 ", %p, %d) [%"
249
                              PRId64 "] lseek failed : %d = %s\n",
250
                              s->fd, bs->filename, offset, buf, count,
251
                              bs->total_sectors, errno, strerror(errno));
252
        }
253
        return -1;
254
    }
255
    s->lseek_err_cnt = 0;
256

    
257
    ret = write(s->fd, buf, count);
258
    if (ret == count)
259
        goto label__raw_write__success;
260

    
261
    DEBUG_BLOCK_PRINT("raw_pwrite(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
262
                      "] write failed %d : %d = %s\n",
263
                      s->fd, bs->filename, offset, buf, count,
264
                      bs->total_sectors, ret, errno, strerror(errno));
265

    
266
label__raw_write__success:
267

    
268
    return ret;
269
}
270

    
271

    
272
#if defined(O_DIRECT) && !defined(QEMU_IMG)
273
/*
274
 * offset and count are in bytes and possibly not aligned. For files opened
275
 * with O_DIRECT, necessary alignments are ensured before calling
276
 * raw_pread_aligned to do the actual read.
277
 */
278
static int raw_pread(BlockDriverState *bs, int64_t offset,
279
                     uint8_t *buf, int count)
280
{
281
    BDRVRawState *s = bs->opaque;
282
    int size, ret, shift, sum;
283

    
284
    sum = 0;
285

    
286
    if (s->aligned_buf != NULL)  {
287

    
288
        if (offset & 0x1ff) {
289
            /* align offset on a 512 bytes boundary */
290

    
291
            shift = offset & 0x1ff;
292
            size = (shift + count + 0x1ff) & ~0x1ff;
293
            if (size > ALIGNED_BUFFER_SIZE)
294
                size = ALIGNED_BUFFER_SIZE;
295
            ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, size);
296
            if (ret < 0)
297
                return ret;
298

    
299
            size = 512 - shift;
300
            if (size > count)
301
                size = count;
302
            memcpy(buf, s->aligned_buf + shift, size);
303

    
304
            buf += size;
305
            offset += size;
306
            count -= size;
307
            sum += size;
308

    
309
            if (count == 0)
310
                return sum;
311
        }
312
        if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
313

    
314
            /* read on aligned buffer */
315

    
316
            while (count) {
317

    
318
                size = (count + 0x1ff) & ~0x1ff;
319
                if (size > ALIGNED_BUFFER_SIZE)
320
                    size = ALIGNED_BUFFER_SIZE;
321

    
322
                ret = raw_pread_aligned(bs, offset, s->aligned_buf, size);
323
                if (ret < 0)
324
                    return ret;
325

    
326
                size = ret;
327
                if (size > count)
328
                    size = count;
329

    
330
                memcpy(buf, s->aligned_buf, size);
331

    
332
                buf += size;
333
                offset += size;
334
                count -= size;
335
                sum += size;
336
            }
337

    
338
            return sum;
339
        }
340
    }
341

    
342
    return raw_pread_aligned(bs, offset, buf, count) + sum;
343
}
344

    
345
/*
346
 * offset and count are in bytes and possibly not aligned. For files opened
347
 * with O_DIRECT, necessary alignments are ensured before calling
348
 * raw_pwrite_aligned to do the actual write.
349
 */
350
static int raw_pwrite(BlockDriverState *bs, int64_t offset,
351
                      const uint8_t *buf, int count)
352
{
353
    BDRVRawState *s = bs->opaque;
354
    int size, ret, shift, sum;
355

    
356
    sum = 0;
357

    
358
    if (s->aligned_buf != NULL) {
359

    
360
        if (offset & 0x1ff) {
361
            /* align offset on a 512 bytes boundary */
362
            shift = offset & 0x1ff;
363
            ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, 512);
364
            if (ret < 0)
365
                return ret;
366

    
367
            size = 512 - shift;
368
            if (size > count)
369
                size = count;
370
            memcpy(s->aligned_buf + shift, buf, size);
371

    
372
            ret = raw_pwrite_aligned(bs, offset - shift, s->aligned_buf, 512);
373
            if (ret < 0)
374
                return ret;
375

    
376
            buf += size;
377
            offset += size;
378
            count -= size;
379
            sum += size;
380

    
381
            if (count == 0)
382
                return sum;
383
        }
384
        if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
385

    
386
            while ((size = (count & ~0x1ff)) != 0) {
387

    
388
                if (size > ALIGNED_BUFFER_SIZE)
389
                    size = ALIGNED_BUFFER_SIZE;
390

    
391
                memcpy(s->aligned_buf, buf, size);
392

    
393
                ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, size);
394
                if (ret < 0)
395
                    return ret;
396

    
397
                buf += ret;
398
                offset += ret;
399
                count -= ret;
400
                sum += ret;
401
            }
402
            /* here, count < 512 because (count & ~0x1ff) == 0 */
403
            if (count) {
404
                ret = raw_pread_aligned(bs, offset, s->aligned_buf, 512);
405
                if (ret < 0)
406
                    return ret;
407
                 memcpy(s->aligned_buf, buf, count);
408

    
409
                 ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, 512);
410
                 if (ret < 0)
411
                     return ret;
412
                 if (count < ret)
413
                     ret = count;
414

    
415
                 sum += ret;
416
            }
417
            return sum;
418
        }
419
    }
420
    return raw_pwrite_aligned(bs, offset, buf, count) + sum;
421
}
422

    
423
#else
424
#define raw_pread raw_pread_aligned
425
#define raw_pwrite raw_pwrite_aligned
426
#endif
427

    
428

    
429
#ifdef CONFIG_AIO
430
/***********************************************************/
431
/* Unix AIO using POSIX AIO */
432

    
433
typedef struct RawAIOCB {
434
    BlockDriverAIOCB common;
435
    struct aiocb aiocb;
436
    struct RawAIOCB *next;
437
    int ret;
438
} RawAIOCB;
439

    
440
static int aio_sig_num = SIGUSR2;
441
static RawAIOCB *first_aio; /* AIO issued */
442
static int aio_initialized = 0;
443

    
444
static void aio_signal_handler(int signum)
445
{
446
#if !defined(QEMU_IMG) && !defined(QEMU_NBD)
447
    CPUState *env = cpu_single_env;
448
    if (env) {
449
        /* stop the currently executing cpu because a timer occured */
450
        cpu_interrupt(env, CPU_INTERRUPT_EXIT);
451
#ifdef USE_KQEMU
452
        if (env->kqemu_enabled) {
453
            kqemu_cpu_interrupt(env);
454
        }
455
#endif
456
    }
457
#endif
458
}
459

    
460
void qemu_aio_init(void)
461
{
462
    struct sigaction act;
463

    
464
    aio_initialized = 1;
465

    
466
    sigfillset(&act.sa_mask);
467
    act.sa_flags = 0; /* do not restart syscalls to interrupt select() */
468
    act.sa_handler = aio_signal_handler;
469
    sigaction(aio_sig_num, &act, NULL);
470

    
471
#if defined(__GLIBC__) && defined(__linux__)
472
    {
473
        /* XXX: aio thread exit seems to hang on RedHat 9 and this init
474
           seems to fix the problem. */
475
        struct aioinit ai;
476
        memset(&ai, 0, sizeof(ai));
477
        ai.aio_threads = 1;
478
        ai.aio_num = 1;
479
        ai.aio_idle_time = 365 * 100000;
480
        aio_init(&ai);
481
    }
482
#endif
483
}
484

    
485
void qemu_aio_poll(void)
486
{
487
    RawAIOCB *acb, **pacb;
488
    int ret;
489

    
490
    for(;;) {
491
        pacb = &first_aio;
492
        for(;;) {
493
            acb = *pacb;
494
            if (!acb)
495
                goto the_end;
496
            ret = aio_error(&acb->aiocb);
497
            if (ret == ECANCELED) {
498
                /* remove the request */
499
                *pacb = acb->next;
500
                qemu_aio_release(acb);
501
            } else if (ret != EINPROGRESS) {
502
                /* end of aio */
503
                if (ret == 0) {
504
                    ret = aio_return(&acb->aiocb);
505
                    if (ret == acb->aiocb.aio_nbytes)
506
                        ret = 0;
507
                    else
508
                        ret = -EINVAL;
509
                } else {
510
                    ret = -ret;
511
                }
512
                /* remove the request */
513
                *pacb = acb->next;
514
                /* call the callback */
515
                acb->common.cb(acb->common.opaque, ret);
516
                qemu_aio_release(acb);
517
                break;
518
            } else {
519
                pacb = &acb->next;
520
            }
521
        }
522
    }
523
 the_end: ;
524
}
525

    
526
/* Wait for all IO requests to complete.  */
527
void qemu_aio_flush(void)
528
{
529
    qemu_aio_wait_start();
530
    qemu_aio_poll();
531
    while (first_aio) {
532
        qemu_aio_wait();
533
    }
534
    qemu_aio_wait_end();
535
}
536

    
537
/* wait until at least one AIO was handled */
538
static sigset_t wait_oset;
539

    
540
void qemu_aio_wait_start(void)
541
{
542
    sigset_t set;
543

    
544
    if (!aio_initialized)
545
        qemu_aio_init();
546
    sigemptyset(&set);
547
    sigaddset(&set, aio_sig_num);
548
    sigprocmask(SIG_BLOCK, &set, &wait_oset);
549
}
550

    
551
void qemu_aio_wait(void)
552
{
553
    sigset_t set;
554
    int nb_sigs;
555

    
556
#if !defined(QEMU_IMG) && !defined(QEMU_NBD)
557
    if (qemu_bh_poll())
558
        return;
559
#endif
560
    sigemptyset(&set);
561
    sigaddset(&set, aio_sig_num);
562
    sigwait(&set, &nb_sigs);
563
    qemu_aio_poll();
564
}
565

    
566
void qemu_aio_wait_end(void)
567
{
568
    sigprocmask(SIG_SETMASK, &wait_oset, NULL);
569
}
570

    
571
static RawAIOCB *raw_aio_setup(BlockDriverState *bs,
572
        int64_t sector_num, uint8_t *buf, int nb_sectors,
573
        BlockDriverCompletionFunc *cb, void *opaque)
574
{
575
    BDRVRawState *s = bs->opaque;
576
    RawAIOCB *acb;
577

    
578
    if (fd_open(bs) < 0)
579
        return NULL;
580

    
581
    acb = qemu_aio_get(bs, cb, opaque);
582
    if (!acb)
583
        return NULL;
584
    acb->aiocb.aio_fildes = s->fd;
585
    acb->aiocb.aio_sigevent.sigev_signo = aio_sig_num;
586
    acb->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
587
    acb->aiocb.aio_buf = buf;
588
    if (nb_sectors < 0)
589
        acb->aiocb.aio_nbytes = -nb_sectors;
590
    else
591
        acb->aiocb.aio_nbytes = nb_sectors * 512;
592
    acb->aiocb.aio_offset = sector_num * 512;
593
    acb->next = first_aio;
594
    first_aio = acb;
595
    return acb;
596
}
597

    
598
#if !defined(QEMU_IMG) && !defined(QEMU_NBD)
599
static void raw_aio_em_cb(void* opaque)
600
{
601
    RawAIOCB *acb = opaque;
602
    acb->common.cb(acb->common.opaque, acb->ret);
603
    qemu_aio_release(acb);
604
}
605
#endif
606

    
607
static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs,
608
        int64_t sector_num, uint8_t *buf, int nb_sectors,
609
        BlockDriverCompletionFunc *cb, void *opaque)
610
{
611
    RawAIOCB *acb;
612

    
613
    /*
614
     * If O_DIRECT is used and the buffer is not aligned fall back
615
     * to synchronous IO.
616
     */
617
#if defined(O_DIRECT) && !defined(QEMU_IMG) && !defined(QEMU_NBD)
618
    BDRVRawState *s = bs->opaque;
619

    
620
    if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
621
        QEMUBH *bh;
622
        acb = qemu_aio_get(bs, cb, opaque);
623
        acb->ret = raw_pread(bs, 512 * sector_num, buf, 512 * nb_sectors);
624
        bh = qemu_bh_new(raw_aio_em_cb, acb);
625
        qemu_bh_schedule(bh);
626
        return &acb->common;
627
    }
628
#endif
629

    
630
    acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
631
    if (!acb)
632
        return NULL;
633
    if (aio_read(&acb->aiocb) < 0) {
634
        qemu_aio_release(acb);
635
        return NULL;
636
    }
637
    return &acb->common;
638
}
639

    
640
static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs,
641
        int64_t sector_num, const uint8_t *buf, int nb_sectors,
642
        BlockDriverCompletionFunc *cb, void *opaque)
643
{
644
    RawAIOCB *acb;
645

    
646
    /*
647
     * If O_DIRECT is used and the buffer is not aligned fall back
648
     * to synchronous IO.
649
     */
650
#if defined(O_DIRECT) && !defined(QEMU_IMG) && !defined(QEMU_NBD)
651
    BDRVRawState *s = bs->opaque;
652

    
653
    if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
654
        QEMUBH *bh;
655
        acb = qemu_aio_get(bs, cb, opaque);
656
        acb->ret = raw_pwrite(bs, 512 * sector_num, buf, 512 * nb_sectors);
657
        bh = qemu_bh_new(raw_aio_em_cb, acb);
658
        qemu_bh_schedule(bh);
659
        return &acb->common;
660
    }
661
#endif
662

    
663
    acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque);
664
    if (!acb)
665
        return NULL;
666
    if (aio_write(&acb->aiocb) < 0) {
667
        qemu_aio_release(acb);
668
        return NULL;
669
    }
670
    return &acb->common;
671
}
672

    
673
static void raw_aio_cancel(BlockDriverAIOCB *blockacb)
674
{
675
    int ret;
676
    RawAIOCB *acb = (RawAIOCB *)blockacb;
677
    RawAIOCB **pacb;
678

    
679
    ret = aio_cancel(acb->aiocb.aio_fildes, &acb->aiocb);
680
    if (ret == AIO_NOTCANCELED) {
681
        /* fail safe: if the aio could not be canceled, we wait for
682
           it */
683
        while (aio_error(&acb->aiocb) == EINPROGRESS);
684
    }
685

    
686
    /* remove the callback from the queue */
687
    pacb = &first_aio;
688
    for(;;) {
689
        if (*pacb == NULL) {
690
            break;
691
        } else if (*pacb == acb) {
692
            *pacb = acb->next;
693
            qemu_aio_release(acb);
694
            break;
695
        }
696
        pacb = &acb->next;
697
    }
698
}
699

    
700
# else /* CONFIG_AIO */
701

    
702
void qemu_aio_init(void)
703
{
704
}
705

    
706
void qemu_aio_poll(void)
707
{
708
}
709

    
710
void qemu_aio_flush(void)
711
{
712
}
713

    
714
void qemu_aio_wait_start(void)
715
{
716
}
717

    
718
void qemu_aio_wait(void)
719
{
720
#if !defined(QEMU_IMG) && !defined(QEMU_NBD)
721
    qemu_bh_poll();
722
#endif
723
}
724

    
725
void qemu_aio_wait_end(void)
726
{
727
}
728

    
729
#endif /* CONFIG_AIO */
730

    
731
static void raw_close(BlockDriverState *bs)
732
{
733
    BDRVRawState *s = bs->opaque;
734
    if (s->fd >= 0) {
735
        close(s->fd);
736
        s->fd = -1;
737
#if defined(O_DIRECT) && !defined(QEMU_IMG)
738
        if (s->aligned_buf != NULL)
739
            qemu_free(s->aligned_buf);
740
#endif
741
    }
742
}
743

    
744
static int raw_truncate(BlockDriverState *bs, int64_t offset)
745
{
746
    BDRVRawState *s = bs->opaque;
747
    if (s->type != FTYPE_FILE)
748
        return -ENOTSUP;
749
    if (ftruncate(s->fd, offset) < 0)
750
        return -errno;
751
    return 0;
752
}
753

    
754
#ifdef __OpenBSD__
755
static int64_t raw_getlength(BlockDriverState *bs)
756
{
757
    BDRVRawState *s = bs->opaque;
758
    int fd = s->fd;
759
    struct stat st;
760

    
761
    if (fstat(fd, &st))
762
        return -1;
763
    if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
764
        struct disklabel dl;
765

    
766
        if (ioctl(fd, DIOCGDINFO, &dl))
767
            return -1;
768
        return (uint64_t)dl.d_secsize *
769
            dl.d_partitions[DISKPART(st.st_rdev)].p_size;
770
    } else
771
        return st.st_size;
772
}
773
#else /* !__OpenBSD__ */
774
static int64_t  raw_getlength(BlockDriverState *bs)
775
{
776
    BDRVRawState *s = bs->opaque;
777
    int fd = s->fd;
778
    int64_t size;
779
#ifdef _BSD
780
    struct stat sb;
781
#endif
782
#ifdef __sun__
783
    struct dk_minfo minfo;
784
    int rv;
785
#endif
786
    int ret;
787

    
788
    ret = fd_open(bs);
789
    if (ret < 0)
790
        return ret;
791

    
792
#ifdef _BSD
793
    if (!fstat(fd, &sb) && (S_IFCHR & sb.st_mode)) {
794
#ifdef DIOCGMEDIASIZE
795
        if (ioctl(fd, DIOCGMEDIASIZE, (off_t *)&size))
796
#endif
797
#ifdef CONFIG_COCOA
798
        size = LONG_LONG_MAX;
799
#else
800
        size = lseek(fd, 0LL, SEEK_END);
801
#endif
802
    } else
803
#endif
804
#ifdef __sun__
805
    /*
806
     * use the DKIOCGMEDIAINFO ioctl to read the size.
807
     */
808
    rv = ioctl ( fd, DKIOCGMEDIAINFO, &minfo );
809
    if ( rv != -1 ) {
810
        size = minfo.dki_lbsize * minfo.dki_capacity;
811
    } else /* there are reports that lseek on some devices
812
              fails, but irc discussion said that contingency
813
              on contingency was overkill */
814
#endif
815
    {
816
        size = lseek(fd, 0, SEEK_END);
817
    }
818
    return size;
819
}
820
#endif
821

    
822
static int raw_create(const char *filename, int64_t total_size,
823
                      const char *backing_file, int flags)
824
{
825
    int fd;
826

    
827
    if (flags || backing_file)
828
        return -ENOTSUP;
829

    
830
    fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
831
              0644);
832
    if (fd < 0)
833
        return -EIO;
834
    ftruncate(fd, total_size * 512);
835
    close(fd);
836
    return 0;
837
}
838

    
839
static void raw_flush(BlockDriverState *bs)
840
{
841
    BDRVRawState *s = bs->opaque;
842
    fsync(s->fd);
843
}
844

    
845
BlockDriver bdrv_raw = {
846
    "raw",
847
    sizeof(BDRVRawState),
848
    NULL, /* no probe for protocols */
849
    raw_open,
850
    NULL,
851
    NULL,
852
    raw_close,
853
    raw_create,
854
    raw_flush,
855

    
856
#ifdef CONFIG_AIO
857
    .bdrv_aio_read = raw_aio_read,
858
    .bdrv_aio_write = raw_aio_write,
859
    .bdrv_aio_cancel = raw_aio_cancel,
860
    .aiocb_size = sizeof(RawAIOCB),
861
#endif
862
    .protocol_name = "file",
863
    .bdrv_pread = raw_pread,
864
    .bdrv_pwrite = raw_pwrite,
865
    .bdrv_truncate = raw_truncate,
866
    .bdrv_getlength = raw_getlength,
867
};
868

    
869
/***********************************************/
870
/* host device */
871

    
872
#ifdef CONFIG_COCOA
873
static kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator );
874
static kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize );
875

    
876
kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator )
877
{
878
    kern_return_t       kernResult;
879
    mach_port_t     masterPort;
880
    CFMutableDictionaryRef  classesToMatch;
881

    
882
    kernResult = IOMasterPort( MACH_PORT_NULL, &masterPort );
883
    if ( KERN_SUCCESS != kernResult ) {
884
        printf( "IOMasterPort returned %d\n", kernResult );
885
    }
886

    
887
    classesToMatch = IOServiceMatching( kIOCDMediaClass );
888
    if ( classesToMatch == NULL ) {
889
        printf( "IOServiceMatching returned a NULL dictionary.\n" );
890
    } else {
891
    CFDictionarySetValue( classesToMatch, CFSTR( kIOMediaEjectableKey ), kCFBooleanTrue );
892
    }
893
    kernResult = IOServiceGetMatchingServices( masterPort, classesToMatch, mediaIterator );
894
    if ( KERN_SUCCESS != kernResult )
895
    {
896
        printf( "IOServiceGetMatchingServices returned %d\n", kernResult );
897
    }
898

    
899
    return kernResult;
900
}
901

    
902
kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize )
903
{
904
    io_object_t     nextMedia;
905
    kern_return_t   kernResult = KERN_FAILURE;
906
    *bsdPath = '\0';
907
    nextMedia = IOIteratorNext( mediaIterator );
908
    if ( nextMedia )
909
    {
910
        CFTypeRef   bsdPathAsCFString;
911
    bsdPathAsCFString = IORegistryEntryCreateCFProperty( nextMedia, CFSTR( kIOBSDNameKey ), kCFAllocatorDefault, 0 );
912
        if ( bsdPathAsCFString ) {
913
            size_t devPathLength;
914
            strcpy( bsdPath, _PATH_DEV );
915
            strcat( bsdPath, "r" );
916
            devPathLength = strlen( bsdPath );
917
            if ( CFStringGetCString( bsdPathAsCFString, bsdPath + devPathLength, maxPathSize - devPathLength, kCFStringEncodingASCII ) ) {
918
                kernResult = KERN_SUCCESS;
919
            }
920
            CFRelease( bsdPathAsCFString );
921
        }
922
        IOObjectRelease( nextMedia );
923
    }
924

    
925
    return kernResult;
926
}
927

    
928
#endif
929

    
930
static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
931
{
932
    BDRVRawState *s = bs->opaque;
933
    int fd, open_flags, ret;
934

    
935
#ifdef CONFIG_COCOA
936
    if (strstart(filename, "/dev/cdrom", NULL)) {
937
        kern_return_t kernResult;
938
        io_iterator_t mediaIterator;
939
        char bsdPath[ MAXPATHLEN ];
940
        int fd;
941

    
942
        kernResult = FindEjectableCDMedia( &mediaIterator );
943
        kernResult = GetBSDPath( mediaIterator, bsdPath, sizeof( bsdPath ) );
944

    
945
        if ( bsdPath[ 0 ] != '\0' ) {
946
            strcat(bsdPath,"s0");
947
            /* some CDs don't have a partition 0 */
948
            fd = open(bsdPath, O_RDONLY | O_BINARY | O_LARGEFILE);
949
            if (fd < 0) {
950
                bsdPath[strlen(bsdPath)-1] = '1';
951
            } else {
952
                close(fd);
953
            }
954
            filename = bsdPath;
955
        }
956

    
957
        if ( mediaIterator )
958
            IOObjectRelease( mediaIterator );
959
    }
960
#endif
961
    open_flags = O_BINARY;
962
    if ((flags & BDRV_O_ACCESS) == O_RDWR) {
963
        open_flags |= O_RDWR;
964
    } else {
965
        open_flags |= O_RDONLY;
966
        bs->read_only = 1;
967
    }
968
#ifdef O_DIRECT
969
    if (flags & BDRV_O_DIRECT)
970
        open_flags |= O_DIRECT;
971
#endif
972

    
973
    s->type = FTYPE_FILE;
974
#if defined(__linux__)
975
    if (strstart(filename, "/dev/cd", NULL)) {
976
        /* open will not fail even if no CD is inserted */
977
        open_flags |= O_NONBLOCK;
978
        s->type = FTYPE_CD;
979
    } else if (strstart(filename, "/dev/fd", NULL)) {
980
        s->type = FTYPE_FD;
981
        s->fd_open_flags = open_flags;
982
        /* open will not fail even if no floppy is inserted */
983
        open_flags |= O_NONBLOCK;
984
    } else if (strstart(filename, "/dev/sg", NULL)) {
985
        bs->sg = 1;
986
    }
987
#endif
988
    fd = open(filename, open_flags, 0644);
989
    if (fd < 0) {
990
        ret = -errno;
991
        if (ret == -EROFS)
992
            ret = -EACCES;
993
        return ret;
994
    }
995
    s->fd = fd;
996
#if defined(__linux__)
997
    /* close fd so that we can reopen it as needed */
998
    if (s->type == FTYPE_FD) {
999
        close(s->fd);
1000
        s->fd = -1;
1001
        s->fd_media_changed = 1;
1002
    }
1003
#endif
1004
    return 0;
1005
}
1006

    
1007
#if defined(__linux__) && !defined(QEMU_IMG) && !defined(QEMU_NBD)
1008

    
1009
/* Note: we do not have a reliable method to detect if the floppy is
1010
   present. The current method is to try to open the floppy at every
1011
   I/O and to keep it opened during a few hundreds of ms. */
1012
static int fd_open(BlockDriverState *bs)
1013
{
1014
    BDRVRawState *s = bs->opaque;
1015
    int last_media_present;
1016

    
1017
    if (s->type != FTYPE_FD)
1018
        return 0;
1019
    last_media_present = (s->fd >= 0);
1020
    if (s->fd >= 0 &&
1021
        (qemu_get_clock(rt_clock) - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
1022
        close(s->fd);
1023
        s->fd = -1;
1024
#ifdef DEBUG_FLOPPY
1025
        printf("Floppy closed\n");
1026
#endif
1027
    }
1028
    if (s->fd < 0) {
1029
        if (s->fd_got_error &&
1030
            (qemu_get_clock(rt_clock) - s->fd_error_time) < FD_OPEN_TIMEOUT) {
1031
#ifdef DEBUG_FLOPPY
1032
            printf("No floppy (open delayed)\n");
1033
#endif
1034
            return -EIO;
1035
        }
1036
        s->fd = open(bs->filename, s->fd_open_flags);
1037
        if (s->fd < 0) {
1038
            s->fd_error_time = qemu_get_clock(rt_clock);
1039
            s->fd_got_error = 1;
1040
            if (last_media_present)
1041
                s->fd_media_changed = 1;
1042
#ifdef DEBUG_FLOPPY
1043
            printf("No floppy\n");
1044
#endif
1045
            return -EIO;
1046
        }
1047
#ifdef DEBUG_FLOPPY
1048
        printf("Floppy opened\n");
1049
#endif
1050
    }
1051
    if (!last_media_present)
1052
        s->fd_media_changed = 1;
1053
    s->fd_open_time = qemu_get_clock(rt_clock);
1054
    s->fd_got_error = 0;
1055
    return 0;
1056
}
1057
#else
1058
static int fd_open(BlockDriverState *bs)
1059
{
1060
    return 0;
1061
}
1062
#endif
1063

    
1064
#if defined(__linux__)
1065

    
1066
static int raw_is_inserted(BlockDriverState *bs)
1067
{
1068
    BDRVRawState *s = bs->opaque;
1069
    int ret;
1070

    
1071
    switch(s->type) {
1072
    case FTYPE_CD:
1073
        ret = ioctl(s->fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
1074
        if (ret == CDS_DISC_OK)
1075
            return 1;
1076
        else
1077
            return 0;
1078
        break;
1079
    case FTYPE_FD:
1080
        ret = fd_open(bs);
1081
        return (ret >= 0);
1082
    default:
1083
        return 1;
1084
    }
1085
}
1086

    
1087
/* currently only used by fdc.c, but a CD version would be good too */
1088
static int raw_media_changed(BlockDriverState *bs)
1089
{
1090
    BDRVRawState *s = bs->opaque;
1091

    
1092
    switch(s->type) {
1093
    case FTYPE_FD:
1094
        {
1095
            int ret;
1096
            /* XXX: we do not have a true media changed indication. It
1097
               does not work if the floppy is changed without trying
1098
               to read it */
1099
            fd_open(bs);
1100
            ret = s->fd_media_changed;
1101
            s->fd_media_changed = 0;
1102
#ifdef DEBUG_FLOPPY
1103
            printf("Floppy changed=%d\n", ret);
1104
#endif
1105
            return ret;
1106
        }
1107
    default:
1108
        return -ENOTSUP;
1109
    }
1110
}
1111

    
1112
static int raw_eject(BlockDriverState *bs, int eject_flag)
1113
{
1114
    BDRVRawState *s = bs->opaque;
1115

    
1116
    switch(s->type) {
1117
    case FTYPE_CD:
1118
        if (eject_flag) {
1119
            if (ioctl (s->fd, CDROMEJECT, NULL) < 0)
1120
                perror("CDROMEJECT");
1121
        } else {
1122
            if (ioctl (s->fd, CDROMCLOSETRAY, NULL) < 0)
1123
                perror("CDROMEJECT");
1124
        }
1125
        break;
1126
    case FTYPE_FD:
1127
        {
1128
            int fd;
1129
            if (s->fd >= 0) {
1130
                close(s->fd);
1131
                s->fd = -1;
1132
            }
1133
            fd = open(bs->filename, s->fd_open_flags | O_NONBLOCK);
1134
            if (fd >= 0) {
1135
                if (ioctl(fd, FDEJECT, 0) < 0)
1136
                    perror("FDEJECT");
1137
                close(fd);
1138
            }
1139
        }
1140
        break;
1141
    default:
1142
        return -ENOTSUP;
1143
    }
1144
    return 0;
1145
}
1146

    
1147
static int raw_set_locked(BlockDriverState *bs, int locked)
1148
{
1149
    BDRVRawState *s = bs->opaque;
1150

    
1151
    switch(s->type) {
1152
    case FTYPE_CD:
1153
        if (ioctl (s->fd, CDROM_LOCKDOOR, locked) < 0) {
1154
            /* Note: an error can happen if the distribution automatically
1155
               mounts the CD-ROM */
1156
            //        perror("CDROM_LOCKDOOR");
1157
        }
1158
        break;
1159
    default:
1160
        return -ENOTSUP;
1161
    }
1162
    return 0;
1163
}
1164

    
1165
static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
1166
{
1167
    BDRVRawState *s = bs->opaque;
1168

    
1169
    return ioctl(s->fd, req, buf);
1170
}
1171
#else
1172

    
1173
static int raw_is_inserted(BlockDriverState *bs)
1174
{
1175
    return 1;
1176
}
1177

    
1178
static int raw_media_changed(BlockDriverState *bs)
1179
{
1180
    return -ENOTSUP;
1181
}
1182

    
1183
static int raw_eject(BlockDriverState *bs, int eject_flag)
1184
{
1185
    return -ENOTSUP;
1186
}
1187

    
1188
static int raw_set_locked(BlockDriverState *bs, int locked)
1189
{
1190
    return -ENOTSUP;
1191
}
1192

    
1193
static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
1194
{
1195
    return -ENOTSUP;
1196
}
1197
#endif /* !linux */
1198

    
1199
BlockDriver bdrv_host_device = {
1200
    "host_device",
1201
    sizeof(BDRVRawState),
1202
    NULL, /* no probe for protocols */
1203
    hdev_open,
1204
    NULL,
1205
    NULL,
1206
    raw_close,
1207
    NULL,
1208
    raw_flush,
1209

    
1210
#ifdef CONFIG_AIO
1211
    .bdrv_aio_read = raw_aio_read,
1212
    .bdrv_aio_write = raw_aio_write,
1213
    .bdrv_aio_cancel = raw_aio_cancel,
1214
    .aiocb_size = sizeof(RawAIOCB),
1215
#endif
1216
    .bdrv_pread = raw_pread,
1217
    .bdrv_pwrite = raw_pwrite,
1218
    .bdrv_getlength = raw_getlength,
1219

    
1220
    /* removable device support */
1221
    .bdrv_is_inserted = raw_is_inserted,
1222
    .bdrv_media_changed = raw_media_changed,
1223
    .bdrv_eject = raw_eject,
1224
    .bdrv_set_locked = raw_set_locked,
1225
    /* generic scsi device */
1226
    .bdrv_ioctl = raw_ioctl,
1227
};