Revision f141eafe

b/block-qcow.c
525 525
typedef struct QCowAIOCB {
526 526
    BlockDriverAIOCB common;
527 527
    int64_t sector_num;
528
    QEMUIOVector *qiov;
528 529
    uint8_t *buf;
530
    void *orig_buf;
529 531
    int nb_sectors;
530 532
    int n;
531 533
    uint64_t cluster_offset;
......
543 545
    int index_in_cluster;
544 546

  
545 547
    acb->hd_aiocb = NULL;
546
    if (ret < 0) {
547
    fail:
548
        acb->common.cb(acb->common.opaque, ret);
549
        qemu_aio_release(acb);
550
        return;
551
    }
548
    if (ret < 0)
549
        goto done;
552 550

  
553 551
 redo:
554 552
    /* post process the read buffer */
......
570 568

  
571 569
    if (acb->nb_sectors == 0) {
572 570
        /* request completed */
573
        acb->common.cb(acb->common.opaque, 0);
574
        qemu_aio_release(acb);
575
        return;
571
        ret = 0;
572
        goto done;
576 573
    }
577 574

  
578 575
    /* prepare next AIO request */
......
592 589
            acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num,
593 590
                &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb);
594 591
            if (acb->hd_aiocb == NULL)
595
                goto fail;
592
                goto done;
596 593
        } else {
597 594
            /* Note: in this case, no need to wait */
598 595
            memset(acb->buf, 0, 512 * acb->n);
......
601 598
    } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
602 599
        /* add AIO support for compressed blocks ? */
603 600
        if (decompress_cluster(s, acb->cluster_offset) < 0)
604
            goto fail;
601
            goto done;
605 602
        memcpy(acb->buf,
606 603
               s->cluster_cache + index_in_cluster * 512, 512 * acb->n);
607 604
        goto redo;
608 605
    } else {
609 606
        if ((acb->cluster_offset & 511) != 0) {
610 607
            ret = -EIO;
611
            goto fail;
608
            goto done;
612 609
        }
613 610
        acb->hd_iov.iov_base = acb->buf;
614 611
        acb->hd_iov.iov_len = acb->n * 512;
......
617 614
                            (acb->cluster_offset >> 9) + index_in_cluster,
618 615
                            &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb);
619 616
        if (acb->hd_aiocb == NULL)
620
            goto fail;
617
            goto done;
618
    }
619

  
620
    return;
621

  
622
done:
623
    if (acb->qiov->niov > 1) {
624
        qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size);
625
        qemu_vfree(acb->orig_buf);
621 626
    }
627
    acb->common.cb(acb->common.opaque, ret);
628
    qemu_aio_release(acb);
622 629
}
623 630

  
624
static BlockDriverAIOCB *qcow_aio_read(BlockDriverState *bs,
625
        int64_t sector_num, uint8_t *buf, int nb_sectors,
631
static BlockDriverAIOCB *qcow_aio_readv(BlockDriverState *bs,
632
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
626 633
        BlockDriverCompletionFunc *cb, void *opaque)
627 634
{
628 635
    QCowAIOCB *acb;
......
632 639
        return NULL;
633 640
    acb->hd_aiocb = NULL;
634 641
    acb->sector_num = sector_num;
635
    acb->buf = buf;
642
    acb->qiov = qiov;
643
    if (qiov->niov > 1)
644
        acb->buf = acb->orig_buf = qemu_memalign(512, qiov->size);
645
    else
646
        acb->buf = qiov->iov->iov_base;
636 647
    acb->nb_sectors = nb_sectors;
637 648
    acb->n = 0;
638 649
    acb->cluster_offset = 0;
......
652 663

  
653 664
    acb->hd_aiocb = NULL;
654 665

  
655
    if (ret < 0) {
656
    fail:
657
        acb->common.cb(acb->common.opaque, ret);
658
        qemu_aio_release(acb);
659
        return;
660
    }
666
    if (ret < 0)
667
        goto done;
661 668

  
662 669
    acb->nb_sectors -= acb->n;
663 670
    acb->sector_num += acb->n;
......
665 672

  
666 673
    if (acb->nb_sectors == 0) {
667 674
        /* request completed */
668
        acb->common.cb(acb->common.opaque, 0);
669
        qemu_aio_release(acb);
670
        return;
675
        ret = 0;
676
        goto done;
671 677
    }
672 678

  
673 679
    index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
......
679 685
                                        index_in_cluster + acb->n);
680 686
    if (!cluster_offset || (cluster_offset & 511) != 0) {
681 687
        ret = -EIO;
682
        goto fail;
688
        goto done;
683 689
    }
684 690
    if (s->crypt_method) {
685 691
        if (!acb->cluster_data) {
686 692
            acb->cluster_data = qemu_mallocz(s->cluster_size);
687 693
            if (!acb->cluster_data) {
688 694
                ret = -ENOMEM;
689
                goto fail;
695
                goto done;
690 696
            }
691 697
        }
692 698
        encrypt_sectors(s, acb->sector_num, acb->cluster_data, acb->buf,
......
704 710
                                    &acb->hd_qiov, acb->n,
705 711
                                    qcow_aio_write_cb, acb);
706 712
    if (acb->hd_aiocb == NULL)
707
        goto fail;
713
        goto done;
714
    return;
715

  
716
done:
717
    if (acb->qiov->niov > 1)
718
        qemu_vfree(acb->orig_buf);
719
    acb->common.cb(acb->common.opaque, ret);
720
    qemu_aio_release(acb);
708 721
}
709 722

  
710
static BlockDriverAIOCB *qcow_aio_write(BlockDriverState *bs,
711
        int64_t sector_num, const uint8_t *buf, int nb_sectors,
723
static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs,
724
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
712 725
        BlockDriverCompletionFunc *cb, void *opaque)
713 726
{
714 727
    BDRVQcowState *s = bs->opaque;
......
721 734
        return NULL;
722 735
    acb->hd_aiocb = NULL;
723 736
    acb->sector_num = sector_num;
724
    acb->buf = (uint8_t *)buf;
737
    acb->qiov = qiov;
738
    if (qiov->niov > 1) {
739
        acb->buf = acb->orig_buf = qemu_memalign(512, qiov->size);
740
        qemu_iovec_to_buffer(qiov, acb->buf);
741
    } else
742
        acb->buf = qiov->iov->iov_base;
725 743
    acb->nb_sectors = nb_sectors;
726 744
    acb->n = 0;
727 745

  
......
909 927
    .bdrv_is_allocated	= qcow_is_allocated,
910 928
    .bdrv_set_key	= qcow_set_key,
911 929
    .bdrv_make_empty	= qcow_make_empty,
912
    .bdrv_aio_read	= qcow_aio_read,
913
    .bdrv_aio_write	= qcow_aio_write,
930
    .bdrv_aio_readv	= qcow_aio_readv,
931
    .bdrv_aio_writev	= qcow_aio_writev,
914 932
    .bdrv_aio_cancel	= qcow_aio_cancel,
915 933
    .aiocb_size		= sizeof(QCowAIOCB),
916 934
    .bdrv_write_compressed = qcow_write_compressed,
b/block-qcow2.c
1264 1264
typedef struct QCowAIOCB {
1265 1265
    BlockDriverAIOCB common;
1266 1266
    int64_t sector_num;
1267
    QEMUIOVector *qiov;
1267 1268
    uint8_t *buf;
1269
    void *orig_buf;
1268 1270
    int nb_sectors;
1269 1271
    int n;
1270 1272
    uint64_t cluster_offset;
......
1307 1309
    int index_in_cluster, n1;
1308 1310

  
1309 1311
    acb->hd_aiocb = NULL;
1310
    if (ret < 0) {
1311
fail:
1312
        acb->common.cb(acb->common.opaque, ret);
1313
        qemu_aio_release(acb);
1314
        return;
1315
    }
1312
    if (ret < 0)
1313
        goto done;
1316 1314

  
1317 1315
    /* post process the read buffer */
1318 1316
    if (!acb->cluster_offset) {
......
1333 1331

  
1334 1332
    if (acb->nb_sectors == 0) {
1335 1333
        /* request completed */
1336
        acb->common.cb(acb->common.opaque, 0);
1337
        qemu_aio_release(acb);
1338
        return;
1334
        ret = 0;
1335
        goto done;
1339 1336
    }
1340 1337

  
1341 1338
    /* prepare next AIO request */
......
1356 1353
                                    &acb->hd_qiov, acb->n,
1357 1354
				    qcow_aio_read_cb, acb);
1358 1355
                if (acb->hd_aiocb == NULL)
1359
                    goto fail;
1356
                    goto done;
1360 1357
            } else {
1361 1358
                ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
1362 1359
                if (ret < 0)
1363
                    goto fail;
1360
                    goto done;
1364 1361
            }
1365 1362
        } else {
1366 1363
            /* Note: in this case, no need to wait */
1367 1364
            memset(acb->buf, 0, 512 * acb->n);
1368 1365
            ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
1369 1366
            if (ret < 0)
1370
                goto fail;
1367
                goto done;
1371 1368
        }
1372 1369
    } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
1373 1370
        /* add AIO support for compressed blocks ? */
1374 1371
        if (decompress_cluster(s, acb->cluster_offset) < 0)
1375
            goto fail;
1372
            goto done;
1376 1373
        memcpy(acb->buf,
1377 1374
               s->cluster_cache + index_in_cluster * 512, 512 * acb->n);
1378 1375
        ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
1379 1376
        if (ret < 0)
1380
            goto fail;
1377
            goto done;
1381 1378
    } else {
1382 1379
        if ((acb->cluster_offset & 511) != 0) {
1383 1380
            ret = -EIO;
1384
            goto fail;
1381
            goto done;
1385 1382
        }
1386 1383

  
1387 1384
        acb->hd_iov.iov_base = acb->buf;
......
1391 1388
                            (acb->cluster_offset >> 9) + index_in_cluster,
1392 1389
                            &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb);
1393 1390
        if (acb->hd_aiocb == NULL)
1394
            goto fail;
1391
            goto done;
1392
    }
1393

  
1394
    return;
1395
done:
1396
    if (acb->qiov->niov > 1) {
1397
        qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size);
1398
        qemu_vfree(acb->orig_buf);
1395 1399
    }
1400
    acb->common.cb(acb->common.opaque, ret);
1401
    qemu_aio_release(acb);
1396 1402
}
1397 1403

  
1398 1404
static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
1399
        int64_t sector_num, uint8_t *buf, int nb_sectors,
1400
        BlockDriverCompletionFunc *cb, void *opaque)
1405
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
1406
        BlockDriverCompletionFunc *cb, void *opaque, int is_write)
1401 1407
{
1402 1408
    QCowAIOCB *acb;
1403 1409

  
......
1406 1412
        return NULL;
1407 1413
    acb->hd_aiocb = NULL;
1408 1414
    acb->sector_num = sector_num;
1409
    acb->buf = buf;
1415
    acb->qiov = qiov;
1416
    if (qiov->niov > 1) {
1417
        acb->buf = acb->orig_buf = qemu_memalign(512, qiov->size);
1418
        if (is_write)
1419
            qemu_iovec_to_buffer(qiov, acb->buf);
1420
    } else
1421
        acb->buf = qiov->iov->iov_base;
1410 1422
    acb->nb_sectors = nb_sectors;
1411 1423
    acb->n = 0;
1412 1424
    acb->cluster_offset = 0;
......
1414 1426
    return acb;
1415 1427
}
1416 1428

  
1417
static BlockDriverAIOCB *qcow_aio_read(BlockDriverState *bs,
1418
        int64_t sector_num, uint8_t *buf, int nb_sectors,
1429
static BlockDriverAIOCB *qcow_aio_readv(BlockDriverState *bs,
1430
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
1419 1431
        BlockDriverCompletionFunc *cb, void *opaque)
1420 1432
{
1421 1433
    QCowAIOCB *acb;
1422 1434

  
1423
    acb = qcow_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
1435
    acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
1424 1436
    if (!acb)
1425 1437
        return NULL;
1426 1438

  
......
1439 1451

  
1440 1452
    acb->hd_aiocb = NULL;
1441 1453

  
1442
    if (ret < 0) {
1443
    fail:
1444
        acb->common.cb(acb->common.opaque, ret);
1445
        qemu_aio_release(acb);
1446
        return;
1447
    }
1454
    if (ret < 0)
1455
        goto done;
1448 1456

  
1449 1457
    if (alloc_cluster_link_l2(bs, acb->cluster_offset, &acb->l2meta) < 0) {
1450 1458
        free_any_clusters(bs, acb->cluster_offset, acb->l2meta.nb_clusters);
1451
        goto fail;
1459
        goto done;
1452 1460
    }
1453 1461

  
1454 1462
    acb->nb_sectors -= acb->n;
......
1457 1465

  
1458 1466
    if (acb->nb_sectors == 0) {
1459 1467
        /* request completed */
1460
        acb->common.cb(acb->common.opaque, 0);
1461
        qemu_aio_release(acb);
1462
        return;
1468
        ret = 0;
1469
        goto done;
1463 1470
    }
1464 1471

  
1465 1472
    index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
......
1473 1480
                                          n_end, &acb->n, &acb->l2meta);
1474 1481
    if (!acb->cluster_offset || (acb->cluster_offset & 511) != 0) {
1475 1482
        ret = -EIO;
1476
        goto fail;
1483
        goto done;
1477 1484
    }
1478 1485
    if (s->crypt_method) {
1479 1486
        if (!acb->cluster_data) {
......
1494 1501
                                    &acb->hd_qiov, acb->n,
1495 1502
                                    qcow_aio_write_cb, acb);
1496 1503
    if (acb->hd_aiocb == NULL)
1497
        goto fail;
1504
        goto done;
1505

  
1506
    return;
1507

  
1508
done:
1509
    if (acb->qiov->niov > 1)
1510
        qemu_vfree(acb->orig_buf);
1511
    acb->common.cb(acb->common.opaque, ret);
1512
    qemu_aio_release(acb);
1498 1513
}
1499 1514

  
1500
static BlockDriverAIOCB *qcow_aio_write(BlockDriverState *bs,
1501
        int64_t sector_num, const uint8_t *buf, int nb_sectors,
1515
static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs,
1516
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
1502 1517
        BlockDriverCompletionFunc *cb, void *opaque)
1503 1518
{
1504 1519
    BDRVQcowState *s = bs->opaque;
......
1506 1521

  
1507 1522
    s->cluster_cache_offset = -1; /* disable compressed cache */
1508 1523

  
1509
    acb = qcow_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque);
1524
    acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
1510 1525
    if (!acb)
1511 1526
        return NULL;
1512 1527

  
......
2771 2786
    .bdrv_set_key	= qcow_set_key,
2772 2787
    .bdrv_make_empty	= qcow_make_empty,
2773 2788

  
2774
    .bdrv_aio_read	= qcow_aio_read,
2775
    .bdrv_aio_write	= qcow_aio_write,
2789
    .bdrv_aio_readv	= qcow_aio_readv,
2790
    .bdrv_aio_writev	= qcow_aio_writev,
2776 2791
    .bdrv_aio_cancel	= qcow_aio_cancel,
2777 2792
    .aiocb_size		= sizeof(QCowAIOCB),
2778 2793
    .bdrv_write_compressed = qcow_write_compressed,
b/block-raw-posix.c
599 599
    return 0;
600 600
}
601 601

  
602
static RawAIOCB *raw_aio_setup(BlockDriverState *bs,
603
        int64_t sector_num, uint8_t *buf, int nb_sectors,
602
static RawAIOCB *raw_aio_setup(BlockDriverState *bs, int64_t sector_num,
603
        QEMUIOVector *qiov, int nb_sectors,
604 604
        BlockDriverCompletionFunc *cb, void *opaque)
605 605
{
606 606
    BDRVRawState *s = bs->opaque;
......
614 614
        return NULL;
615 615
    acb->aiocb.aio_fildes = s->fd;
616 616
    acb->aiocb.ev_signo = SIGUSR2;
617
    acb->aiocb.aio_buf = buf;
618
    if (nb_sectors < 0)
619
        acb->aiocb.aio_nbytes = -nb_sectors;
620
    else
621
        acb->aiocb.aio_nbytes = nb_sectors * 512;
617
    acb->aiocb.aio_iov = qiov->iov;
618
    acb->aiocb.aio_niov = qiov->niov;
619
    acb->aiocb.aio_nbytes = nb_sectors * 512;
622 620
    acb->aiocb.aio_offset = sector_num * 512;
621
    acb->aiocb.aio_flags = 0;
622

  
623
    /*
624
     * If O_DIRECT is used the buffer needs to be aligned on a sector
625
     * boundary. Tell the low level code to ensure that in case it's
626
     * not done yet.
627
     */
628
    if (s->aligned_buf)
629
        acb->aiocb.aio_flags |= QEMU_AIO_SECTOR_ALIGNED;
630

  
623 631
    acb->next = posix_aio_state->first_aio;
624 632
    posix_aio_state->first_aio = acb;
625 633
    return acb;
626 634
}
627 635

  
628
static void raw_aio_em_cb(void* opaque)
629
{
630
    RawAIOCB *acb = opaque;
631
    acb->common.cb(acb->common.opaque, acb->ret);
632
    qemu_aio_release(acb);
633
}
634

  
635 636
static void raw_aio_remove(RawAIOCB *acb)
636 637
{
637 638
    RawAIOCB **pacb;
......
651 652
    }
652 653
}
653 654

  
654
static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs,
655
        int64_t sector_num, uint8_t *buf, int nb_sectors,
655
static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
656
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
656 657
        BlockDriverCompletionFunc *cb, void *opaque)
657 658
{
658 659
    RawAIOCB *acb;
659 660

  
660
    /*
661
     * If O_DIRECT is used and the buffer is not aligned fall back
662
     * to synchronous IO.
663
     */
664
    BDRVRawState *s = bs->opaque;
665

  
666
    if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
667
        QEMUBH *bh;
668
        acb = qemu_aio_get(bs, cb, opaque);
669
        acb->ret = raw_pread(bs, 512 * sector_num, buf, 512 * nb_sectors);
670
        bh = qemu_bh_new(raw_aio_em_cb, acb);
671
        qemu_bh_schedule(bh);
672
        return &acb->common;
673
    }
674

  
675
    acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
661
    acb = raw_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque);
676 662
    if (!acb)
677 663
        return NULL;
678 664
    if (qemu_paio_read(&acb->aiocb) < 0) {
......
682 668
    return &acb->common;
683 669
}
684 670

  
685
static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs,
686
        int64_t sector_num, const uint8_t *buf, int nb_sectors,
671
static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
672
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
687 673
        BlockDriverCompletionFunc *cb, void *opaque)
688 674
{
689 675
    RawAIOCB *acb;
690 676

  
691
    /*
692
     * If O_DIRECT is used and the buffer is not aligned fall back
693
     * to synchronous IO.
694
     */
695
    BDRVRawState *s = bs->opaque;
696

  
697
    if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
698
        QEMUBH *bh;
699
        acb = qemu_aio_get(bs, cb, opaque);
700
        acb->ret = raw_pwrite(bs, 512 * sector_num, buf, 512 * nb_sectors);
701
        bh = qemu_bh_new(raw_aio_em_cb, acb);
702
        qemu_bh_schedule(bh);
703
        return &acb->common;
704
    }
705

  
706
    acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque);
677
    acb = raw_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque);
707 678
    if (!acb)
708 679
        return NULL;
709 680
    if (qemu_paio_write(&acb->aiocb) < 0) {
......
887 858
    .bdrv_flush = raw_flush,
888 859

  
889 860
#ifdef CONFIG_AIO
890
    .bdrv_aio_read = raw_aio_read,
891
    .bdrv_aio_write = raw_aio_write,
861
    .bdrv_aio_readv = raw_aio_readv,
862
    .bdrv_aio_writev = raw_aio_writev,
892 863
    .bdrv_aio_cancel = raw_aio_cancel,
893 864
    .aiocb_size = sizeof(RawAIOCB),
894 865
#endif
......
1215 1186
        unsigned long int req, void *buf,
1216 1187
        BlockDriverCompletionFunc *cb, void *opaque)
1217 1188
{
1189
    BDRVRawState *s = bs->opaque;
1218 1190
    RawAIOCB *acb;
1219 1191

  
1220
    acb = raw_aio_setup(bs, 0, buf, 0, cb, opaque);
1192
    if (fd_open(bs) < 0)
1193
        return NULL;
1194

  
1195
    acb = qemu_aio_get(bs, cb, opaque);
1221 1196
    if (!acb)
1222 1197
        return NULL;
1198
    acb->aiocb.aio_fildes = s->fd;
1199
    acb->aiocb.ev_signo = SIGUSR2;
1200
    acb->aiocb.aio_offset = 0;
1201
    acb->aiocb.aio_flags = 0;
1202

  
1203
    acb->next = posix_aio_state->first_aio;
1204
    posix_aio_state->first_aio = acb;
1223 1205

  
1206
    acb->aiocb.aio_ioctl_buf = buf;
1224 1207
    acb->aiocb.aio_ioctl_cmd = req;
1225 1208
    if (qemu_paio_ioctl(&acb->aiocb) < 0) {
1226 1209
        raw_aio_remove(acb);
......
1424 1407
    .bdrv_flush		= raw_flush,
1425 1408

  
1426 1409
#ifdef CONFIG_AIO
1427
    .bdrv_aio_read	= raw_aio_read,
1428
    .bdrv_aio_write	= raw_aio_write,
1410
    .bdrv_aio_readv	= raw_aio_readv,
1411
    .bdrv_aio_writev	= raw_aio_writev,
1429 1412
    .bdrv_aio_cancel	= raw_aio_cancel,
1430 1413
    .aiocb_size		= sizeof(RawAIOCB),
1431 1414
#endif
b/block.c
47 47
#define SECTOR_BITS 9
48 48
#define SECTOR_SIZE (1 << SECTOR_BITS)
49 49

  
50
static AIOPool vectored_aio_pool;
51

  
52 50
typedef struct BlockDriverAIOCBSync {
53 51
    BlockDriverAIOCB common;
54 52
    QEMUBH *bh;
55 53
    int ret;
54
    /* vector translation state */
55
    QEMUIOVector *qiov;
56
    uint8_t *bounce;
57
    int is_write;
56 58
} BlockDriverAIOCBSync;
57 59

  
58
static BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs,
59
        int64_t sector_num, uint8_t *buf, int nb_sectors,
60
static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
61
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
60 62
        BlockDriverCompletionFunc *cb, void *opaque);
61
static BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs,
62
        int64_t sector_num, const uint8_t *buf, int nb_sectors,
63
        BlockDriverCompletionFunc *cb, void *opaque);
64
static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs,
65
        int64_t sector_num, uint8_t *buf, int nb_sectors,
66
        BlockDriverCompletionFunc *cb, void *opaque);
67
static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs,
68
        int64_t sector_num, const uint8_t *buf, int nb_sectors,
63
static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
64
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
69 65
        BlockDriverCompletionFunc *cb, void *opaque);
70 66
static void bdrv_aio_cancel_em(BlockDriverAIOCB *acb);
71 67
static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
......
144 140

  
145 141
static void bdrv_register(BlockDriver *bdrv)
146 142
{
147
    if (!bdrv->bdrv_aio_read) {
143
    if (!bdrv->bdrv_aio_readv) {
148 144
        /* add AIO emulation layer */
149
        bdrv->bdrv_aio_read = bdrv_aio_read_em;
150
        bdrv->bdrv_aio_write = bdrv_aio_write_em;
145
        bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
146
        bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
151 147
        bdrv->bdrv_aio_cancel = bdrv_aio_cancel_em;
152 148
        bdrv->aiocb_size = sizeof(BlockDriverAIOCBSync);
153 149
    } else if (!bdrv->bdrv_read) {
......
1295 1291
/**************************************************************/
1296 1292
/* async I/Os */
1297 1293

  
1298
typedef struct VectorTranslationAIOCB {
1299
    BlockDriverAIOCB common;
1300
    QEMUIOVector *iov;
1301
    uint8_t *bounce;
1302
    int is_write;
1303
    BlockDriverAIOCB *aiocb;
1304
} VectorTranslationAIOCB;
1305

  
1306
static void bdrv_aio_cancel_vector(BlockDriverAIOCB *_acb)
1307
{
1308
    VectorTranslationAIOCB *acb
1309
        = container_of(_acb, VectorTranslationAIOCB, common);
1310

  
1311
    bdrv_aio_cancel(acb->aiocb);
1312
}
1313

  
1314
static void bdrv_aio_rw_vector_cb(void *opaque, int ret)
1315
{
1316
    VectorTranslationAIOCB *s = (VectorTranslationAIOCB *)opaque;
1317

  
1318
    if (!s->is_write) {
1319
        qemu_iovec_from_buffer(s->iov, s->bounce, s->iov->size);
1320
    }
1321
    qemu_vfree(s->bounce);
1322
    s->common.cb(s->common.opaque, ret);
1323
    qemu_aio_release(s);
1324
}
1325

  
1326
static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
1327
                                            int64_t sector_num,
1328
                                            QEMUIOVector *iov,
1329
                                            int nb_sectors,
1330
                                            BlockDriverCompletionFunc *cb,
1331
                                            void *opaque,
1332
                                            int is_write)
1333

  
1334
{
1335
    VectorTranslationAIOCB *s = qemu_aio_get_pool(&vectored_aio_pool, bs,
1336
                                                  cb, opaque);
1337

  
1338
    s->iov = iov;
1339
    s->bounce = qemu_memalign(512, nb_sectors * 512);
1340
    s->is_write = is_write;
1341
    if (is_write) {
1342
        qemu_iovec_to_buffer(s->iov, s->bounce);
1343
        s->aiocb = bdrv_aio_write(bs, sector_num, s->bounce, nb_sectors,
1344
                                  bdrv_aio_rw_vector_cb, s);
1345
    } else {
1346
        s->aiocb = bdrv_aio_read(bs, sector_num, s->bounce, nb_sectors,
1347
                                 bdrv_aio_rw_vector_cb, s);
1348
    }
1349
    if (!s->aiocb) {
1350
        qemu_vfree(s->bounce);
1351
        qemu_aio_release(s);
1352
        return NULL;
1353
    }
1354
    return &s->common;
1355
}
1356

  
1357 1294
BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
1358
                                 QEMUIOVector *iov, int nb_sectors,
1295
                                 QEMUIOVector *qiov, int nb_sectors,
1359 1296
                                 BlockDriverCompletionFunc *cb, void *opaque)
1360 1297
{
1361
    if (bdrv_check_request(bs, sector_num, nb_sectors))
1362
        return NULL;
1363

  
1364
    return bdrv_aio_rw_vector(bs, sector_num, iov, nb_sectors,
1365
                              cb, opaque, 0);
1366
}
1367

  
1368
BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
1369
                                  QEMUIOVector *iov, int nb_sectors,
1370
                                  BlockDriverCompletionFunc *cb, void *opaque)
1371
{
1372
    if (bdrv_check_request(bs, sector_num, nb_sectors))
1373
        return NULL;
1374

  
1375
    return bdrv_aio_rw_vector(bs, sector_num, iov, nb_sectors,
1376
                              cb, opaque, 1);
1377
}
1378

  
1379
static BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num,
1380
                                uint8_t *buf, int nb_sectors,
1381
                                BlockDriverCompletionFunc *cb, void *opaque)
1382
{
1383 1298
    BlockDriver *drv = bs->drv;
1384 1299
    BlockDriverAIOCB *ret;
1385 1300

  
......
1388 1303
    if (bdrv_check_request(bs, sector_num, nb_sectors))
1389 1304
        return NULL;
1390 1305

  
1391
    ret = drv->bdrv_aio_read(bs, sector_num, buf, nb_sectors, cb, opaque);
1306
    ret = drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
1307
                              cb, opaque);
1392 1308

  
1393 1309
    if (ret) {
1394 1310
	/* Update stats even though technically transfer has not happened. */
......
1399 1315
    return ret;
1400 1316
}
1401 1317

  
1402
static BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num,
1403
                                 const uint8_t *buf, int nb_sectors,
1404
                                 BlockDriverCompletionFunc *cb, void *opaque)
1318
BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
1319
                                  QEMUIOVector *qiov, int nb_sectors,
1320
                                  BlockDriverCompletionFunc *cb, void *opaque)
1405 1321
{
1406 1322
    BlockDriver *drv = bs->drv;
1407 1323
    BlockDriverAIOCB *ret;
......
1413 1329
    if (bdrv_check_request(bs, sector_num, nb_sectors))
1414 1330
        return NULL;
1415 1331

  
1416
    ret = drv->bdrv_aio_write(bs, sector_num, buf, nb_sectors, cb, opaque);
1332
    ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
1333
                               cb, opaque);
1417 1334

  
1418 1335
    if (ret) {
1419 1336
	/* Update stats even though technically transfer has not happened. */
......
1436 1353
static void bdrv_aio_bh_cb(void *opaque)
1437 1354
{
1438 1355
    BlockDriverAIOCBSync *acb = opaque;
1356

  
1357
    qemu_vfree(acb->bounce);
1358

  
1359
    if (!acb->is_write)
1360
        qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
1439 1361
    acb->common.cb(acb->common.opaque, acb->ret);
1362

  
1440 1363
    qemu_aio_release(acb);
1441 1364
}
1442 1365

  
1443
static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs,
1444
        int64_t sector_num, uint8_t *buf, int nb_sectors,
1445
        BlockDriverCompletionFunc *cb, void *opaque)
1366
static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
1367
                                            int64_t sector_num,
1368
                                            QEMUIOVector *qiov,
1369
                                            int nb_sectors,
1370
                                            BlockDriverCompletionFunc *cb,
1371
                                            void *opaque,
1372
                                            int is_write)
1373

  
1446 1374
{
1447 1375
    BlockDriverAIOCBSync *acb;
1448
    int ret;
1449 1376

  
1450 1377
    acb = qemu_aio_get(bs, cb, opaque);
1378
    acb->is_write = is_write;
1379
    acb->qiov = qiov;
1380
    acb->bounce = qemu_memalign(512, qiov->size);
1381

  
1451 1382
    if (!acb->bh)
1452 1383
        acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
1453
    ret = bdrv_read(bs, sector_num, buf, nb_sectors);
1454
    acb->ret = ret;
1384

  
1385
    if (is_write) {
1386
        qemu_iovec_to_buffer(acb->qiov, acb->bounce);
1387
        acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
1388
    } else {
1389
        acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
1390
    }
1391

  
1455 1392
    qemu_bh_schedule(acb->bh);
1393

  
1456 1394
    return &acb->common;
1457 1395
}
1458 1396

  
1459
static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs,
1460
        int64_t sector_num, const uint8_t *buf, int nb_sectors,
1397
static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
1398
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
1461 1399
        BlockDriverCompletionFunc *cb, void *opaque)
1462 1400
{
1463
    BlockDriverAIOCBSync *acb;
1464
    int ret;
1401
    return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
1402
}
1465 1403

  
1466
    acb = qemu_aio_get(bs, cb, opaque);
1467
    if (!acb->bh)
1468
        acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
1469
    ret = bdrv_write(bs, sector_num, buf, nb_sectors);
1470
    acb->ret = ret;
1471
    qemu_bh_schedule(acb->bh);
1472
    return &acb->common;
1404
static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
1405
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
1406
        BlockDriverCompletionFunc *cb, void *opaque)
1407
{
1408
    return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
1473 1409
}
1474 1410

  
1411

  
1475 1412
static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
1476 1413
{
1477 1414
    BlockDriverAIOCBSync *acb = (BlockDriverAIOCBSync *)blockacb;
......
1494 1431
{
1495 1432
    int async_ret;
1496 1433
    BlockDriverAIOCB *acb;
1434
    struct iovec iov;
1435
    QEMUIOVector qiov;
1497 1436

  
1498 1437
    async_ret = NOT_DONE;
1499
    acb = bdrv_aio_read(bs, sector_num, buf, nb_sectors,
1500
                        bdrv_rw_em_cb, &async_ret);
1438
    iov.iov_base = buf;
1439
    iov.iov_len = nb_sectors * 512;
1440
    qemu_iovec_init_external(&qiov, &iov, 1);
1441
    acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors,
1442
        bdrv_rw_em_cb, &async_ret);
1501 1443
    if (acb == NULL)
1502 1444
        return -1;
1503 1445

  
......
1513 1455
{
1514 1456
    int async_ret;
1515 1457
    BlockDriverAIOCB *acb;
1458
    struct iovec iov;
1459
    QEMUIOVector qiov;
1516 1460

  
1517 1461
    async_ret = NOT_DONE;
1518
    acb = bdrv_aio_write(bs, sector_num, buf, nb_sectors,
1519
                         bdrv_rw_em_cb, &async_ret);
1462
    iov.iov_base = (void *)buf;
1463
    iov.iov_len = nb_sectors * 512;
1464
    qemu_iovec_init_external(&qiov, &iov, 1);
1465
    acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors,
1466
        bdrv_rw_em_cb, &async_ret);
1520 1467
    if (acb == NULL)
1521 1468
        return -1;
1522 1469
    while (async_ret == NOT_DONE) {
......
1527 1474

  
1528 1475
void bdrv_init(void)
1529 1476
{
1530
    aio_pool_init(&vectored_aio_pool, sizeof(VectorTranslationAIOCB),
1531
                  bdrv_aio_cancel_vector);
1532

  
1533 1477
    bdrv_register(&bdrv_raw);
1534 1478
    bdrv_register(&bdrv_host_device);
1535 1479
#ifndef _WIN32
b/block_int.h
54 54
    int (*bdrv_set_key)(BlockDriverState *bs, const char *key);
55 55
    int (*bdrv_make_empty)(BlockDriverState *bs);
56 56
    /* aio */
57
    BlockDriverAIOCB *(*bdrv_aio_read)(BlockDriverState *bs,
58
        int64_t sector_num, uint8_t *buf, int nb_sectors,
57
    BlockDriverAIOCB *(*bdrv_aio_readv)(BlockDriverState *bs,
58
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
59 59
        BlockDriverCompletionFunc *cb, void *opaque);
60
    BlockDriverAIOCB *(*bdrv_aio_write)(BlockDriverState *bs,
61
        int64_t sector_num, const uint8_t *buf, int nb_sectors,
60
    BlockDriverAIOCB *(*bdrv_aio_writev)(BlockDriverState *bs,
61
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
62 62
        BlockDriverCompletionFunc *cb, void *opaque);
63 63
    void (*bdrv_aio_cancel)(BlockDriverAIOCB *acb);
64 64
    int aiocb_size;
b/posix-aio-compat.c
20 20
#include <stdlib.h>
21 21
#include <stdio.h>
22 22
#include "osdep.h"
23
#include "qemu-common.h"
23 24

  
24 25
#include "posix-aio-compat.h"
25 26

  
......
76 77
    if (ret) die2(ret, "pthread_create");
77 78
}
78 79

  
79
static size_t handle_aiocb_readwrite(struct qemu_paiocb *aiocb)
80
static size_t handle_aiocb_ioctl(struct qemu_paiocb *aiocb)
81
{
82
	int ret;
83

  
84
	ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_ioctl_buf);
85
	if (ret == -1)
86
		return -errno;
87
	return ret;
88
}
89

  
90
/*
91
 * Check if we need to copy the data in the aiocb into a new
92
 * properly aligned buffer.
93
 */
94
static int aiocb_needs_copy(struct qemu_paiocb *aiocb)
95
{
96
    if (aiocb->aio_flags & QEMU_AIO_SECTOR_ALIGNED) {
97
        int i;
98

  
99
        for (i = 0; i < aiocb->aio_niov; i++)
100
            if ((uintptr_t) aiocb->aio_iov[i].iov_base % 512)
101
                return 1;
102
    }
103

  
104
    return 0;
105
}
106

  
107
static size_t handle_aiocb_rw_linear(struct qemu_paiocb *aiocb, char *buf)
80 108
{
81 109
    size_t offset = 0;
82
    ssize_t len;
110
    size_t len;
83 111

  
84 112
    while (offset < aiocb->aio_nbytes) {
85
        if (aiocb->aio_type == QEMU_PAIO_WRITE)
86
            len = pwrite(aiocb->aio_fildes,
87
                         (const char *)aiocb->aio_buf + offset,
113
         if (aiocb->aio_type == QEMU_PAIO_WRITE)
114
             len = pwrite(aiocb->aio_fildes,
115
                          (const char *)buf + offset,
116
                          aiocb->aio_nbytes - offset,
117
                          aiocb->aio_offset + offset);
118
         else
119
             len = pread(aiocb->aio_fildes,
120
                         buf + offset,
88 121
                         aiocb->aio_nbytes - offset,
89 122
                         aiocb->aio_offset + offset);
90
        else
91
            len = pread(aiocb->aio_fildes,
92
                        (char *)aiocb->aio_buf + offset,
93
                        aiocb->aio_nbytes - offset,
94
                        aiocb->aio_offset + offset);
95

  
96
        if (len == -1 && errno == EINTR)
97
            continue;
98
        else if (len == -1) {
99
            offset = -errno;
100
            break;
101
        } else if (len == 0)
102
            break;
103 123

  
104
        offset += len;
124
         if (len == -1 && errno == EINTR)
125
             continue;
126
         else if (len == -1) {
127
             offset = -errno;
128
             break;
129
         } else if (len == 0)
130
             break;
131

  
132
         offset += len;
105 133
    }
106 134

  
107 135
    return offset;
108 136
}
109 137

  
110
static size_t handle_aiocb_ioctl(struct qemu_paiocb *aiocb)
138
static size_t handle_aiocb_rw(struct qemu_paiocb *aiocb)
111 139
{
112
	int ret;
140
    size_t nbytes;
141
    char *buf;
142

  
143
    if (!aiocb_needs_copy(aiocb) && aiocb->aio_niov == 1) {
144
        /*
145
         * If there is just a single buffer, and it is properly aligned
146
         * we can just use plain pread/pwrite without any problems.
147
         */
148
        return handle_aiocb_rw_linear(aiocb, aiocb->aio_iov->iov_base);
149
    }
113 150

  
114
	ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_buf);
115
	if (ret == -1)
116
		return -errno;
117
	return ret;
151
    /*
152
     * Ok, we have to do it the hard way, copy all segments into
153
     * a single aligned buffer.
154
     */
155
    buf = qemu_memalign(512, aiocb->aio_nbytes);
156
    if (aiocb->aio_type == QEMU_PAIO_WRITE) {
157
        char *p = buf;
158
        int i;
159

  
160
        for (i = 0; i < aiocb->aio_niov; ++i) {
161
            memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len);
162
            p += aiocb->aio_iov[i].iov_len;
163
        }
164
    }
165

  
166
    nbytes = handle_aiocb_rw_linear(aiocb, buf);
167
    if (aiocb->aio_type != QEMU_PAIO_WRITE) {
168
        char *p = buf;
169
        size_t count = aiocb->aio_nbytes, copy;
170
        int i;
171

  
172
        for (i = 0; i < aiocb->aio_niov && count; ++i) {
173
            copy = count;
174
            if (copy > aiocb->aio_iov[i].iov_len)
175
                copy = aiocb->aio_iov[i].iov_len;
176
            memcpy(aiocb->aio_iov[i].iov_base, p, copy);
177
            p     += copy;
178
            count -= copy;
179
        }
180
    }
181
    qemu_vfree(buf);
182

  
183
    return nbytes;
118 184
}
119 185

  
120 186
static void *aio_thread(void *unused)
......
157 223
        switch (aiocb->aio_type) {
158 224
        case QEMU_PAIO_READ:
159 225
        case QEMU_PAIO_WRITE:
160
		ret = handle_aiocb_readwrite(aiocb);
226
		ret = handle_aiocb_rw(aiocb);
161 227
		break;
162 228
        case QEMU_PAIO_IOCTL:
163 229
		ret = handle_aiocb_ioctl(aiocb);
b/posix-aio-compat.h
27 27
struct qemu_paiocb
28 28
{
29 29
    int aio_fildes;
30
    void *aio_buf;
30
    union {
31
        struct iovec *aio_iov;
32
	void *aio_ioctl_buf;
33
    };
34
    int aio_niov;
31 35
    size_t aio_nbytes;
32 36
#define aio_ioctl_cmd   aio_nbytes /* for QEMU_PAIO_IOCTL */
33 37
    int ev_signo;
34 38
    off_t aio_offset;
39
    unsigned aio_flags;
40
/* 512 byte alignment required for buffer, offset and length */
41
#define QEMU_AIO_SECTOR_ALIGNED	0x01
35 42

  
36 43
    /* private */
37 44
    TAILQ_ENTRY(qemu_paiocb) node;

Also available in: Unified diff