Revision eabba580 block/qed.c
b/block/qed.c | ||
---|---|---|
12 | 12 |
* |
13 | 13 |
*/ |
14 | 14 |
|
15 |
#include "trace.h" |
|
15 | 16 |
#include "qed.h" |
16 | 17 |
|
18 |
static void qed_aio_cancel(BlockDriverAIOCB *blockacb) |
|
19 |
{ |
|
20 |
QEDAIOCB *acb = (QEDAIOCB *)blockacb; |
|
21 |
bool finished = false; |
|
22 |
|
|
23 |
/* Wait for the request to finish */ |
|
24 |
acb->finished = &finished; |
|
25 |
while (!finished) { |
|
26 |
qemu_aio_wait(); |
|
27 |
} |
|
28 |
} |
|
29 |
|
|
30 |
static AIOPool qed_aio_pool = { |
|
31 |
.aiocb_size = sizeof(QEDAIOCB), |
|
32 |
.cancel = qed_aio_cancel, |
|
33 |
}; |
|
34 |
|
|
17 | 35 |
static int bdrv_qed_probe(const uint8_t *buf, int buf_size, |
18 | 36 |
const char *filename) |
19 | 37 |
{ |
... | ... | |
155 | 173 |
return 0; |
156 | 174 |
} |
157 | 175 |
|
176 |
/** |
|
177 |
* Allocate new clusters |
|
178 |
* |
|
179 |
* @s: QED state |
|
180 |
* @n: Number of contiguous clusters to allocate |
|
181 |
* @ret: Offset of first allocated cluster |
|
182 |
* |
|
183 |
* This function only produces the offset where the new clusters should be |
|
184 |
* written. It updates BDRVQEDState but does not make any changes to the image |
|
185 |
* file. |
|
186 |
*/ |
|
187 |
static uint64_t qed_alloc_clusters(BDRVQEDState *s, unsigned int n) |
|
188 |
{ |
|
189 |
uint64_t offset = s->file_size; |
|
190 |
s->file_size += n * s->header.cluster_size; |
|
191 |
return offset; |
|
192 |
} |
|
193 |
|
|
158 | 194 |
QEDTable *qed_alloc_table(BDRVQEDState *s) |
159 | 195 |
{ |
160 | 196 |
/* Honor O_DIRECT memory alignment requirements */ |
... | ... | |
162 | 198 |
s->header.cluster_size * s->header.table_size); |
163 | 199 |
} |
164 | 200 |
|
201 |
/** |
|
202 |
* Allocate a new zeroed L2 table |
|
203 |
*/ |
|
204 |
static CachedL2Table *qed_new_l2_table(BDRVQEDState *s) |
|
205 |
{ |
|
206 |
CachedL2Table *l2_table = qed_alloc_l2_cache_entry(&s->l2_cache); |
|
207 |
|
|
208 |
l2_table->table = qed_alloc_table(s); |
|
209 |
l2_table->offset = qed_alloc_clusters(s, s->header.table_size); |
|
210 |
|
|
211 |
memset(l2_table->table->offsets, 0, |
|
212 |
s->header.cluster_size * s->header.table_size); |
|
213 |
return l2_table; |
|
214 |
} |
|
215 |
|
|
216 |
static void qed_aio_next_io(void *opaque, int ret); |
|
217 |
|
|
165 | 218 |
static int bdrv_qed_open(BlockDriverState *bs, int flags) |
166 | 219 |
{ |
167 | 220 |
BDRVQEDState *s = bs->opaque; |
... | ... | |
170 | 223 |
int ret; |
171 | 224 |
|
172 | 225 |
s->bs = bs; |
226 |
QSIMPLEQ_INIT(&s->allocating_write_reqs); |
|
173 | 227 |
|
174 | 228 |
ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header)); |
175 | 229 |
if (ret < 0) { |
... | ... | |
431 | 485 |
return -ENOTSUP; |
432 | 486 |
} |
433 | 487 |
|
488 |
static BDRVQEDState *acb_to_s(QEDAIOCB *acb) |
|
489 |
{ |
|
490 |
return acb->common.bs->opaque; |
|
491 |
} |
|
492 |
|
|
493 |
/** |
|
494 |
* Read from the backing file or zero-fill if no backing file |
|
495 |
* |
|
496 |
* @s: QED state |
|
497 |
* @pos: Byte position in device |
|
498 |
* @qiov: Destination I/O vector |
|
499 |
* @cb: Completion function |
|
500 |
* @opaque: User data for completion function |
|
501 |
* |
|
502 |
* This function reads qiov->size bytes starting at pos from the backing file. |
|
503 |
* If there is no backing file then zeroes are read. |
|
504 |
*/ |
|
505 |
static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos, |
|
506 |
QEMUIOVector *qiov, |
|
507 |
BlockDriverCompletionFunc *cb, void *opaque) |
|
508 |
{ |
|
509 |
BlockDriverAIOCB *aiocb; |
|
510 |
uint64_t backing_length = 0; |
|
511 |
size_t size; |
|
512 |
|
|
513 |
/* If there is a backing file, get its length. Treat the absence of a |
|
514 |
* backing file like a zero length backing file. |
|
515 |
*/ |
|
516 |
if (s->bs->backing_hd) { |
|
517 |
int64_t l = bdrv_getlength(s->bs->backing_hd); |
|
518 |
if (l < 0) { |
|
519 |
cb(opaque, l); |
|
520 |
return; |
|
521 |
} |
|
522 |
backing_length = l; |
|
523 |
} |
|
524 |
|
|
525 |
/* Zero all sectors if reading beyond the end of the backing file */ |
|
526 |
if (pos >= backing_length || |
|
527 |
pos + qiov->size > backing_length) { |
|
528 |
qemu_iovec_memset(qiov, 0, qiov->size); |
|
529 |
} |
|
530 |
|
|
531 |
/* Complete now if there are no backing file sectors to read */ |
|
532 |
if (pos >= backing_length) { |
|
533 |
cb(opaque, 0); |
|
534 |
return; |
|
535 |
} |
|
536 |
|
|
537 |
/* If the read straddles the end of the backing file, shorten it */ |
|
538 |
size = MIN((uint64_t)backing_length - pos, qiov->size); |
|
539 |
|
|
540 |
BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING); |
|
541 |
aiocb = bdrv_aio_readv(s->bs->backing_hd, pos / BDRV_SECTOR_SIZE, |
|
542 |
qiov, size / BDRV_SECTOR_SIZE, cb, opaque); |
|
543 |
if (!aiocb) { |
|
544 |
cb(opaque, -EIO); |
|
545 |
} |
|
546 |
} |
|
547 |
|
|
548 |
typedef struct { |
|
549 |
GenericCB gencb; |
|
550 |
BDRVQEDState *s; |
|
551 |
QEMUIOVector qiov; |
|
552 |
struct iovec iov; |
|
553 |
uint64_t offset; |
|
554 |
} CopyFromBackingFileCB; |
|
555 |
|
|
556 |
static void qed_copy_from_backing_file_cb(void *opaque, int ret) |
|
557 |
{ |
|
558 |
CopyFromBackingFileCB *copy_cb = opaque; |
|
559 |
qemu_vfree(copy_cb->iov.iov_base); |
|
560 |
gencb_complete(©_cb->gencb, ret); |
|
561 |
} |
|
562 |
|
|
563 |
static void qed_copy_from_backing_file_write(void *opaque, int ret) |
|
564 |
{ |
|
565 |
CopyFromBackingFileCB *copy_cb = opaque; |
|
566 |
BDRVQEDState *s = copy_cb->s; |
|
567 |
BlockDriverAIOCB *aiocb; |
|
568 |
|
|
569 |
if (ret) { |
|
570 |
qed_copy_from_backing_file_cb(copy_cb, ret); |
|
571 |
return; |
|
572 |
} |
|
573 |
|
|
574 |
BLKDBG_EVENT(s->bs->file, BLKDBG_COW_WRITE); |
|
575 |
aiocb = bdrv_aio_writev(s->bs->file, copy_cb->offset / BDRV_SECTOR_SIZE, |
|
576 |
©_cb->qiov, |
|
577 |
copy_cb->qiov.size / BDRV_SECTOR_SIZE, |
|
578 |
qed_copy_from_backing_file_cb, copy_cb); |
|
579 |
if (!aiocb) { |
|
580 |
qed_copy_from_backing_file_cb(copy_cb, -EIO); |
|
581 |
} |
|
582 |
} |
|
583 |
|
|
584 |
/** |
|
585 |
* Copy data from backing file into the image |
|
586 |
* |
|
587 |
* @s: QED state |
|
588 |
* @pos: Byte position in device |
|
589 |
* @len: Number of bytes |
|
590 |
* @offset: Byte offset in image file |
|
591 |
* @cb: Completion function |
|
592 |
* @opaque: User data for completion function |
|
593 |
*/ |
|
594 |
static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos, |
|
595 |
uint64_t len, uint64_t offset, |
|
596 |
BlockDriverCompletionFunc *cb, |
|
597 |
void *opaque) |
|
598 |
{ |
|
599 |
CopyFromBackingFileCB *copy_cb; |
|
600 |
|
|
601 |
/* Skip copy entirely if there is no work to do */ |
|
602 |
if (len == 0) { |
|
603 |
cb(opaque, 0); |
|
604 |
return; |
|
605 |
} |
|
606 |
|
|
607 |
copy_cb = gencb_alloc(sizeof(*copy_cb), cb, opaque); |
|
608 |
copy_cb->s = s; |
|
609 |
copy_cb->offset = offset; |
|
610 |
copy_cb->iov.iov_base = qemu_blockalign(s->bs, len); |
|
611 |
copy_cb->iov.iov_len = len; |
|
612 |
qemu_iovec_init_external(©_cb->qiov, ©_cb->iov, 1); |
|
613 |
|
|
614 |
qed_read_backing_file(s, pos, ©_cb->qiov, |
|
615 |
qed_copy_from_backing_file_write, copy_cb); |
|
616 |
} |
|
617 |
|
|
618 |
/** |
|
619 |
* Link one or more contiguous clusters into a table |
|
620 |
* |
|
621 |
* @s: QED state |
|
622 |
* @table: L2 table |
|
623 |
* @index: First cluster index |
|
624 |
* @n: Number of contiguous clusters |
|
625 |
* @cluster: First cluster byte offset in image file |
|
626 |
*/ |
|
627 |
static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index, |
|
628 |
unsigned int n, uint64_t cluster) |
|
629 |
{ |
|
630 |
int i; |
|
631 |
for (i = index; i < index + n; i++) { |
|
632 |
table->offsets[i] = cluster; |
|
633 |
cluster += s->header.cluster_size; |
|
634 |
} |
|
635 |
} |
|
636 |
|
|
637 |
static void qed_aio_complete_bh(void *opaque) |
|
638 |
{ |
|
639 |
QEDAIOCB *acb = opaque; |
|
640 |
BlockDriverCompletionFunc *cb = acb->common.cb; |
|
641 |
void *user_opaque = acb->common.opaque; |
|
642 |
int ret = acb->bh_ret; |
|
643 |
bool *finished = acb->finished; |
|
644 |
|
|
645 |
qemu_bh_delete(acb->bh); |
|
646 |
qemu_aio_release(acb); |
|
647 |
|
|
648 |
/* Invoke callback */ |
|
649 |
cb(user_opaque, ret); |
|
650 |
|
|
651 |
/* Signal cancel completion */ |
|
652 |
if (finished) { |
|
653 |
*finished = true; |
|
654 |
} |
|
655 |
} |
|
656 |
|
|
657 |
static void qed_aio_complete(QEDAIOCB *acb, int ret) |
|
658 |
{ |
|
659 |
BDRVQEDState *s = acb_to_s(acb); |
|
660 |
|
|
661 |
trace_qed_aio_complete(s, acb, ret); |
|
662 |
|
|
663 |
/* Free resources */ |
|
664 |
qemu_iovec_destroy(&acb->cur_qiov); |
|
665 |
qed_unref_l2_cache_entry(acb->request.l2_table); |
|
666 |
|
|
667 |
/* Arrange for a bh to invoke the completion function */ |
|
668 |
acb->bh_ret = ret; |
|
669 |
acb->bh = qemu_bh_new(qed_aio_complete_bh, acb); |
|
670 |
qemu_bh_schedule(acb->bh); |
|
671 |
|
|
672 |
/* Start next allocating write request waiting behind this one. Note that |
|
673 |
* requests enqueue themselves when they first hit an unallocated cluster |
|
674 |
* but they wait until the entire request is finished before waking up the |
|
675 |
* next request in the queue. This ensures that we don't cycle through |
|
676 |
* requests multiple times but rather finish one at a time completely. |
|
677 |
*/ |
|
678 |
if (acb == QSIMPLEQ_FIRST(&s->allocating_write_reqs)) { |
|
679 |
QSIMPLEQ_REMOVE_HEAD(&s->allocating_write_reqs, next); |
|
680 |
acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs); |
|
681 |
if (acb) { |
|
682 |
qed_aio_next_io(acb, 0); |
|
683 |
} |
|
684 |
} |
|
685 |
} |
|
686 |
|
|
687 |
/** |
|
688 |
* Commit the current L2 table to the cache |
|
689 |
*/ |
|
690 |
static void qed_commit_l2_update(void *opaque, int ret) |
|
691 |
{ |
|
692 |
QEDAIOCB *acb = opaque; |
|
693 |
BDRVQEDState *s = acb_to_s(acb); |
|
694 |
CachedL2Table *l2_table = acb->request.l2_table; |
|
695 |
|
|
696 |
qed_commit_l2_cache_entry(&s->l2_cache, l2_table); |
|
697 |
|
|
698 |
/* This is guaranteed to succeed because we just committed the entry to the |
|
699 |
* cache. |
|
700 |
*/ |
|
701 |
acb->request.l2_table = qed_find_l2_cache_entry(&s->l2_cache, |
|
702 |
l2_table->offset); |
|
703 |
assert(acb->request.l2_table != NULL); |
|
704 |
|
|
705 |
qed_aio_next_io(opaque, ret); |
|
706 |
} |
|
707 |
|
|
708 |
/** |
|
709 |
* Update L1 table with new L2 table offset and write it out |
|
710 |
*/ |
|
711 |
static void qed_aio_write_l1_update(void *opaque, int ret) |
|
712 |
{ |
|
713 |
QEDAIOCB *acb = opaque; |
|
714 |
BDRVQEDState *s = acb_to_s(acb); |
|
715 |
int index; |
|
716 |
|
|
717 |
if (ret) { |
|
718 |
qed_aio_complete(acb, ret); |
|
719 |
return; |
|
720 |
} |
|
721 |
|
|
722 |
index = qed_l1_index(s, acb->cur_pos); |
|
723 |
s->l1_table->offsets[index] = acb->request.l2_table->offset; |
|
724 |
|
|
725 |
qed_write_l1_table(s, index, 1, qed_commit_l2_update, acb); |
|
726 |
} |
|
727 |
|
|
728 |
/** |
|
729 |
* Update L2 table with new cluster offsets and write them out |
|
730 |
*/ |
|
731 |
static void qed_aio_write_l2_update(void *opaque, int ret) |
|
732 |
{ |
|
733 |
QEDAIOCB *acb = opaque; |
|
734 |
BDRVQEDState *s = acb_to_s(acb); |
|
735 |
bool need_alloc = acb->find_cluster_ret == QED_CLUSTER_L1; |
|
736 |
int index; |
|
737 |
|
|
738 |
if (ret) { |
|
739 |
goto err; |
|
740 |
} |
|
741 |
|
|
742 |
if (need_alloc) { |
|
743 |
qed_unref_l2_cache_entry(acb->request.l2_table); |
|
744 |
acb->request.l2_table = qed_new_l2_table(s); |
|
745 |
} |
|
746 |
|
|
747 |
index = qed_l2_index(s, acb->cur_pos); |
|
748 |
qed_update_l2_table(s, acb->request.l2_table->table, index, acb->cur_nclusters, |
|
749 |
acb->cur_cluster); |
|
750 |
|
|
751 |
if (need_alloc) { |
|
752 |
/* Write out the whole new L2 table */ |
|
753 |
qed_write_l2_table(s, &acb->request, 0, s->table_nelems, true, |
|
754 |
qed_aio_write_l1_update, acb); |
|
755 |
} else { |
|
756 |
/* Write out only the updated part of the L2 table */ |
|
757 |
qed_write_l2_table(s, &acb->request, index, acb->cur_nclusters, false, |
|
758 |
qed_aio_next_io, acb); |
|
759 |
} |
|
760 |
return; |
|
761 |
|
|
762 |
err: |
|
763 |
qed_aio_complete(acb, ret); |
|
764 |
} |
|
765 |
|
|
766 |
/** |
|
767 |
* Flush new data clusters before updating the L2 table |
|
768 |
* |
|
769 |
* This flush is necessary when a backing file is in use. A crash during an |
|
770 |
* allocating write could result in empty clusters in the image. If the write |
|
771 |
* only touched a subregion of the cluster, then backing image sectors have |
|
772 |
* been lost in the untouched region. The solution is to flush after writing a |
|
773 |
* new data cluster and before updating the L2 table. |
|
774 |
*/ |
|
775 |
static void qed_aio_write_flush_before_l2_update(void *opaque, int ret) |
|
776 |
{ |
|
777 |
QEDAIOCB *acb = opaque; |
|
778 |
BDRVQEDState *s = acb_to_s(acb); |
|
779 |
|
|
780 |
if (!bdrv_aio_flush(s->bs->file, qed_aio_write_l2_update, opaque)) { |
|
781 |
qed_aio_complete(acb, -EIO); |
|
782 |
} |
|
783 |
} |
|
784 |
|
|
785 |
/** |
|
786 |
* Write data to the image file |
|
787 |
*/ |
|
788 |
static void qed_aio_write_main(void *opaque, int ret) |
|
789 |
{ |
|
790 |
QEDAIOCB *acb = opaque; |
|
791 |
BDRVQEDState *s = acb_to_s(acb); |
|
792 |
uint64_t offset = acb->cur_cluster + |
|
793 |
qed_offset_into_cluster(s, acb->cur_pos); |
|
794 |
BlockDriverCompletionFunc *next_fn; |
|
795 |
BlockDriverAIOCB *file_acb; |
|
796 |
|
|
797 |
trace_qed_aio_write_main(s, acb, ret, offset, acb->cur_qiov.size); |
|
798 |
|
|
799 |
if (ret) { |
|
800 |
qed_aio_complete(acb, ret); |
|
801 |
return; |
|
802 |
} |
|
803 |
|
|
804 |
if (acb->find_cluster_ret == QED_CLUSTER_FOUND) { |
|
805 |
next_fn = qed_aio_next_io; |
|
806 |
} else { |
|
807 |
if (s->bs->backing_hd) { |
|
808 |
next_fn = qed_aio_write_flush_before_l2_update; |
|
809 |
} else { |
|
810 |
next_fn = qed_aio_write_l2_update; |
|
811 |
} |
|
812 |
} |
|
813 |
|
|
814 |
BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO); |
|
815 |
file_acb = bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE, |
|
816 |
&acb->cur_qiov, |
|
817 |
acb->cur_qiov.size / BDRV_SECTOR_SIZE, |
|
818 |
next_fn, acb); |
|
819 |
if (!file_acb) { |
|
820 |
qed_aio_complete(acb, -EIO); |
|
821 |
} |
|
822 |
} |
|
823 |
|
|
824 |
/** |
|
825 |
* Populate back untouched region of new data cluster |
|
826 |
*/ |
|
827 |
static void qed_aio_write_postfill(void *opaque, int ret) |
|
828 |
{ |
|
829 |
QEDAIOCB *acb = opaque; |
|
830 |
BDRVQEDState *s = acb_to_s(acb); |
|
831 |
uint64_t start = acb->cur_pos + acb->cur_qiov.size; |
|
832 |
uint64_t len = |
|
833 |
qed_start_of_cluster(s, start + s->header.cluster_size - 1) - start; |
|
834 |
uint64_t offset = acb->cur_cluster + |
|
835 |
qed_offset_into_cluster(s, acb->cur_pos) + |
|
836 |
acb->cur_qiov.size; |
|
837 |
|
|
838 |
if (ret) { |
|
839 |
qed_aio_complete(acb, ret); |
|
840 |
return; |
|
841 |
} |
|
842 |
|
|
843 |
trace_qed_aio_write_postfill(s, acb, start, len, offset); |
|
844 |
qed_copy_from_backing_file(s, start, len, offset, |
|
845 |
qed_aio_write_main, acb); |
|
846 |
} |
|
847 |
|
|
848 |
/** |
|
849 |
* Populate front untouched region of new data cluster |
|
850 |
*/ |
|
851 |
static void qed_aio_write_prefill(void *opaque, int ret) |
|
852 |
{ |
|
853 |
QEDAIOCB *acb = opaque; |
|
854 |
BDRVQEDState *s = acb_to_s(acb); |
|
855 |
uint64_t start = qed_start_of_cluster(s, acb->cur_pos); |
|
856 |
uint64_t len = qed_offset_into_cluster(s, acb->cur_pos); |
|
857 |
|
|
858 |
trace_qed_aio_write_prefill(s, acb, start, len, acb->cur_cluster); |
|
859 |
qed_copy_from_backing_file(s, start, len, acb->cur_cluster, |
|
860 |
qed_aio_write_postfill, acb); |
|
861 |
} |
|
862 |
|
|
863 |
/** |
|
864 |
* Write new data cluster |
|
865 |
* |
|
866 |
* @acb: Write request |
|
867 |
* @len: Length in bytes |
|
868 |
* |
|
869 |
* This path is taken when writing to previously unallocated clusters. |
|
870 |
*/ |
|
871 |
static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len) |
|
872 |
{ |
|
873 |
BDRVQEDState *s = acb_to_s(acb); |
|
874 |
|
|
875 |
/* Freeze this request if another allocating write is in progress */ |
|
876 |
if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs)) { |
|
877 |
QSIMPLEQ_INSERT_TAIL(&s->allocating_write_reqs, acb, next); |
|
878 |
} |
|
879 |
if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs)) { |
|
880 |
return; /* wait for existing request to finish */ |
|
881 |
} |
|
882 |
|
|
883 |
acb->cur_nclusters = qed_bytes_to_clusters(s, |
|
884 |
qed_offset_into_cluster(s, acb->cur_pos) + len); |
|
885 |
acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters); |
|
886 |
qemu_iovec_copy(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); |
|
887 |
|
|
888 |
/* Write new cluster */ |
|
889 |
qed_aio_write_prefill(acb, 0); |
|
890 |
} |
|
891 |
|
|
892 |
/** |
|
893 |
* Write data cluster in place |
|
894 |
* |
|
895 |
* @acb: Write request |
|
896 |
* @offset: Cluster offset in bytes |
|
897 |
* @len: Length in bytes |
|
898 |
* |
|
899 |
* This path is taken when writing to already allocated clusters. |
|
900 |
*/ |
|
901 |
static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len) |
|
902 |
{ |
|
903 |
/* Calculate the I/O vector */ |
|
904 |
acb->cur_cluster = offset; |
|
905 |
qemu_iovec_copy(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); |
|
906 |
|
|
907 |
/* Do the actual write */ |
|
908 |
qed_aio_write_main(acb, 0); |
|
909 |
} |
|
910 |
|
|
911 |
/** |
|
912 |
* Write data cluster |
|
913 |
* |
|
914 |
* @opaque: Write request |
|
915 |
* @ret: QED_CLUSTER_FOUND, QED_CLUSTER_L2, QED_CLUSTER_L1, |
|
916 |
* or -errno |
|
917 |
* @offset: Cluster offset in bytes |
|
918 |
* @len: Length in bytes |
|
919 |
* |
|
920 |
* Callback from qed_find_cluster(). |
|
921 |
*/ |
|
922 |
static void qed_aio_write_data(void *opaque, int ret, |
|
923 |
uint64_t offset, size_t len) |
|
924 |
{ |
|
925 |
QEDAIOCB *acb = opaque; |
|
926 |
|
|
927 |
trace_qed_aio_write_data(acb_to_s(acb), acb, ret, offset, len); |
|
928 |
|
|
929 |
acb->find_cluster_ret = ret; |
|
930 |
|
|
931 |
switch (ret) { |
|
932 |
case QED_CLUSTER_FOUND: |
|
933 |
qed_aio_write_inplace(acb, offset, len); |
|
934 |
break; |
|
935 |
|
|
936 |
case QED_CLUSTER_L2: |
|
937 |
case QED_CLUSTER_L1: |
|
938 |
qed_aio_write_alloc(acb, len); |
|
939 |
break; |
|
940 |
|
|
941 |
default: |
|
942 |
qed_aio_complete(acb, ret); |
|
943 |
break; |
|
944 |
} |
|
945 |
} |
|
946 |
|
|
947 |
/** |
|
948 |
* Read data cluster |
|
949 |
* |
|
950 |
* @opaque: Read request |
|
951 |
* @ret: QED_CLUSTER_FOUND, QED_CLUSTER_L2, QED_CLUSTER_L1, |
|
952 |
* or -errno |
|
953 |
* @offset: Cluster offset in bytes |
|
954 |
* @len: Length in bytes |
|
955 |
* |
|
956 |
* Callback from qed_find_cluster(). |
|
957 |
*/ |
|
958 |
static void qed_aio_read_data(void *opaque, int ret, |
|
959 |
uint64_t offset, size_t len) |
|
960 |
{ |
|
961 |
QEDAIOCB *acb = opaque; |
|
962 |
BDRVQEDState *s = acb_to_s(acb); |
|
963 |
BlockDriverState *bs = acb->common.bs; |
|
964 |
BlockDriverAIOCB *file_acb; |
|
965 |
|
|
966 |
/* Adjust offset into cluster */ |
|
967 |
offset += qed_offset_into_cluster(s, acb->cur_pos); |
|
968 |
|
|
969 |
trace_qed_aio_read_data(s, acb, ret, offset, len); |
|
970 |
|
|
971 |
if (ret < 0) { |
|
972 |
goto err; |
|
973 |
} |
|
974 |
|
|
975 |
qemu_iovec_copy(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); |
|
976 |
|
|
977 |
/* Handle backing file and unallocated sparse hole reads */ |
|
978 |
if (ret != QED_CLUSTER_FOUND) { |
|
979 |
qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov, |
|
980 |
qed_aio_next_io, acb); |
|
981 |
return; |
|
982 |
} |
|
983 |
|
|
984 |
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); |
|
985 |
file_acb = bdrv_aio_readv(bs->file, offset / BDRV_SECTOR_SIZE, |
|
986 |
&acb->cur_qiov, |
|
987 |
acb->cur_qiov.size / BDRV_SECTOR_SIZE, |
|
988 |
qed_aio_next_io, acb); |
|
989 |
if (!file_acb) { |
|
990 |
ret = -EIO; |
|
991 |
goto err; |
|
992 |
} |
|
993 |
return; |
|
994 |
|
|
995 |
err: |
|
996 |
qed_aio_complete(acb, ret); |
|
997 |
} |
|
998 |
|
|
999 |
/** |
|
1000 |
* Begin next I/O or complete the request |
|
1001 |
*/ |
|
1002 |
static void qed_aio_next_io(void *opaque, int ret) |
|
1003 |
{ |
|
1004 |
QEDAIOCB *acb = opaque; |
|
1005 |
BDRVQEDState *s = acb_to_s(acb); |
|
1006 |
QEDFindClusterFunc *io_fn = |
|
1007 |
acb->is_write ? qed_aio_write_data : qed_aio_read_data; |
|
1008 |
|
|
1009 |
trace_qed_aio_next_io(s, acb, ret, acb->cur_pos + acb->cur_qiov.size); |
|
1010 |
|
|
1011 |
/* Handle I/O error */ |
|
1012 |
if (ret) { |
|
1013 |
qed_aio_complete(acb, ret); |
|
1014 |
return; |
|
1015 |
} |
|
1016 |
|
|
1017 |
acb->qiov_offset += acb->cur_qiov.size; |
|
1018 |
acb->cur_pos += acb->cur_qiov.size; |
|
1019 |
qemu_iovec_reset(&acb->cur_qiov); |
|
1020 |
|
|
1021 |
/* Complete request */ |
|
1022 |
if (acb->cur_pos >= acb->end_pos) { |
|
1023 |
qed_aio_complete(acb, 0); |
|
1024 |
return; |
|
1025 |
} |
|
1026 |
|
|
1027 |
/* Find next cluster and start I/O */ |
|
1028 |
qed_find_cluster(s, &acb->request, |
|
1029 |
acb->cur_pos, acb->end_pos - acb->cur_pos, |
|
1030 |
io_fn, acb); |
|
1031 |
} |
|
1032 |
|
|
1033 |
static BlockDriverAIOCB *qed_aio_setup(BlockDriverState *bs, |
|
1034 |
int64_t sector_num, |
|
1035 |
QEMUIOVector *qiov, int nb_sectors, |
|
1036 |
BlockDriverCompletionFunc *cb, |
|
1037 |
void *opaque, bool is_write) |
|
1038 |
{ |
|
1039 |
QEDAIOCB *acb = qemu_aio_get(&qed_aio_pool, bs, cb, opaque); |
|
1040 |
|
|
1041 |
trace_qed_aio_setup(bs->opaque, acb, sector_num, nb_sectors, |
|
1042 |
opaque, is_write); |
|
1043 |
|
|
1044 |
acb->is_write = is_write; |
|
1045 |
acb->finished = NULL; |
|
1046 |
acb->qiov = qiov; |
|
1047 |
acb->qiov_offset = 0; |
|
1048 |
acb->cur_pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE; |
|
1049 |
acb->end_pos = acb->cur_pos + nb_sectors * BDRV_SECTOR_SIZE; |
|
1050 |
acb->request.l2_table = NULL; |
|
1051 |
qemu_iovec_init(&acb->cur_qiov, qiov->niov); |
|
1052 |
|
|
1053 |
/* Start request */ |
|
1054 |
qed_aio_next_io(acb, 0); |
|
1055 |
return &acb->common; |
|
1056 |
} |
|
1057 |
|
|
434 | 1058 |
static BlockDriverAIOCB *bdrv_qed_aio_readv(BlockDriverState *bs, |
435 | 1059 |
int64_t sector_num, |
436 | 1060 |
QEMUIOVector *qiov, int nb_sectors, |
437 | 1061 |
BlockDriverCompletionFunc *cb, |
438 | 1062 |
void *opaque) |
439 | 1063 |
{ |
440 |
return NULL;
|
|
1064 |
return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, false);
|
|
441 | 1065 |
} |
442 | 1066 |
|
443 | 1067 |
static BlockDriverAIOCB *bdrv_qed_aio_writev(BlockDriverState *bs, |
... | ... | |
446 | 1070 |
BlockDriverCompletionFunc *cb, |
447 | 1071 |
void *opaque) |
448 | 1072 |
{ |
449 |
return NULL;
|
|
1073 |
return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, true);
|
|
450 | 1074 |
} |
451 | 1075 |
|
452 | 1076 |
static BlockDriverAIOCB *bdrv_qed_aio_flush(BlockDriverState *bs, |
Also available in: Unified diff