root / hw / virtio-blk.c @ 99b3718e
History | View | Annotate | Download (8.7 kB)
1 | 6e02c38d | aliguori | /*
|
---|---|---|---|
2 | 6e02c38d | aliguori | * Virtio Block Device
|
3 | 6e02c38d | aliguori | *
|
4 | 6e02c38d | aliguori | * Copyright IBM, Corp. 2007
|
5 | 6e02c38d | aliguori | *
|
6 | 6e02c38d | aliguori | * Authors:
|
7 | 6e02c38d | aliguori | * Anthony Liguori <aliguori@us.ibm.com>
|
8 | 6e02c38d | aliguori | *
|
9 | 6e02c38d | aliguori | * This work is licensed under the terms of the GNU GPL, version 2. See
|
10 | 6e02c38d | aliguori | * the COPYING file in the top-level directory.
|
11 | 6e02c38d | aliguori | *
|
12 | 6e02c38d | aliguori | */
|
13 | 6e02c38d | aliguori | |
14 | 869a5c6d | aliguori | #include <qemu-common.h> |
15 | 869a5c6d | aliguori | #include <sysemu.h> |
16 | 6e02c38d | aliguori | #include "virtio-blk.h" |
17 | 6e02c38d | aliguori | #include "block_int.h" |
18 | 6e02c38d | aliguori | |
19 | 6e02c38d | aliguori | typedef struct VirtIOBlock |
20 | 6e02c38d | aliguori | { |
21 | 6e02c38d | aliguori | VirtIODevice vdev; |
22 | 6e02c38d | aliguori | BlockDriverState *bs; |
23 | 6e02c38d | aliguori | VirtQueue *vq; |
24 | 869a5c6d | aliguori | void *rq;
|
25 | 6e02c38d | aliguori | } VirtIOBlock; |
26 | 6e02c38d | aliguori | |
27 | 6e02c38d | aliguori | static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev)
|
28 | 6e02c38d | aliguori | { |
29 | 6e02c38d | aliguori | return (VirtIOBlock *)vdev;
|
30 | 6e02c38d | aliguori | } |
31 | 6e02c38d | aliguori | |
32 | 6e02c38d | aliguori | typedef struct VirtIOBlockReq |
33 | 6e02c38d | aliguori | { |
34 | 6e02c38d | aliguori | VirtIOBlock *dev; |
35 | 6e02c38d | aliguori | VirtQueueElement elem; |
36 | 6e02c38d | aliguori | struct virtio_blk_inhdr *in;
|
37 | 6e02c38d | aliguori | struct virtio_blk_outhdr *out;
|
38 | 6e02c38d | aliguori | size_t size; |
39 | 6e02c38d | aliguori | uint8_t *buffer; |
40 | 869a5c6d | aliguori | struct VirtIOBlockReq *next;
|
41 | 6e02c38d | aliguori | } VirtIOBlockReq; |
42 | 6e02c38d | aliguori | |
43 | 869a5c6d | aliguori | static void virtio_blk_req_complete(VirtIOBlockReq *req, int status) |
44 | 869a5c6d | aliguori | { |
45 | 869a5c6d | aliguori | VirtIOBlock *s = req->dev; |
46 | 869a5c6d | aliguori | |
47 | 869a5c6d | aliguori | req->in->status = status; |
48 | 869a5c6d | aliguori | virtqueue_push(s->vq, &req->elem, req->size + sizeof(*req->in));
|
49 | 869a5c6d | aliguori | virtio_notify(&s->vdev, s->vq); |
50 | 869a5c6d | aliguori | |
51 | 869a5c6d | aliguori | qemu_free(req->buffer); |
52 | 869a5c6d | aliguori | qemu_free(req); |
53 | 869a5c6d | aliguori | } |
54 | 869a5c6d | aliguori | |
55 | 869a5c6d | aliguori | static int virtio_blk_handle_write_error(VirtIOBlockReq *req, int error) |
56 | 869a5c6d | aliguori | { |
57 | 869a5c6d | aliguori | BlockInterfaceErrorAction action = drive_get_onerror(req->dev->bs); |
58 | 869a5c6d | aliguori | VirtIOBlock *s = req->dev; |
59 | 869a5c6d | aliguori | |
60 | 869a5c6d | aliguori | if (action == BLOCK_ERR_IGNORE)
|
61 | 869a5c6d | aliguori | return 0; |
62 | 869a5c6d | aliguori | |
63 | 869a5c6d | aliguori | if ((error == ENOSPC && action == BLOCK_ERR_STOP_ENOSPC)
|
64 | 869a5c6d | aliguori | || action == BLOCK_ERR_STOP_ANY) { |
65 | 869a5c6d | aliguori | req->next = s->rq; |
66 | 869a5c6d | aliguori | s->rq = req; |
67 | 869a5c6d | aliguori | vm_stop(0);
|
68 | 869a5c6d | aliguori | } else {
|
69 | 869a5c6d | aliguori | virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); |
70 | 869a5c6d | aliguori | } |
71 | 869a5c6d | aliguori | |
72 | 869a5c6d | aliguori | return 1; |
73 | 869a5c6d | aliguori | } |
74 | 869a5c6d | aliguori | |
75 | 6e02c38d | aliguori | static void virtio_blk_rw_complete(void *opaque, int ret) |
76 | 6e02c38d | aliguori | { |
77 | 6e02c38d | aliguori | VirtIOBlockReq *req = opaque; |
78 | 6e02c38d | aliguori | |
79 | 6e02c38d | aliguori | /* Copy read data to the guest */
|
80 | 6e02c38d | aliguori | if (!ret && !(req->out->type & VIRTIO_BLK_T_OUT)) {
|
81 | 6e02c38d | aliguori | size_t offset = 0;
|
82 | 6e02c38d | aliguori | int i;
|
83 | 6e02c38d | aliguori | |
84 | 6e02c38d | aliguori | for (i = 0; i < req->elem.in_num - 1; i++) { |
85 | 6e02c38d | aliguori | size_t len; |
86 | 6e02c38d | aliguori | |
87 | 6e02c38d | aliguori | /* Be pretty defensive wrt malicious guests */
|
88 | 6e02c38d | aliguori | len = MIN(req->elem.in_sg[i].iov_len, |
89 | 6e02c38d | aliguori | req->size - offset); |
90 | 6e02c38d | aliguori | |
91 | 6e02c38d | aliguori | memcpy(req->elem.in_sg[i].iov_base, |
92 | 6e02c38d | aliguori | req->buffer + offset, |
93 | 6e02c38d | aliguori | len); |
94 | 6e02c38d | aliguori | offset += len; |
95 | 6e02c38d | aliguori | } |
96 | 869a5c6d | aliguori | } else if (ret && (req->out->type & VIRTIO_BLK_T_OUT)) { |
97 | 869a5c6d | aliguori | if (virtio_blk_handle_write_error(req, -ret))
|
98 | 869a5c6d | aliguori | return;
|
99 | 6e02c38d | aliguori | } |
100 | 6e02c38d | aliguori | |
101 | 869a5c6d | aliguori | virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); |
102 | 869a5c6d | aliguori | } |
103 | 6e02c38d | aliguori | |
104 | 869a5c6d | aliguori | static VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
|
105 | 869a5c6d | aliguori | { |
106 | 869a5c6d | aliguori | VirtIOBlockReq *req = qemu_mallocz(sizeof(*req));
|
107 | 869a5c6d | aliguori | if (req != NULL) |
108 | 869a5c6d | aliguori | req->dev = s; |
109 | 869a5c6d | aliguori | return req;
|
110 | 6e02c38d | aliguori | } |
111 | 6e02c38d | aliguori | |
112 | 6e02c38d | aliguori | static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s)
|
113 | 6e02c38d | aliguori | { |
114 | 869a5c6d | aliguori | VirtIOBlockReq *req = virtio_blk_alloc_request(s); |
115 | 6e02c38d | aliguori | |
116 | 869a5c6d | aliguori | if (req != NULL) { |
117 | 869a5c6d | aliguori | if (!virtqueue_pop(s->vq, &req->elem)) {
|
118 | 869a5c6d | aliguori | qemu_free(req); |
119 | 869a5c6d | aliguori | return NULL; |
120 | 869a5c6d | aliguori | } |
121 | 6e02c38d | aliguori | } |
122 | 6e02c38d | aliguori | |
123 | 6e02c38d | aliguori | return req;
|
124 | 6e02c38d | aliguori | } |
125 | 6e02c38d | aliguori | |
126 | 869a5c6d | aliguori | static int virtio_blk_handle_write(VirtIOBlockReq *req) |
127 | 869a5c6d | aliguori | { |
128 | 869a5c6d | aliguori | if (!req->buffer) {
|
129 | 869a5c6d | aliguori | size_t offset = 0;
|
130 | 869a5c6d | aliguori | int i;
|
131 | 869a5c6d | aliguori | |
132 | 869a5c6d | aliguori | for (i = 1; i < req->elem.out_num; i++) |
133 | 869a5c6d | aliguori | req->size += req->elem.out_sg[i].iov_len; |
134 | 869a5c6d | aliguori | |
135 | 869a5c6d | aliguori | req->buffer = qemu_memalign(512, req->size);
|
136 | 869a5c6d | aliguori | if (req->buffer == NULL) { |
137 | 869a5c6d | aliguori | qemu_free(req); |
138 | 869a5c6d | aliguori | return -1; |
139 | 869a5c6d | aliguori | } |
140 | 869a5c6d | aliguori | |
141 | 869a5c6d | aliguori | /* We copy the data from the SG list to avoid splitting up the request.
|
142 | 869a5c6d | aliguori | This helps performance a lot until we can pass full sg lists as AIO
|
143 | 869a5c6d | aliguori | operations */
|
144 | 869a5c6d | aliguori | for (i = 1; i < req->elem.out_num; i++) { |
145 | 869a5c6d | aliguori | size_t len; |
146 | 869a5c6d | aliguori | |
147 | 869a5c6d | aliguori | len = MIN(req->elem.out_sg[i].iov_len, |
148 | 869a5c6d | aliguori | req->size - offset); |
149 | 869a5c6d | aliguori | memcpy(req->buffer + offset, |
150 | 869a5c6d | aliguori | req->elem.out_sg[i].iov_base, |
151 | 869a5c6d | aliguori | len); |
152 | 869a5c6d | aliguori | offset += len; |
153 | 869a5c6d | aliguori | } |
154 | 869a5c6d | aliguori | } |
155 | 869a5c6d | aliguori | |
156 | 869a5c6d | aliguori | bdrv_aio_write(req->dev->bs, req->out->sector, req->buffer, req->size / 512,
|
157 | 869a5c6d | aliguori | virtio_blk_rw_complete, req); |
158 | 869a5c6d | aliguori | return 0; |
159 | 869a5c6d | aliguori | } |
160 | 869a5c6d | aliguori | |
161 | 6e02c38d | aliguori | static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) |
162 | 6e02c38d | aliguori | { |
163 | 6e02c38d | aliguori | VirtIOBlock *s = to_virtio_blk(vdev); |
164 | 6e02c38d | aliguori | VirtIOBlockReq *req; |
165 | 6e02c38d | aliguori | |
166 | 6e02c38d | aliguori | while ((req = virtio_blk_get_request(s))) {
|
167 | 6e02c38d | aliguori | int i;
|
168 | 6e02c38d | aliguori | |
169 | 6e02c38d | aliguori | if (req->elem.out_num < 1 || req->elem.in_num < 1) { |
170 | 6e02c38d | aliguori | fprintf(stderr, "virtio-blk missing headers\n");
|
171 | 6e02c38d | aliguori | exit(1);
|
172 | 6e02c38d | aliguori | } |
173 | 6e02c38d | aliguori | |
174 | 6e02c38d | aliguori | if (req->elem.out_sg[0].iov_len < sizeof(*req->out) || |
175 | 6e02c38d | aliguori | req->elem.in_sg[req->elem.in_num - 1].iov_len < sizeof(*req->in)) { |
176 | 6e02c38d | aliguori | fprintf(stderr, "virtio-blk header not in correct element\n");
|
177 | 6e02c38d | aliguori | exit(1);
|
178 | 6e02c38d | aliguori | } |
179 | 6e02c38d | aliguori | |
180 | 6e02c38d | aliguori | req->out = (void *)req->elem.out_sg[0].iov_base; |
181 | 6e02c38d | aliguori | req->in = (void *)req->elem.in_sg[req->elem.in_num - 1].iov_base; |
182 | 6e02c38d | aliguori | |
183 | 6e02c38d | aliguori | if (req->out->type & VIRTIO_BLK_T_SCSI_CMD) {
|
184 | 6e02c38d | aliguori | unsigned int len = sizeof(*req->in); |
185 | 6e02c38d | aliguori | |
186 | 6e02c38d | aliguori | req->in->status = VIRTIO_BLK_S_UNSUPP; |
187 | 6e02c38d | aliguori | virtqueue_push(vq, &req->elem, len); |
188 | 6e02c38d | aliguori | virtio_notify(vdev, vq); |
189 | 6e02c38d | aliguori | qemu_free(req); |
190 | 6e02c38d | aliguori | } else if (req->out->type & VIRTIO_BLK_T_OUT) { |
191 | 869a5c6d | aliguori | if (virtio_blk_handle_write(req) < 0) |
192 | 6e02c38d | aliguori | break;
|
193 | 6e02c38d | aliguori | } else {
|
194 | 6e02c38d | aliguori | for (i = 0; i < req->elem.in_num - 1; i++) |
195 | 6e02c38d | aliguori | req->size += req->elem.in_sg[i].iov_len; |
196 | 6e02c38d | aliguori | |
197 | 6e02c38d | aliguori | req->buffer = qemu_memalign(512, req->size);
|
198 | 6e02c38d | aliguori | if (req->buffer == NULL) { |
199 | 6e02c38d | aliguori | qemu_free(req); |
200 | 6e02c38d | aliguori | break;
|
201 | 6e02c38d | aliguori | } |
202 | 6e02c38d | aliguori | |
203 | 6e02c38d | aliguori | bdrv_aio_read(s->bs, req->out->sector, |
204 | 6e02c38d | aliguori | req->buffer, |
205 | 6e02c38d | aliguori | req->size / 512,
|
206 | 6e02c38d | aliguori | virtio_blk_rw_complete, |
207 | 6e02c38d | aliguori | req); |
208 | 6e02c38d | aliguori | } |
209 | 6e02c38d | aliguori | } |
210 | 6e02c38d | aliguori | /*
|
211 | 6e02c38d | aliguori | * FIXME: Want to check for completions before returning to guest mode,
|
212 | 6e02c38d | aliguori | * so cached reads and writes are reported as quickly as possible. But
|
213 | 6e02c38d | aliguori | * that should be done in the generic block layer.
|
214 | 6e02c38d | aliguori | */
|
215 | 6e02c38d | aliguori | } |
216 | 6e02c38d | aliguori | |
217 | 869a5c6d | aliguori | static void virtio_blk_dma_restart_cb(void *opaque, int running, int reason) |
218 | 869a5c6d | aliguori | { |
219 | 869a5c6d | aliguori | VirtIOBlock *s = opaque; |
220 | 869a5c6d | aliguori | VirtIOBlockReq *req = s->rq; |
221 | 869a5c6d | aliguori | |
222 | 869a5c6d | aliguori | if (!running)
|
223 | 869a5c6d | aliguori | return;
|
224 | 869a5c6d | aliguori | |
225 | 869a5c6d | aliguori | s->rq = NULL;
|
226 | 869a5c6d | aliguori | |
227 | 869a5c6d | aliguori | while (req) {
|
228 | 869a5c6d | aliguori | virtio_blk_handle_write(req); |
229 | 869a5c6d | aliguori | req = req->next; |
230 | 869a5c6d | aliguori | } |
231 | 869a5c6d | aliguori | } |
232 | 869a5c6d | aliguori | |
233 | 6e02c38d | aliguori | static void virtio_blk_reset(VirtIODevice *vdev) |
234 | 6e02c38d | aliguori | { |
235 | 6e02c38d | aliguori | /*
|
236 | 6e02c38d | aliguori | * This should cancel pending requests, but can't do nicely until there
|
237 | 6e02c38d | aliguori | * are per-device request lists.
|
238 | 6e02c38d | aliguori | */
|
239 | 6e02c38d | aliguori | qemu_aio_flush(); |
240 | 6e02c38d | aliguori | } |
241 | 6e02c38d | aliguori | |
242 | 6e02c38d | aliguori | static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config) |
243 | 6e02c38d | aliguori | { |
244 | 6e02c38d | aliguori | VirtIOBlock *s = to_virtio_blk(vdev); |
245 | 6e02c38d | aliguori | struct virtio_blk_config blkcfg;
|
246 | 6e02c38d | aliguori | uint64_t capacity; |
247 | 6e02c38d | aliguori | int cylinders, heads, secs;
|
248 | 6e02c38d | aliguori | |
249 | 6e02c38d | aliguori | bdrv_get_geometry(s->bs, &capacity); |
250 | 6e02c38d | aliguori | bdrv_get_geometry_hint(s->bs, &cylinders, &heads, &secs); |
251 | 6e02c38d | aliguori | stq_raw(&blkcfg.capacity, capacity); |
252 | 6e02c38d | aliguori | stl_raw(&blkcfg.seg_max, 128 - 2); |
253 | 6e02c38d | aliguori | stw_raw(&blkcfg.cylinders, cylinders); |
254 | 6e02c38d | aliguori | blkcfg.heads = heads; |
255 | 6e02c38d | aliguori | blkcfg.sectors = secs; |
256 | 6e02c38d | aliguori | memcpy(config, &blkcfg, sizeof(blkcfg));
|
257 | 6e02c38d | aliguori | } |
258 | 6e02c38d | aliguori | |
259 | 6e02c38d | aliguori | static uint32_t virtio_blk_get_features(VirtIODevice *vdev)
|
260 | 6e02c38d | aliguori | { |
261 | 6e02c38d | aliguori | return (1 << VIRTIO_BLK_F_SEG_MAX | 1 << VIRTIO_BLK_F_GEOMETRY); |
262 | 6e02c38d | aliguori | } |
263 | 6e02c38d | aliguori | |
264 | 6e02c38d | aliguori | static void virtio_blk_save(QEMUFile *f, void *opaque) |
265 | 6e02c38d | aliguori | { |
266 | 6e02c38d | aliguori | VirtIOBlock *s = opaque; |
267 | 869a5c6d | aliguori | VirtIOBlockReq *req = s->rq; |
268 | 869a5c6d | aliguori | |
269 | 6e02c38d | aliguori | virtio_save(&s->vdev, f); |
270 | 869a5c6d | aliguori | |
271 | 869a5c6d | aliguori | while (req) {
|
272 | 869a5c6d | aliguori | qemu_put_sbyte(f, 1);
|
273 | 869a5c6d | aliguori | qemu_put_buffer(f, (unsigned char*)&req->elem, sizeof(req->elem)); |
274 | 869a5c6d | aliguori | req = req->next; |
275 | 869a5c6d | aliguori | } |
276 | 869a5c6d | aliguori | qemu_put_sbyte(f, 0);
|
277 | 6e02c38d | aliguori | } |
278 | 6e02c38d | aliguori | |
279 | 6e02c38d | aliguori | static int virtio_blk_load(QEMUFile *f, void *opaque, int version_id) |
280 | 6e02c38d | aliguori | { |
281 | 6e02c38d | aliguori | VirtIOBlock *s = opaque; |
282 | 6e02c38d | aliguori | |
283 | 869a5c6d | aliguori | if (version_id != 2) |
284 | 6e02c38d | aliguori | return -EINVAL;
|
285 | 6e02c38d | aliguori | |
286 | 6e02c38d | aliguori | virtio_load(&s->vdev, f); |
287 | 869a5c6d | aliguori | while (qemu_get_sbyte(f)) {
|
288 | 869a5c6d | aliguori | VirtIOBlockReq *req = virtio_blk_alloc_request(s); |
289 | 869a5c6d | aliguori | qemu_get_buffer(f, (unsigned char*)&req->elem, sizeof(req->elem)); |
290 | 869a5c6d | aliguori | req->next = s->rq; |
291 | 869a5c6d | aliguori | s->rq = req->next; |
292 | 869a5c6d | aliguori | } |
293 | 6e02c38d | aliguori | |
294 | 6e02c38d | aliguori | return 0; |
295 | 6e02c38d | aliguori | } |
296 | 6e02c38d | aliguori | |
297 | 9b32d5a5 | aliguori | void *virtio_blk_init(PCIBus *bus, BlockDriverState *bs)
|
298 | 6e02c38d | aliguori | { |
299 | 6e02c38d | aliguori | VirtIOBlock *s; |
300 | 6e02c38d | aliguori | int cylinders, heads, secs;
|
301 | 6e02c38d | aliguori | static int virtio_blk_id; |
302 | 6e02c38d | aliguori | |
303 | 9b32d5a5 | aliguori | s = (VirtIOBlock *)virtio_init_pci(bus, "virtio-blk",
|
304 | 9b32d5a5 | aliguori | PCI_VENDOR_ID_REDHAT_QUMRANET, |
305 | 9b32d5a5 | aliguori | PCI_DEVICE_ID_VIRTIO_BLOCK, |
306 | 99b3718e | aliguori | PCI_VENDOR_ID_REDHAT_QUMRANET, |
307 | 99b3718e | aliguori | VIRTIO_ID_BLOCK, |
308 | 6e02c38d | aliguori | 0x01, 0x80, 0x00, |
309 | 6e02c38d | aliguori | sizeof(struct virtio_blk_config), sizeof(VirtIOBlock)); |
310 | 6e02c38d | aliguori | if (!s)
|
311 | 6e02c38d | aliguori | return NULL; |
312 | 6e02c38d | aliguori | |
313 | 6e02c38d | aliguori | s->vdev.get_config = virtio_blk_update_config; |
314 | 6e02c38d | aliguori | s->vdev.get_features = virtio_blk_get_features; |
315 | 6e02c38d | aliguori | s->vdev.reset = virtio_blk_reset; |
316 | 6e02c38d | aliguori | s->bs = bs; |
317 | 869a5c6d | aliguori | s->rq = NULL;
|
318 | 6e02c38d | aliguori | bdrv_guess_geometry(s->bs, &cylinders, &heads, &secs); |
319 | 6e02c38d | aliguori | bdrv_set_geometry_hint(s->bs, cylinders, heads, secs); |
320 | 6e02c38d | aliguori | |
321 | 6e02c38d | aliguori | s->vq = virtio_add_queue(&s->vdev, 128, virtio_blk_handle_output);
|
322 | 6e02c38d | aliguori | |
323 | 869a5c6d | aliguori | qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s); |
324 | 869a5c6d | aliguori | register_savevm("virtio-blk", virtio_blk_id++, 2, |
325 | 6e02c38d | aliguori | virtio_blk_save, virtio_blk_load, s); |
326 | 6e02c38d | aliguori | |
327 | 6e02c38d | aliguori | return s;
|
328 | 6e02c38d | aliguori | } |