root / hw / virtio-blk.c @ b0a7b120
History | View | Annotate | Download (8.7 kB)
1 |
/*
|
---|---|
2 |
* Virtio Block Device
|
3 |
*
|
4 |
* Copyright IBM, Corp. 2007
|
5 |
*
|
6 |
* Authors:
|
7 |
* Anthony Liguori <aliguori@us.ibm.com>
|
8 |
*
|
9 |
* This work is licensed under the terms of the GNU GPL, version 2. See
|
10 |
* the COPYING file in the top-level directory.
|
11 |
*
|
12 |
*/
|
13 |
|
14 |
#include <qemu-common.h> |
15 |
#include <sysemu.h> |
16 |
#include "virtio-blk.h" |
17 |
#include "block_int.h" |
18 |
|
19 |
typedef struct VirtIOBlock |
20 |
{ |
21 |
VirtIODevice vdev; |
22 |
BlockDriverState *bs; |
23 |
VirtQueue *vq; |
24 |
void *rq;
|
25 |
} VirtIOBlock; |
26 |
|
27 |
static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev)
|
28 |
{ |
29 |
return (VirtIOBlock *)vdev;
|
30 |
} |
31 |
|
32 |
typedef struct VirtIOBlockReq |
33 |
{ |
34 |
VirtIOBlock *dev; |
35 |
VirtQueueElement elem; |
36 |
struct virtio_blk_inhdr *in;
|
37 |
struct virtio_blk_outhdr *out;
|
38 |
size_t size; |
39 |
uint8_t *buffer; |
40 |
struct VirtIOBlockReq *next;
|
41 |
} VirtIOBlockReq; |
42 |
|
43 |
static void virtio_blk_req_complete(VirtIOBlockReq *req, int status) |
44 |
{ |
45 |
VirtIOBlock *s = req->dev; |
46 |
|
47 |
req->in->status = status; |
48 |
virtqueue_push(s->vq, &req->elem, req->size + sizeof(*req->in));
|
49 |
virtio_notify(&s->vdev, s->vq); |
50 |
|
51 |
qemu_free(req->buffer); |
52 |
qemu_free(req); |
53 |
} |
54 |
|
55 |
static int virtio_blk_handle_write_error(VirtIOBlockReq *req, int error) |
56 |
{ |
57 |
BlockInterfaceErrorAction action = drive_get_onerror(req->dev->bs); |
58 |
VirtIOBlock *s = req->dev; |
59 |
|
60 |
if (action == BLOCK_ERR_IGNORE)
|
61 |
return 0; |
62 |
|
63 |
if ((error == ENOSPC && action == BLOCK_ERR_STOP_ENOSPC)
|
64 |
|| action == BLOCK_ERR_STOP_ANY) { |
65 |
req->next = s->rq; |
66 |
s->rq = req; |
67 |
vm_stop(0);
|
68 |
} else {
|
69 |
virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); |
70 |
} |
71 |
|
72 |
return 1; |
73 |
} |
74 |
|
75 |
static void virtio_blk_rw_complete(void *opaque, int ret) |
76 |
{ |
77 |
VirtIOBlockReq *req = opaque; |
78 |
|
79 |
/* Copy read data to the guest */
|
80 |
if (!ret && !(req->out->type & VIRTIO_BLK_T_OUT)) {
|
81 |
size_t offset = 0;
|
82 |
int i;
|
83 |
|
84 |
for (i = 0; i < req->elem.in_num - 1; i++) { |
85 |
size_t len; |
86 |
|
87 |
/* Be pretty defensive wrt malicious guests */
|
88 |
len = MIN(req->elem.in_sg[i].iov_len, |
89 |
req->size - offset); |
90 |
|
91 |
memcpy(req->elem.in_sg[i].iov_base, |
92 |
req->buffer + offset, |
93 |
len); |
94 |
offset += len; |
95 |
} |
96 |
} else if (ret && (req->out->type & VIRTIO_BLK_T_OUT)) { |
97 |
if (virtio_blk_handle_write_error(req, -ret))
|
98 |
return;
|
99 |
} |
100 |
|
101 |
virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); |
102 |
} |
103 |
|
104 |
static VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
|
105 |
{ |
106 |
VirtIOBlockReq *req = qemu_mallocz(sizeof(*req));
|
107 |
req->dev = s; |
108 |
return req;
|
109 |
} |
110 |
|
111 |
static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s)
|
112 |
{ |
113 |
VirtIOBlockReq *req = virtio_blk_alloc_request(s); |
114 |
|
115 |
if (req != NULL) { |
116 |
if (!virtqueue_pop(s->vq, &req->elem)) {
|
117 |
qemu_free(req); |
118 |
return NULL; |
119 |
} |
120 |
} |
121 |
|
122 |
return req;
|
123 |
} |
124 |
|
125 |
static int virtio_blk_handle_write(VirtIOBlockReq *req) |
126 |
{ |
127 |
if (!req->buffer) {
|
128 |
size_t offset = 0;
|
129 |
int i;
|
130 |
|
131 |
for (i = 1; i < req->elem.out_num; i++) |
132 |
req->size += req->elem.out_sg[i].iov_len; |
133 |
|
134 |
req->buffer = qemu_memalign(512, req->size);
|
135 |
if (req->buffer == NULL) { |
136 |
qemu_free(req); |
137 |
return -1; |
138 |
} |
139 |
|
140 |
/* We copy the data from the SG list to avoid splitting up the request.
|
141 |
This helps performance a lot until we can pass full sg lists as AIO
|
142 |
operations */
|
143 |
for (i = 1; i < req->elem.out_num; i++) { |
144 |
size_t len; |
145 |
|
146 |
len = MIN(req->elem.out_sg[i].iov_len, |
147 |
req->size - offset); |
148 |
memcpy(req->buffer + offset, |
149 |
req->elem.out_sg[i].iov_base, |
150 |
len); |
151 |
offset += len; |
152 |
} |
153 |
} |
154 |
|
155 |
bdrv_aio_write(req->dev->bs, req->out->sector, req->buffer, req->size / 512,
|
156 |
virtio_blk_rw_complete, req); |
157 |
return 0; |
158 |
} |
159 |
|
160 |
static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) |
161 |
{ |
162 |
VirtIOBlock *s = to_virtio_blk(vdev); |
163 |
VirtIOBlockReq *req; |
164 |
|
165 |
while ((req = virtio_blk_get_request(s))) {
|
166 |
int i;
|
167 |
|
168 |
if (req->elem.out_num < 1 || req->elem.in_num < 1) { |
169 |
fprintf(stderr, "virtio-blk missing headers\n");
|
170 |
exit(1);
|
171 |
} |
172 |
|
173 |
if (req->elem.out_sg[0].iov_len < sizeof(*req->out) || |
174 |
req->elem.in_sg[req->elem.in_num - 1].iov_len < sizeof(*req->in)) { |
175 |
fprintf(stderr, "virtio-blk header not in correct element\n");
|
176 |
exit(1);
|
177 |
} |
178 |
|
179 |
req->out = (void *)req->elem.out_sg[0].iov_base; |
180 |
req->in = (void *)req->elem.in_sg[req->elem.in_num - 1].iov_base; |
181 |
|
182 |
if (req->out->type & VIRTIO_BLK_T_SCSI_CMD) {
|
183 |
unsigned int len = sizeof(*req->in); |
184 |
|
185 |
req->in->status = VIRTIO_BLK_S_UNSUPP; |
186 |
virtqueue_push(vq, &req->elem, len); |
187 |
virtio_notify(vdev, vq); |
188 |
qemu_free(req); |
189 |
} else if (req->out->type & VIRTIO_BLK_T_OUT) { |
190 |
if (virtio_blk_handle_write(req) < 0) |
191 |
break;
|
192 |
} else {
|
193 |
for (i = 0; i < req->elem.in_num - 1; i++) |
194 |
req->size += req->elem.in_sg[i].iov_len; |
195 |
|
196 |
req->buffer = qemu_memalign(512, req->size);
|
197 |
if (req->buffer == NULL) { |
198 |
qemu_free(req); |
199 |
break;
|
200 |
} |
201 |
|
202 |
bdrv_aio_read(s->bs, req->out->sector, |
203 |
req->buffer, |
204 |
req->size / 512,
|
205 |
virtio_blk_rw_complete, |
206 |
req); |
207 |
} |
208 |
} |
209 |
/*
|
210 |
* FIXME: Want to check for completions before returning to guest mode,
|
211 |
* so cached reads and writes are reported as quickly as possible. But
|
212 |
* that should be done in the generic block layer.
|
213 |
*/
|
214 |
} |
215 |
|
216 |
static void virtio_blk_dma_restart_cb(void *opaque, int running, int reason) |
217 |
{ |
218 |
VirtIOBlock *s = opaque; |
219 |
VirtIOBlockReq *req = s->rq; |
220 |
|
221 |
if (!running)
|
222 |
return;
|
223 |
|
224 |
s->rq = NULL;
|
225 |
|
226 |
while (req) {
|
227 |
virtio_blk_handle_write(req); |
228 |
req = req->next; |
229 |
} |
230 |
} |
231 |
|
232 |
static void virtio_blk_reset(VirtIODevice *vdev) |
233 |
{ |
234 |
/*
|
235 |
* This should cancel pending requests, but can't do nicely until there
|
236 |
* are per-device request lists.
|
237 |
*/
|
238 |
qemu_aio_flush(); |
239 |
} |
240 |
|
241 |
static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config) |
242 |
{ |
243 |
VirtIOBlock *s = to_virtio_blk(vdev); |
244 |
struct virtio_blk_config blkcfg;
|
245 |
uint64_t capacity; |
246 |
int cylinders, heads, secs;
|
247 |
|
248 |
bdrv_get_geometry(s->bs, &capacity); |
249 |
bdrv_get_geometry_hint(s->bs, &cylinders, &heads, &secs); |
250 |
stq_raw(&blkcfg.capacity, capacity); |
251 |
stl_raw(&blkcfg.seg_max, 128 - 2); |
252 |
stw_raw(&blkcfg.cylinders, cylinders); |
253 |
blkcfg.heads = heads; |
254 |
blkcfg.sectors = secs; |
255 |
memcpy(config, &blkcfg, sizeof(blkcfg));
|
256 |
} |
257 |
|
258 |
static uint32_t virtio_blk_get_features(VirtIODevice *vdev)
|
259 |
{ |
260 |
return (1 << VIRTIO_BLK_F_SEG_MAX | 1 << VIRTIO_BLK_F_GEOMETRY); |
261 |
} |
262 |
|
263 |
static void virtio_blk_save(QEMUFile *f, void *opaque) |
264 |
{ |
265 |
VirtIOBlock *s = opaque; |
266 |
VirtIOBlockReq *req = s->rq; |
267 |
|
268 |
virtio_save(&s->vdev, f); |
269 |
|
270 |
while (req) {
|
271 |
qemu_put_sbyte(f, 1);
|
272 |
qemu_put_buffer(f, (unsigned char*)&req->elem, sizeof(req->elem)); |
273 |
req = req->next; |
274 |
} |
275 |
qemu_put_sbyte(f, 0);
|
276 |
} |
277 |
|
278 |
static int virtio_blk_load(QEMUFile *f, void *opaque, int version_id) |
279 |
{ |
280 |
VirtIOBlock *s = opaque; |
281 |
|
282 |
if (version_id != 2) |
283 |
return -EINVAL;
|
284 |
|
285 |
virtio_load(&s->vdev, f); |
286 |
while (qemu_get_sbyte(f)) {
|
287 |
VirtIOBlockReq *req = virtio_blk_alloc_request(s); |
288 |
qemu_get_buffer(f, (unsigned char*)&req->elem, sizeof(req->elem)); |
289 |
req->next = s->rq; |
290 |
s->rq = req->next; |
291 |
} |
292 |
|
293 |
return 0; |
294 |
} |
295 |
|
296 |
void *virtio_blk_init(PCIBus *bus, BlockDriverState *bs)
|
297 |
{ |
298 |
VirtIOBlock *s; |
299 |
int cylinders, heads, secs;
|
300 |
static int virtio_blk_id; |
301 |
|
302 |
s = (VirtIOBlock *)virtio_init_pci(bus, "virtio-blk",
|
303 |
PCI_VENDOR_ID_REDHAT_QUMRANET, |
304 |
PCI_DEVICE_ID_VIRTIO_BLOCK, |
305 |
PCI_VENDOR_ID_REDHAT_QUMRANET, |
306 |
VIRTIO_ID_BLOCK, |
307 |
PCI_CLASS_STORAGE_OTHER, 0x00,
|
308 |
sizeof(struct virtio_blk_config), sizeof(VirtIOBlock)); |
309 |
if (!s)
|
310 |
return NULL; |
311 |
|
312 |
s->vdev.get_config = virtio_blk_update_config; |
313 |
s->vdev.get_features = virtio_blk_get_features; |
314 |
s->vdev.reset = virtio_blk_reset; |
315 |
s->bs = bs; |
316 |
s->rq = NULL;
|
317 |
bs->private = &s->vdev.pci_dev; |
318 |
bdrv_guess_geometry(s->bs, &cylinders, &heads, &secs); |
319 |
bdrv_set_geometry_hint(s->bs, cylinders, heads, secs); |
320 |
|
321 |
s->vq = virtio_add_queue(&s->vdev, 128, virtio_blk_handle_output);
|
322 |
|
323 |
qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s); |
324 |
register_savevm("virtio-blk", virtio_blk_id++, 2, |
325 |
virtio_blk_save, virtio_blk_load, s); |
326 |
|
327 |
return s;
|
328 |
} |