root / hw / virtio-blk.c @ 173a543b
History | View | Annotate | Download (8.7 kB)
1 |
/*
|
---|---|
2 |
* Virtio Block Device
|
3 |
*
|
4 |
* Copyright IBM, Corp. 2007
|
5 |
*
|
6 |
* Authors:
|
7 |
* Anthony Liguori <aliguori@us.ibm.com>
|
8 |
*
|
9 |
* This work is licensed under the terms of the GNU GPL, version 2. See
|
10 |
* the COPYING file in the top-level directory.
|
11 |
*
|
12 |
*/
|
13 |
|
14 |
#include <qemu-common.h> |
15 |
#include <sysemu.h> |
16 |
#include "virtio-blk.h" |
17 |
#include "block_int.h" |
18 |
|
19 |
typedef struct VirtIOBlock |
20 |
{ |
21 |
VirtIODevice vdev; |
22 |
BlockDriverState *bs; |
23 |
VirtQueue *vq; |
24 |
void *rq;
|
25 |
} VirtIOBlock; |
26 |
|
27 |
static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev)
|
28 |
{ |
29 |
return (VirtIOBlock *)vdev;
|
30 |
} |
31 |
|
32 |
typedef struct VirtIOBlockReq |
33 |
{ |
34 |
VirtIOBlock *dev; |
35 |
VirtQueueElement elem; |
36 |
struct virtio_blk_inhdr *in;
|
37 |
struct virtio_blk_outhdr *out;
|
38 |
size_t size; |
39 |
uint8_t *buffer; |
40 |
struct VirtIOBlockReq *next;
|
41 |
} VirtIOBlockReq; |
42 |
|
43 |
static void virtio_blk_req_complete(VirtIOBlockReq *req, int status) |
44 |
{ |
45 |
VirtIOBlock *s = req->dev; |
46 |
|
47 |
req->in->status = status; |
48 |
virtqueue_push(s->vq, &req->elem, req->size + sizeof(*req->in));
|
49 |
virtio_notify(&s->vdev, s->vq); |
50 |
|
51 |
qemu_free(req->buffer); |
52 |
qemu_free(req); |
53 |
} |
54 |
|
55 |
static int virtio_blk_handle_write_error(VirtIOBlockReq *req, int error) |
56 |
{ |
57 |
BlockInterfaceErrorAction action = drive_get_onerror(req->dev->bs); |
58 |
VirtIOBlock *s = req->dev; |
59 |
|
60 |
if (action == BLOCK_ERR_IGNORE)
|
61 |
return 0; |
62 |
|
63 |
if ((error == ENOSPC && action == BLOCK_ERR_STOP_ENOSPC)
|
64 |
|| action == BLOCK_ERR_STOP_ANY) { |
65 |
req->next = s->rq; |
66 |
s->rq = req; |
67 |
vm_stop(0);
|
68 |
} else {
|
69 |
virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); |
70 |
} |
71 |
|
72 |
return 1; |
73 |
} |
74 |
|
75 |
static void virtio_blk_rw_complete(void *opaque, int ret) |
76 |
{ |
77 |
VirtIOBlockReq *req = opaque; |
78 |
|
79 |
/* Copy read data to the guest */
|
80 |
if (!ret && !(req->out->type & VIRTIO_BLK_T_OUT)) {
|
81 |
size_t offset = 0;
|
82 |
int i;
|
83 |
|
84 |
for (i = 0; i < req->elem.in_num - 1; i++) { |
85 |
size_t len; |
86 |
|
87 |
/* Be pretty defensive wrt malicious guests */
|
88 |
len = MIN(req->elem.in_sg[i].iov_len, |
89 |
req->size - offset); |
90 |
|
91 |
memcpy(req->elem.in_sg[i].iov_base, |
92 |
req->buffer + offset, |
93 |
len); |
94 |
offset += len; |
95 |
} |
96 |
} else if (ret && (req->out->type & VIRTIO_BLK_T_OUT)) { |
97 |
if (virtio_blk_handle_write_error(req, -ret))
|
98 |
return;
|
99 |
} |
100 |
|
101 |
virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); |
102 |
} |
103 |
|
104 |
static VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
|
105 |
{ |
106 |
VirtIOBlockReq *req = qemu_mallocz(sizeof(*req));
|
107 |
if (req != NULL) |
108 |
req->dev = s; |
109 |
return req;
|
110 |
} |
111 |
|
112 |
static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s)
|
113 |
{ |
114 |
VirtIOBlockReq *req = virtio_blk_alloc_request(s); |
115 |
|
116 |
if (req != NULL) { |
117 |
if (!virtqueue_pop(s->vq, &req->elem)) {
|
118 |
qemu_free(req); |
119 |
return NULL; |
120 |
} |
121 |
} |
122 |
|
123 |
return req;
|
124 |
} |
125 |
|
126 |
static int virtio_blk_handle_write(VirtIOBlockReq *req) |
127 |
{ |
128 |
if (!req->buffer) {
|
129 |
size_t offset = 0;
|
130 |
int i;
|
131 |
|
132 |
for (i = 1; i < req->elem.out_num; i++) |
133 |
req->size += req->elem.out_sg[i].iov_len; |
134 |
|
135 |
req->buffer = qemu_memalign(512, req->size);
|
136 |
if (req->buffer == NULL) { |
137 |
qemu_free(req); |
138 |
return -1; |
139 |
} |
140 |
|
141 |
/* We copy the data from the SG list to avoid splitting up the request.
|
142 |
This helps performance a lot until we can pass full sg lists as AIO
|
143 |
operations */
|
144 |
for (i = 1; i < req->elem.out_num; i++) { |
145 |
size_t len; |
146 |
|
147 |
len = MIN(req->elem.out_sg[i].iov_len, |
148 |
req->size - offset); |
149 |
memcpy(req->buffer + offset, |
150 |
req->elem.out_sg[i].iov_base, |
151 |
len); |
152 |
offset += len; |
153 |
} |
154 |
} |
155 |
|
156 |
bdrv_aio_write(req->dev->bs, req->out->sector, req->buffer, req->size / 512,
|
157 |
virtio_blk_rw_complete, req); |
158 |
return 0; |
159 |
} |
160 |
|
161 |
static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) |
162 |
{ |
163 |
VirtIOBlock *s = to_virtio_blk(vdev); |
164 |
VirtIOBlockReq *req; |
165 |
|
166 |
while ((req = virtio_blk_get_request(s))) {
|
167 |
int i;
|
168 |
|
169 |
if (req->elem.out_num < 1 || req->elem.in_num < 1) { |
170 |
fprintf(stderr, "virtio-blk missing headers\n");
|
171 |
exit(1);
|
172 |
} |
173 |
|
174 |
if (req->elem.out_sg[0].iov_len < sizeof(*req->out) || |
175 |
req->elem.in_sg[req->elem.in_num - 1].iov_len < sizeof(*req->in)) { |
176 |
fprintf(stderr, "virtio-blk header not in correct element\n");
|
177 |
exit(1);
|
178 |
} |
179 |
|
180 |
req->out = (void *)req->elem.out_sg[0].iov_base; |
181 |
req->in = (void *)req->elem.in_sg[req->elem.in_num - 1].iov_base; |
182 |
|
183 |
if (req->out->type & VIRTIO_BLK_T_SCSI_CMD) {
|
184 |
unsigned int len = sizeof(*req->in); |
185 |
|
186 |
req->in->status = VIRTIO_BLK_S_UNSUPP; |
187 |
virtqueue_push(vq, &req->elem, len); |
188 |
virtio_notify(vdev, vq); |
189 |
qemu_free(req); |
190 |
} else if (req->out->type & VIRTIO_BLK_T_OUT) { |
191 |
if (virtio_blk_handle_write(req) < 0) |
192 |
break;
|
193 |
} else {
|
194 |
for (i = 0; i < req->elem.in_num - 1; i++) |
195 |
req->size += req->elem.in_sg[i].iov_len; |
196 |
|
197 |
req->buffer = qemu_memalign(512, req->size);
|
198 |
if (req->buffer == NULL) { |
199 |
qemu_free(req); |
200 |
break;
|
201 |
} |
202 |
|
203 |
bdrv_aio_read(s->bs, req->out->sector, |
204 |
req->buffer, |
205 |
req->size / 512,
|
206 |
virtio_blk_rw_complete, |
207 |
req); |
208 |
} |
209 |
} |
210 |
/*
|
211 |
* FIXME: Want to check for completions before returning to guest mode,
|
212 |
* so cached reads and writes are reported as quickly as possible. But
|
213 |
* that should be done in the generic block layer.
|
214 |
*/
|
215 |
} |
216 |
|
217 |
static void virtio_blk_dma_restart_cb(void *opaque, int running, int reason) |
218 |
{ |
219 |
VirtIOBlock *s = opaque; |
220 |
VirtIOBlockReq *req = s->rq; |
221 |
|
222 |
if (!running)
|
223 |
return;
|
224 |
|
225 |
s->rq = NULL;
|
226 |
|
227 |
while (req) {
|
228 |
virtio_blk_handle_write(req); |
229 |
req = req->next; |
230 |
} |
231 |
} |
232 |
|
233 |
static void virtio_blk_reset(VirtIODevice *vdev) |
234 |
{ |
235 |
/*
|
236 |
* This should cancel pending requests, but can't do nicely until there
|
237 |
* are per-device request lists.
|
238 |
*/
|
239 |
qemu_aio_flush(); |
240 |
} |
241 |
|
242 |
static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config) |
243 |
{ |
244 |
VirtIOBlock *s = to_virtio_blk(vdev); |
245 |
struct virtio_blk_config blkcfg;
|
246 |
uint64_t capacity; |
247 |
int cylinders, heads, secs;
|
248 |
|
249 |
bdrv_get_geometry(s->bs, &capacity); |
250 |
bdrv_get_geometry_hint(s->bs, &cylinders, &heads, &secs); |
251 |
stq_raw(&blkcfg.capacity, capacity); |
252 |
stl_raw(&blkcfg.seg_max, 128 - 2); |
253 |
stw_raw(&blkcfg.cylinders, cylinders); |
254 |
blkcfg.heads = heads; |
255 |
blkcfg.sectors = secs; |
256 |
memcpy(config, &blkcfg, sizeof(blkcfg));
|
257 |
} |
258 |
|
259 |
static uint32_t virtio_blk_get_features(VirtIODevice *vdev)
|
260 |
{ |
261 |
return (1 << VIRTIO_BLK_F_SEG_MAX | 1 << VIRTIO_BLK_F_GEOMETRY); |
262 |
} |
263 |
|
264 |
static void virtio_blk_save(QEMUFile *f, void *opaque) |
265 |
{ |
266 |
VirtIOBlock *s = opaque; |
267 |
VirtIOBlockReq *req = s->rq; |
268 |
|
269 |
virtio_save(&s->vdev, f); |
270 |
|
271 |
while (req) {
|
272 |
qemu_put_sbyte(f, 1);
|
273 |
qemu_put_buffer(f, (unsigned char*)&req->elem, sizeof(req->elem)); |
274 |
req = req->next; |
275 |
} |
276 |
qemu_put_sbyte(f, 0);
|
277 |
} |
278 |
|
279 |
static int virtio_blk_load(QEMUFile *f, void *opaque, int version_id) |
280 |
{ |
281 |
VirtIOBlock *s = opaque; |
282 |
|
283 |
if (version_id != 2) |
284 |
return -EINVAL;
|
285 |
|
286 |
virtio_load(&s->vdev, f); |
287 |
while (qemu_get_sbyte(f)) {
|
288 |
VirtIOBlockReq *req = virtio_blk_alloc_request(s); |
289 |
qemu_get_buffer(f, (unsigned char*)&req->elem, sizeof(req->elem)); |
290 |
req->next = s->rq; |
291 |
s->rq = req->next; |
292 |
} |
293 |
|
294 |
return 0; |
295 |
} |
296 |
|
297 |
void *virtio_blk_init(PCIBus *bus, BlockDriverState *bs)
|
298 |
{ |
299 |
VirtIOBlock *s; |
300 |
int cylinders, heads, secs;
|
301 |
static int virtio_blk_id; |
302 |
|
303 |
s = (VirtIOBlock *)virtio_init_pci(bus, "virtio-blk",
|
304 |
PCI_VENDOR_ID_REDHAT_QUMRANET, |
305 |
PCI_DEVICE_ID_VIRTIO_BLOCK, |
306 |
PCI_VENDOR_ID_REDHAT_QUMRANET, |
307 |
VIRTIO_ID_BLOCK, |
308 |
PCI_CLASS_STORAGE_OTHER, 0x00,
|
309 |
sizeof(struct virtio_blk_config), sizeof(VirtIOBlock)); |
310 |
if (!s)
|
311 |
return NULL; |
312 |
|
313 |
s->vdev.get_config = virtio_blk_update_config; |
314 |
s->vdev.get_features = virtio_blk_get_features; |
315 |
s->vdev.reset = virtio_blk_reset; |
316 |
s->bs = bs; |
317 |
s->rq = NULL;
|
318 |
bdrv_guess_geometry(s->bs, &cylinders, &heads, &secs); |
319 |
bdrv_set_geometry_hint(s->bs, cylinders, heads, secs); |
320 |
|
321 |
s->vq = virtio_add_queue(&s->vdev, 128, virtio_blk_handle_output);
|
322 |
|
323 |
qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s); |
324 |
register_savevm("virtio-blk", virtio_blk_id++, 2, |
325 |
virtio_blk_save, virtio_blk_load, s); |
326 |
|
327 |
return s;
|
328 |
} |