root / hw / dataplane / vring.c @ 0d09e41a
History | View | Annotate | Download (11.7 kB)
1 |
/* Copyright 2012 Red Hat, Inc.
|
---|---|
2 |
* Copyright IBM, Corp. 2012
|
3 |
*
|
4 |
* Based on Linux 2.6.39 vhost code:
|
5 |
* Copyright (C) 2009 Red Hat, Inc.
|
6 |
* Copyright (C) 2006 Rusty Russell IBM Corporation
|
7 |
*
|
8 |
* Author: Michael S. Tsirkin <mst@redhat.com>
|
9 |
* Stefan Hajnoczi <stefanha@redhat.com>
|
10 |
*
|
11 |
* Inspiration, some code, and most witty comments come from
|
12 |
* Documentation/virtual/lguest/lguest.c, by Rusty Russell
|
13 |
*
|
14 |
* This work is licensed under the terms of the GNU GPL, version 2.
|
15 |
*/
|
16 |
|
17 |
#include "trace.h" |
18 |
#include "hw/virtio/dataplane/vring.h" |
19 |
#include "qemu/error-report.h" |
20 |
|
21 |
/* Map the guest's vring to host memory */
|
22 |
bool vring_setup(Vring *vring, VirtIODevice *vdev, int n) |
23 |
{ |
24 |
hwaddr vring_addr = virtio_queue_get_ring_addr(vdev, n); |
25 |
hwaddr vring_size = virtio_queue_get_ring_size(vdev, n); |
26 |
void *vring_ptr;
|
27 |
|
28 |
vring->broken = false;
|
29 |
|
30 |
hostmem_init(&vring->hostmem); |
31 |
vring_ptr = hostmem_lookup(&vring->hostmem, vring_addr, vring_size, true);
|
32 |
if (!vring_ptr) {
|
33 |
error_report("Failed to map vring "
|
34 |
"addr %#" HWADDR_PRIx " size %" HWADDR_PRIu, |
35 |
vring_addr, vring_size); |
36 |
vring->broken = true;
|
37 |
return false; |
38 |
} |
39 |
|
40 |
vring_init(&vring->vr, virtio_queue_get_num(vdev, n), vring_ptr, 4096);
|
41 |
|
42 |
vring->last_avail_idx = 0;
|
43 |
vring->last_used_idx = 0;
|
44 |
vring->signalled_used = 0;
|
45 |
vring->signalled_used_valid = false;
|
46 |
|
47 |
trace_vring_setup(virtio_queue_get_ring_addr(vdev, n), |
48 |
vring->vr.desc, vring->vr.avail, vring->vr.used); |
49 |
return true; |
50 |
} |
51 |
|
52 |
void vring_teardown(Vring *vring)
|
53 |
{ |
54 |
hostmem_finalize(&vring->hostmem); |
55 |
} |
56 |
|
57 |
/* Disable guest->host notifies */
|
58 |
void vring_disable_notification(VirtIODevice *vdev, Vring *vring)
|
59 |
{ |
60 |
if (!(vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX))) { |
61 |
vring->vr.used->flags |= VRING_USED_F_NO_NOTIFY; |
62 |
} |
63 |
} |
64 |
|
65 |
/* Enable guest->host notifies
|
66 |
*
|
67 |
* Return true if the vring is empty, false if there are more requests.
|
68 |
*/
|
69 |
bool vring_enable_notification(VirtIODevice *vdev, Vring *vring)
|
70 |
{ |
71 |
if (vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) { |
72 |
vring_avail_event(&vring->vr) = vring->vr.avail->idx; |
73 |
} else {
|
74 |
vring->vr.used->flags &= ~VRING_USED_F_NO_NOTIFY; |
75 |
} |
76 |
smp_mb(); /* ensure update is seen before reading avail_idx */
|
77 |
return !vring_more_avail(vring);
|
78 |
} |
79 |
|
80 |
/* This is stolen from linux/drivers/vhost/vhost.c:vhost_notify() */
|
81 |
bool vring_should_notify(VirtIODevice *vdev, Vring *vring)
|
82 |
{ |
83 |
uint16_t old, new; |
84 |
bool v;
|
85 |
/* Flush out used index updates. This is paired
|
86 |
* with the barrier that the Guest executes when enabling
|
87 |
* interrupts. */
|
88 |
smp_mb(); |
89 |
|
90 |
if ((vdev->guest_features & VIRTIO_F_NOTIFY_ON_EMPTY) &&
|
91 |
unlikely(vring->vr.avail->idx == vring->last_avail_idx)) { |
92 |
return true; |
93 |
} |
94 |
|
95 |
if (!(vdev->guest_features & VIRTIO_RING_F_EVENT_IDX)) {
|
96 |
return !(vring->vr.avail->flags & VRING_AVAIL_F_NO_INTERRUPT);
|
97 |
} |
98 |
old = vring->signalled_used; |
99 |
v = vring->signalled_used_valid; |
100 |
new = vring->signalled_used = vring->last_used_idx; |
101 |
vring->signalled_used_valid = true;
|
102 |
|
103 |
if (unlikely(!v)) {
|
104 |
return true; |
105 |
} |
106 |
|
107 |
return vring_need_event(vring_used_event(&vring->vr), new, old);
|
108 |
} |
109 |
|
110 |
/* This is stolen from linux/drivers/vhost/vhost.c. */
|
111 |
static int get_indirect(Vring *vring, |
112 |
struct iovec iov[], struct iovec *iov_end, |
113 |
unsigned int *out_num, unsigned int *in_num, |
114 |
struct vring_desc *indirect)
|
115 |
{ |
116 |
struct vring_desc desc;
|
117 |
unsigned int i = 0, count, found = 0; |
118 |
|
119 |
/* Sanity check */
|
120 |
if (unlikely(indirect->len % sizeof(desc))) { |
121 |
error_report("Invalid length in indirect descriptor: "
|
122 |
"len %#x not multiple of %#zx",
|
123 |
indirect->len, sizeof(desc));
|
124 |
vring->broken = true;
|
125 |
return -EFAULT;
|
126 |
} |
127 |
|
128 |
count = indirect->len / sizeof(desc);
|
129 |
/* Buffers are chained via a 16 bit next field, so
|
130 |
* we can have at most 2^16 of these. */
|
131 |
if (unlikely(count > USHRT_MAX + 1)) { |
132 |
error_report("Indirect buffer length too big: %d", indirect->len);
|
133 |
vring->broken = true;
|
134 |
return -EFAULT;
|
135 |
} |
136 |
|
137 |
do {
|
138 |
struct vring_desc *desc_ptr;
|
139 |
|
140 |
/* Translate indirect descriptor */
|
141 |
desc_ptr = hostmem_lookup(&vring->hostmem, |
142 |
indirect->addr + found * sizeof(desc),
|
143 |
sizeof(desc), false); |
144 |
if (!desc_ptr) {
|
145 |
error_report("Failed to map indirect descriptor "
|
146 |
"addr %#" PRIx64 " len %zu", |
147 |
(uint64_t)indirect->addr + found * sizeof(desc),
|
148 |
sizeof(desc));
|
149 |
vring->broken = true;
|
150 |
return -EFAULT;
|
151 |
} |
152 |
desc = *desc_ptr; |
153 |
|
154 |
/* Ensure descriptor has been loaded before accessing fields */
|
155 |
barrier(); /* read_barrier_depends(); */
|
156 |
|
157 |
if (unlikely(++found > count)) {
|
158 |
error_report("Loop detected: last one at %u "
|
159 |
"indirect size %u", i, count);
|
160 |
vring->broken = true;
|
161 |
return -EFAULT;
|
162 |
} |
163 |
|
164 |
if (unlikely(desc.flags & VRING_DESC_F_INDIRECT)) {
|
165 |
error_report("Nested indirect descriptor");
|
166 |
vring->broken = true;
|
167 |
return -EFAULT;
|
168 |
} |
169 |
|
170 |
/* Stop for now if there are not enough iovecs available. */
|
171 |
if (iov >= iov_end) {
|
172 |
return -ENOBUFS;
|
173 |
} |
174 |
|
175 |
iov->iov_base = hostmem_lookup(&vring->hostmem, desc.addr, desc.len, |
176 |
desc.flags & VRING_DESC_F_WRITE); |
177 |
if (!iov->iov_base) {
|
178 |
error_report("Failed to map indirect descriptor"
|
179 |
"addr %#" PRIx64 " len %u", |
180 |
(uint64_t)desc.addr, desc.len); |
181 |
vring->broken = true;
|
182 |
return -EFAULT;
|
183 |
} |
184 |
iov->iov_len = desc.len; |
185 |
iov++; |
186 |
|
187 |
/* If this is an input descriptor, increment that count. */
|
188 |
if (desc.flags & VRING_DESC_F_WRITE) {
|
189 |
*in_num += 1;
|
190 |
} else {
|
191 |
/* If it's an output descriptor, they're all supposed
|
192 |
* to come before any input descriptors. */
|
193 |
if (unlikely(*in_num)) {
|
194 |
error_report("Indirect descriptor "
|
195 |
"has out after in: idx %u", i);
|
196 |
vring->broken = true;
|
197 |
return -EFAULT;
|
198 |
} |
199 |
*out_num += 1;
|
200 |
} |
201 |
i = desc.next; |
202 |
} while (desc.flags & VRING_DESC_F_NEXT);
|
203 |
return 0; |
204 |
} |
205 |
|
206 |
/* This looks in the virtqueue and for the first available buffer, and converts
|
207 |
* it to an iovec for convenient access. Since descriptors consist of some
|
208 |
* number of output then some number of input descriptors, it's actually two
|
209 |
* iovecs, but we pack them into one and note how many of each there were.
|
210 |
*
|
211 |
* This function returns the descriptor number found, or vq->num (which is
|
212 |
* never a valid descriptor number) if none was found. A negative code is
|
213 |
* returned on error.
|
214 |
*
|
215 |
* Stolen from linux/drivers/vhost/vhost.c.
|
216 |
*/
|
217 |
int vring_pop(VirtIODevice *vdev, Vring *vring,
|
218 |
struct iovec iov[], struct iovec *iov_end, |
219 |
unsigned int *out_num, unsigned int *in_num) |
220 |
{ |
221 |
struct vring_desc desc;
|
222 |
unsigned int i, head, found = 0, num = vring->vr.num; |
223 |
uint16_t avail_idx, last_avail_idx; |
224 |
|
225 |
/* If there was a fatal error then refuse operation */
|
226 |
if (vring->broken) {
|
227 |
return -EFAULT;
|
228 |
} |
229 |
|
230 |
/* Check it isn't doing very strange things with descriptor numbers. */
|
231 |
last_avail_idx = vring->last_avail_idx; |
232 |
avail_idx = vring->vr.avail->idx; |
233 |
barrier(); /* load indices now and not again later */
|
234 |
|
235 |
if (unlikely((uint16_t)(avail_idx - last_avail_idx) > num)) {
|
236 |
error_report("Guest moved used index from %u to %u",
|
237 |
last_avail_idx, avail_idx); |
238 |
vring->broken = true;
|
239 |
return -EFAULT;
|
240 |
} |
241 |
|
242 |
/* If there's nothing new since last we looked. */
|
243 |
if (avail_idx == last_avail_idx) {
|
244 |
return -EAGAIN;
|
245 |
} |
246 |
|
247 |
/* Only get avail ring entries after they have been exposed by guest. */
|
248 |
smp_rmb(); |
249 |
|
250 |
/* Grab the next descriptor number they're advertising, and increment
|
251 |
* the index we've seen. */
|
252 |
head = vring->vr.avail->ring[last_avail_idx % num]; |
253 |
|
254 |
/* If their number is silly, that's an error. */
|
255 |
if (unlikely(head >= num)) {
|
256 |
error_report("Guest says index %u > %u is available", head, num);
|
257 |
vring->broken = true;
|
258 |
return -EFAULT;
|
259 |
} |
260 |
|
261 |
if (vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) { |
262 |
vring_avail_event(&vring->vr) = vring->vr.avail->idx; |
263 |
} |
264 |
|
265 |
/* When we start there are none of either input nor output. */
|
266 |
*out_num = *in_num = 0;
|
267 |
|
268 |
i = head; |
269 |
do {
|
270 |
if (unlikely(i >= num)) {
|
271 |
error_report("Desc index is %u > %u, head = %u", i, num, head);
|
272 |
vring->broken = true;
|
273 |
return -EFAULT;
|
274 |
} |
275 |
if (unlikely(++found > num)) {
|
276 |
error_report("Loop detected: last one at %u vq size %u head %u",
|
277 |
i, num, head); |
278 |
vring->broken = true;
|
279 |
return -EFAULT;
|
280 |
} |
281 |
desc = vring->vr.desc[i]; |
282 |
|
283 |
/* Ensure descriptor is loaded before accessing fields */
|
284 |
barrier(); |
285 |
|
286 |
if (desc.flags & VRING_DESC_F_INDIRECT) {
|
287 |
int ret = get_indirect(vring, iov, iov_end, out_num, in_num, &desc);
|
288 |
if (ret < 0) { |
289 |
return ret;
|
290 |
} |
291 |
continue;
|
292 |
} |
293 |
|
294 |
/* If there are not enough iovecs left, stop for now. The caller
|
295 |
* should check if there are more descs available once they have dealt
|
296 |
* with the current set.
|
297 |
*/
|
298 |
if (iov >= iov_end) {
|
299 |
return -ENOBUFS;
|
300 |
} |
301 |
|
302 |
/* TODO handle non-contiguous memory across region boundaries */
|
303 |
iov->iov_base = hostmem_lookup(&vring->hostmem, desc.addr, desc.len, |
304 |
desc.flags & VRING_DESC_F_WRITE); |
305 |
if (!iov->iov_base) {
|
306 |
error_report("Failed to map vring desc addr %#" PRIx64 " len %u", |
307 |
(uint64_t)desc.addr, desc.len); |
308 |
vring->broken = true;
|
309 |
return -EFAULT;
|
310 |
} |
311 |
iov->iov_len = desc.len; |
312 |
iov++; |
313 |
|
314 |
if (desc.flags & VRING_DESC_F_WRITE) {
|
315 |
/* If this is an input descriptor,
|
316 |
* increment that count. */
|
317 |
*in_num += 1;
|
318 |
} else {
|
319 |
/* If it's an output descriptor, they're all supposed
|
320 |
* to come before any input descriptors. */
|
321 |
if (unlikely(*in_num)) {
|
322 |
error_report("Descriptor has out after in: idx %d", i);
|
323 |
vring->broken = true;
|
324 |
return -EFAULT;
|
325 |
} |
326 |
*out_num += 1;
|
327 |
} |
328 |
i = desc.next; |
329 |
} while (desc.flags & VRING_DESC_F_NEXT);
|
330 |
|
331 |
/* On success, increment avail index. */
|
332 |
vring->last_avail_idx++; |
333 |
return head;
|
334 |
} |
335 |
|
336 |
/* After we've used one of their buffers, we tell them about it.
|
337 |
*
|
338 |
* Stolen from linux/drivers/vhost/vhost.c.
|
339 |
*/
|
340 |
void vring_push(Vring *vring, unsigned int head, int len) |
341 |
{ |
342 |
struct vring_used_elem *used;
|
343 |
uint16_t new; |
344 |
|
345 |
/* Don't touch vring if a fatal error occurred */
|
346 |
if (vring->broken) {
|
347 |
return;
|
348 |
} |
349 |
|
350 |
/* The virtqueue contains a ring of used buffers. Get a pointer to the
|
351 |
* next entry in that used ring. */
|
352 |
used = &vring->vr.used->ring[vring->last_used_idx % vring->vr.num]; |
353 |
used->id = head; |
354 |
used->len = len; |
355 |
|
356 |
/* Make sure buffer is written before we update index. */
|
357 |
smp_wmb(); |
358 |
|
359 |
new = vring->vr.used->idx = ++vring->last_used_idx; |
360 |
if (unlikely((int16_t)(new - vring->signalled_used) < (uint16_t)1)) { |
361 |
vring->signalled_used_valid = false;
|
362 |
} |
363 |
} |