Statistics
| Branch: | Revision:

root / hw / dataplane / vring.c @ 0d09e41a

History | View | Annotate | Download (11.7 kB)

1
/* Copyright 2012 Red Hat, Inc.
2
 * Copyright IBM, Corp. 2012
3
 *
4
 * Based on Linux 2.6.39 vhost code:
5
 * Copyright (C) 2009 Red Hat, Inc.
6
 * Copyright (C) 2006 Rusty Russell IBM Corporation
7
 *
8
 * Author: Michael S. Tsirkin <mst@redhat.com>
9
 *         Stefan Hajnoczi <stefanha@redhat.com>
10
 *
11
 * Inspiration, some code, and most witty comments come from
12
 * Documentation/virtual/lguest/lguest.c, by Rusty Russell
13
 *
14
 * This work is licensed under the terms of the GNU GPL, version 2.
15
 */
16

    
17
#include "trace.h"
18
#include "hw/virtio/dataplane/vring.h"
19
#include "qemu/error-report.h"
20

    
21
/* Map the guest's vring to host memory */
22
bool vring_setup(Vring *vring, VirtIODevice *vdev, int n)
23
{
24
    hwaddr vring_addr = virtio_queue_get_ring_addr(vdev, n);
25
    hwaddr vring_size = virtio_queue_get_ring_size(vdev, n);
26
    void *vring_ptr;
27

    
28
    vring->broken = false;
29

    
30
    hostmem_init(&vring->hostmem);
31
    vring_ptr = hostmem_lookup(&vring->hostmem, vring_addr, vring_size, true);
32
    if (!vring_ptr) {
33
        error_report("Failed to map vring "
34
                     "addr %#" HWADDR_PRIx " size %" HWADDR_PRIu,
35
                     vring_addr, vring_size);
36
        vring->broken = true;
37
        return false;
38
    }
39

    
40
    vring_init(&vring->vr, virtio_queue_get_num(vdev, n), vring_ptr, 4096);
41

    
42
    vring->last_avail_idx = 0;
43
    vring->last_used_idx = 0;
44
    vring->signalled_used = 0;
45
    vring->signalled_used_valid = false;
46

    
47
    trace_vring_setup(virtio_queue_get_ring_addr(vdev, n),
48
                      vring->vr.desc, vring->vr.avail, vring->vr.used);
49
    return true;
50
}
51

    
52
void vring_teardown(Vring *vring)
53
{
54
    hostmem_finalize(&vring->hostmem);
55
}
56

    
57
/* Disable guest->host notifies */
58
void vring_disable_notification(VirtIODevice *vdev, Vring *vring)
59
{
60
    if (!(vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX))) {
61
        vring->vr.used->flags |= VRING_USED_F_NO_NOTIFY;
62
    }
63
}
64

    
65
/* Enable guest->host notifies
66
 *
67
 * Return true if the vring is empty, false if there are more requests.
68
 */
69
bool vring_enable_notification(VirtIODevice *vdev, Vring *vring)
70
{
71
    if (vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
72
        vring_avail_event(&vring->vr) = vring->vr.avail->idx;
73
    } else {
74
        vring->vr.used->flags &= ~VRING_USED_F_NO_NOTIFY;
75
    }
76
    smp_mb(); /* ensure update is seen before reading avail_idx */
77
    return !vring_more_avail(vring);
78
}
79

    
80
/* This is stolen from linux/drivers/vhost/vhost.c:vhost_notify() */
81
bool vring_should_notify(VirtIODevice *vdev, Vring *vring)
82
{
83
    uint16_t old, new;
84
    bool v;
85
    /* Flush out used index updates. This is paired
86
     * with the barrier that the Guest executes when enabling
87
     * interrupts. */
88
    smp_mb();
89

    
90
    if ((vdev->guest_features & VIRTIO_F_NOTIFY_ON_EMPTY) &&
91
        unlikely(vring->vr.avail->idx == vring->last_avail_idx)) {
92
        return true;
93
    }
94

    
95
    if (!(vdev->guest_features & VIRTIO_RING_F_EVENT_IDX)) {
96
        return !(vring->vr.avail->flags & VRING_AVAIL_F_NO_INTERRUPT);
97
    }
98
    old = vring->signalled_used;
99
    v = vring->signalled_used_valid;
100
    new = vring->signalled_used = vring->last_used_idx;
101
    vring->signalled_used_valid = true;
102

    
103
    if (unlikely(!v)) {
104
        return true;
105
    }
106

    
107
    return vring_need_event(vring_used_event(&vring->vr), new, old);
108
}
109

    
110
/* This is stolen from linux/drivers/vhost/vhost.c. */
111
static int get_indirect(Vring *vring,
112
                        struct iovec iov[], struct iovec *iov_end,
113
                        unsigned int *out_num, unsigned int *in_num,
114
                        struct vring_desc *indirect)
115
{
116
    struct vring_desc desc;
117
    unsigned int i = 0, count, found = 0;
118

    
119
    /* Sanity check */
120
    if (unlikely(indirect->len % sizeof(desc))) {
121
        error_report("Invalid length in indirect descriptor: "
122
                     "len %#x not multiple of %#zx",
123
                     indirect->len, sizeof(desc));
124
        vring->broken = true;
125
        return -EFAULT;
126
    }
127

    
128
    count = indirect->len / sizeof(desc);
129
    /* Buffers are chained via a 16 bit next field, so
130
     * we can have at most 2^16 of these. */
131
    if (unlikely(count > USHRT_MAX + 1)) {
132
        error_report("Indirect buffer length too big: %d", indirect->len);
133
        vring->broken = true;
134
        return -EFAULT;
135
    }
136

    
137
    do {
138
        struct vring_desc *desc_ptr;
139

    
140
        /* Translate indirect descriptor */
141
        desc_ptr = hostmem_lookup(&vring->hostmem,
142
                                  indirect->addr + found * sizeof(desc),
143
                                  sizeof(desc), false);
144
        if (!desc_ptr) {
145
            error_report("Failed to map indirect descriptor "
146
                         "addr %#" PRIx64 " len %zu",
147
                         (uint64_t)indirect->addr + found * sizeof(desc),
148
                         sizeof(desc));
149
            vring->broken = true;
150
            return -EFAULT;
151
        }
152
        desc = *desc_ptr;
153

    
154
        /* Ensure descriptor has been loaded before accessing fields */
155
        barrier(); /* read_barrier_depends(); */
156

    
157
        if (unlikely(++found > count)) {
158
            error_report("Loop detected: last one at %u "
159
                         "indirect size %u", i, count);
160
            vring->broken = true;
161
            return -EFAULT;
162
        }
163

    
164
        if (unlikely(desc.flags & VRING_DESC_F_INDIRECT)) {
165
            error_report("Nested indirect descriptor");
166
            vring->broken = true;
167
            return -EFAULT;
168
        }
169

    
170
        /* Stop for now if there are not enough iovecs available. */
171
        if (iov >= iov_end) {
172
            return -ENOBUFS;
173
        }
174

    
175
        iov->iov_base = hostmem_lookup(&vring->hostmem, desc.addr, desc.len,
176
                                       desc.flags & VRING_DESC_F_WRITE);
177
        if (!iov->iov_base) {
178
            error_report("Failed to map indirect descriptor"
179
                         "addr %#" PRIx64 " len %u",
180
                         (uint64_t)desc.addr, desc.len);
181
            vring->broken = true;
182
            return -EFAULT;
183
        }
184
        iov->iov_len = desc.len;
185
        iov++;
186

    
187
        /* If this is an input descriptor, increment that count. */
188
        if (desc.flags & VRING_DESC_F_WRITE) {
189
            *in_num += 1;
190
        } else {
191
            /* If it's an output descriptor, they're all supposed
192
             * to come before any input descriptors. */
193
            if (unlikely(*in_num)) {
194
                error_report("Indirect descriptor "
195
                             "has out after in: idx %u", i);
196
                vring->broken = true;
197
                return -EFAULT;
198
            }
199
            *out_num += 1;
200
        }
201
        i = desc.next;
202
    } while (desc.flags & VRING_DESC_F_NEXT);
203
    return 0;
204
}
205

    
206
/* This looks in the virtqueue and for the first available buffer, and converts
207
 * it to an iovec for convenient access.  Since descriptors consist of some
208
 * number of output then some number of input descriptors, it's actually two
209
 * iovecs, but we pack them into one and note how many of each there were.
210
 *
211
 * This function returns the descriptor number found, or vq->num (which is
212
 * never a valid descriptor number) if none was found.  A negative code is
213
 * returned on error.
214
 *
215
 * Stolen from linux/drivers/vhost/vhost.c.
216
 */
217
int vring_pop(VirtIODevice *vdev, Vring *vring,
218
              struct iovec iov[], struct iovec *iov_end,
219
              unsigned int *out_num, unsigned int *in_num)
220
{
221
    struct vring_desc desc;
222
    unsigned int i, head, found = 0, num = vring->vr.num;
223
    uint16_t avail_idx, last_avail_idx;
224

    
225
    /* If there was a fatal error then refuse operation */
226
    if (vring->broken) {
227
        return -EFAULT;
228
    }
229

    
230
    /* Check it isn't doing very strange things with descriptor numbers. */
231
    last_avail_idx = vring->last_avail_idx;
232
    avail_idx = vring->vr.avail->idx;
233
    barrier(); /* load indices now and not again later */
234

    
235
    if (unlikely((uint16_t)(avail_idx - last_avail_idx) > num)) {
236
        error_report("Guest moved used index from %u to %u",
237
                     last_avail_idx, avail_idx);
238
        vring->broken = true;
239
        return -EFAULT;
240
    }
241

    
242
    /* If there's nothing new since last we looked. */
243
    if (avail_idx == last_avail_idx) {
244
        return -EAGAIN;
245
    }
246

    
247
    /* Only get avail ring entries after they have been exposed by guest. */
248
    smp_rmb();
249

    
250
    /* Grab the next descriptor number they're advertising, and increment
251
     * the index we've seen. */
252
    head = vring->vr.avail->ring[last_avail_idx % num];
253

    
254
    /* If their number is silly, that's an error. */
255
    if (unlikely(head >= num)) {
256
        error_report("Guest says index %u > %u is available", head, num);
257
        vring->broken = true;
258
        return -EFAULT;
259
    }
260

    
261
    if (vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
262
        vring_avail_event(&vring->vr) = vring->vr.avail->idx;
263
    }
264

    
265
    /* When we start there are none of either input nor output. */
266
    *out_num = *in_num = 0;
267

    
268
    i = head;
269
    do {
270
        if (unlikely(i >= num)) {
271
            error_report("Desc index is %u > %u, head = %u", i, num, head);
272
            vring->broken = true;
273
            return -EFAULT;
274
        }
275
        if (unlikely(++found > num)) {
276
            error_report("Loop detected: last one at %u vq size %u head %u",
277
                         i, num, head);
278
            vring->broken = true;
279
            return -EFAULT;
280
        }
281
        desc = vring->vr.desc[i];
282

    
283
        /* Ensure descriptor is loaded before accessing fields */
284
        barrier();
285

    
286
        if (desc.flags & VRING_DESC_F_INDIRECT) {
287
            int ret = get_indirect(vring, iov, iov_end, out_num, in_num, &desc);
288
            if (ret < 0) {
289
                return ret;
290
            }
291
            continue;
292
        }
293

    
294
        /* If there are not enough iovecs left, stop for now.  The caller
295
         * should check if there are more descs available once they have dealt
296
         * with the current set.
297
         */
298
        if (iov >= iov_end) {
299
            return -ENOBUFS;
300
        }
301

    
302
        /* TODO handle non-contiguous memory across region boundaries */
303
        iov->iov_base = hostmem_lookup(&vring->hostmem, desc.addr, desc.len,
304
                                       desc.flags & VRING_DESC_F_WRITE);
305
        if (!iov->iov_base) {
306
            error_report("Failed to map vring desc addr %#" PRIx64 " len %u",
307
                         (uint64_t)desc.addr, desc.len);
308
            vring->broken = true;
309
            return -EFAULT;
310
        }
311
        iov->iov_len  = desc.len;
312
        iov++;
313

    
314
        if (desc.flags & VRING_DESC_F_WRITE) {
315
            /* If this is an input descriptor,
316
             * increment that count. */
317
            *in_num += 1;
318
        } else {
319
            /* If it's an output descriptor, they're all supposed
320
             * to come before any input descriptors. */
321
            if (unlikely(*in_num)) {
322
                error_report("Descriptor has out after in: idx %d", i);
323
                vring->broken = true;
324
                return -EFAULT;
325
            }
326
            *out_num += 1;
327
        }
328
        i = desc.next;
329
    } while (desc.flags & VRING_DESC_F_NEXT);
330

    
331
    /* On success, increment avail index. */
332
    vring->last_avail_idx++;
333
    return head;
334
}
335

    
336
/* After we've used one of their buffers, we tell them about it.
337
 *
338
 * Stolen from linux/drivers/vhost/vhost.c.
339
 */
340
void vring_push(Vring *vring, unsigned int head, int len)
341
{
342
    struct vring_used_elem *used;
343
    uint16_t new;
344

    
345
    /* Don't touch vring if a fatal error occurred */
346
    if (vring->broken) {
347
        return;
348
    }
349

    
350
    /* The virtqueue contains a ring of used buffers.  Get a pointer to the
351
     * next entry in that used ring. */
352
    used = &vring->vr.used->ring[vring->last_used_idx % vring->vr.num];
353
    used->id = head;
354
    used->len = len;
355

    
356
    /* Make sure buffer is written before we update index. */
357
    smp_wmb();
358

    
359
    new = vring->vr.used->idx = ++vring->last_used_idx;
360
    if (unlikely((int16_t)(new - vring->signalled_used) < (uint16_t)1)) {
361
        vring->signalled_used_valid = false;
362
    }
363
}