Statistics
| Branch: | Revision:

root / hw / virtio.c @ d8c6d07f

History | View | Annotate | Download (28.9 kB)

1
/*
2
 * Virtio Support
3
 *
4
 * Copyright IBM, Corp. 2007
5
 *
6
 * Authors:
7
 *  Anthony Liguori   <aliguori@us.ibm.com>
8
 *
9
 * This work is licensed under the terms of the GNU GPL, version 2.  See
10
 * the COPYING file in the top-level directory.
11
 *
12
 */
13

    
14
#include <inttypes.h>
15

    
16
#include "trace.h"
17
#include "qemu/error-report.h"
18
#include "virtio.h"
19
#include "qemu/atomic.h"
20
#include "virtio-bus.h"
21

    
22
/* The alignment to use between consumer and producer parts of vring.
23
 * x86 pagesize again. */
24
#define VIRTIO_PCI_VRING_ALIGN         4096
25

    
26
typedef struct VRingDesc
27
{
28
    uint64_t addr;
29
    uint32_t len;
30
    uint16_t flags;
31
    uint16_t next;
32
} VRingDesc;
33

    
34
typedef struct VRingAvail
35
{
36
    uint16_t flags;
37
    uint16_t idx;
38
    uint16_t ring[0];
39
} VRingAvail;
40

    
41
typedef struct VRingUsedElem
42
{
43
    uint32_t id;
44
    uint32_t len;
45
} VRingUsedElem;
46

    
47
typedef struct VRingUsed
48
{
49
    uint16_t flags;
50
    uint16_t idx;
51
    VRingUsedElem ring[0];
52
} VRingUsed;
53

    
54
typedef struct VRing
55
{
56
    unsigned int num;
57
    hwaddr desc;
58
    hwaddr avail;
59
    hwaddr used;
60
} VRing;
61

    
62
struct VirtQueue
63
{
64
    VRing vring;
65
    hwaddr pa;
66
    uint16_t last_avail_idx;
67
    /* Last used index value we have signalled on */
68
    uint16_t signalled_used;
69

    
70
    /* Last used index value we have signalled on */
71
    bool signalled_used_valid;
72

    
73
    /* Notification enabled? */
74
    bool notification;
75

    
76
    int inuse;
77

    
78
    uint16_t vector;
79
    void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
80
    VirtIODevice *vdev;
81
    EventNotifier guest_notifier;
82
    EventNotifier host_notifier;
83
};
84

    
85
/* virt queue functions */
86
static void virtqueue_init(VirtQueue *vq)
87
{
88
    hwaddr pa = vq->pa;
89

    
90
    vq->vring.desc = pa;
91
    vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc);
92
    vq->vring.used = vring_align(vq->vring.avail +
93
                                 offsetof(VRingAvail, ring[vq->vring.num]),
94
                                 VIRTIO_PCI_VRING_ALIGN);
95
}
96

    
97
static inline uint64_t vring_desc_addr(hwaddr desc_pa, int i)
98
{
99
    hwaddr pa;
100
    pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr);
101
    return ldq_phys(pa);
102
}
103

    
104
static inline uint32_t vring_desc_len(hwaddr desc_pa, int i)
105
{
106
    hwaddr pa;
107
    pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, len);
108
    return ldl_phys(pa);
109
}
110

    
111
static inline uint16_t vring_desc_flags(hwaddr desc_pa, int i)
112
{
113
    hwaddr pa;
114
    pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags);
115
    return lduw_phys(pa);
116
}
117

    
118
static inline uint16_t vring_desc_next(hwaddr desc_pa, int i)
119
{
120
    hwaddr pa;
121
    pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, next);
122
    return lduw_phys(pa);
123
}
124

    
125
static inline uint16_t vring_avail_flags(VirtQueue *vq)
126
{
127
    hwaddr pa;
128
    pa = vq->vring.avail + offsetof(VRingAvail, flags);
129
    return lduw_phys(pa);
130
}
131

    
132
static inline uint16_t vring_avail_idx(VirtQueue *vq)
133
{
134
    hwaddr pa;
135
    pa = vq->vring.avail + offsetof(VRingAvail, idx);
136
    return lduw_phys(pa);
137
}
138

    
139
static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
140
{
141
    hwaddr pa;
142
    pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
143
    return lduw_phys(pa);
144
}
145

    
146
static inline uint16_t vring_used_event(VirtQueue *vq)
147
{
148
    return vring_avail_ring(vq, vq->vring.num);
149
}
150

    
151
static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val)
152
{
153
    hwaddr pa;
154
    pa = vq->vring.used + offsetof(VRingUsed, ring[i].id);
155
    stl_phys(pa, val);
156
}
157

    
158
static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val)
159
{
160
    hwaddr pa;
161
    pa = vq->vring.used + offsetof(VRingUsed, ring[i].len);
162
    stl_phys(pa, val);
163
}
164

    
165
static uint16_t vring_used_idx(VirtQueue *vq)
166
{
167
    hwaddr pa;
168
    pa = vq->vring.used + offsetof(VRingUsed, idx);
169
    return lduw_phys(pa);
170
}
171

    
172
static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
173
{
174
    hwaddr pa;
175
    pa = vq->vring.used + offsetof(VRingUsed, idx);
176
    stw_phys(pa, val);
177
}
178

    
179
static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
180
{
181
    hwaddr pa;
182
    pa = vq->vring.used + offsetof(VRingUsed, flags);
183
    stw_phys(pa, lduw_phys(pa) | mask);
184
}
185

    
186
static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
187
{
188
    hwaddr pa;
189
    pa = vq->vring.used + offsetof(VRingUsed, flags);
190
    stw_phys(pa, lduw_phys(pa) & ~mask);
191
}
192

    
193
static inline void vring_avail_event(VirtQueue *vq, uint16_t val)
194
{
195
    hwaddr pa;
196
    if (!vq->notification) {
197
        return;
198
    }
199
    pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]);
200
    stw_phys(pa, val);
201
}
202

    
203
void virtio_queue_set_notification(VirtQueue *vq, int enable)
204
{
205
    vq->notification = enable;
206
    if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
207
        vring_avail_event(vq, vring_avail_idx(vq));
208
    } else if (enable) {
209
        vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
210
    } else {
211
        vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
212
    }
213
    if (enable) {
214
        /* Expose avail event/used flags before caller checks the avail idx. */
215
        smp_mb();
216
    }
217
}
218

    
219
int virtio_queue_ready(VirtQueue *vq)
220
{
221
    return vq->vring.avail != 0;
222
}
223

    
224
int virtio_queue_empty(VirtQueue *vq)
225
{
226
    return vring_avail_idx(vq) == vq->last_avail_idx;
227
}
228

    
229
void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
230
                    unsigned int len, unsigned int idx)
231
{
232
    unsigned int offset;
233
    int i;
234

    
235
    trace_virtqueue_fill(vq, elem, len, idx);
236

    
237
    offset = 0;
238
    for (i = 0; i < elem->in_num; i++) {
239
        size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
240

    
241
        cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
242
                                  elem->in_sg[i].iov_len,
243
                                  1, size);
244

    
245
        offset += size;
246
    }
247

    
248
    for (i = 0; i < elem->out_num; i++)
249
        cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
250
                                  elem->out_sg[i].iov_len,
251
                                  0, elem->out_sg[i].iov_len);
252

    
253
    idx = (idx + vring_used_idx(vq)) % vq->vring.num;
254

    
255
    /* Get a pointer to the next entry in the used ring. */
256
    vring_used_ring_id(vq, idx, elem->index);
257
    vring_used_ring_len(vq, idx, len);
258
}
259

    
260
void virtqueue_flush(VirtQueue *vq, unsigned int count)
261
{
262
    uint16_t old, new;
263
    /* Make sure buffer is written before we update index. */
264
    smp_wmb();
265
    trace_virtqueue_flush(vq, count);
266
    old = vring_used_idx(vq);
267
    new = old + count;
268
    vring_used_idx_set(vq, new);
269
    vq->inuse -= count;
270
    if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
271
        vq->signalled_used_valid = false;
272
}
273

    
274
void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
275
                    unsigned int len)
276
{
277
    virtqueue_fill(vq, elem, len, 0);
278
    virtqueue_flush(vq, 1);
279
}
280

    
281
static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
282
{
283
    uint16_t num_heads = vring_avail_idx(vq) - idx;
284

    
285
    /* Check it isn't doing very strange things with descriptor numbers. */
286
    if (num_heads > vq->vring.num) {
287
        error_report("Guest moved used index from %u to %u",
288
                     idx, vring_avail_idx(vq));
289
        exit(1);
290
    }
291
    /* On success, callers read a descriptor at vq->last_avail_idx.
292
     * Make sure descriptor read does not bypass avail index read. */
293
    if (num_heads) {
294
        smp_rmb();
295
    }
296

    
297
    return num_heads;
298
}
299

    
300
static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
301
{
302
    unsigned int head;
303

    
304
    /* Grab the next descriptor number they're advertising, and increment
305
     * the index we've seen. */
306
    head = vring_avail_ring(vq, idx % vq->vring.num);
307

    
308
    /* If their number is silly, that's a fatal mistake. */
309
    if (head >= vq->vring.num) {
310
        error_report("Guest says index %u is available", head);
311
        exit(1);
312
    }
313

    
314
    return head;
315
}
316

    
317
static unsigned virtqueue_next_desc(hwaddr desc_pa,
318
                                    unsigned int i, unsigned int max)
319
{
320
    unsigned int next;
321

    
322
    /* If this descriptor says it doesn't chain, we're done. */
323
    if (!(vring_desc_flags(desc_pa, i) & VRING_DESC_F_NEXT))
324
        return max;
325

    
326
    /* Check they're not leading us off end of descriptors. */
327
    next = vring_desc_next(desc_pa, i);
328
    /* Make sure compiler knows to grab that: we don't want it changing! */
329
    smp_wmb();
330

    
331
    if (next >= max) {
332
        error_report("Desc next is %u", next);
333
        exit(1);
334
    }
335

    
336
    return next;
337
}
338

    
339
void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
340
                               unsigned int *out_bytes,
341
                               unsigned max_in_bytes, unsigned max_out_bytes)
342
{
343
    unsigned int idx;
344
    unsigned int total_bufs, in_total, out_total;
345

    
346
    idx = vq->last_avail_idx;
347

    
348
    total_bufs = in_total = out_total = 0;
349
    while (virtqueue_num_heads(vq, idx)) {
350
        unsigned int max, num_bufs, indirect = 0;
351
        hwaddr desc_pa;
352
        int i;
353

    
354
        max = vq->vring.num;
355
        num_bufs = total_bufs;
356
        i = virtqueue_get_head(vq, idx++);
357
        desc_pa = vq->vring.desc;
358

    
359
        if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
360
            if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
361
                error_report("Invalid size for indirect buffer table");
362
                exit(1);
363
            }
364

    
365
            /* If we've got too many, that implies a descriptor loop. */
366
            if (num_bufs >= max) {
367
                error_report("Looped descriptor");
368
                exit(1);
369
            }
370

    
371
            /* loop over the indirect descriptor table */
372
            indirect = 1;
373
            max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
374
            num_bufs = i = 0;
375
            desc_pa = vring_desc_addr(desc_pa, i);
376
        }
377

    
378
        do {
379
            /* If we've got too many, that implies a descriptor loop. */
380
            if (++num_bufs > max) {
381
                error_report("Looped descriptor");
382
                exit(1);
383
            }
384

    
385
            if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
386
                in_total += vring_desc_len(desc_pa, i);
387
            } else {
388
                out_total += vring_desc_len(desc_pa, i);
389
            }
390
            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
391
                goto done;
392
            }
393
        } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
394

    
395
        if (!indirect)
396
            total_bufs = num_bufs;
397
        else
398
            total_bufs++;
399
    }
400
done:
401
    if (in_bytes) {
402
        *in_bytes = in_total;
403
    }
404
    if (out_bytes) {
405
        *out_bytes = out_total;
406
    }
407
}
408

    
409
int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
410
                          unsigned int out_bytes)
411
{
412
    unsigned int in_total, out_total;
413

    
414
    virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
415
    return in_bytes <= in_total && out_bytes <= out_total;
416
}
417

    
418
void virtqueue_map_sg(struct iovec *sg, hwaddr *addr,
419
    size_t num_sg, int is_write)
420
{
421
    unsigned int i;
422
    hwaddr len;
423

    
424
    for (i = 0; i < num_sg; i++) {
425
        len = sg[i].iov_len;
426
        sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
427
        if (sg[i].iov_base == NULL || len != sg[i].iov_len) {
428
            error_report("virtio: trying to map MMIO memory");
429
            exit(1);
430
        }
431
    }
432
}
433

    
434
int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
435
{
436
    unsigned int i, head, max;
437
    hwaddr desc_pa = vq->vring.desc;
438

    
439
    if (!virtqueue_num_heads(vq, vq->last_avail_idx))
440
        return 0;
441

    
442
    /* When we start there are none of either input nor output. */
443
    elem->out_num = elem->in_num = 0;
444

    
445
    max = vq->vring.num;
446

    
447
    i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
448
    if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
449
        vring_avail_event(vq, vring_avail_idx(vq));
450
    }
451

    
452
    if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
453
        if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
454
            error_report("Invalid size for indirect buffer table");
455
            exit(1);
456
        }
457

    
458
        /* loop over the indirect descriptor table */
459
        max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
460
        desc_pa = vring_desc_addr(desc_pa, i);
461
        i = 0;
462
    }
463

    
464
    /* Collect all the descriptors */
465
    do {
466
        struct iovec *sg;
467

    
468
        if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
469
            if (elem->in_num >= ARRAY_SIZE(elem->in_sg)) {
470
                error_report("Too many write descriptors in indirect table");
471
                exit(1);
472
            }
473
            elem->in_addr[elem->in_num] = vring_desc_addr(desc_pa, i);
474
            sg = &elem->in_sg[elem->in_num++];
475
        } else {
476
            if (elem->out_num >= ARRAY_SIZE(elem->out_sg)) {
477
                error_report("Too many read descriptors in indirect table");
478
                exit(1);
479
            }
480
            elem->out_addr[elem->out_num] = vring_desc_addr(desc_pa, i);
481
            sg = &elem->out_sg[elem->out_num++];
482
        }
483

    
484
        sg->iov_len = vring_desc_len(desc_pa, i);
485

    
486
        /* If we've got too many, that implies a descriptor loop. */
487
        if ((elem->in_num + elem->out_num) > max) {
488
            error_report("Looped descriptor");
489
            exit(1);
490
        }
491
    } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
492

    
493
    /* Now map what we have collected */
494
    virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1);
495
    virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0);
496

    
497
    elem->index = head;
498

    
499
    vq->inuse++;
500

    
501
    trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
502
    return elem->in_num + elem->out_num;
503
}
504

    
505
/* virtio device */
506
static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
507
{
508
    if (vdev->binding->notify) {
509
        vdev->binding->notify(vdev->binding_opaque, vector);
510
    }
511
}
512

    
513
void virtio_update_irq(VirtIODevice *vdev)
514
{
515
    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
516
}
517

    
518
void virtio_set_status(VirtIODevice *vdev, uint8_t val)
519
{
520
    trace_virtio_set_status(vdev, val);
521

    
522
    if (vdev->set_status) {
523
        vdev->set_status(vdev, val);
524
    }
525
    vdev->status = val;
526
}
527

    
528
void virtio_reset(void *opaque)
529
{
530
    VirtIODevice *vdev = opaque;
531
    int i;
532

    
533
    virtio_set_status(vdev, 0);
534

    
535
    if (vdev->reset)
536
        vdev->reset(vdev);
537

    
538
    vdev->guest_features = 0;
539
    vdev->queue_sel = 0;
540
    vdev->status = 0;
541
    vdev->isr = 0;
542
    vdev->config_vector = VIRTIO_NO_VECTOR;
543
    virtio_notify_vector(vdev, vdev->config_vector);
544

    
545
    for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
546
        vdev->vq[i].vring.desc = 0;
547
        vdev->vq[i].vring.avail = 0;
548
        vdev->vq[i].vring.used = 0;
549
        vdev->vq[i].last_avail_idx = 0;
550
        vdev->vq[i].pa = 0;
551
        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
552
        vdev->vq[i].signalled_used = 0;
553
        vdev->vq[i].signalled_used_valid = false;
554
        vdev->vq[i].notification = true;
555
    }
556
}
557

    
558
uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
559
{
560
    uint8_t val;
561

    
562
    vdev->get_config(vdev, vdev->config);
563

    
564
    if (addr > (vdev->config_len - sizeof(val)))
565
        return (uint32_t)-1;
566

    
567
    val = ldub_p(vdev->config + addr);
568
    return val;
569
}
570

    
571
uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
572
{
573
    uint16_t val;
574

    
575
    vdev->get_config(vdev, vdev->config);
576

    
577
    if (addr > (vdev->config_len - sizeof(val)))
578
        return (uint32_t)-1;
579

    
580
    val = lduw_p(vdev->config + addr);
581
    return val;
582
}
583

    
584
uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
585
{
586
    uint32_t val;
587

    
588
    vdev->get_config(vdev, vdev->config);
589

    
590
    if (addr > (vdev->config_len - sizeof(val)))
591
        return (uint32_t)-1;
592

    
593
    val = ldl_p(vdev->config + addr);
594
    return val;
595
}
596

    
597
void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
598
{
599
    uint8_t val = data;
600

    
601
    if (addr > (vdev->config_len - sizeof(val)))
602
        return;
603

    
604
    stb_p(vdev->config + addr, val);
605

    
606
    if (vdev->set_config)
607
        vdev->set_config(vdev, vdev->config);
608
}
609

    
610
void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
611
{
612
    uint16_t val = data;
613

    
614
    if (addr > (vdev->config_len - sizeof(val)))
615
        return;
616

    
617
    stw_p(vdev->config + addr, val);
618

    
619
    if (vdev->set_config)
620
        vdev->set_config(vdev, vdev->config);
621
}
622

    
623
void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
624
{
625
    uint32_t val = data;
626

    
627
    if (addr > (vdev->config_len - sizeof(val)))
628
        return;
629

    
630
    stl_p(vdev->config + addr, val);
631

    
632
    if (vdev->set_config)
633
        vdev->set_config(vdev, vdev->config);
634
}
635

    
636
void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
637
{
638
    vdev->vq[n].pa = addr;
639
    virtqueue_init(&vdev->vq[n]);
640
}
641

    
642
hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
643
{
644
    return vdev->vq[n].pa;
645
}
646

    
647
int virtio_queue_get_num(VirtIODevice *vdev, int n)
648
{
649
    return vdev->vq[n].vring.num;
650
}
651

    
652
int virtio_queue_get_id(VirtQueue *vq)
653
{
654
    VirtIODevice *vdev = vq->vdev;
655
    assert(vq >= &vdev->vq[0] && vq < &vdev->vq[VIRTIO_PCI_QUEUE_MAX]);
656
    return vq - &vdev->vq[0];
657
}
658

    
659
void virtio_queue_notify_vq(VirtQueue *vq)
660
{
661
    if (vq->vring.desc) {
662
        VirtIODevice *vdev = vq->vdev;
663
        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
664
        vq->handle_output(vdev, vq);
665
    }
666
}
667

    
668
void virtio_queue_notify(VirtIODevice *vdev, int n)
669
{
670
    virtio_queue_notify_vq(&vdev->vq[n]);
671
}
672

    
673
uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
674
{
675
    return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector :
676
        VIRTIO_NO_VECTOR;
677
}
678

    
679
void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
680
{
681
    if (n < VIRTIO_PCI_QUEUE_MAX)
682
        vdev->vq[n].vector = vector;
683
}
684

    
685
VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
686
                            void (*handle_output)(VirtIODevice *, VirtQueue *))
687
{
688
    int i;
689

    
690
    for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
691
        if (vdev->vq[i].vring.num == 0)
692
            break;
693
    }
694

    
695
    if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
696
        abort();
697

    
698
    vdev->vq[i].vring.num = queue_size;
699
    vdev->vq[i].handle_output = handle_output;
700

    
701
    return &vdev->vq[i];
702
}
703

    
704
void virtio_irq(VirtQueue *vq)
705
{
706
    trace_virtio_irq(vq);
707
    vq->vdev->isr |= 0x01;
708
    virtio_notify_vector(vq->vdev, vq->vector);
709
}
710

    
711
/* Assuming a given event_idx value from the other size, if
712
 * we have just incremented index from old to new_idx,
713
 * should we trigger an event? */
714
static inline int vring_need_event(uint16_t event, uint16_t new, uint16_t old)
715
{
716
        /* Note: Xen has similar logic for notification hold-off
717
         * in include/xen/interface/io/ring.h with req_event and req_prod
718
         * corresponding to event_idx + 1 and new respectively.
719
         * Note also that req_event and req_prod in Xen start at 1,
720
         * event indexes in virtio start at 0. */
721
        return (uint16_t)(new - event - 1) < (uint16_t)(new - old);
722
}
723

    
724
static bool vring_notify(VirtIODevice *vdev, VirtQueue *vq)
725
{
726
    uint16_t old, new;
727
    bool v;
728
    /* We need to expose used array entries before checking used event. */
729
    smp_mb();
730
    /* Always notify when queue is empty (when feature acknowledge) */
731
    if (((vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) &&
732
         !vq->inuse && vring_avail_idx(vq) == vq->last_avail_idx)) {
733
        return true;
734
    }
735

    
736
    if (!(vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX))) {
737
        return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
738
    }
739

    
740
    v = vq->signalled_used_valid;
741
    vq->signalled_used_valid = true;
742
    old = vq->signalled_used;
743
    new = vq->signalled_used = vring_used_idx(vq);
744
    return !v || vring_need_event(vring_used_event(vq), new, old);
745
}
746

    
747
void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
748
{
749
    if (!vring_notify(vdev, vq)) {
750
        return;
751
    }
752

    
753
    trace_virtio_notify(vdev, vq);
754
    vdev->isr |= 0x01;
755
    virtio_notify_vector(vdev, vq->vector);
756
}
757

    
758
void virtio_notify_config(VirtIODevice *vdev)
759
{
760
    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
761
        return;
762

    
763
    vdev->isr |= 0x03;
764
    virtio_notify_vector(vdev, vdev->config_vector);
765
}
766

    
767
void virtio_save(VirtIODevice *vdev, QEMUFile *f)
768
{
769
    int i;
770

    
771
    if (vdev->binding->save_config)
772
        vdev->binding->save_config(vdev->binding_opaque, f);
773

    
774
    qemu_put_8s(f, &vdev->status);
775
    qemu_put_8s(f, &vdev->isr);
776
    qemu_put_be16s(f, &vdev->queue_sel);
777
    qemu_put_be32s(f, &vdev->guest_features);
778
    qemu_put_be32(f, vdev->config_len);
779
    qemu_put_buffer(f, vdev->config, vdev->config_len);
780

    
781
    for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
782
        if (vdev->vq[i].vring.num == 0)
783
            break;
784
    }
785

    
786
    qemu_put_be32(f, i);
787

    
788
    for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
789
        if (vdev->vq[i].vring.num == 0)
790
            break;
791

    
792
        qemu_put_be32(f, vdev->vq[i].vring.num);
793
        qemu_put_be64(f, vdev->vq[i].pa);
794
        qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
795
        if (vdev->binding->save_queue)
796
            vdev->binding->save_queue(vdev->binding_opaque, i, f);
797
    }
798
}
799

    
800
int virtio_set_features(VirtIODevice *vdev, uint32_t val)
801
{
802
    uint32_t supported_features =
803
        vdev->binding->get_features(vdev->binding_opaque);
804
    bool bad = (val & ~supported_features) != 0;
805

    
806
    val &= supported_features;
807
    if (vdev->set_features) {
808
        vdev->set_features(vdev, val);
809
    }
810
    vdev->guest_features = val;
811
    return bad ? -1 : 0;
812
}
813

    
814
int virtio_load(VirtIODevice *vdev, QEMUFile *f)
815
{
816
    int num, i, ret;
817
    uint32_t features;
818
    uint32_t supported_features;
819

    
820
    if (vdev->binding->load_config) {
821
        ret = vdev->binding->load_config(vdev->binding_opaque, f);
822
        if (ret)
823
            return ret;
824
    }
825

    
826
    qemu_get_8s(f, &vdev->status);
827
    qemu_get_8s(f, &vdev->isr);
828
    qemu_get_be16s(f, &vdev->queue_sel);
829
    qemu_get_be32s(f, &features);
830

    
831
    if (virtio_set_features(vdev, features) < 0) {
832
        supported_features = vdev->binding->get_features(vdev->binding_opaque);
833
        error_report("Features 0x%x unsupported. Allowed features: 0x%x",
834
                     features, supported_features);
835
        return -1;
836
    }
837
    vdev->config_len = qemu_get_be32(f);
838
    qemu_get_buffer(f, vdev->config, vdev->config_len);
839

    
840
    num = qemu_get_be32(f);
841

    
842
    for (i = 0; i < num; i++) {
843
        vdev->vq[i].vring.num = qemu_get_be32(f);
844
        vdev->vq[i].pa = qemu_get_be64(f);
845
        qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
846
        vdev->vq[i].signalled_used_valid = false;
847
        vdev->vq[i].notification = true;
848

    
849
        if (vdev->vq[i].pa) {
850
            uint16_t nheads;
851
            virtqueue_init(&vdev->vq[i]);
852
            nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
853
            /* Check it isn't doing very strange things with descriptor numbers. */
854
            if (nheads > vdev->vq[i].vring.num) {
855
                error_report("VQ %d size 0x%x Guest index 0x%x "
856
                             "inconsistent with Host index 0x%x: delta 0x%x",
857
                             i, vdev->vq[i].vring.num,
858
                             vring_avail_idx(&vdev->vq[i]),
859
                             vdev->vq[i].last_avail_idx, nheads);
860
                return -1;
861
            }
862
        } else if (vdev->vq[i].last_avail_idx) {
863
            error_report("VQ %d address 0x0 "
864
                         "inconsistent with Host index 0x%x",
865
                         i, vdev->vq[i].last_avail_idx);
866
                return -1;
867
        }
868
        if (vdev->binding->load_queue) {
869
            ret = vdev->binding->load_queue(vdev->binding_opaque, i, f);
870
            if (ret)
871
                return ret;
872
        }
873
    }
874

    
875
    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
876
    return 0;
877
}
878

    
879
void virtio_common_cleanup(VirtIODevice *vdev)
880
{
881
    qemu_del_vm_change_state_handler(vdev->vmstate);
882
    g_free(vdev->config);
883
    g_free(vdev->vq);
884
}
885

    
886
void virtio_cleanup(VirtIODevice *vdev)
887
{
888
    virtio_common_cleanup(vdev);
889
    g_free(vdev);
890
}
891

    
892
static void virtio_vmstate_change(void *opaque, int running, RunState state)
893
{
894
    VirtIODevice *vdev = opaque;
895
    bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
896
    vdev->vm_running = running;
897

    
898
    if (backend_run) {
899
        virtio_set_status(vdev, vdev->status);
900
    }
901

    
902
    if (vdev->binding->vmstate_change) {
903
        vdev->binding->vmstate_change(vdev->binding_opaque, backend_run);
904
    }
905

    
906
    if (!backend_run) {
907
        virtio_set_status(vdev, vdev->status);
908
    }
909
}
910

    
911
void virtio_init(VirtIODevice *vdev, const char *name,
912
                 uint16_t device_id, size_t config_size)
913
{
914
    int i;
915
    vdev->device_id = device_id;
916
    vdev->status = 0;
917
    vdev->isr = 0;
918
    vdev->queue_sel = 0;
919
    vdev->config_vector = VIRTIO_NO_VECTOR;
920
    vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
921
    vdev->vm_running = runstate_is_running();
922
    for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
923
        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
924
        vdev->vq[i].vdev = vdev;
925
    }
926

    
927
    vdev->name = name;
928
    vdev->config_len = config_size;
929
    if (vdev->config_len) {
930
        vdev->config = g_malloc0(config_size);
931
    } else {
932
        vdev->config = NULL;
933
    }
934
    vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change,
935
                                                     vdev);
936
}
937

    
938
VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
939
                                 size_t config_size, size_t struct_size)
940
{
941
    VirtIODevice *vdev;
942
    vdev = g_malloc0(struct_size);
943
    virtio_init(vdev, name, device_id, config_size);
944
    return vdev;
945
}
946

    
947
void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding,
948
                        DeviceState *opaque)
949
{
950
    vdev->binding = binding;
951
    vdev->binding_opaque = opaque;
952
}
953

    
954
hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
955
{
956
    return vdev->vq[n].vring.desc;
957
}
958

    
959
hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
960
{
961
    return vdev->vq[n].vring.avail;
962
}
963

    
964
hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
965
{
966
    return vdev->vq[n].vring.used;
967
}
968

    
969
hwaddr virtio_queue_get_ring_addr(VirtIODevice *vdev, int n)
970
{
971
    return vdev->vq[n].vring.desc;
972
}
973

    
974
hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
975
{
976
    return sizeof(VRingDesc) * vdev->vq[n].vring.num;
977
}
978

    
979
hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
980
{
981
    return offsetof(VRingAvail, ring) +
982
        sizeof(uint64_t) * vdev->vq[n].vring.num;
983
}
984

    
985
hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
986
{
987
    return offsetof(VRingUsed, ring) +
988
        sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
989
}
990

    
991
hwaddr virtio_queue_get_ring_size(VirtIODevice *vdev, int n)
992
{
993
    return vdev->vq[n].vring.used - vdev->vq[n].vring.desc +
994
            virtio_queue_get_used_size(vdev, n);
995
}
996

    
997
uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
998
{
999
    return vdev->vq[n].last_avail_idx;
1000
}
1001

    
1002
void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
1003
{
1004
    vdev->vq[n].last_avail_idx = idx;
1005
}
1006

    
1007
VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
1008
{
1009
    return vdev->vq + n;
1010
}
1011

    
1012
static void virtio_queue_guest_notifier_read(EventNotifier *n)
1013
{
1014
    VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
1015
    if (event_notifier_test_and_clear(n)) {
1016
        virtio_irq(vq);
1017
    }
1018
}
1019

    
1020
void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
1021
                                                bool with_irqfd)
1022
{
1023
    if (assign && !with_irqfd) {
1024
        event_notifier_set_handler(&vq->guest_notifier,
1025
                                   virtio_queue_guest_notifier_read);
1026
    } else {
1027
        event_notifier_set_handler(&vq->guest_notifier, NULL);
1028
    }
1029
    if (!assign) {
1030
        /* Test and clear notifier before closing it,
1031
         * in case poll callback didn't have time to run. */
1032
        virtio_queue_guest_notifier_read(&vq->guest_notifier);
1033
    }
1034
}
1035

    
1036
EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
1037
{
1038
    return &vq->guest_notifier;
1039
}
1040

    
1041
static void virtio_queue_host_notifier_read(EventNotifier *n)
1042
{
1043
    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
1044
    if (event_notifier_test_and_clear(n)) {
1045
        virtio_queue_notify_vq(vq);
1046
    }
1047
}
1048

    
1049
void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign,
1050
                                               bool set_handler)
1051
{
1052
    if (assign && set_handler) {
1053
        event_notifier_set_handler(&vq->host_notifier,
1054
                                   virtio_queue_host_notifier_read);
1055
    } else {
1056
        event_notifier_set_handler(&vq->host_notifier, NULL);
1057
    }
1058
    if (!assign) {
1059
        /* Test and clear notifier before after disabling event,
1060
         * in case poll callback didn't have time to run. */
1061
        virtio_queue_host_notifier_read(&vq->host_notifier);
1062
    }
1063
}
1064

    
1065
EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
1066
{
1067
    return &vq->host_notifier;
1068
}
1069

    
1070
static int virtio_device_init(DeviceState *qdev)
1071
{
1072
    VirtIODevice *vdev = VIRTIO_DEVICE(qdev);
1073
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(qdev);
1074
    assert(k->init != NULL);
1075
    if (k->init(vdev) < 0) {
1076
        return -1;
1077
    }
1078
    virtio_bus_plug_device(vdev);
1079
    return 0;
1080
}
1081

    
1082
static void virtio_device_class_init(ObjectClass *klass, void *data)
1083
{
1084
    /* Set the default value here. */
1085
    DeviceClass *dc = DEVICE_CLASS(klass);
1086
    dc->init = virtio_device_init;
1087
    dc->bus_type = TYPE_VIRTIO_BUS;
1088
}
1089

    
1090
static const TypeInfo virtio_device_info = {
1091
    .name = TYPE_VIRTIO_DEVICE,
1092
    .parent = TYPE_DEVICE,
1093
    .instance_size = sizeof(VirtIODevice),
1094
    .class_init = virtio_device_class_init,
1095
    .abstract = true,
1096
    .class_size = sizeof(VirtioDeviceClass),
1097
};
1098

    
1099
static void virtio_register_types(void)
1100
{
1101
    type_register_static(&virtio_device_info);
1102
}
1103

    
1104
type_init(virtio_register_types)