Statistics
| Branch: | Revision:

root / hw / virtio.c @ 9bfa659e

History | View | Annotate | Download (27.6 kB)

1
/*
2
 * Virtio Support
3
 *
4
 * Copyright IBM, Corp. 2007
5
 *
6
 * Authors:
7
 *  Anthony Liguori   <aliguori@us.ibm.com>
8
 *
9
 * This work is licensed under the terms of the GNU GPL, version 2.  See
10
 * the COPYING file in the top-level directory.
11
 *
12
 */
13

    
14
#include <inttypes.h>
15

    
16
#include "trace.h"
17
#include "qemu-error.h"
18
#include "virtio.h"
19
#include "qemu-barrier.h"
20

    
21
/* The alignment to use between consumer and producer parts of vring.
22
 * x86 pagesize again. */
23
#define VIRTIO_PCI_VRING_ALIGN         4096
24

    
25
typedef struct VRingDesc
26
{
27
    uint64_t addr;
28
    uint32_t len;
29
    uint16_t flags;
30
    uint16_t next;
31
} VRingDesc;
32

    
33
typedef struct VRingAvail
34
{
35
    uint16_t flags;
36
    uint16_t idx;
37
    uint16_t ring[0];
38
} VRingAvail;
39

    
40
typedef struct VRingUsedElem
41
{
42
    uint32_t id;
43
    uint32_t len;
44
} VRingUsedElem;
45

    
46
typedef struct VRingUsed
47
{
48
    uint16_t flags;
49
    uint16_t idx;
50
    VRingUsedElem ring[0];
51
} VRingUsed;
52

    
53
typedef struct VRing
54
{
55
    unsigned int num;
56
    target_phys_addr_t desc;
57
    target_phys_addr_t avail;
58
    target_phys_addr_t used;
59
} VRing;
60

    
61
struct VirtQueue
62
{
63
    VRing vring;
64
    target_phys_addr_t pa;
65
    uint16_t last_avail_idx;
66
    /* Last used index value we have signalled on */
67
    uint16_t signalled_used;
68

    
69
    /* Last used index value we have signalled on */
70
    bool signalled_used_valid;
71

    
72
    /* Notification enabled? */
73
    bool notification;
74

    
75
    int inuse;
76

    
77
    uint16_t vector;
78
    void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
79
    VirtIODevice *vdev;
80
    EventNotifier guest_notifier;
81
    EventNotifier host_notifier;
82
};
83

    
84
/* virt queue functions */
85
static void virtqueue_init(VirtQueue *vq)
86
{
87
    target_phys_addr_t pa = vq->pa;
88

    
89
    vq->vring.desc = pa;
90
    vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc);
91
    vq->vring.used = vring_align(vq->vring.avail +
92
                                 offsetof(VRingAvail, ring[vq->vring.num]),
93
                                 VIRTIO_PCI_VRING_ALIGN);
94
}
95

    
96
static inline uint64_t vring_desc_addr(target_phys_addr_t desc_pa, int i)
97
{
98
    target_phys_addr_t pa;
99
    pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr);
100
    return ldq_phys(pa);
101
}
102

    
103
static inline uint32_t vring_desc_len(target_phys_addr_t desc_pa, int i)
104
{
105
    target_phys_addr_t pa;
106
    pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, len);
107
    return ldl_phys(pa);
108
}
109

    
110
static inline uint16_t vring_desc_flags(target_phys_addr_t desc_pa, int i)
111
{
112
    target_phys_addr_t pa;
113
    pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags);
114
    return lduw_phys(pa);
115
}
116

    
117
static inline uint16_t vring_desc_next(target_phys_addr_t desc_pa, int i)
118
{
119
    target_phys_addr_t pa;
120
    pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, next);
121
    return lduw_phys(pa);
122
}
123

    
124
static inline uint16_t vring_avail_flags(VirtQueue *vq)
125
{
126
    target_phys_addr_t pa;
127
    pa = vq->vring.avail + offsetof(VRingAvail, flags);
128
    return lduw_phys(pa);
129
}
130

    
131
static inline uint16_t vring_avail_idx(VirtQueue *vq)
132
{
133
    target_phys_addr_t pa;
134
    pa = vq->vring.avail + offsetof(VRingAvail, idx);
135
    return lduw_phys(pa);
136
}
137

    
138
static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
139
{
140
    target_phys_addr_t pa;
141
    pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
142
    return lduw_phys(pa);
143
}
144

    
145
static inline uint16_t vring_used_event(VirtQueue *vq)
146
{
147
    return vring_avail_ring(vq, vq->vring.num);
148
}
149

    
150
static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val)
151
{
152
    target_phys_addr_t pa;
153
    pa = vq->vring.used + offsetof(VRingUsed, ring[i].id);
154
    stl_phys(pa, val);
155
}
156

    
157
static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val)
158
{
159
    target_phys_addr_t pa;
160
    pa = vq->vring.used + offsetof(VRingUsed, ring[i].len);
161
    stl_phys(pa, val);
162
}
163

    
164
static uint16_t vring_used_idx(VirtQueue *vq)
165
{
166
    target_phys_addr_t pa;
167
    pa = vq->vring.used + offsetof(VRingUsed, idx);
168
    return lduw_phys(pa);
169
}
170

    
171
static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
172
{
173
    target_phys_addr_t pa;
174
    pa = vq->vring.used + offsetof(VRingUsed, idx);
175
    stw_phys(pa, val);
176
}
177

    
178
static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
179
{
180
    target_phys_addr_t pa;
181
    pa = vq->vring.used + offsetof(VRingUsed, flags);
182
    stw_phys(pa, lduw_phys(pa) | mask);
183
}
184

    
185
static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
186
{
187
    target_phys_addr_t pa;
188
    pa = vq->vring.used + offsetof(VRingUsed, flags);
189
    stw_phys(pa, lduw_phys(pa) & ~mask);
190
}
191

    
192
static inline void vring_avail_event(VirtQueue *vq, uint16_t val)
193
{
194
    target_phys_addr_t pa;
195
    if (!vq->notification) {
196
        return;
197
    }
198
    pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]);
199
    stw_phys(pa, val);
200
}
201

    
202
void virtio_queue_set_notification(VirtQueue *vq, int enable)
203
{
204
    vq->notification = enable;
205
    if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
206
        vring_avail_event(vq, vring_avail_idx(vq));
207
    } else if (enable) {
208
        vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
209
    } else {
210
        vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
211
    }
212
    if (enable) {
213
        /* Expose avail event/used flags before caller checks the avail idx. */
214
        smp_mb();
215
    }
216
}
217

    
218
int virtio_queue_ready(VirtQueue *vq)
219
{
220
    return vq->vring.avail != 0;
221
}
222

    
223
int virtio_queue_empty(VirtQueue *vq)
224
{
225
    return vring_avail_idx(vq) == vq->last_avail_idx;
226
}
227

    
228
void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
229
                    unsigned int len, unsigned int idx)
230
{
231
    unsigned int offset;
232
    int i;
233

    
234
    trace_virtqueue_fill(vq, elem, len, idx);
235

    
236
    offset = 0;
237
    for (i = 0; i < elem->in_num; i++) {
238
        size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
239

    
240
        cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
241
                                  elem->in_sg[i].iov_len,
242
                                  1, size);
243

    
244
        offset += elem->in_sg[i].iov_len;
245
    }
246

    
247
    for (i = 0; i < elem->out_num; i++)
248
        cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
249
                                  elem->out_sg[i].iov_len,
250
                                  0, elem->out_sg[i].iov_len);
251

    
252
    idx = (idx + vring_used_idx(vq)) % vq->vring.num;
253

    
254
    /* Get a pointer to the next entry in the used ring. */
255
    vring_used_ring_id(vq, idx, elem->index);
256
    vring_used_ring_len(vq, idx, len);
257
}
258

    
259
void virtqueue_flush(VirtQueue *vq, unsigned int count)
260
{
261
    uint16_t old, new;
262
    /* Make sure buffer is written before we update index. */
263
    smp_wmb();
264
    trace_virtqueue_flush(vq, count);
265
    old = vring_used_idx(vq);
266
    new = old + count;
267
    vring_used_idx_set(vq, new);
268
    vq->inuse -= count;
269
    if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
270
        vq->signalled_used_valid = false;
271
}
272

    
273
void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
274
                    unsigned int len)
275
{
276
    virtqueue_fill(vq, elem, len, 0);
277
    virtqueue_flush(vq, 1);
278
}
279

    
280
static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
281
{
282
    uint16_t num_heads = vring_avail_idx(vq) - idx;
283

    
284
    /* Check it isn't doing very strange things with descriptor numbers. */
285
    if (num_heads > vq->vring.num) {
286
        error_report("Guest moved used index from %u to %u",
287
                     idx, vring_avail_idx(vq));
288
        exit(1);
289
    }
290
    /* On success, callers read a descriptor at vq->last_avail_idx.
291
     * Make sure descriptor read does not bypass avail index read. */
292
    if (num_heads) {
293
        smp_rmb();
294
    }
295

    
296
    return num_heads;
297
}
298

    
299
static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
300
{
301
    unsigned int head;
302

    
303
    /* Grab the next descriptor number they're advertising, and increment
304
     * the index we've seen. */
305
    head = vring_avail_ring(vq, idx % vq->vring.num);
306

    
307
    /* If their number is silly, that's a fatal mistake. */
308
    if (head >= vq->vring.num) {
309
        error_report("Guest says index %u is available", head);
310
        exit(1);
311
    }
312

    
313
    return head;
314
}
315

    
316
static unsigned virtqueue_next_desc(target_phys_addr_t desc_pa,
317
                                    unsigned int i, unsigned int max)
318
{
319
    unsigned int next;
320

    
321
    /* If this descriptor says it doesn't chain, we're done. */
322
    if (!(vring_desc_flags(desc_pa, i) & VRING_DESC_F_NEXT))
323
        return max;
324

    
325
    /* Check they're not leading us off end of descriptors. */
326
    next = vring_desc_next(desc_pa, i);
327
    /* Make sure compiler knows to grab that: we don't want it changing! */
328
    smp_wmb();
329

    
330
    if (next >= max) {
331
        error_report("Desc next is %u", next);
332
        exit(1);
333
    }
334

    
335
    return next;
336
}
337

    
338
int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes)
339
{
340
    unsigned int idx;
341
    int total_bufs, in_total, out_total;
342

    
343
    idx = vq->last_avail_idx;
344

    
345
    total_bufs = in_total = out_total = 0;
346
    while (virtqueue_num_heads(vq, idx)) {
347
        unsigned int max, num_bufs, indirect = 0;
348
        target_phys_addr_t desc_pa;
349
        int i;
350

    
351
        max = vq->vring.num;
352
        num_bufs = total_bufs;
353
        i = virtqueue_get_head(vq, idx++);
354
        desc_pa = vq->vring.desc;
355

    
356
        if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
357
            if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
358
                error_report("Invalid size for indirect buffer table");
359
                exit(1);
360
            }
361

    
362
            /* If we've got too many, that implies a descriptor loop. */
363
            if (num_bufs >= max) {
364
                error_report("Looped descriptor");
365
                exit(1);
366
            }
367

    
368
            /* loop over the indirect descriptor table */
369
            indirect = 1;
370
            max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
371
            num_bufs = i = 0;
372
            desc_pa = vring_desc_addr(desc_pa, i);
373
        }
374

    
375
        do {
376
            /* If we've got too many, that implies a descriptor loop. */
377
            if (++num_bufs > max) {
378
                error_report("Looped descriptor");
379
                exit(1);
380
            }
381

    
382
            if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
383
                if (in_bytes > 0 &&
384
                    (in_total += vring_desc_len(desc_pa, i)) >= in_bytes)
385
                    return 1;
386
            } else {
387
                if (out_bytes > 0 &&
388
                    (out_total += vring_desc_len(desc_pa, i)) >= out_bytes)
389
                    return 1;
390
            }
391
        } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
392

    
393
        if (!indirect)
394
            total_bufs = num_bufs;
395
        else
396
            total_bufs++;
397
    }
398

    
399
    return 0;
400
}
401

    
402
void virtqueue_map_sg(struct iovec *sg, target_phys_addr_t *addr,
403
    size_t num_sg, int is_write)
404
{
405
    unsigned int i;
406
    target_phys_addr_t len;
407

    
408
    for (i = 0; i < num_sg; i++) {
409
        len = sg[i].iov_len;
410
        sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
411
        if (sg[i].iov_base == NULL || len != sg[i].iov_len) {
412
            error_report("virtio: trying to map MMIO memory");
413
            exit(1);
414
        }
415
    }
416
}
417

    
418
int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
419
{
420
    unsigned int i, head, max;
421
    target_phys_addr_t desc_pa = vq->vring.desc;
422

    
423
    if (!virtqueue_num_heads(vq, vq->last_avail_idx))
424
        return 0;
425

    
426
    /* When we start there are none of either input nor output. */
427
    elem->out_num = elem->in_num = 0;
428

    
429
    max = vq->vring.num;
430

    
431
    i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
432
    if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
433
        vring_avail_event(vq, vring_avail_idx(vq));
434
    }
435

    
436
    if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
437
        if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
438
            error_report("Invalid size for indirect buffer table");
439
            exit(1);
440
        }
441

    
442
        /* loop over the indirect descriptor table */
443
        max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
444
        desc_pa = vring_desc_addr(desc_pa, i);
445
        i = 0;
446
    }
447

    
448
    /* Collect all the descriptors */
449
    do {
450
        struct iovec *sg;
451

    
452
        if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
453
            if (elem->in_num >= ARRAY_SIZE(elem->in_sg)) {
454
                error_report("Too many write descriptors in indirect table");
455
                exit(1);
456
            }
457
            elem->in_addr[elem->in_num] = vring_desc_addr(desc_pa, i);
458
            sg = &elem->in_sg[elem->in_num++];
459
        } else {
460
            if (elem->out_num >= ARRAY_SIZE(elem->out_sg)) {
461
                error_report("Too many read descriptors in indirect table");
462
                exit(1);
463
            }
464
            elem->out_addr[elem->out_num] = vring_desc_addr(desc_pa, i);
465
            sg = &elem->out_sg[elem->out_num++];
466
        }
467

    
468
        sg->iov_len = vring_desc_len(desc_pa, i);
469

    
470
        /* If we've got too many, that implies a descriptor loop. */
471
        if ((elem->in_num + elem->out_num) > max) {
472
            error_report("Looped descriptor");
473
            exit(1);
474
        }
475
    } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
476

    
477
    /* Now map what we have collected */
478
    virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1);
479
    virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0);
480

    
481
    elem->index = head;
482

    
483
    vq->inuse++;
484

    
485
    trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
486
    return elem->in_num + elem->out_num;
487
}
488

    
489
/* virtio device */
490
static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
491
{
492
    if (vdev->binding->notify) {
493
        vdev->binding->notify(vdev->binding_opaque, vector);
494
    }
495
}
496

    
497
void virtio_update_irq(VirtIODevice *vdev)
498
{
499
    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
500
}
501

    
502
void virtio_set_status(VirtIODevice *vdev, uint8_t val)
503
{
504
    trace_virtio_set_status(vdev, val);
505

    
506
    if (vdev->set_status) {
507
        vdev->set_status(vdev, val);
508
    }
509
    vdev->status = val;
510
}
511

    
512
void virtio_reset(void *opaque)
513
{
514
    VirtIODevice *vdev = opaque;
515
    int i;
516

    
517
    virtio_set_status(vdev, 0);
518

    
519
    if (vdev->reset)
520
        vdev->reset(vdev);
521

    
522
    vdev->guest_features = 0;
523
    vdev->queue_sel = 0;
524
    vdev->status = 0;
525
    vdev->isr = 0;
526
    vdev->config_vector = VIRTIO_NO_VECTOR;
527
    virtio_notify_vector(vdev, vdev->config_vector);
528

    
529
    for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
530
        vdev->vq[i].vring.desc = 0;
531
        vdev->vq[i].vring.avail = 0;
532
        vdev->vq[i].vring.used = 0;
533
        vdev->vq[i].last_avail_idx = 0;
534
        vdev->vq[i].pa = 0;
535
        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
536
        vdev->vq[i].signalled_used = 0;
537
        vdev->vq[i].signalled_used_valid = false;
538
        vdev->vq[i].notification = true;
539
    }
540
}
541

    
542
uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
543
{
544
    uint8_t val;
545

    
546
    vdev->get_config(vdev, vdev->config);
547

    
548
    if (addr > (vdev->config_len - sizeof(val)))
549
        return (uint32_t)-1;
550

    
551
    val = ldub_p(vdev->config + addr);
552
    return val;
553
}
554

    
555
uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
556
{
557
    uint16_t val;
558

    
559
    vdev->get_config(vdev, vdev->config);
560

    
561
    if (addr > (vdev->config_len - sizeof(val)))
562
        return (uint32_t)-1;
563

    
564
    val = lduw_p(vdev->config + addr);
565
    return val;
566
}
567

    
568
uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
569
{
570
    uint32_t val;
571

    
572
    vdev->get_config(vdev, vdev->config);
573

    
574
    if (addr > (vdev->config_len - sizeof(val)))
575
        return (uint32_t)-1;
576

    
577
    val = ldl_p(vdev->config + addr);
578
    return val;
579
}
580

    
581
void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
582
{
583
    uint8_t val = data;
584

    
585
    if (addr > (vdev->config_len - sizeof(val)))
586
        return;
587

    
588
    stb_p(vdev->config + addr, val);
589

    
590
    if (vdev->set_config)
591
        vdev->set_config(vdev, vdev->config);
592
}
593

    
594
void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
595
{
596
    uint16_t val = data;
597

    
598
    if (addr > (vdev->config_len - sizeof(val)))
599
        return;
600

    
601
    stw_p(vdev->config + addr, val);
602

    
603
    if (vdev->set_config)
604
        vdev->set_config(vdev, vdev->config);
605
}
606

    
607
void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
608
{
609
    uint32_t val = data;
610

    
611
    if (addr > (vdev->config_len - sizeof(val)))
612
        return;
613

    
614
    stl_p(vdev->config + addr, val);
615

    
616
    if (vdev->set_config)
617
        vdev->set_config(vdev, vdev->config);
618
}
619

    
620
void virtio_queue_set_addr(VirtIODevice *vdev, int n, target_phys_addr_t addr)
621
{
622
    vdev->vq[n].pa = addr;
623
    virtqueue_init(&vdev->vq[n]);
624
}
625

    
626
target_phys_addr_t virtio_queue_get_addr(VirtIODevice *vdev, int n)
627
{
628
    return vdev->vq[n].pa;
629
}
630

    
631
int virtio_queue_get_num(VirtIODevice *vdev, int n)
632
{
633
    return vdev->vq[n].vring.num;
634
}
635

    
636
int virtio_queue_get_id(VirtQueue *vq)
637
{
638
    VirtIODevice *vdev = vq->vdev;
639
    assert(vq >= &vdev->vq[0] && vq < &vdev->vq[VIRTIO_PCI_QUEUE_MAX]);
640
    return vq - &vdev->vq[0];
641
}
642

    
643
void virtio_queue_notify_vq(VirtQueue *vq)
644
{
645
    if (vq->vring.desc) {
646
        VirtIODevice *vdev = vq->vdev;
647
        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
648
        vq->handle_output(vdev, vq);
649
    }
650
}
651

    
652
void virtio_queue_notify(VirtIODevice *vdev, int n)
653
{
654
    virtio_queue_notify_vq(&vdev->vq[n]);
655
}
656

    
657
uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
658
{
659
    return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector :
660
        VIRTIO_NO_VECTOR;
661
}
662

    
663
void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
664
{
665
    if (n < VIRTIO_PCI_QUEUE_MAX)
666
        vdev->vq[n].vector = vector;
667
}
668

    
669
VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
670
                            void (*handle_output)(VirtIODevice *, VirtQueue *))
671
{
672
    int i;
673

    
674
    for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
675
        if (vdev->vq[i].vring.num == 0)
676
            break;
677
    }
678

    
679
    if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
680
        abort();
681

    
682
    vdev->vq[i].vring.num = queue_size;
683
    vdev->vq[i].handle_output = handle_output;
684

    
685
    return &vdev->vq[i];
686
}
687

    
688
void virtio_irq(VirtQueue *vq)
689
{
690
    trace_virtio_irq(vq);
691
    vq->vdev->isr |= 0x01;
692
    virtio_notify_vector(vq->vdev, vq->vector);
693
}
694

    
695
/* Assuming a given event_idx value from the other size, if
696
 * we have just incremented index from old to new_idx,
697
 * should we trigger an event? */
698
static inline int vring_need_event(uint16_t event, uint16_t new, uint16_t old)
699
{
700
        /* Note: Xen has similar logic for notification hold-off
701
         * in include/xen/interface/io/ring.h with req_event and req_prod
702
         * corresponding to event_idx + 1 and new respectively.
703
         * Note also that req_event and req_prod in Xen start at 1,
704
         * event indexes in virtio start at 0. */
705
        return (uint16_t)(new - event - 1) < (uint16_t)(new - old);
706
}
707

    
708
static bool vring_notify(VirtIODevice *vdev, VirtQueue *vq)
709
{
710
    uint16_t old, new;
711
    bool v;
712
    /* We need to expose used array entries before checking used event. */
713
    smp_mb();
714
    /* Always notify when queue is empty (when feature acknowledge) */
715
    if (((vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) &&
716
         !vq->inuse && vring_avail_idx(vq) == vq->last_avail_idx)) {
717
        return true;
718
    }
719

    
720
    if (!(vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX))) {
721
        return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
722
    }
723

    
724
    v = vq->signalled_used_valid;
725
    vq->signalled_used_valid = true;
726
    old = vq->signalled_used;
727
    new = vq->signalled_used = vring_used_idx(vq);
728
    return !v || vring_need_event(vring_used_event(vq), new, old);
729
}
730

    
731
void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
732
{
733
    if (!vring_notify(vdev, vq)) {
734
        return;
735
    }
736

    
737
    trace_virtio_notify(vdev, vq);
738
    vdev->isr |= 0x01;
739
    virtio_notify_vector(vdev, vq->vector);
740
}
741

    
742
void virtio_notify_config(VirtIODevice *vdev)
743
{
744
    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
745
        return;
746

    
747
    vdev->isr |= 0x03;
748
    virtio_notify_vector(vdev, vdev->config_vector);
749
}
750

    
751
void virtio_save(VirtIODevice *vdev, QEMUFile *f)
752
{
753
    int i;
754

    
755
    if (vdev->binding->save_config)
756
        vdev->binding->save_config(vdev->binding_opaque, f);
757

    
758
    qemu_put_8s(f, &vdev->status);
759
    qemu_put_8s(f, &vdev->isr);
760
    qemu_put_be16s(f, &vdev->queue_sel);
761
    qemu_put_be32s(f, &vdev->guest_features);
762
    qemu_put_be32(f, vdev->config_len);
763
    qemu_put_buffer(f, vdev->config, vdev->config_len);
764

    
765
    for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
766
        if (vdev->vq[i].vring.num == 0)
767
            break;
768
    }
769

    
770
    qemu_put_be32(f, i);
771

    
772
    for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
773
        if (vdev->vq[i].vring.num == 0)
774
            break;
775

    
776
        qemu_put_be32(f, vdev->vq[i].vring.num);
777
        qemu_put_be64(f, vdev->vq[i].pa);
778
        qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
779
        if (vdev->binding->save_queue)
780
            vdev->binding->save_queue(vdev->binding_opaque, i, f);
781
    }
782
}
783

    
784
int virtio_set_features(VirtIODevice *vdev, uint32_t val)
785
{
786
    uint32_t supported_features =
787
        vdev->binding->get_features(vdev->binding_opaque);
788
    bool bad = (val & ~supported_features) != 0;
789

    
790
    val &= supported_features;
791
    if (vdev->set_features) {
792
        vdev->set_features(vdev, val);
793
    }
794
    vdev->guest_features = val;
795
    return bad ? -1 : 0;
796
}
797

    
798
int virtio_load(VirtIODevice *vdev, QEMUFile *f)
799
{
800
    int num, i, ret;
801
    uint32_t features;
802
    uint32_t supported_features;
803

    
804
    if (vdev->binding->load_config) {
805
        ret = vdev->binding->load_config(vdev->binding_opaque, f);
806
        if (ret)
807
            return ret;
808
    }
809

    
810
    qemu_get_8s(f, &vdev->status);
811
    qemu_get_8s(f, &vdev->isr);
812
    qemu_get_be16s(f, &vdev->queue_sel);
813
    qemu_get_be32s(f, &features);
814

    
815
    if (virtio_set_features(vdev, features) < 0) {
816
        supported_features = vdev->binding->get_features(vdev->binding_opaque);
817
        error_report("Features 0x%x unsupported. Allowed features: 0x%x",
818
                     features, supported_features);
819
        return -1;
820
    }
821
    vdev->config_len = qemu_get_be32(f);
822
    qemu_get_buffer(f, vdev->config, vdev->config_len);
823

    
824
    num = qemu_get_be32(f);
825

    
826
    for (i = 0; i < num; i++) {
827
        vdev->vq[i].vring.num = qemu_get_be32(f);
828
        vdev->vq[i].pa = qemu_get_be64(f);
829
        qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
830
        vdev->vq[i].signalled_used_valid = false;
831
        vdev->vq[i].notification = true;
832

    
833
        if (vdev->vq[i].pa) {
834
            uint16_t nheads;
835
            virtqueue_init(&vdev->vq[i]);
836
            nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
837
            /* Check it isn't doing very strange things with descriptor numbers. */
838
            if (nheads > vdev->vq[i].vring.num) {
839
                error_report("VQ %d size 0x%x Guest index 0x%x "
840
                             "inconsistent with Host index 0x%x: delta 0x%x",
841
                             i, vdev->vq[i].vring.num,
842
                             vring_avail_idx(&vdev->vq[i]),
843
                             vdev->vq[i].last_avail_idx, nheads);
844
                return -1;
845
            }
846
        } else if (vdev->vq[i].last_avail_idx) {
847
            error_report("VQ %d address 0x0 "
848
                         "inconsistent with Host index 0x%x",
849
                         i, vdev->vq[i].last_avail_idx);
850
                return -1;
851
        }
852
        if (vdev->binding->load_queue) {
853
            ret = vdev->binding->load_queue(vdev->binding_opaque, i, f);
854
            if (ret)
855
                return ret;
856
        }
857
    }
858

    
859
    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
860
    return 0;
861
}
862

    
863
void virtio_cleanup(VirtIODevice *vdev)
864
{
865
    qemu_del_vm_change_state_handler(vdev->vmstate);
866
    g_free(vdev->config);
867
    g_free(vdev->vq);
868
    g_free(vdev);
869
}
870

    
871
static void virtio_vmstate_change(void *opaque, int running, RunState state)
872
{
873
    VirtIODevice *vdev = opaque;
874
    bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
875
    vdev->vm_running = running;
876

    
877
    if (backend_run) {
878
        virtio_set_status(vdev, vdev->status);
879
    }
880

    
881
    if (vdev->binding->vmstate_change) {
882
        vdev->binding->vmstate_change(vdev->binding_opaque, backend_run);
883
    }
884

    
885
    if (!backend_run) {
886
        virtio_set_status(vdev, vdev->status);
887
    }
888
}
889

    
890
VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
891
                                 size_t config_size, size_t struct_size)
892
{
893
    VirtIODevice *vdev;
894
    int i;
895

    
896
    vdev = g_malloc0(struct_size);
897

    
898
    vdev->device_id = device_id;
899
    vdev->status = 0;
900
    vdev->isr = 0;
901
    vdev->queue_sel = 0;
902
    vdev->config_vector = VIRTIO_NO_VECTOR;
903
    vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
904
    vdev->vm_running = runstate_is_running();
905
    for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
906
        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
907
        vdev->vq[i].vdev = vdev;
908
    }
909

    
910
    vdev->name = name;
911
    vdev->config_len = config_size;
912
    if (vdev->config_len)
913
        vdev->config = g_malloc0(config_size);
914
    else
915
        vdev->config = NULL;
916

    
917
    vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change, vdev);
918

    
919
    return vdev;
920
}
921

    
922
void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding,
923
                        void *opaque)
924
{
925
    vdev->binding = binding;
926
    vdev->binding_opaque = opaque;
927
}
928

    
929
target_phys_addr_t virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
930
{
931
    return vdev->vq[n].vring.desc;
932
}
933

    
934
target_phys_addr_t virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
935
{
936
    return vdev->vq[n].vring.avail;
937
}
938

    
939
target_phys_addr_t virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
940
{
941
    return vdev->vq[n].vring.used;
942
}
943

    
944
target_phys_addr_t virtio_queue_get_ring_addr(VirtIODevice *vdev, int n)
945
{
946
    return vdev->vq[n].vring.desc;
947
}
948

    
949
target_phys_addr_t virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
950
{
951
    return sizeof(VRingDesc) * vdev->vq[n].vring.num;
952
}
953

    
954
target_phys_addr_t virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
955
{
956
    return offsetof(VRingAvail, ring) +
957
        sizeof(uint64_t) * vdev->vq[n].vring.num;
958
}
959

    
960
target_phys_addr_t virtio_queue_get_used_size(VirtIODevice *vdev, int n)
961
{
962
    return offsetof(VRingUsed, ring) +
963
        sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
964
}
965

    
966
target_phys_addr_t virtio_queue_get_ring_size(VirtIODevice *vdev, int n)
967
{
968
    return vdev->vq[n].vring.used - vdev->vq[n].vring.desc +
969
            virtio_queue_get_used_size(vdev, n);
970
}
971

    
972
uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
973
{
974
    return vdev->vq[n].last_avail_idx;
975
}
976

    
977
void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
978
{
979
    vdev->vq[n].last_avail_idx = idx;
980
}
981

    
982
VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
983
{
984
    return vdev->vq + n;
985
}
986

    
987
static void virtio_queue_guest_notifier_read(EventNotifier *n)
988
{
989
    VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
990
    if (event_notifier_test_and_clear(n)) {
991
        virtio_irq(vq);
992
    }
993
}
994

    
995
void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
996
                                                bool with_irqfd)
997
{
998
    if (assign && !with_irqfd) {
999
        event_notifier_set_handler(&vq->guest_notifier,
1000
                                   virtio_queue_guest_notifier_read);
1001
    } else {
1002
        event_notifier_set_handler(&vq->guest_notifier, NULL);
1003
    }
1004
    if (!assign) {
1005
        /* Test and clear notifier before closing it,
1006
         * in case poll callback didn't have time to run. */
1007
        virtio_queue_guest_notifier_read(&vq->guest_notifier);
1008
    }
1009
}
1010

    
1011
EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
1012
{
1013
    return &vq->guest_notifier;
1014
}
1015

    
1016
static void virtio_queue_host_notifier_read(EventNotifier *n)
1017
{
1018
    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
1019
    if (event_notifier_test_and_clear(n)) {
1020
        virtio_queue_notify_vq(vq);
1021
    }
1022
}
1023

    
1024
void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign)
1025
{
1026
    if (assign) {
1027
        event_notifier_set_handler(&vq->host_notifier,
1028
                                   virtio_queue_host_notifier_read);
1029
    } else {
1030
        event_notifier_set_handler(&vq->host_notifier, NULL);
1031
        /* Test and clear notifier before after disabling event,
1032
         * in case poll callback didn't have time to run. */
1033
        virtio_queue_host_notifier_read(&vq->host_notifier);
1034
    }
1035
}
1036

    
1037
EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
1038
{
1039
    return &vq->host_notifier;
1040
}