Statistics
| Branch: | Revision:

root / hw / virtio.c @ c65adf9b

History | View | Annotate | Download (25.7 kB)

1
/*
2
 * Virtio Support
3
 *
4
 * Copyright IBM, Corp. 2007
5
 *
6
 * Authors:
7
 *  Anthony Liguori   <aliguori@us.ibm.com>
8
 *
9
 * This work is licensed under the terms of the GNU GPL, version 2.  See
10
 * the COPYING file in the top-level directory.
11
 *
12
 */
13

    
14
#include <inttypes.h>
15

    
16
#include "trace.h"
17
#include "qemu-error.h"
18
#include "virtio.h"
19
#include "qemu-barrier.h"
20

    
21
/* The alignment to use between consumer and producer parts of vring.
22
 * x86 pagesize again. */
23
#define VIRTIO_PCI_VRING_ALIGN         4096
24

    
25
typedef struct VRingDesc
26
{
27
    uint64_t addr;
28
    uint32_t len;
29
    uint16_t flags;
30
    uint16_t next;
31
} VRingDesc;
32

    
33
typedef struct VRingAvail
34
{
35
    uint16_t flags;
36
    uint16_t idx;
37
    uint16_t ring[0];
38
} VRingAvail;
39

    
40
typedef struct VRingUsedElem
41
{
42
    uint32_t id;
43
    uint32_t len;
44
} VRingUsedElem;
45

    
46
typedef struct VRingUsed
47
{
48
    uint16_t flags;
49
    uint16_t idx;
50
    VRingUsedElem ring[0];
51
} VRingUsed;
52

    
53
typedef struct VRing
54
{
55
    unsigned int num;
56
    target_phys_addr_t desc;
57
    target_phys_addr_t avail;
58
    target_phys_addr_t used;
59
} VRing;
60

    
61
struct VirtQueue
62
{
63
    VRing vring;
64
    target_phys_addr_t pa;
65
    uint16_t last_avail_idx;
66
    /* Last used index value we have signalled on */
67
    uint16_t signalled_used;
68

    
69
    /* Last used index value we have signalled on */
70
    bool signalled_used_valid;
71

    
72
    /* Notification enabled? */
73
    bool notification;
74

    
75
    int inuse;
76

    
77
    uint16_t vector;
78
    void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
79
    VirtIODevice *vdev;
80
    EventNotifier guest_notifier;
81
    EventNotifier host_notifier;
82
};
83

    
84
/* virt queue functions */
85
static void virtqueue_init(VirtQueue *vq)
86
{
87
    target_phys_addr_t pa = vq->pa;
88

    
89
    vq->vring.desc = pa;
90
    vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc);
91
    vq->vring.used = vring_align(vq->vring.avail +
92
                                 offsetof(VRingAvail, ring[vq->vring.num]),
93
                                 VIRTIO_PCI_VRING_ALIGN);
94
}
95

    
96
static inline uint64_t vring_desc_addr(target_phys_addr_t desc_pa, int i)
97
{
98
    target_phys_addr_t pa;
99
    pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr);
100
    return ldq_phys(pa);
101
}
102

    
103
static inline uint32_t vring_desc_len(target_phys_addr_t desc_pa, int i)
104
{
105
    target_phys_addr_t pa;
106
    pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, len);
107
    return ldl_phys(pa);
108
}
109

    
110
static inline uint16_t vring_desc_flags(target_phys_addr_t desc_pa, int i)
111
{
112
    target_phys_addr_t pa;
113
    pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags);
114
    return lduw_phys(pa);
115
}
116

    
117
static inline uint16_t vring_desc_next(target_phys_addr_t desc_pa, int i)
118
{
119
    target_phys_addr_t pa;
120
    pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, next);
121
    return lduw_phys(pa);
122
}
123

    
124
static inline uint16_t vring_avail_flags(VirtQueue *vq)
125
{
126
    target_phys_addr_t pa;
127
    pa = vq->vring.avail + offsetof(VRingAvail, flags);
128
    return lduw_phys(pa);
129
}
130

    
131
static inline uint16_t vring_avail_idx(VirtQueue *vq)
132
{
133
    target_phys_addr_t pa;
134
    pa = vq->vring.avail + offsetof(VRingAvail, idx);
135
    return lduw_phys(pa);
136
}
137

    
138
static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
139
{
140
    target_phys_addr_t pa;
141
    pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
142
    return lduw_phys(pa);
143
}
144

    
145
static inline uint16_t vring_used_event(VirtQueue *vq)
146
{
147
    return vring_avail_ring(vq, vq->vring.num);
148
}
149

    
150
static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val)
151
{
152
    target_phys_addr_t pa;
153
    pa = vq->vring.used + offsetof(VRingUsed, ring[i].id);
154
    stl_phys(pa, val);
155
}
156

    
157
static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val)
158
{
159
    target_phys_addr_t pa;
160
    pa = vq->vring.used + offsetof(VRingUsed, ring[i].len);
161
    stl_phys(pa, val);
162
}
163

    
164
static uint16_t vring_used_idx(VirtQueue *vq)
165
{
166
    target_phys_addr_t pa;
167
    pa = vq->vring.used + offsetof(VRingUsed, idx);
168
    return lduw_phys(pa);
169
}
170

    
171
static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
172
{
173
    target_phys_addr_t pa;
174
    pa = vq->vring.used + offsetof(VRingUsed, idx);
175
    stw_phys(pa, val);
176
}
177

    
178
static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
179
{
180
    target_phys_addr_t pa;
181
    pa = vq->vring.used + offsetof(VRingUsed, flags);
182
    stw_phys(pa, lduw_phys(pa) | mask);
183
}
184

    
185
static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
186
{
187
    target_phys_addr_t pa;
188
    pa = vq->vring.used + offsetof(VRingUsed, flags);
189
    stw_phys(pa, lduw_phys(pa) & ~mask);
190
}
191

    
192
static inline void vring_avail_event(VirtQueue *vq, uint16_t val)
193
{
194
    target_phys_addr_t pa;
195
    if (!vq->notification) {
196
        return;
197
    }
198
    pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]);
199
    stw_phys(pa, val);
200
}
201

    
202
void virtio_queue_set_notification(VirtQueue *vq, int enable)
203
{
204
    vq->notification = enable;
205
    if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
206
        vring_avail_event(vq, vring_avail_idx(vq));
207
    } else if (enable) {
208
        vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
209
    } else {
210
        vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
211
    }
212
}
213

    
214
int virtio_queue_ready(VirtQueue *vq)
215
{
216
    return vq->vring.avail != 0;
217
}
218

    
219
int virtio_queue_empty(VirtQueue *vq)
220
{
221
    return vring_avail_idx(vq) == vq->last_avail_idx;
222
}
223

    
224
void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
225
                    unsigned int len, unsigned int idx)
226
{
227
    unsigned int offset;
228
    int i;
229

    
230
    trace_virtqueue_fill(vq, elem, len, idx);
231

    
232
    offset = 0;
233
    for (i = 0; i < elem->in_num; i++) {
234
        size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
235

    
236
        cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
237
                                  elem->in_sg[i].iov_len,
238
                                  1, size);
239

    
240
        offset += elem->in_sg[i].iov_len;
241
    }
242

    
243
    for (i = 0; i < elem->out_num; i++)
244
        cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
245
                                  elem->out_sg[i].iov_len,
246
                                  0, elem->out_sg[i].iov_len);
247

    
248
    idx = (idx + vring_used_idx(vq)) % vq->vring.num;
249

    
250
    /* Get a pointer to the next entry in the used ring. */
251
    vring_used_ring_id(vq, idx, elem->index);
252
    vring_used_ring_len(vq, idx, len);
253
}
254

    
255
void virtqueue_flush(VirtQueue *vq, unsigned int count)
256
{
257
    uint16_t old, new;
258
    /* Make sure buffer is written before we update index. */
259
    smp_wmb();
260
    trace_virtqueue_flush(vq, count);
261
    old = vring_used_idx(vq);
262
    new = old + count;
263
    vring_used_idx_set(vq, new);
264
    vq->inuse -= count;
265
    if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
266
        vq->signalled_used_valid = false;
267
}
268

    
269
void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
270
                    unsigned int len)
271
{
272
    virtqueue_fill(vq, elem, len, 0);
273
    virtqueue_flush(vq, 1);
274
}
275

    
276
static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
277
{
278
    uint16_t num_heads = vring_avail_idx(vq) - idx;
279

    
280
    /* Check it isn't doing very strange things with descriptor numbers. */
281
    if (num_heads > vq->vring.num) {
282
        error_report("Guest moved used index from %u to %u",
283
                     idx, vring_avail_idx(vq));
284
        exit(1);
285
    }
286

    
287
    return num_heads;
288
}
289

    
290
static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
291
{
292
    unsigned int head;
293

    
294
    /* Grab the next descriptor number they're advertising, and increment
295
     * the index we've seen. */
296
    head = vring_avail_ring(vq, idx % vq->vring.num);
297

    
298
    /* If their number is silly, that's a fatal mistake. */
299
    if (head >= vq->vring.num) {
300
        error_report("Guest says index %u is available", head);
301
        exit(1);
302
    }
303

    
304
    return head;
305
}
306

    
307
static unsigned virtqueue_next_desc(target_phys_addr_t desc_pa,
308
                                    unsigned int i, unsigned int max)
309
{
310
    unsigned int next;
311

    
312
    /* If this descriptor says it doesn't chain, we're done. */
313
    if (!(vring_desc_flags(desc_pa, i) & VRING_DESC_F_NEXT))
314
        return max;
315

    
316
    /* Check they're not leading us off end of descriptors. */
317
    next = vring_desc_next(desc_pa, i);
318
    /* Make sure compiler knows to grab that: we don't want it changing! */
319
    smp_wmb();
320

    
321
    if (next >= max) {
322
        error_report("Desc next is %u", next);
323
        exit(1);
324
    }
325

    
326
    return next;
327
}
328

    
329
int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes)
330
{
331
    unsigned int idx;
332
    int total_bufs, in_total, out_total;
333

    
334
    idx = vq->last_avail_idx;
335

    
336
    total_bufs = in_total = out_total = 0;
337
    while (virtqueue_num_heads(vq, idx)) {
338
        unsigned int max, num_bufs, indirect = 0;
339
        target_phys_addr_t desc_pa;
340
        int i;
341

    
342
        max = vq->vring.num;
343
        num_bufs = total_bufs;
344
        i = virtqueue_get_head(vq, idx++);
345
        desc_pa = vq->vring.desc;
346

    
347
        if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
348
            if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
349
                error_report("Invalid size for indirect buffer table");
350
                exit(1);
351
            }
352

    
353
            /* If we've got too many, that implies a descriptor loop. */
354
            if (num_bufs >= max) {
355
                error_report("Looped descriptor");
356
                exit(1);
357
            }
358

    
359
            /* loop over the indirect descriptor table */
360
            indirect = 1;
361
            max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
362
            num_bufs = i = 0;
363
            desc_pa = vring_desc_addr(desc_pa, i);
364
        }
365

    
366
        do {
367
            /* If we've got too many, that implies a descriptor loop. */
368
            if (++num_bufs > max) {
369
                error_report("Looped descriptor");
370
                exit(1);
371
            }
372

    
373
            if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
374
                if (in_bytes > 0 &&
375
                    (in_total += vring_desc_len(desc_pa, i)) >= in_bytes)
376
                    return 1;
377
            } else {
378
                if (out_bytes > 0 &&
379
                    (out_total += vring_desc_len(desc_pa, i)) >= out_bytes)
380
                    return 1;
381
            }
382
        } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
383

    
384
        if (!indirect)
385
            total_bufs = num_bufs;
386
        else
387
            total_bufs++;
388
    }
389

    
390
    return 0;
391
}
392

    
393
void virtqueue_map_sg(struct iovec *sg, target_phys_addr_t *addr,
394
    size_t num_sg, int is_write)
395
{
396
    unsigned int i;
397
    target_phys_addr_t len;
398

    
399
    for (i = 0; i < num_sg; i++) {
400
        len = sg[i].iov_len;
401
        sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
402
        if (sg[i].iov_base == NULL || len != sg[i].iov_len) {
403
            error_report("virtio: trying to map MMIO memory");
404
            exit(1);
405
        }
406
    }
407
}
408

    
409
int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
410
{
411
    unsigned int i, head, max;
412
    target_phys_addr_t desc_pa = vq->vring.desc;
413

    
414
    if (!virtqueue_num_heads(vq, vq->last_avail_idx))
415
        return 0;
416

    
417
    /* When we start there are none of either input nor output. */
418
    elem->out_num = elem->in_num = 0;
419

    
420
    max = vq->vring.num;
421

    
422
    i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
423
    if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
424
        vring_avail_event(vq, vring_avail_idx(vq));
425
    }
426

    
427
    if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
428
        if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
429
            error_report("Invalid size for indirect buffer table");
430
            exit(1);
431
        }
432

    
433
        /* loop over the indirect descriptor table */
434
        max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
435
        desc_pa = vring_desc_addr(desc_pa, i);
436
        i = 0;
437
    }
438

    
439
    /* Collect all the descriptors */
440
    do {
441
        struct iovec *sg;
442

    
443
        if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
444
            if (elem->in_num >= ARRAY_SIZE(elem->in_sg)) {
445
                error_report("Too many write descriptors in indirect table");
446
                exit(1);
447
            }
448
            elem->in_addr[elem->in_num] = vring_desc_addr(desc_pa, i);
449
            sg = &elem->in_sg[elem->in_num++];
450
        } else {
451
            if (elem->out_num >= ARRAY_SIZE(elem->out_sg)) {
452
                error_report("Too many read descriptors in indirect table");
453
                exit(1);
454
            }
455
            elem->out_addr[elem->out_num] = vring_desc_addr(desc_pa, i);
456
            sg = &elem->out_sg[elem->out_num++];
457
        }
458

    
459
        sg->iov_len = vring_desc_len(desc_pa, i);
460

    
461
        /* If we've got too many, that implies a descriptor loop. */
462
        if ((elem->in_num + elem->out_num) > max) {
463
            error_report("Looped descriptor");
464
            exit(1);
465
        }
466
    } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
467

    
468
    /* Now map what we have collected */
469
    virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1);
470
    virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0);
471

    
472
    elem->index = head;
473

    
474
    vq->inuse++;
475

    
476
    trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
477
    return elem->in_num + elem->out_num;
478
}
479

    
480
/* virtio device */
481
static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
482
{
483
    if (vdev->binding->notify) {
484
        vdev->binding->notify(vdev->binding_opaque, vector);
485
    }
486
}
487

    
488
void virtio_update_irq(VirtIODevice *vdev)
489
{
490
    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
491
}
492

    
493
void virtio_set_status(VirtIODevice *vdev, uint8_t val)
494
{
495
    trace_virtio_set_status(vdev, val);
496

    
497
    if (vdev->set_status) {
498
        vdev->set_status(vdev, val);
499
    }
500
    vdev->status = val;
501
}
502

    
503
void virtio_reset(void *opaque)
504
{
505
    VirtIODevice *vdev = opaque;
506
    int i;
507

    
508
    virtio_set_status(vdev, 0);
509

    
510
    if (vdev->reset)
511
        vdev->reset(vdev);
512

    
513
    vdev->guest_features = 0;
514
    vdev->queue_sel = 0;
515
    vdev->status = 0;
516
    vdev->isr = 0;
517
    vdev->config_vector = VIRTIO_NO_VECTOR;
518
    virtio_notify_vector(vdev, vdev->config_vector);
519

    
520
    for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
521
        vdev->vq[i].vring.desc = 0;
522
        vdev->vq[i].vring.avail = 0;
523
        vdev->vq[i].vring.used = 0;
524
        vdev->vq[i].last_avail_idx = 0;
525
        vdev->vq[i].pa = 0;
526
        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
527
        vdev->vq[i].signalled_used = 0;
528
        vdev->vq[i].signalled_used_valid = false;
529
        vdev->vq[i].notification = true;
530
    }
531
}
532

    
533
uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
534
{
535
    uint8_t val;
536

    
537
    vdev->get_config(vdev, vdev->config);
538

    
539
    if (addr > (vdev->config_len - sizeof(val)))
540
        return (uint32_t)-1;
541

    
542
    memcpy(&val, vdev->config + addr, sizeof(val));
543
    return val;
544
}
545

    
546
uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
547
{
548
    uint16_t val;
549

    
550
    vdev->get_config(vdev, vdev->config);
551

    
552
    if (addr > (vdev->config_len - sizeof(val)))
553
        return (uint32_t)-1;
554

    
555
    memcpy(&val, vdev->config + addr, sizeof(val));
556
    return val;
557
}
558

    
559
uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
560
{
561
    uint32_t val;
562

    
563
    vdev->get_config(vdev, vdev->config);
564

    
565
    if (addr > (vdev->config_len - sizeof(val)))
566
        return (uint32_t)-1;
567

    
568
    memcpy(&val, vdev->config + addr, sizeof(val));
569
    return val;
570
}
571

    
572
void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
573
{
574
    uint8_t val = data;
575

    
576
    if (addr > (vdev->config_len - sizeof(val)))
577
        return;
578

    
579
    memcpy(vdev->config + addr, &val, sizeof(val));
580

    
581
    if (vdev->set_config)
582
        vdev->set_config(vdev, vdev->config);
583
}
584

    
585
void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
586
{
587
    uint16_t val = data;
588

    
589
    if (addr > (vdev->config_len - sizeof(val)))
590
        return;
591

    
592
    memcpy(vdev->config + addr, &val, sizeof(val));
593

    
594
    if (vdev->set_config)
595
        vdev->set_config(vdev, vdev->config);
596
}
597

    
598
void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
599
{
600
    uint32_t val = data;
601

    
602
    if (addr > (vdev->config_len - sizeof(val)))
603
        return;
604

    
605
    memcpy(vdev->config + addr, &val, sizeof(val));
606

    
607
    if (vdev->set_config)
608
        vdev->set_config(vdev, vdev->config);
609
}
610

    
611
void virtio_queue_set_addr(VirtIODevice *vdev, int n, target_phys_addr_t addr)
612
{
613
    vdev->vq[n].pa = addr;
614
    virtqueue_init(&vdev->vq[n]);
615
}
616

    
617
target_phys_addr_t virtio_queue_get_addr(VirtIODevice *vdev, int n)
618
{
619
    return vdev->vq[n].pa;
620
}
621

    
622
int virtio_queue_get_num(VirtIODevice *vdev, int n)
623
{
624
    return vdev->vq[n].vring.num;
625
}
626

    
627
void virtio_queue_notify_vq(VirtQueue *vq)
628
{
629
    if (vq->vring.desc) {
630
        VirtIODevice *vdev = vq->vdev;
631
        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
632
        vq->handle_output(vdev, vq);
633
    }
634
}
635

    
636
void virtio_queue_notify(VirtIODevice *vdev, int n)
637
{
638
    virtio_queue_notify_vq(&vdev->vq[n]);
639
}
640

    
641
uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
642
{
643
    return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector :
644
        VIRTIO_NO_VECTOR;
645
}
646

    
647
void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
648
{
649
    if (n < VIRTIO_PCI_QUEUE_MAX)
650
        vdev->vq[n].vector = vector;
651
}
652

    
653
VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
654
                            void (*handle_output)(VirtIODevice *, VirtQueue *))
655
{
656
    int i;
657

    
658
    for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
659
        if (vdev->vq[i].vring.num == 0)
660
            break;
661
    }
662

    
663
    if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
664
        abort();
665

    
666
    vdev->vq[i].vring.num = queue_size;
667
    vdev->vq[i].handle_output = handle_output;
668

    
669
    return &vdev->vq[i];
670
}
671

    
672
void virtio_irq(VirtQueue *vq)
673
{
674
    trace_virtio_irq(vq);
675
    vq->vdev->isr |= 0x01;
676
    virtio_notify_vector(vq->vdev, vq->vector);
677
}
678

    
679
/* Assuming a given event_idx value from the other size, if
680
 * we have just incremented index from old to new_idx,
681
 * should we trigger an event? */
682
static inline int vring_need_event(uint16_t event, uint16_t new, uint16_t old)
683
{
684
        /* Note: Xen has similar logic for notification hold-off
685
         * in include/xen/interface/io/ring.h with req_event and req_prod
686
         * corresponding to event_idx + 1 and new respectively.
687
         * Note also that req_event and req_prod in Xen start at 1,
688
         * event indexes in virtio start at 0. */
689
        return (uint16_t)(new - event - 1) < (uint16_t)(new - old);
690
}
691

    
692
static bool vring_notify(VirtIODevice *vdev, VirtQueue *vq)
693
{
694
    uint16_t old, new;
695
    bool v;
696
    /* Always notify when queue is empty (when feature acknowledge) */
697
    if (((vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) &&
698
         !vq->inuse && vring_avail_idx(vq) == vq->last_avail_idx)) {
699
        return true;
700
    }
701

    
702
    if (!(vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX))) {
703
        return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
704
    }
705

    
706
    v = vq->signalled_used_valid;
707
    vq->signalled_used_valid = true;
708
    old = vq->signalled_used;
709
    new = vq->signalled_used = vring_used_idx(vq);
710
    return !v || vring_need_event(vring_used_event(vq), new, old);
711
}
712

    
713
void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
714
{
715
    if (!vring_notify(vdev, vq)) {
716
        return;
717
    }
718

    
719
    trace_virtio_notify(vdev, vq);
720
    vdev->isr |= 0x01;
721
    virtio_notify_vector(vdev, vq->vector);
722
}
723

    
724
void virtio_notify_config(VirtIODevice *vdev)
725
{
726
    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
727
        return;
728

    
729
    vdev->isr |= 0x03;
730
    virtio_notify_vector(vdev, vdev->config_vector);
731
}
732

    
733
void virtio_save(VirtIODevice *vdev, QEMUFile *f)
734
{
735
    int i;
736

    
737
    if (vdev->binding->save_config)
738
        vdev->binding->save_config(vdev->binding_opaque, f);
739

    
740
    qemu_put_8s(f, &vdev->status);
741
    qemu_put_8s(f, &vdev->isr);
742
    qemu_put_be16s(f, &vdev->queue_sel);
743
    qemu_put_be32s(f, &vdev->guest_features);
744
    qemu_put_be32(f, vdev->config_len);
745
    qemu_put_buffer(f, vdev->config, vdev->config_len);
746

    
747
    for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
748
        if (vdev->vq[i].vring.num == 0)
749
            break;
750
    }
751

    
752
    qemu_put_be32(f, i);
753

    
754
    for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
755
        if (vdev->vq[i].vring.num == 0)
756
            break;
757

    
758
        qemu_put_be32(f, vdev->vq[i].vring.num);
759
        qemu_put_be64(f, vdev->vq[i].pa);
760
        qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
761
        if (vdev->binding->save_queue)
762
            vdev->binding->save_queue(vdev->binding_opaque, i, f);
763
    }
764
}
765

    
766
int virtio_set_features(VirtIODevice *vdev, uint32_t val)
767
{
768
    uint32_t supported_features =
769
        vdev->binding->get_features(vdev->binding_opaque);
770
    bool bad = (val & ~supported_features) != 0;
771

    
772
    val &= supported_features;
773
    if (vdev->set_features) {
774
        vdev->set_features(vdev, val);
775
    }
776
    vdev->guest_features = val;
777
    return bad ? -1 : 0;
778
}
779

    
780
int virtio_load(VirtIODevice *vdev, QEMUFile *f)
781
{
782
    int num, i, ret;
783
    uint32_t features;
784
    uint32_t supported_features;
785

    
786
    if (vdev->binding->load_config) {
787
        ret = vdev->binding->load_config(vdev->binding_opaque, f);
788
        if (ret)
789
            return ret;
790
    }
791

    
792
    qemu_get_8s(f, &vdev->status);
793
    qemu_get_8s(f, &vdev->isr);
794
    qemu_get_be16s(f, &vdev->queue_sel);
795
    qemu_get_be32s(f, &features);
796

    
797
    if (virtio_set_features(vdev, features) < 0) {
798
        supported_features = vdev->binding->get_features(vdev->binding_opaque);
799
        error_report("Features 0x%x unsupported. Allowed features: 0x%x",
800
                     features, supported_features);
801
        return -1;
802
    }
803
    vdev->config_len = qemu_get_be32(f);
804
    qemu_get_buffer(f, vdev->config, vdev->config_len);
805

    
806
    num = qemu_get_be32(f);
807

    
808
    for (i = 0; i < num; i++) {
809
        vdev->vq[i].vring.num = qemu_get_be32(f);
810
        vdev->vq[i].pa = qemu_get_be64(f);
811
        qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
812
        vdev->vq[i].signalled_used_valid = false;
813
        vdev->vq[i].notification = true;
814

    
815
        if (vdev->vq[i].pa) {
816
            uint16_t nheads;
817
            virtqueue_init(&vdev->vq[i]);
818
            nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
819
            /* Check it isn't doing very strange things with descriptor numbers. */
820
            if (nheads > vdev->vq[i].vring.num) {
821
                error_report("VQ %d size 0x%x Guest index 0x%x "
822
                             "inconsistent with Host index 0x%x: delta 0x%x",
823
                             i, vdev->vq[i].vring.num,
824
                             vring_avail_idx(&vdev->vq[i]),
825
                             vdev->vq[i].last_avail_idx, nheads);
826
                return -1;
827
            }
828
        } else if (vdev->vq[i].last_avail_idx) {
829
            error_report("VQ %d address 0x0 "
830
                         "inconsistent with Host index 0x%x",
831
                         i, vdev->vq[i].last_avail_idx);
832
                return -1;
833
        }
834
        if (vdev->binding->load_queue) {
835
            ret = vdev->binding->load_queue(vdev->binding_opaque, i, f);
836
            if (ret)
837
                return ret;
838
        }
839
    }
840

    
841
    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
842
    return 0;
843
}
844

    
845
void virtio_cleanup(VirtIODevice *vdev)
846
{
847
    qemu_del_vm_change_state_handler(vdev->vmstate);
848
    if (vdev->config)
849
        g_free(vdev->config);
850
    g_free(vdev->vq);
851
    g_free(vdev);
852
}
853

    
854
static void virtio_vmstate_change(void *opaque, int running, RunState state)
855
{
856
    VirtIODevice *vdev = opaque;
857
    bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
858
    vdev->vm_running = running;
859

    
860
    if (backend_run) {
861
        virtio_set_status(vdev, vdev->status);
862
    }
863

    
864
    if (vdev->binding->vmstate_change) {
865
        vdev->binding->vmstate_change(vdev->binding_opaque, backend_run);
866
    }
867

    
868
    if (!backend_run) {
869
        virtio_set_status(vdev, vdev->status);
870
    }
871
}
872

    
873
VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
874
                                 size_t config_size, size_t struct_size)
875
{
876
    VirtIODevice *vdev;
877
    int i;
878

    
879
    vdev = g_malloc0(struct_size);
880

    
881
    vdev->device_id = device_id;
882
    vdev->status = 0;
883
    vdev->isr = 0;
884
    vdev->queue_sel = 0;
885
    vdev->config_vector = VIRTIO_NO_VECTOR;
886
    vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
887
    vdev->vm_running = runstate_is_running();
888
    for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
889
        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
890
        vdev->vq[i].vdev = vdev;
891
    }
892

    
893
    vdev->name = name;
894
    vdev->config_len = config_size;
895
    if (vdev->config_len)
896
        vdev->config = g_malloc0(config_size);
897
    else
898
        vdev->config = NULL;
899

    
900
    vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change, vdev);
901

    
902
    return vdev;
903
}
904

    
905
void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding,
906
                        void *opaque)
907
{
908
    vdev->binding = binding;
909
    vdev->binding_opaque = opaque;
910
}
911

    
912
target_phys_addr_t virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
913
{
914
    return vdev->vq[n].vring.desc;
915
}
916

    
917
target_phys_addr_t virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
918
{
919
    return vdev->vq[n].vring.avail;
920
}
921

    
922
target_phys_addr_t virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
923
{
924
    return vdev->vq[n].vring.used;
925
}
926

    
927
target_phys_addr_t virtio_queue_get_ring_addr(VirtIODevice *vdev, int n)
928
{
929
    return vdev->vq[n].vring.desc;
930
}
931

    
932
target_phys_addr_t virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
933
{
934
    return sizeof(VRingDesc) * vdev->vq[n].vring.num;
935
}
936

    
937
target_phys_addr_t virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
938
{
939
    return offsetof(VRingAvail, ring) +
940
        sizeof(uint64_t) * vdev->vq[n].vring.num;
941
}
942

    
943
target_phys_addr_t virtio_queue_get_used_size(VirtIODevice *vdev, int n)
944
{
945
    return offsetof(VRingUsed, ring) +
946
        sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
947
}
948

    
949
target_phys_addr_t virtio_queue_get_ring_size(VirtIODevice *vdev, int n)
950
{
951
    return vdev->vq[n].vring.used - vdev->vq[n].vring.desc +
952
            virtio_queue_get_used_size(vdev, n);
953
}
954

    
955
uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
956
{
957
    return vdev->vq[n].last_avail_idx;
958
}
959

    
960
void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
961
{
962
    vdev->vq[n].last_avail_idx = idx;
963
}
964

    
965
VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
966
{
967
    return vdev->vq + n;
968
}
969

    
970
EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
971
{
972
    return &vq->guest_notifier;
973
}
974
EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
975
{
976
    return &vq->host_notifier;
977
}