Statistics
| Branch: | Revision:

root / hw / virtio.c @ 7696d1ec

History | View | Annotate | Download (15.8 kB)

1
/*
2
 * Virtio Support
3
 *
4
 * Copyright IBM, Corp. 2007
5
 *
6
 * Authors:
7
 *  Anthony Liguori   <aliguori@us.ibm.com>
8
 *
9
 * This work is licensed under the terms of the GNU GPL, version 2.  See
10
 * the COPYING file in the top-level directory.
11
 *
12
 */
13

    
14
#include <inttypes.h>
15

    
16
#include "virtio.h"
17
#include "sysemu.h"
18

    
19
/* The alignment to use between consumer and producer parts of vring.
20
 * x86 pagesize again. */
21
#define VIRTIO_PCI_VRING_ALIGN         4096
22

    
23
/* QEMU doesn't strictly need write barriers since everything runs in
24
 * lock-step.  We'll leave the calls to wmb() in though to make it obvious for
25
 * KVM or if kqemu gets SMP support.
26
 */
27
#define wmb() do { } while (0)
28

    
29
typedef struct VRingDesc
30
{
31
    uint64_t addr;
32
    uint32_t len;
33
    uint16_t flags;
34
    uint16_t next;
35
} VRingDesc;
36

    
37
typedef struct VRingAvail
38
{
39
    uint16_t flags;
40
    uint16_t idx;
41
    uint16_t ring[0];
42
} VRingAvail;
43

    
44
typedef struct VRingUsedElem
45
{
46
    uint32_t id;
47
    uint32_t len;
48
} VRingUsedElem;
49

    
50
typedef struct VRingUsed
51
{
52
    uint16_t flags;
53
    uint16_t idx;
54
    VRingUsedElem ring[0];
55
} VRingUsed;
56

    
57
typedef struct VRing
58
{
59
    unsigned int num;
60
    target_phys_addr_t desc;
61
    target_phys_addr_t avail;
62
    target_phys_addr_t used;
63
} VRing;
64

    
65
struct VirtQueue
66
{
67
    VRing vring;
68
    target_phys_addr_t pa;
69
    uint16_t last_avail_idx;
70
    int inuse;
71
    void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
72
};
73

    
74
#define VIRTIO_PCI_QUEUE_MAX        16
75

    
76
/* virt queue functions */
77
static void virtqueue_init(VirtQueue *vq)
78
{
79
    target_phys_addr_t pa = vq->pa;
80

    
81
    vq->vring.desc = pa;
82
    vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc);
83
    vq->vring.used = vring_align(vq->vring.avail +
84
                                 offsetof(VRingAvail, ring[vq->vring.num]),
85
                                 VIRTIO_PCI_VRING_ALIGN);
86
}
87

    
88
static inline uint64_t vring_desc_addr(VirtQueue *vq, int i)
89
{
90
    target_phys_addr_t pa;
91
    pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr);
92
    return ldq_phys(pa);
93
}
94

    
95
static inline uint32_t vring_desc_len(VirtQueue *vq, int i)
96
{
97
    target_phys_addr_t pa;
98
    pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, len);
99
    return ldl_phys(pa);
100
}
101

    
102
static inline uint16_t vring_desc_flags(VirtQueue *vq, int i)
103
{
104
    target_phys_addr_t pa;
105
    pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags);
106
    return lduw_phys(pa);
107
}
108

    
109
static inline uint16_t vring_desc_next(VirtQueue *vq, int i)
110
{
111
    target_phys_addr_t pa;
112
    pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, next);
113
    return lduw_phys(pa);
114
}
115

    
116
static inline uint16_t vring_avail_flags(VirtQueue *vq)
117
{
118
    target_phys_addr_t pa;
119
    pa = vq->vring.avail + offsetof(VRingAvail, flags);
120
    return lduw_phys(pa);
121
}
122

    
123
static inline uint16_t vring_avail_idx(VirtQueue *vq)
124
{
125
    target_phys_addr_t pa;
126
    pa = vq->vring.avail + offsetof(VRingAvail, idx);
127
    return lduw_phys(pa);
128
}
129

    
130
static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
131
{
132
    target_phys_addr_t pa;
133
    pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
134
    return lduw_phys(pa);
135
}
136

    
137
static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val)
138
{
139
    target_phys_addr_t pa;
140
    pa = vq->vring.used + offsetof(VRingUsed, ring[i].id);
141
    stl_phys(pa, val);
142
}
143

    
144
static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val)
145
{
146
    target_phys_addr_t pa;
147
    pa = vq->vring.used + offsetof(VRingUsed, ring[i].len);
148
    stl_phys(pa, val);
149
}
150

    
151
static uint16_t vring_used_idx(VirtQueue *vq)
152
{
153
    target_phys_addr_t pa;
154
    pa = vq->vring.used + offsetof(VRingUsed, idx);
155
    return lduw_phys(pa);
156
}
157

    
158
static inline void vring_used_idx_increment(VirtQueue *vq, uint16_t val)
159
{
160
    target_phys_addr_t pa;
161
    pa = vq->vring.used + offsetof(VRingUsed, idx);
162
    stw_phys(pa, vring_used_idx(vq) + val);
163
}
164

    
165
static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
166
{
167
    target_phys_addr_t pa;
168
    pa = vq->vring.used + offsetof(VRingUsed, flags);
169
    stw_phys(pa, lduw_phys(pa) | mask);
170
}
171

    
172
static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
173
{
174
    target_phys_addr_t pa;
175
    pa = vq->vring.used + offsetof(VRingUsed, flags);
176
    stw_phys(pa, lduw_phys(pa) & ~mask);
177
}
178

    
179
void virtio_queue_set_notification(VirtQueue *vq, int enable)
180
{
181
    if (enable)
182
        vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
183
    else
184
        vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
185
}
186

    
187
int virtio_queue_ready(VirtQueue *vq)
188
{
189
    return vq->vring.avail != 0;
190
}
191

    
192
int virtio_queue_empty(VirtQueue *vq)
193
{
194
    return vring_avail_idx(vq) == vq->last_avail_idx;
195
}
196

    
197
void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
198
                    unsigned int len, unsigned int idx)
199
{
200
    unsigned int offset;
201
    int i;
202

    
203
    offset = 0;
204
    for (i = 0; i < elem->in_num; i++) {
205
        size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
206

    
207
        cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
208
                                  elem->in_sg[i].iov_len,
209
                                  1, size);
210

    
211
        offset += elem->in_sg[i].iov_len;
212
    }
213

    
214
    for (i = 0; i < elem->out_num; i++)
215
        cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
216
                                  elem->out_sg[i].iov_len,
217
                                  0, elem->out_sg[i].iov_len);
218

    
219
    idx = (idx + vring_used_idx(vq)) % vq->vring.num;
220

    
221
    /* Get a pointer to the next entry in the used ring. */
222
    vring_used_ring_id(vq, idx, elem->index);
223
    vring_used_ring_len(vq, idx, len);
224
}
225

    
226
void virtqueue_flush(VirtQueue *vq, unsigned int count)
227
{
228
    /* Make sure buffer is written before we update index. */
229
    wmb();
230
    vring_used_idx_increment(vq, count);
231
    vq->inuse -= count;
232
}
233

    
234
void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
235
                    unsigned int len)
236
{
237
    virtqueue_fill(vq, elem, len, 0);
238
    virtqueue_flush(vq, 1);
239
}
240

    
241
static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
242
{
243
    uint16_t num_heads = vring_avail_idx(vq) - idx;
244

    
245
    /* Check it isn't doing very strange things with descriptor numbers. */
246
    if (num_heads > vq->vring.num) {
247
        fprintf(stderr, "Guest moved used index from %u to %u",
248
                idx, vring_avail_idx(vq));
249
        exit(1);
250
    }
251

    
252
    return num_heads;
253
}
254

    
255
static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
256
{
257
    unsigned int head;
258

    
259
    /* Grab the next descriptor number they're advertising, and increment
260
     * the index we've seen. */
261
    head = vring_avail_ring(vq, idx % vq->vring.num);
262

    
263
    /* If their number is silly, that's a fatal mistake. */
264
    if (head >= vq->vring.num) {
265
        fprintf(stderr, "Guest says index %u is available", head);
266
        exit(1);
267
    }
268

    
269
    return head;
270
}
271

    
272
static unsigned virtqueue_next_desc(VirtQueue *vq, unsigned int i)
273
{
274
    unsigned int next;
275

    
276
    /* If this descriptor says it doesn't chain, we're done. */
277
    if (!(vring_desc_flags(vq, i) & VRING_DESC_F_NEXT))
278
        return vq->vring.num;
279

    
280
    /* Check they're not leading us off end of descriptors. */
281
    next = vring_desc_next(vq, i);
282
    /* Make sure compiler knows to grab that: we don't want it changing! */
283
    wmb();
284

    
285
    if (next >= vq->vring.num) {
286
        fprintf(stderr, "Desc next is %u", next);
287
        exit(1);
288
    }
289

    
290
    return next;
291
}
292

    
293
int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes)
294
{
295
    unsigned int idx;
296
    int num_bufs, in_total, out_total;
297

    
298
    idx = vq->last_avail_idx;
299

    
300
    num_bufs = in_total = out_total = 0;
301
    while (virtqueue_num_heads(vq, idx)) {
302
        int i;
303

    
304
        i = virtqueue_get_head(vq, idx++);
305
        do {
306
            /* If we've got too many, that implies a descriptor loop. */
307
            if (++num_bufs > vq->vring.num) {
308
                fprintf(stderr, "Looped descriptor");
309
                exit(1);
310
            }
311

    
312
            if (vring_desc_flags(vq, i) & VRING_DESC_F_WRITE) {
313
                if (in_bytes > 0 &&
314
                    (in_total += vring_desc_len(vq, i)) >= in_bytes)
315
                    return 1;
316
            } else {
317
                if (out_bytes > 0 &&
318
                    (out_total += vring_desc_len(vq, i)) >= out_bytes)
319
                    return 1;
320
            }
321
        } while ((i = virtqueue_next_desc(vq, i)) != vq->vring.num);
322
    }
323

    
324
    return 0;
325
}
326

    
327
int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
328
{
329
    unsigned int i, head;
330
    target_phys_addr_t len;
331

    
332
    if (!virtqueue_num_heads(vq, vq->last_avail_idx))
333
        return 0;
334

    
335
    /* When we start there are none of either input nor output. */
336
    elem->out_num = elem->in_num = 0;
337

    
338
    i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
339
    do {
340
        struct iovec *sg;
341
        int is_write = 0;
342

    
343
        if (vring_desc_flags(vq, i) & VRING_DESC_F_WRITE) {
344
            elem->in_addr[elem->in_num] = vring_desc_addr(vq, i);
345
            sg = &elem->in_sg[elem->in_num++];
346
            is_write = 1;
347
        } else
348
            sg = &elem->out_sg[elem->out_num++];
349

    
350
        /* Grab the first descriptor, and check it's OK. */
351
        sg->iov_len = vring_desc_len(vq, i);
352
        len = sg->iov_len;
353

    
354
        sg->iov_base = cpu_physical_memory_map(vring_desc_addr(vq, i), &len, is_write);
355

    
356
        if (sg->iov_base == NULL || len != sg->iov_len) {
357
            fprintf(stderr, "virtio: trying to map MMIO memory\n");
358
            exit(1);
359
        }
360

    
361
        /* If we've got too many, that implies a descriptor loop. */
362
        if ((elem->in_num + elem->out_num) > vq->vring.num) {
363
            fprintf(stderr, "Looped descriptor");
364
            exit(1);
365
        }
366
    } while ((i = virtqueue_next_desc(vq, i)) != vq->vring.num);
367

    
368
    elem->index = head;
369

    
370
    vq->inuse++;
371

    
372
    return elem->in_num + elem->out_num;
373
}
374

    
375
/* virtio device */
376

    
377
void virtio_update_irq(VirtIODevice *vdev)
378
{
379
    if (vdev->binding->update_irq) {
380
        vdev->binding->update_irq(vdev->binding_opaque);
381
    }
382
}
383

    
384
void virtio_reset(void *opaque)
385
{
386
    VirtIODevice *vdev = opaque;
387
    int i;
388

    
389
    if (vdev->reset)
390
        vdev->reset(vdev);
391

    
392
    vdev->features = 0;
393
    vdev->queue_sel = 0;
394
    vdev->status = 0;
395
    vdev->isr = 0;
396
    virtio_update_irq(vdev);
397

    
398
    for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
399
        vdev->vq[i].vring.desc = 0;
400
        vdev->vq[i].vring.avail = 0;
401
        vdev->vq[i].vring.used = 0;
402
        vdev->vq[i].last_avail_idx = 0;
403
        vdev->vq[i].pa = 0;
404
    }
405
}
406

    
407
uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
408
{
409
    uint8_t val;
410

    
411
    vdev->get_config(vdev, vdev->config);
412

    
413
    if (addr > (vdev->config_len - sizeof(val)))
414
        return (uint32_t)-1;
415

    
416
    memcpy(&val, vdev->config + addr, sizeof(val));
417
    return val;
418
}
419

    
420
uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
421
{
422
    uint16_t val;
423

    
424
    vdev->get_config(vdev, vdev->config);
425

    
426
    if (addr > (vdev->config_len - sizeof(val)))
427
        return (uint32_t)-1;
428

    
429
    memcpy(&val, vdev->config + addr, sizeof(val));
430
    return val;
431
}
432

    
433
uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
434
{
435
    uint32_t val;
436

    
437
    vdev->get_config(vdev, vdev->config);
438

    
439
    if (addr > (vdev->config_len - sizeof(val)))
440
        return (uint32_t)-1;
441

    
442
    memcpy(&val, vdev->config + addr, sizeof(val));
443
    return val;
444
}
445

    
446
void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
447
{
448
    uint8_t val = data;
449

    
450
    if (addr > (vdev->config_len - sizeof(val)))
451
        return;
452

    
453
    memcpy(vdev->config + addr, &val, sizeof(val));
454

    
455
    if (vdev->set_config)
456
        vdev->set_config(vdev, vdev->config);
457
}
458

    
459
void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
460
{
461
    uint16_t val = data;
462

    
463
    if (addr > (vdev->config_len - sizeof(val)))
464
        return;
465

    
466
    memcpy(vdev->config + addr, &val, sizeof(val));
467

    
468
    if (vdev->set_config)
469
        vdev->set_config(vdev, vdev->config);
470
}
471

    
472
void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
473
{
474
    uint32_t val = data;
475

    
476
    if (addr > (vdev->config_len - sizeof(val)))
477
        return;
478

    
479
    memcpy(vdev->config + addr, &val, sizeof(val));
480

    
481
    if (vdev->set_config)
482
        vdev->set_config(vdev, vdev->config);
483
}
484

    
485
void virtio_queue_set_addr(VirtIODevice *vdev, int n, target_phys_addr_t addr)
486
{
487
    if (addr == 0) {
488
        virtio_reset(vdev);
489
    } else {
490
        vdev->vq[n].pa = addr;
491
        virtqueue_init(&vdev->vq[n]);
492
    }
493
}
494

    
495
target_phys_addr_t virtio_queue_get_addr(VirtIODevice *vdev, int n)
496
{
497
    return vdev->vq[n].pa;
498
}
499

    
500
int virtio_queue_get_num(VirtIODevice *vdev, int n)
501
{
502
    return vdev->vq[n].vring.num;
503
}
504

    
505
void virtio_queue_notify(VirtIODevice *vdev, int n)
506
{
507
    if (n < VIRTIO_PCI_QUEUE_MAX && vdev->vq[n].vring.desc) {
508
        vdev->vq[n].handle_output(vdev, &vdev->vq[n]);
509
    }
510
}
511

    
512
VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
513
                            void (*handle_output)(VirtIODevice *, VirtQueue *))
514
{
515
    int i;
516

    
517
    for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
518
        if (vdev->vq[i].vring.num == 0)
519
            break;
520
    }
521

    
522
    if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
523
        abort();
524

    
525
    vdev->vq[i].vring.num = queue_size;
526
    vdev->vq[i].handle_output = handle_output;
527

    
528
    return &vdev->vq[i];
529
}
530

    
531
void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
532
{
533
    /* Always notify when queue is empty (when feature acknowledge) */
534
    if ((vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT) &&
535
        (!(vdev->features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) ||
536
         (vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx)))
537
        return;
538

    
539
    vdev->isr |= 0x01;
540
    virtio_update_irq(vdev);
541
}
542

    
543
void virtio_notify_config(VirtIODevice *vdev)
544
{
545
    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
546
        return;
547

    
548
    vdev->isr |= 0x03;
549
    virtio_update_irq(vdev);
550
}
551

    
552
void virtio_save(VirtIODevice *vdev, QEMUFile *f)
553
{
554
    int i;
555

    
556
    /* FIXME: load/save binding.  */
557
    //pci_device_save(&vdev->pci_dev, f);
558

    
559
    qemu_put_8s(f, &vdev->status);
560
    qemu_put_8s(f, &vdev->isr);
561
    qemu_put_be16s(f, &vdev->queue_sel);
562
    qemu_put_be32s(f, &vdev->features);
563
    qemu_put_be32(f, vdev->config_len);
564
    qemu_put_buffer(f, vdev->config, vdev->config_len);
565

    
566
    for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
567
        if (vdev->vq[i].vring.num == 0)
568
            break;
569
    }
570

    
571
    qemu_put_be32(f, i);
572

    
573
    for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
574
        if (vdev->vq[i].vring.num == 0)
575
            break;
576

    
577
        qemu_put_be32(f, vdev->vq[i].vring.num);
578
        qemu_put_be64(f, vdev->vq[i].pa);
579
        qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
580
    }
581
}
582

    
583
void virtio_load(VirtIODevice *vdev, QEMUFile *f)
584
{
585
    int num, i;
586

    
587
    /* FIXME: load/save binding.  */
588
    //pci_device_load(&vdev->pci_dev, f);
589

    
590
    qemu_get_8s(f, &vdev->status);
591
    qemu_get_8s(f, &vdev->isr);
592
    qemu_get_be16s(f, &vdev->queue_sel);
593
    qemu_get_be32s(f, &vdev->features);
594
    vdev->config_len = qemu_get_be32(f);
595
    qemu_get_buffer(f, vdev->config, vdev->config_len);
596

    
597
    num = qemu_get_be32(f);
598

    
599
    for (i = 0; i < num; i++) {
600
        vdev->vq[i].vring.num = qemu_get_be32(f);
601
        vdev->vq[i].pa = qemu_get_be64(f);
602
        qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
603

    
604
        if (vdev->vq[i].pa) {
605
            virtqueue_init(&vdev->vq[i]);
606
        }
607
    }
608

    
609
    virtio_update_irq(vdev);
610
}
611

    
612
void virtio_cleanup(VirtIODevice *vdev)
613
{
614
    if (vdev->config)
615
        qemu_free(vdev->config);
616
    qemu_free(vdev->vq);
617
}
618

    
619
VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
620
                                 size_t config_size, size_t struct_size)
621
{
622
    VirtIODevice *vdev;
623

    
624
    vdev = qemu_mallocz(struct_size);
625

    
626
    vdev->device_id = device_id;
627
    vdev->status = 0;
628
    vdev->isr = 0;
629
    vdev->queue_sel = 0;
630
    vdev->vq = qemu_mallocz(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
631

    
632
    vdev->name = name;
633
    vdev->config_len = config_size;
634
    if (vdev->config_len)
635
        vdev->config = qemu_mallocz(config_size);
636
    else
637
        vdev->config = NULL;
638

    
639
    qemu_register_reset(virtio_reset, 0, vdev);
640

    
641
    return vdev;
642
}
643

    
644
void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding,
645
                        void *opaque)
646
{
647
    vdev->binding = binding;
648
    vdev->binding_opaque = opaque;
649
}