Statistics
| Branch: | Revision:

root / hw / virtio.c @ 97b83deb

History | View | Annotate | Download (22.9 kB)

1
/*
2
 * Virtio Support
3
 *
4
 * Copyright IBM, Corp. 2007
5
 *
6
 * Authors:
7
 *  Anthony Liguori   <aliguori@us.ibm.com>
8
 *
9
 * This work is licensed under the terms of the GNU GPL, version 2.  See
10
 * the COPYING file in the top-level directory.
11
 *
12
 */
13

    
14
#include <inttypes.h>
15

    
16
#include "virtio.h"
17
#include "sysemu.h"
18

    
19
//#define VIRTIO_ZERO_COPY
20

    
21
/* from Linux's linux/virtio_pci.h */
22

    
23
/* A 32-bit r/o bitmask of the features supported by the host */
24
#define VIRTIO_PCI_HOST_FEATURES        0
25

    
26
/* A 32-bit r/w bitmask of features activated by the guest */
27
#define VIRTIO_PCI_GUEST_FEATURES       4
28

    
29
/* A 32-bit r/w PFN for the currently selected queue */
30
#define VIRTIO_PCI_QUEUE_PFN            8
31

    
32
/* A 16-bit r/o queue size for the currently selected queue */
33
#define VIRTIO_PCI_QUEUE_NUM            12
34

    
35
/* A 16-bit r/w queue selector */
36
#define VIRTIO_PCI_QUEUE_SEL            14
37

    
38
/* A 16-bit r/w queue notifier */
39
#define VIRTIO_PCI_QUEUE_NOTIFY         16
40

    
41
/* An 8-bit device status register.  */
42
#define VIRTIO_PCI_STATUS               18
43

    
44
/* An 8-bit r/o interrupt status register.  Reading the value will return the
45
 * current contents of the ISR and will also clear it.  This is effectively
46
 * a read-and-acknowledge. */
47
#define VIRTIO_PCI_ISR                  19
48

    
49
#define VIRTIO_PCI_CONFIG               20
50

    
51
/* Virtio ABI version, if we increment this, we break the guest driver. */
52
#define VIRTIO_PCI_ABI_VERSION          0
53

    
54
/* How many bits to shift physical queue address written to QUEUE_PFN.
55
 * 12 is historical, and due to x86 page size. */
56
#define VIRTIO_PCI_QUEUE_ADDR_SHIFT    12
57

    
58
/* The alignment to use between consumer and producer parts of vring.
59
 * x86 pagesize again. */
60
#define VIRTIO_PCI_VRING_ALIGN         4096
61

    
62
/* QEMU doesn't strictly need write barriers since everything runs in
63
 * lock-step.  We'll leave the calls to wmb() in though to make it obvious for
64
 * KVM or if kqemu gets SMP support.
65
 */
66
#define wmb() do { } while (0)
67

    
68
typedef struct VRingDesc
69
{
70
    uint64_t addr;
71
    uint32_t len;
72
    uint16_t flags;
73
    uint16_t next;
74
} VRingDesc;
75

    
76
typedef struct VRingAvail
77
{
78
    uint16_t flags;
79
    uint16_t idx;
80
    uint16_t ring[0];
81
} VRingAvail;
82

    
83
typedef struct VRingUsedElem
84
{
85
    uint32_t id;
86
    uint32_t len;
87
} VRingUsedElem;
88

    
89
typedef struct VRingUsed
90
{
91
    uint16_t flags;
92
    uint16_t idx;
93
    VRingUsedElem ring[0];
94
} VRingUsed;
95

    
96
typedef struct VRing
97
{
98
    unsigned int num;
99
    target_phys_addr_t desc;
100
    target_phys_addr_t avail;
101
    target_phys_addr_t used;
102
} VRing;
103

    
104
struct VirtQueue
105
{
106
    VRing vring;
107
    uint32_t pfn;
108
    uint16_t last_avail_idx;
109
    int inuse;
110
    void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
111
};
112

    
113
#define VIRTIO_PCI_QUEUE_MAX        16
114

    
115
/* virt queue functions */
116
#ifdef VIRTIO_ZERO_COPY
117
static void *virtio_map_gpa(target_phys_addr_t addr, size_t size)
118
{
119
    ram_addr_t off;
120
    target_phys_addr_t addr1;
121

    
122
    off = cpu_get_physical_page_desc(addr);
123
    if ((off & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
124
        fprintf(stderr, "virtio DMA to IO ram\n");
125
        exit(1);
126
    }
127

    
128
    off = (off & TARGET_PAGE_MASK) | (addr & ~TARGET_PAGE_MASK);
129

    
130
    for (addr1 = addr + TARGET_PAGE_SIZE;
131
         addr1 < TARGET_PAGE_ALIGN(addr + size);
132
         addr1 += TARGET_PAGE_SIZE) {
133
        ram_addr_t off1;
134

    
135
        off1 = cpu_get_physical_page_desc(addr1);
136
        if ((off1 & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
137
            fprintf(stderr, "virtio DMA to IO ram\n");
138
            exit(1);
139
        }
140

    
141
        off1 = (off1 & TARGET_PAGE_MASK) | (addr1 & ~TARGET_PAGE_MASK);
142

    
143
        if (off1 != (off + (addr1 - addr))) {
144
            fprintf(stderr, "discontigous virtio memory\n");
145
            exit(1);
146
        }
147
    }
148

    
149
    return phys_ram_base + off;
150
}
151
#endif
152

    
153
static void virtqueue_init(VirtQueue *vq, target_phys_addr_t pa)
154
{
155
    vq->vring.desc = pa;
156
    vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc);
157
    vq->vring.used = vring_align(vq->vring.avail +
158
                                 offsetof(VRingAvail, ring[vq->vring.num]),
159
                                 VIRTIO_PCI_VRING_ALIGN);
160
}
161

    
162
static inline uint64_t vring_desc_addr(VirtQueue *vq, int i)
163
{
164
    target_phys_addr_t pa;
165
    pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr);
166
    return ldq_phys(pa);
167
}
168

    
169
static inline uint32_t vring_desc_len(VirtQueue *vq, int i)
170
{
171
    target_phys_addr_t pa;
172
    pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, len);
173
    return ldl_phys(pa);
174
}
175

    
176
static inline uint16_t vring_desc_flags(VirtQueue *vq, int i)
177
{
178
    target_phys_addr_t pa;
179
    pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags);
180
    return lduw_phys(pa);
181
}
182

    
183
static inline uint16_t vring_desc_next(VirtQueue *vq, int i)
184
{
185
    target_phys_addr_t pa;
186
    pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, next);
187
    return lduw_phys(pa);
188
}
189

    
190
static inline uint16_t vring_avail_flags(VirtQueue *vq)
191
{
192
    target_phys_addr_t pa;
193
    pa = vq->vring.avail + offsetof(VRingAvail, flags);
194
    return lduw_phys(pa);
195
}
196

    
197
static inline uint16_t vring_avail_idx(VirtQueue *vq)
198
{
199
    target_phys_addr_t pa;
200
    pa = vq->vring.avail + offsetof(VRingAvail, idx);
201
    return lduw_phys(pa);
202
}
203

    
204
static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
205
{
206
    target_phys_addr_t pa;
207
    pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
208
    return lduw_phys(pa);
209
}
210

    
211
static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val)
212
{
213
    target_phys_addr_t pa;
214
    pa = vq->vring.used + offsetof(VRingUsed, ring[i].id);
215
    stl_phys(pa, val);
216
}
217

    
218
static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val)
219
{
220
    target_phys_addr_t pa;
221
    pa = vq->vring.used + offsetof(VRingUsed, ring[i].len);
222
    stl_phys(pa, val);
223
}
224

    
225
static uint16_t vring_used_idx(VirtQueue *vq)
226
{
227
    target_phys_addr_t pa;
228
    pa = vq->vring.used + offsetof(VRingUsed, idx);
229
    return lduw_phys(pa);
230
}
231

    
232
static inline void vring_used_idx_increment(VirtQueue *vq, uint16_t val)
233
{
234
    target_phys_addr_t pa;
235
    pa = vq->vring.used + offsetof(VRingUsed, idx);
236
    stw_phys(pa, vring_used_idx(vq) + val);
237
}
238

    
239
static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
240
{
241
    target_phys_addr_t pa;
242
    pa = vq->vring.used + offsetof(VRingUsed, flags);
243
    stw_phys(pa, lduw_phys(pa) | mask);
244
}
245

    
246
static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
247
{
248
    target_phys_addr_t pa;
249
    pa = vq->vring.used + offsetof(VRingUsed, flags);
250
    stw_phys(pa, lduw_phys(pa) & ~mask);
251
}
252

    
253
void virtio_queue_set_notification(VirtQueue *vq, int enable)
254
{
255
    if (enable)
256
        vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
257
    else
258
        vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
259
}
260

    
261
int virtio_queue_ready(VirtQueue *vq)
262
{
263
    return vq->vring.avail != 0;
264
}
265

    
266
int virtio_queue_empty(VirtQueue *vq)
267
{
268
    return vring_avail_idx(vq) == vq->last_avail_idx;
269
}
270

    
271
void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
272
                    unsigned int len, unsigned int idx)
273
{
274
    unsigned int offset;
275
    int i;
276

    
277
#ifndef VIRTIO_ZERO_COPY
278
    for (i = 0; i < elem->out_num; i++)
279
        qemu_free(elem->out_sg[i].iov_base);
280
#endif
281

    
282
    offset = 0;
283
    for (i = 0; i < elem->in_num; i++) {
284
        size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
285

    
286
#ifdef VIRTIO_ZERO_COPY
287
        if (size) {
288
            ram_addr_t addr = (uint8_t *)elem->in_sg[i].iov_base - phys_ram_base;
289
            ram_addr_t off;
290

    
291
            for (off = 0; off < size; off += TARGET_PAGE_SIZE)
292
                cpu_physical_memory_set_dirty(addr + off);
293
        }
294
#else
295
        if (size)
296
            cpu_physical_memory_write(elem->in_addr[i],
297
                                      elem->in_sg[i].iov_base,
298
                                      size);
299

    
300
        qemu_free(elem->in_sg[i].iov_base);
301
#endif
302
        
303
        offset += size;
304
    }
305

    
306
    idx = (idx + vring_used_idx(vq)) % vq->vring.num;
307

    
308
    /* Get a pointer to the next entry in the used ring. */
309
    vring_used_ring_id(vq, idx, elem->index);
310
    vring_used_ring_len(vq, idx, len);
311
}
312

    
313
void virtqueue_flush(VirtQueue *vq, unsigned int count)
314
{
315
    /* Make sure buffer is written before we update index. */
316
    wmb();
317
    vring_used_idx_increment(vq, count);
318
    vq->inuse -= count;
319
}
320

    
321
void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
322
                    unsigned int len)
323
{
324
    virtqueue_fill(vq, elem, len, 0);
325
    virtqueue_flush(vq, 1);
326
}
327

    
328
static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
329
{
330
    uint16_t num_heads = vring_avail_idx(vq) - idx;
331

    
332
    /* Check it isn't doing very strange things with descriptor numbers. */
333
    if (num_heads > vq->vring.num) {
334
        fprintf(stderr, "Guest moved used index from %u to %u",
335
                idx, vring_avail_idx(vq));
336
        exit(1);
337
    }
338

    
339
    return num_heads;
340
}
341

    
342
static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
343
{
344
    unsigned int head;
345

    
346
    /* Grab the next descriptor number they're advertising, and increment
347
     * the index we've seen. */
348
    head = vring_avail_ring(vq, idx % vq->vring.num);
349

    
350
    /* If their number is silly, that's a fatal mistake. */
351
    if (head >= vq->vring.num) {
352
        fprintf(stderr, "Guest says index %u is available", head);
353
        exit(1);
354
    }
355

    
356
    return head;
357
}
358

    
359
static unsigned virtqueue_next_desc(VirtQueue *vq, unsigned int i)
360
{
361
    unsigned int next;
362

    
363
    /* If this descriptor says it doesn't chain, we're done. */
364
    if (!(vring_desc_flags(vq, i) & VRING_DESC_F_NEXT))
365
        return vq->vring.num;
366

    
367
    /* Check they're not leading us off end of descriptors. */
368
    next = vring_desc_next(vq, i);
369
    /* Make sure compiler knows to grab that: we don't want it changing! */
370
    wmb();
371

    
372
    if (next >= vq->vring.num) {
373
        fprintf(stderr, "Desc next is %u", next);
374
        exit(1);
375
    }
376

    
377
    return next;
378
}
379

    
380
int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes)
381
{
382
    unsigned int idx;
383
    int num_bufs, in_total, out_total;
384

    
385
    idx = vq->last_avail_idx;
386

    
387
    num_bufs = in_total = out_total = 0;
388
    while (virtqueue_num_heads(vq, idx)) {
389
        int i;
390

    
391
        i = virtqueue_get_head(vq, idx++);
392
        do {
393
            /* If we've got too many, that implies a descriptor loop. */
394
            if (++num_bufs > vq->vring.num) {
395
                fprintf(stderr, "Looped descriptor");
396
                exit(1);
397
            }
398

    
399
            if (vring_desc_flags(vq, i) & VRING_DESC_F_WRITE) {
400
                if (in_bytes > 0 &&
401
                    (in_total += vring_desc_len(vq, i)) >= in_bytes)
402
                    return 1;
403
            } else {
404
                if (out_bytes > 0 &&
405
                    (out_total += vring_desc_len(vq, i)) >= out_bytes)
406
                    return 1;
407
            }
408
        } while ((i = virtqueue_next_desc(vq, i)) != vq->vring.num);
409
    }
410

    
411
    return 0;
412
}
413

    
414
int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
415
{
416
    unsigned int i, head;
417

    
418
    if (!virtqueue_num_heads(vq, vq->last_avail_idx))
419
        return 0;
420

    
421
    /* When we start there are none of either input nor output. */
422
    elem->out_num = elem->in_num = 0;
423

    
424
    i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
425
    do {
426
        struct iovec *sg;
427

    
428
        if (vring_desc_flags(vq, i) & VRING_DESC_F_WRITE) {
429
            elem->in_addr[elem->in_num] = vring_desc_addr(vq, i);
430
            sg = &elem->in_sg[elem->in_num++];
431
        } else
432
            sg = &elem->out_sg[elem->out_num++];
433

    
434
        /* Grab the first descriptor, and check it's OK. */
435
        sg->iov_len = vring_desc_len(vq, i);
436

    
437
#ifdef VIRTIO_ZERO_COPY
438
        sg->iov_base = virtio_map_gpa(vring_desc_addr(vq, i), sg->iov_len);
439
#else
440
        /* cap individual scatter element size to prevent unbounded allocations
441
           of memory from the guest.  Practically speaking, no virtio driver
442
           will ever pass more than a page in each element.  We set the cap to
443
           be 2MB in case for some reason a large page makes it way into the
444
           sg list.  When we implement a zero copy API, this limitation will
445
           disappear */
446
        if (sg->iov_len > (2 << 20))
447
            sg->iov_len = 2 << 20;
448

    
449
        sg->iov_base = qemu_malloc(sg->iov_len);
450
        if (!(vring_desc_flags(vq, i) & VRING_DESC_F_WRITE)) {
451
            cpu_physical_memory_read(vring_desc_addr(vq, i),
452
                                     sg->iov_base,
453
                                     sg->iov_len);
454
        }
455
#endif
456
        if (sg->iov_base == NULL) {
457
            fprintf(stderr, "Invalid mapping\n");
458
            exit(1);
459
        }
460

    
461
        /* If we've got too many, that implies a descriptor loop. */
462
        if ((elem->in_num + elem->out_num) > vq->vring.num) {
463
            fprintf(stderr, "Looped descriptor");
464
            exit(1);
465
        }
466
    } while ((i = virtqueue_next_desc(vq, i)) != vq->vring.num);
467

    
468
    elem->index = head;
469

    
470
    vq->inuse++;
471

    
472
    return elem->in_num + elem->out_num;
473
}
474

    
475
/* virtio device */
476

    
477
static VirtIODevice *to_virtio_device(PCIDevice *pci_dev)
478
{
479
    return (VirtIODevice *)pci_dev;
480
}
481

    
482
static void virtio_update_irq(VirtIODevice *vdev)
483
{
484
    qemu_set_irq(vdev->pci_dev.irq[0], vdev->isr & 1);
485
}
486

    
487
static void virtio_reset(void *opaque)
488
{
489
    VirtIODevice *vdev = opaque;
490
    int i;
491

    
492
    if (vdev->reset)
493
        vdev->reset(vdev);
494

    
495
    vdev->features = 0;
496
    vdev->queue_sel = 0;
497
    vdev->status = 0;
498
    vdev->isr = 0;
499
    virtio_update_irq(vdev);
500

    
501
    for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
502
        vdev->vq[i].vring.desc = 0;
503
        vdev->vq[i].vring.avail = 0;
504
        vdev->vq[i].vring.used = 0;
505
        vdev->vq[i].last_avail_idx = 0;
506
        vdev->vq[i].pfn = 0;
507
    }
508
}
509

    
510
static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
511
{
512
    VirtIODevice *vdev = to_virtio_device(opaque);
513
    ram_addr_t pa;
514

    
515
    addr -= vdev->addr;
516

    
517
    switch (addr) {
518
    case VIRTIO_PCI_GUEST_FEATURES:
519
        if (vdev->set_features)
520
            vdev->set_features(vdev, val);
521
        vdev->features = val;
522
        break;
523
    case VIRTIO_PCI_QUEUE_PFN:
524
        pa = (ram_addr_t)val << VIRTIO_PCI_QUEUE_ADDR_SHIFT;
525
        vdev->vq[vdev->queue_sel].pfn = val;
526
        if (pa == 0) {
527
            virtio_reset(vdev);
528
        } else {
529
            virtqueue_init(&vdev->vq[vdev->queue_sel], pa);
530
        }
531
        break;
532
    case VIRTIO_PCI_QUEUE_SEL:
533
        if (val < VIRTIO_PCI_QUEUE_MAX)
534
            vdev->queue_sel = val;
535
        break;
536
    case VIRTIO_PCI_QUEUE_NOTIFY:
537
        if (val < VIRTIO_PCI_QUEUE_MAX && vdev->vq[val].vring.desc)
538
            vdev->vq[val].handle_output(vdev, &vdev->vq[val]);
539
        break;
540
    case VIRTIO_PCI_STATUS:
541
        vdev->status = val & 0xFF;
542
        if (vdev->status == 0)
543
            virtio_reset(vdev);
544
        break;
545
    }
546
}
547

    
548
static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
549
{
550
    VirtIODevice *vdev = to_virtio_device(opaque);
551
    uint32_t ret = 0xFFFFFFFF;
552

    
553
    addr -= vdev->addr;
554

    
555
    switch (addr) {
556
    case VIRTIO_PCI_HOST_FEATURES:
557
        ret = vdev->get_features(vdev);
558
        ret |= (1 << VIRTIO_F_NOTIFY_ON_EMPTY);
559
        break;
560
    case VIRTIO_PCI_GUEST_FEATURES:
561
        ret = vdev->features;
562
        break;
563
    case VIRTIO_PCI_QUEUE_PFN:
564
        ret = vdev->vq[vdev->queue_sel].pfn;
565
        break;
566
    case VIRTIO_PCI_QUEUE_NUM:
567
        ret = vdev->vq[vdev->queue_sel].vring.num;
568
        break;
569
    case VIRTIO_PCI_QUEUE_SEL:
570
        ret = vdev->queue_sel;
571
        break;
572
    case VIRTIO_PCI_STATUS:
573
        ret = vdev->status;
574
        break;
575
    case VIRTIO_PCI_ISR:
576
        /* reading from the ISR also clears it. */
577
        ret = vdev->isr;
578
        vdev->isr = 0;
579
        virtio_update_irq(vdev);
580
        break;
581
    default:
582
        break;
583
    }
584

    
585
    return ret;
586
}
587

    
588
static uint32_t virtio_config_readb(void *opaque, uint32_t addr)
589
{
590
    VirtIODevice *vdev = opaque;
591
    uint8_t val;
592

    
593
    vdev->get_config(vdev, vdev->config);
594

    
595
    addr -= vdev->addr + VIRTIO_PCI_CONFIG;
596
    if (addr > (vdev->config_len - sizeof(val)))
597
        return (uint32_t)-1;
598

    
599
    memcpy(&val, vdev->config + addr, sizeof(val));
600
    return val;
601
}
602

    
603
static uint32_t virtio_config_readw(void *opaque, uint32_t addr)
604
{
605
    VirtIODevice *vdev = opaque;
606
    uint16_t val;
607

    
608
    vdev->get_config(vdev, vdev->config);
609

    
610
    addr -= vdev->addr + VIRTIO_PCI_CONFIG;
611
    if (addr > (vdev->config_len - sizeof(val)))
612
        return (uint32_t)-1;
613

    
614
    memcpy(&val, vdev->config + addr, sizeof(val));
615
    return val;
616
}
617

    
618
static uint32_t virtio_config_readl(void *opaque, uint32_t addr)
619
{
620
    VirtIODevice *vdev = opaque;
621
    uint32_t val;
622

    
623
    vdev->get_config(vdev, vdev->config);
624

    
625
    addr -= vdev->addr + VIRTIO_PCI_CONFIG;
626
    if (addr > (vdev->config_len - sizeof(val)))
627
        return (uint32_t)-1;
628

    
629
    memcpy(&val, vdev->config + addr, sizeof(val));
630
    return val;
631
}
632

    
633
static void virtio_config_writeb(void *opaque, uint32_t addr, uint32_t data)
634
{
635
    VirtIODevice *vdev = opaque;
636
    uint8_t val = data;
637

    
638
    addr -= vdev->addr + VIRTIO_PCI_CONFIG;
639
    if (addr > (vdev->config_len - sizeof(val)))
640
        return;
641

    
642
    memcpy(vdev->config + addr, &val, sizeof(val));
643

    
644
    if (vdev->set_config)
645
        vdev->set_config(vdev, vdev->config);
646
}
647

    
648
static void virtio_config_writew(void *opaque, uint32_t addr, uint32_t data)
649
{
650
    VirtIODevice *vdev = opaque;
651
    uint16_t val = data;
652

    
653
    addr -= vdev->addr + VIRTIO_PCI_CONFIG;
654
    if (addr > (vdev->config_len - sizeof(val)))
655
        return;
656

    
657
    memcpy(vdev->config + addr, &val, sizeof(val));
658

    
659
    if (vdev->set_config)
660
        vdev->set_config(vdev, vdev->config);
661
}
662

    
663
static void virtio_config_writel(void *opaque, uint32_t addr, uint32_t data)
664
{
665
    VirtIODevice *vdev = opaque;
666
    uint32_t val = data;
667

    
668
    addr -= vdev->addr + VIRTIO_PCI_CONFIG;
669
    if (addr > (vdev->config_len - sizeof(val)))
670
        return;
671

    
672
    memcpy(vdev->config + addr, &val, sizeof(val));
673

    
674
    if (vdev->set_config)
675
        vdev->set_config(vdev, vdev->config);
676
}
677

    
678
static void virtio_map(PCIDevice *pci_dev, int region_num,
679
                       uint32_t addr, uint32_t size, int type)
680
{
681
    VirtIODevice *vdev = to_virtio_device(pci_dev);
682
    int i;
683

    
684
    vdev->addr = addr;
685
    for (i = 0; i < 3; i++) {
686
        register_ioport_write(addr, 20, 1 << i, virtio_ioport_write, vdev);
687
        register_ioport_read(addr, 20, 1 << i, virtio_ioport_read, vdev);
688
    }
689

    
690
    if (vdev->config_len) {
691
        register_ioport_write(addr + 20, vdev->config_len, 1,
692
                              virtio_config_writeb, vdev);
693
        register_ioport_write(addr + 20, vdev->config_len, 2,
694
                              virtio_config_writew, vdev);
695
        register_ioport_write(addr + 20, vdev->config_len, 4,
696
                              virtio_config_writel, vdev);
697
        register_ioport_read(addr + 20, vdev->config_len, 1,
698
                             virtio_config_readb, vdev);
699
        register_ioport_read(addr + 20, vdev->config_len, 2,
700
                             virtio_config_readw, vdev);
701
        register_ioport_read(addr + 20, vdev->config_len, 4,
702
                             virtio_config_readl, vdev);
703

    
704
        vdev->get_config(vdev, vdev->config);
705
    }
706
}
707

    
708
VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
709
                            void (*handle_output)(VirtIODevice *, VirtQueue *))
710
{
711
    int i;
712

    
713
    for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
714
        if (vdev->vq[i].vring.num == 0)
715
            break;
716
    }
717

    
718
    if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
719
        abort();
720

    
721
    vdev->vq[i].vring.num = queue_size;
722
    vdev->vq[i].handle_output = handle_output;
723

    
724
    return &vdev->vq[i];
725
}
726

    
727
void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
728
{
729
    /* Always notify when queue is empty (when feature acknowledge) */
730
    if ((vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT) &&
731
        (!(vdev->features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) ||
732
         (vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx)))
733
        return;
734

    
735
    vdev->isr |= 0x01;
736
    virtio_update_irq(vdev);
737
}
738

    
739
void virtio_notify_config(VirtIODevice *vdev)
740
{
741
    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
742
        return;
743

    
744
    vdev->isr |= 0x03;
745
    virtio_update_irq(vdev);
746
}
747

    
748
void virtio_save(VirtIODevice *vdev, QEMUFile *f)
749
{
750
    int i;
751

    
752
    pci_device_save(&vdev->pci_dev, f);
753

    
754
    qemu_put_be32s(f, &vdev->addr);
755
    qemu_put_8s(f, &vdev->status);
756
    qemu_put_8s(f, &vdev->isr);
757
    qemu_put_be16s(f, &vdev->queue_sel);
758
    qemu_put_be32s(f, &vdev->features);
759
    qemu_put_be32(f, vdev->config_len);
760
    qemu_put_buffer(f, vdev->config, vdev->config_len);
761

    
762
    for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
763
        if (vdev->vq[i].vring.num == 0)
764
            break;
765
    }
766

    
767
    qemu_put_be32(f, i);
768

    
769
    for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
770
        if (vdev->vq[i].vring.num == 0)
771
            break;
772

    
773
        qemu_put_be32(f, vdev->vq[i].vring.num);
774
        qemu_put_be32s(f, &vdev->vq[i].pfn);
775
        qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
776
    }
777
}
778

    
779
void virtio_load(VirtIODevice *vdev, QEMUFile *f)
780
{
781
    int num, i;
782

    
783
    pci_device_load(&vdev->pci_dev, f);
784

    
785
    qemu_get_be32s(f, &vdev->addr);
786
    qemu_get_8s(f, &vdev->status);
787
    qemu_get_8s(f, &vdev->isr);
788
    qemu_get_be16s(f, &vdev->queue_sel);
789
    qemu_get_be32s(f, &vdev->features);
790
    vdev->config_len = qemu_get_be32(f);
791
    qemu_get_buffer(f, vdev->config, vdev->config_len);
792

    
793
    num = qemu_get_be32(f);
794

    
795
    for (i = 0; i < num; i++) {
796
        vdev->vq[i].vring.num = qemu_get_be32(f);
797
        qemu_get_be32s(f, &vdev->vq[i].pfn);
798
        qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
799

    
800
        if (vdev->vq[i].pfn) {
801
            target_phys_addr_t pa;
802

    
803
            pa = (ram_addr_t)vdev->vq[i].pfn << VIRTIO_PCI_QUEUE_ADDR_SHIFT;
804
            virtqueue_init(&vdev->vq[i], pa);
805
        }
806
    }
807

    
808
    virtio_update_irq(vdev);
809
}
810

    
811
VirtIODevice *virtio_init_pci(PCIBus *bus, const char *name,
812
                              uint16_t vendor, uint16_t device,
813
                              uint16_t subvendor, uint16_t subdevice,
814
                              uint16_t class_code, uint8_t pif,
815
                              size_t config_size, size_t struct_size)
816
{
817
    VirtIODevice *vdev;
818
    PCIDevice *pci_dev;
819
    uint8_t *config;
820
    uint32_t size;
821

    
822
    pci_dev = pci_register_device(bus, name, struct_size,
823
                                  -1, NULL, NULL);
824
    if (!pci_dev)
825
        return NULL;
826

    
827
    vdev = to_virtio_device(pci_dev);
828

    
829
    vdev->status = 0;
830
    vdev->isr = 0;
831
    vdev->queue_sel = 0;
832
    vdev->vq = qemu_mallocz(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
833

    
834
    config = pci_dev->config;
835
    pci_config_set_vendor_id(config, vendor);
836
    pci_config_set_device_id(config, device);
837

    
838
    config[0x08] = VIRTIO_PCI_ABI_VERSION;
839

    
840
    config[0x09] = pif;
841
    pci_config_set_class(config, class_code);
842
    config[0x0e] = 0x00;
843

    
844
    config[0x2c] = subvendor & 0xFF;
845
    config[0x2d] = (subvendor >> 8) & 0xFF;
846
    config[0x2e] = subdevice & 0xFF;
847
    config[0x2f] = (subdevice >> 8) & 0xFF;
848

    
849
    config[0x3d] = 1;
850

    
851
    vdev->name = name;
852
    vdev->config_len = config_size;
853
    if (vdev->config_len)
854
        vdev->config = qemu_mallocz(config_size);
855
    else
856
        vdev->config = NULL;
857

    
858
    size = 20 + config_size;
859
    if (size & (size-1))
860
        size = 1 << qemu_fls(size);
861

    
862
    pci_register_io_region(pci_dev, 0, size, PCI_ADDRESS_SPACE_IO,
863
                           virtio_map);
864
    qemu_register_reset(virtio_reset, vdev);
865

    
866
    return vdev;
867
}