Revision 6cbf4c8c

b/Makefile.target
190 190
obj-y += rtl8139.o
191 191
obj-y += e1000.o
192 192

  
193
# Inter-VM PCI shared memory
194
obj-y += ivshmem.o
195

  
193 196
# Hardware support
194 197
obj-i386-y += vga.o
195 198
obj-i386-y += mc146818rtc.o i8259.o pc.o
b/hw/ivshmem.c
1
/*
2
 * Inter-VM Shared Memory PCI device.
3
 *
4
 * Author:
5
 *      Cam Macdonell <cam@cs.ualberta.ca>
6
 *
7
 * Based On: cirrus_vga.c
8
 *          Copyright (c) 2004 Fabrice Bellard
9
 *          Copyright (c) 2004 Makoto Suzuki (suzu)
10
 *
11
 *      and rtl8139.c
12
 *          Copyright (c) 2006 Igor Kovalenko
13
 *
14
 * This code is licensed under the GNU GPL v2.
15
 */
16
#include "hw.h"
17
#include "pc.h"
18
#include "pci.h"
19
#include "msix.h"
20
#include "kvm.h"
21

  
22
#include <sys/mman.h>
23
#include <sys/types.h>
24

  
25
#define IVSHMEM_IOEVENTFD   0
26
#define IVSHMEM_MSI     1
27

  
28
#define IVSHMEM_PEER    0
29
#define IVSHMEM_MASTER  1
30

  
31
#define IVSHMEM_REG_BAR_SIZE 0x100
32

  
33
//#define DEBUG_IVSHMEM
34
#ifdef DEBUG_IVSHMEM
35
#define IVSHMEM_DPRINTF(fmt, ...)        \
36
    do {printf("IVSHMEM: " fmt, ## __VA_ARGS__); } while (0)
37
#else
38
#define IVSHMEM_DPRINTF(fmt, ...)
39
#endif
40

  
41
typedef struct Peer {
42
    int nb_eventfds;
43
    int *eventfds;
44
} Peer;
45

  
46
typedef struct EventfdEntry {
47
    PCIDevice *pdev;
48
    int vector;
49
} EventfdEntry;
50

  
51
typedef struct IVShmemState {
52
    PCIDevice dev;
53
    uint32_t intrmask;
54
    uint32_t intrstatus;
55
    uint32_t doorbell;
56

  
57
    CharDriverState **eventfd_chr;
58
    CharDriverState *server_chr;
59
    int ivshmem_mmio_io_addr;
60

  
61
    pcibus_t mmio_addr;
62
    pcibus_t shm_pci_addr;
63
    uint64_t ivshmem_offset;
64
    uint64_t ivshmem_size; /* size of shared memory region */
65
    int shm_fd; /* shared memory file descriptor */
66

  
67
    Peer *peers;
68
    int nb_peers; /* how many guests we have space for */
69
    int max_peer; /* maximum numbered peer */
70

  
71
    int vm_id;
72
    uint32_t vectors;
73
    uint32_t features;
74
    EventfdEntry *eventfd_table;
75

  
76
    char * shmobj;
77
    char * sizearg;
78
    char * role;
79
    int role_val;   /* scalar to avoid multiple string comparisons */
80
} IVShmemState;
81

  
82
/* registers for the Inter-VM shared memory device */
83
enum ivshmem_registers {
84
    INTRMASK = 0,
85
    INTRSTATUS = 4,
86
    IVPOSITION = 8,
87
    DOORBELL = 12,
88
};
89

  
90
static inline uint32_t ivshmem_has_feature(IVShmemState *ivs,
91
                                                    unsigned int feature) {
92
    return (ivs->features & (1 << feature));
93
}
94

  
95
static inline bool is_power_of_two(uint64_t x) {
96
    return (x & (x - 1)) == 0;
97
}
98

  
99
static void ivshmem_map(PCIDevice *pci_dev, int region_num,
100
                    pcibus_t addr, pcibus_t size, int type)
101
{
102
    IVShmemState *s = DO_UPCAST(IVShmemState, dev, pci_dev);
103

  
104
    s->shm_pci_addr = addr;
105

  
106
    if (s->ivshmem_offset > 0) {
107
        cpu_register_physical_memory(s->shm_pci_addr, s->ivshmem_size,
108
                                                            s->ivshmem_offset);
109
    }
110

  
111
    IVSHMEM_DPRINTF("guest pci addr = %" FMT_PCIBUS ", guest h/w addr = %"
112
        PRIu64 ", size = %" FMT_PCIBUS "\n", addr, s->ivshmem_offset, size);
113

  
114
}
115

  
116
/* accessing registers - based on rtl8139 */
117
static void ivshmem_update_irq(IVShmemState *s, int val)
118
{
119
    int isr;
120
    isr = (s->intrstatus & s->intrmask) & 0xffffffff;
121

  
122
    /* don't print ISR resets */
123
    if (isr) {
124
        IVSHMEM_DPRINTF("Set IRQ to %d (%04x %04x)\n",
125
           isr ? 1 : 0, s->intrstatus, s->intrmask);
126
    }
127

  
128
    qemu_set_irq(s->dev.irq[0], (isr != 0));
129
}
130

  
131
static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val)
132
{
133
    IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val);
134

  
135
    s->intrmask = val;
136

  
137
    ivshmem_update_irq(s, val);
138
}
139

  
140
static uint32_t ivshmem_IntrMask_read(IVShmemState *s)
141
{
142
    uint32_t ret = s->intrmask;
143

  
144
    IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret);
145

  
146
    return ret;
147
}
148

  
149
static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val)
150
{
151
    IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val);
152

  
153
    s->intrstatus = val;
154

  
155
    ivshmem_update_irq(s, val);
156
    return;
157
}
158

  
159
static uint32_t ivshmem_IntrStatus_read(IVShmemState *s)
160
{
161
    uint32_t ret = s->intrstatus;
162

  
163
    /* reading ISR clears all interrupts */
164
    s->intrstatus = 0;
165

  
166
    ivshmem_update_irq(s, 0);
167

  
168
    return ret;
169
}
170

  
171
static void ivshmem_io_writew(void *opaque, target_phys_addr_t addr,
172
                                                            uint32_t val)
173
{
174

  
175
    IVSHMEM_DPRINTF("We shouldn't be writing words\n");
176
}
177

  
178
static void ivshmem_io_writel(void *opaque, target_phys_addr_t addr,
179
                                                            uint32_t val)
180
{
181
    IVShmemState *s = opaque;
182

  
183
    uint64_t write_one = 1;
184
    uint16_t dest = val >> 16;
185
    uint16_t vector = val & 0xff;
186

  
187
    addr &= 0xfc;
188

  
189
    IVSHMEM_DPRINTF("writing to addr " TARGET_FMT_plx "\n", addr);
190
    switch (addr)
191
    {
192
        case INTRMASK:
193
            ivshmem_IntrMask_write(s, val);
194
            break;
195

  
196
        case INTRSTATUS:
197
            ivshmem_IntrStatus_write(s, val);
198
            break;
199

  
200
        case DOORBELL:
201
            /* check that dest VM ID is reasonable */
202
            if ((dest < 0) || (dest > s->max_peer)) {
203
                IVSHMEM_DPRINTF("Invalid destination VM ID (%d)\n", dest);
204
                break;
205
            }
206

  
207
            /* check doorbell range */
208
            if ((vector >= 0) && (vector < s->peers[dest].nb_eventfds)) {
209
                IVSHMEM_DPRINTF("Writing %" PRId64 " to VM %d on vector %d\n",
210
                                                    write_one, dest, vector);
211
                if (write(s->peers[dest].eventfds[vector],
212
                                                    &(write_one), 8) != 8) {
213
                    IVSHMEM_DPRINTF("error writing to eventfd\n");
214
                }
215
            }
216
            break;
217
        default:
218
            IVSHMEM_DPRINTF("Invalid VM Doorbell VM %d\n", dest);
219
    }
220
}
221

  
222
static void ivshmem_io_writeb(void *opaque, target_phys_addr_t addr,
223
                                                                uint32_t val)
224
{
225
    IVSHMEM_DPRINTF("We shouldn't be writing bytes\n");
226
}
227

  
228
static uint32_t ivshmem_io_readw(void *opaque, target_phys_addr_t addr)
229
{
230

  
231
    IVSHMEM_DPRINTF("We shouldn't be reading words\n");
232
    return 0;
233
}
234

  
235
static uint32_t ivshmem_io_readl(void *opaque, target_phys_addr_t addr)
236
{
237

  
238
    IVShmemState *s = opaque;
239
    uint32_t ret;
240

  
241
    switch (addr)
242
    {
243
        case INTRMASK:
244
            ret = ivshmem_IntrMask_read(s);
245
            break;
246

  
247
        case INTRSTATUS:
248
            ret = ivshmem_IntrStatus_read(s);
249
            break;
250

  
251
        case IVPOSITION:
252
            /* return my VM ID if the memory is mapped */
253
            if (s->shm_fd > 0) {
254
                ret = s->vm_id;
255
            } else {
256
                ret = -1;
257
            }
258
            break;
259

  
260
        default:
261
            IVSHMEM_DPRINTF("why are we reading " TARGET_FMT_plx "\n", addr);
262
            ret = 0;
263
    }
264

  
265
    return ret;
266
}
267

  
268
static uint32_t ivshmem_io_readb(void *opaque, target_phys_addr_t addr)
269
{
270
    IVSHMEM_DPRINTF("We shouldn't be reading bytes\n");
271

  
272
    return 0;
273
}
274

  
275
static CPUReadMemoryFunc * const ivshmem_mmio_read[3] = {
276
    ivshmem_io_readb,
277
    ivshmem_io_readw,
278
    ivshmem_io_readl,
279
};
280

  
281
static CPUWriteMemoryFunc * const ivshmem_mmio_write[3] = {
282
    ivshmem_io_writeb,
283
    ivshmem_io_writew,
284
    ivshmem_io_writel,
285
};
286

  
287
static void ivshmem_receive(void *opaque, const uint8_t *buf, int size)
288
{
289
    IVShmemState *s = opaque;
290

  
291
    ivshmem_IntrStatus_write(s, *buf);
292

  
293
    IVSHMEM_DPRINTF("ivshmem_receive 0x%02x\n", *buf);
294
}
295

  
296
static int ivshmem_can_receive(void * opaque)
297
{
298
    return 8;
299
}
300

  
301
static void ivshmem_event(void *opaque, int event)
302
{
303
    IVSHMEM_DPRINTF("ivshmem_event %d\n", event);
304
}
305

  
306
static void fake_irqfd(void *opaque, const uint8_t *buf, int size) {
307

  
308
    EventfdEntry *entry = opaque;
309
    PCIDevice *pdev = entry->pdev;
310

  
311
    IVSHMEM_DPRINTF("interrupt on vector %p %d\n", pdev, entry->vector);
312
    msix_notify(pdev, entry->vector);
313
}
314

  
315
static CharDriverState* create_eventfd_chr_device(void * opaque, int eventfd,
316
                                                                    int vector)
317
{
318
    /* create a event character device based on the passed eventfd */
319
    IVShmemState *s = opaque;
320
    CharDriverState * chr;
321

  
322
    chr = qemu_chr_open_eventfd(eventfd);
323

  
324
    if (chr == NULL) {
325
        fprintf(stderr, "creating eventfd for eventfd %d failed\n", eventfd);
326
        exit(-1);
327
    }
328

  
329
    /* if MSI is supported we need multiple interrupts */
330
    if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
331
        s->eventfd_table[vector].pdev = &s->dev;
332
        s->eventfd_table[vector].vector = vector;
333

  
334
        qemu_chr_add_handlers(chr, ivshmem_can_receive, fake_irqfd,
335
                      ivshmem_event, &s->eventfd_table[vector]);
336
    } else {
337
        qemu_chr_add_handlers(chr, ivshmem_can_receive, ivshmem_receive,
338
                      ivshmem_event, s);
339
    }
340

  
341
    return chr;
342

  
343
}
344

  
345
static int check_shm_size(IVShmemState *s, int fd) {
346
    /* check that the guest isn't going to try and map more memory than the
347
     * the object has allocated return -1 to indicate error */
348

  
349
    struct stat buf;
350

  
351
    fstat(fd, &buf);
352

  
353
    if (s->ivshmem_size > buf.st_size) {
354
        fprintf(stderr, "IVSHMEM ERROR: Requested memory size greater");
355
        fprintf(stderr, " than shared object size (%" PRIu64 " > %ld)\n",
356
                                          s->ivshmem_size, buf.st_size);
357
        return -1;
358
    } else {
359
        return 0;
360
    }
361
}
362

  
363
/* create the shared memory BAR when we are not using the server, so we can
364
 * create the BAR and map the memory immediately */
365
static void create_shared_memory_BAR(IVShmemState *s, int fd) {
366

  
367
    void * ptr;
368

  
369
    s->shm_fd = fd;
370

  
371
    ptr = mmap(0, s->ivshmem_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
372

  
373
    s->ivshmem_offset = qemu_ram_alloc_from_ptr(&s->dev.qdev, "ivshmem.bar2",
374
                                                        s->ivshmem_size, ptr);
375

  
376
    /* region for shared memory */
377
    pci_register_bar(&s->dev, 2, s->ivshmem_size,
378
                                PCI_BASE_ADDRESS_SPACE_MEMORY, ivshmem_map);
379
}
380

  
381
static void close_guest_eventfds(IVShmemState *s, int posn)
382
{
383
    int i, guest_curr_max;
384

  
385
    guest_curr_max = s->peers[posn].nb_eventfds;
386

  
387
    for (i = 0; i < guest_curr_max; i++) {
388
        kvm_set_ioeventfd_mmio_long(s->peers[posn].eventfds[i],
389
                    s->mmio_addr + DOORBELL, (posn << 16) | i, 0);
390
        close(s->peers[posn].eventfds[i]);
391
    }
392

  
393
    qemu_free(s->peers[posn].eventfds);
394
    s->peers[posn].nb_eventfds = 0;
395
}
396

  
397
static void setup_ioeventfds(IVShmemState *s) {
398

  
399
    int i, j;
400

  
401
    for (i = 0; i <= s->max_peer; i++) {
402
        for (j = 0; j < s->peers[i].nb_eventfds; j++) {
403
            kvm_set_ioeventfd_mmio_long(s->peers[i].eventfds[j],
404
                    s->mmio_addr + DOORBELL, (i << 16) | j, 1);
405
        }
406
    }
407
}
408

  
409
/* this function increase the dynamic storage need to store data about other
410
 * guests */
411
static void increase_dynamic_storage(IVShmemState *s, int new_min_size) {
412

  
413
    int j, old_nb_alloc;
414

  
415
    old_nb_alloc = s->nb_peers;
416

  
417
    while (new_min_size >= s->nb_peers)
418
        s->nb_peers = s->nb_peers * 2;
419

  
420
    IVSHMEM_DPRINTF("bumping storage to %d guests\n", s->nb_peers);
421
    s->peers = qemu_realloc(s->peers, s->nb_peers * sizeof(Peer));
422

  
423
    /* zero out new pointers */
424
    for (j = old_nb_alloc; j < s->nb_peers; j++) {
425
        s->peers[j].eventfds = NULL;
426
        s->peers[j].nb_eventfds = 0;
427
    }
428
}
429

  
430
static void ivshmem_read(void *opaque, const uint8_t * buf, int flags)
431
{
432
    IVShmemState *s = opaque;
433
    int incoming_fd, tmp_fd;
434
    int guest_max_eventfd;
435
    long incoming_posn;
436

  
437
    memcpy(&incoming_posn, buf, sizeof(long));
438
    /* pick off s->server_chr->msgfd and store it, posn should accompany msg */
439
    tmp_fd = qemu_chr_get_msgfd(s->server_chr);
440
    IVSHMEM_DPRINTF("posn is %ld, fd is %d\n", incoming_posn, tmp_fd);
441

  
442
    /* make sure we have enough space for this guest */
443
    if (incoming_posn >= s->nb_peers) {
444
        increase_dynamic_storage(s, incoming_posn);
445
    }
446

  
447
    if (tmp_fd == -1) {
448
        /* if posn is positive and unseen before then this is our posn*/
449
        if ((incoming_posn >= 0) &&
450
                            (s->peers[incoming_posn].eventfds == NULL)) {
451
            /* receive our posn */
452
            s->vm_id = incoming_posn;
453
            return;
454
        } else {
455
            /* otherwise an fd == -1 means an existing guest has gone away */
456
            IVSHMEM_DPRINTF("posn %ld has gone away\n", incoming_posn);
457
            close_guest_eventfds(s, incoming_posn);
458
            return;
459
        }
460
    }
461

  
462
    /* because of the implementation of get_msgfd, we need a dup */
463
    incoming_fd = dup(tmp_fd);
464

  
465
    if (incoming_fd == -1) {
466
        fprintf(stderr, "could not allocate file descriptor %s\n",
467
                                                            strerror(errno));
468
        return;
469
    }
470

  
471
    /* if the position is -1, then it's shared memory region fd */
472
    if (incoming_posn == -1) {
473

  
474
        void * map_ptr;
475

  
476
        s->max_peer = 0;
477

  
478
        if (check_shm_size(s, incoming_fd) == -1) {
479
            exit(-1);
480
        }
481

  
482
        /* mmap the region and map into the BAR2 */
483
        map_ptr = mmap(0, s->ivshmem_size, PROT_READ|PROT_WRITE, MAP_SHARED,
484
                                                            incoming_fd, 0);
485
        s->ivshmem_offset = qemu_ram_alloc_from_ptr(&s->dev.qdev,
486
                                    "ivshmem.bar2", s->ivshmem_size, map_ptr);
487

  
488
        IVSHMEM_DPRINTF("guest pci addr = %" FMT_PCIBUS ", guest h/w addr = %"
489
                         PRIu64 ", size = %" PRIu64 "\n", s->shm_pci_addr,
490
                         s->ivshmem_offset, s->ivshmem_size);
491

  
492
        if (s->shm_pci_addr > 0) {
493
            /* map memory into BAR2 */
494
            cpu_register_physical_memory(s->shm_pci_addr, s->ivshmem_size,
495
                                                            s->ivshmem_offset);
496
        }
497

  
498
        /* only store the fd if it is successfully mapped */
499
        s->shm_fd = incoming_fd;
500

  
501
        return;
502
    }
503

  
504
    /* each guest has an array of eventfds, and we keep track of how many
505
     * guests for each VM */
506
    guest_max_eventfd = s->peers[incoming_posn].nb_eventfds;
507

  
508
    if (guest_max_eventfd == 0) {
509
        /* one eventfd per MSI vector */
510
        s->peers[incoming_posn].eventfds = (int *) qemu_malloc(s->vectors *
511
                                                                sizeof(int));
512
    }
513

  
514
    /* this is an eventfd for a particular guest VM */
515
    IVSHMEM_DPRINTF("eventfds[%ld][%d] = %d\n", incoming_posn,
516
                                            guest_max_eventfd, incoming_fd);
517
    s->peers[incoming_posn].eventfds[guest_max_eventfd] = incoming_fd;
518

  
519
    /* increment count for particular guest */
520
    s->peers[incoming_posn].nb_eventfds++;
521

  
522
    /* keep track of the maximum VM ID */
523
    if (incoming_posn > s->max_peer) {
524
        s->max_peer = incoming_posn;
525
    }
526

  
527
    if (incoming_posn == s->vm_id) {
528
        s->eventfd_chr[guest_max_eventfd] = create_eventfd_chr_device(s,
529
                   s->peers[s->vm_id].eventfds[guest_max_eventfd],
530
                   guest_max_eventfd);
531
    }
532

  
533
    if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
534
        if (kvm_set_ioeventfd_mmio_long(incoming_fd, s->mmio_addr + DOORBELL,
535
                        (incoming_posn << 16) | guest_max_eventfd, 1) < 0) {
536
            fprintf(stderr, "ivshmem: ioeventfd not available\n");
537
        }
538
    }
539

  
540
    return;
541
}
542

  
543
static void ivshmem_reset(DeviceState *d)
544
{
545
    IVShmemState *s = DO_UPCAST(IVShmemState, dev.qdev, d);
546

  
547
    s->intrstatus = 0;
548
    return;
549
}
550

  
551
static void ivshmem_mmio_map(PCIDevice *pci_dev, int region_num,
552
                       pcibus_t addr, pcibus_t size, int type)
553
{
554
    IVShmemState *s = DO_UPCAST(IVShmemState, dev, pci_dev);
555

  
556
    s->mmio_addr = addr;
557
    cpu_register_physical_memory(addr + 0, IVSHMEM_REG_BAR_SIZE,
558
                                                s->ivshmem_mmio_io_addr);
559

  
560
    if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
561
        setup_ioeventfds(s);
562
    }
563
}
564

  
565
static uint64_t ivshmem_get_size(IVShmemState * s) {
566

  
567
    uint64_t value;
568
    char *ptr;
569

  
570
    value = strtoull(s->sizearg, &ptr, 10);
571
    switch (*ptr) {
572
        case 0: case 'M': case 'm':
573
            value <<= 20;
574
            break;
575
        case 'G': case 'g':
576
            value <<= 30;
577
            break;
578
        default:
579
            fprintf(stderr, "qemu: invalid ram size: %s\n", s->sizearg);
580
            exit(1);
581
    }
582

  
583
    /* BARs must be a power of 2 */
584
    if (!is_power_of_two(value)) {
585
        fprintf(stderr, "ivshmem: size must be power of 2\n");
586
        exit(1);
587
    }
588

  
589
    return value;
590
}
591

  
592
static void ivshmem_setup_msi(IVShmemState * s) {
593

  
594
    int i;
595

  
596
    /* allocate the MSI-X vectors */
597

  
598
    if (!msix_init(&s->dev, s->vectors, 1, 0)) {
599
        pci_register_bar(&s->dev, 1,
600
                         msix_bar_size(&s->dev),
601
                         PCI_BASE_ADDRESS_SPACE_MEMORY,
602
                         msix_mmio_map);
603
        IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors);
604
    } else {
605
        IVSHMEM_DPRINTF("msix initialization failed\n");
606
        exit(1);
607
    }
608

  
609
    /* 'activate' the vectors */
610
    for (i = 0; i < s->vectors; i++) {
611
        msix_vector_use(&s->dev, i);
612
    }
613

  
614
    /* allocate Qemu char devices for receiving interrupts */
615
    s->eventfd_table = qemu_mallocz(s->vectors * sizeof(EventfdEntry));
616
}
617

  
618
static void ivshmem_save(QEMUFile* f, void *opaque)
619
{
620
    IVShmemState *proxy = opaque;
621

  
622
    IVSHMEM_DPRINTF("ivshmem_save\n");
623
    pci_device_save(&proxy->dev, f);
624

  
625
    if (ivshmem_has_feature(proxy, IVSHMEM_MSI)) {
626
        msix_save(&proxy->dev, f);
627
    } else {
628
        qemu_put_be32(f, proxy->intrstatus);
629
        qemu_put_be32(f, proxy->intrmask);
630
    }
631

  
632
}
633

  
634
static int ivshmem_load(QEMUFile* f, void *opaque, int version_id)
635
{
636
    IVSHMEM_DPRINTF("ivshmem_load\n");
637

  
638
    IVShmemState *proxy = opaque;
639
    int ret, i;
640

  
641
    if (version_id > 0) {
642
        return -EINVAL;
643
    }
644

  
645
    if (proxy->role_val == IVSHMEM_PEER) {
646
        fprintf(stderr, "ivshmem: 'peer' devices are not migratable\n");
647
        return -EINVAL;
648
    }
649

  
650
    ret = pci_device_load(&proxy->dev, f);
651
    if (ret) {
652
        return ret;
653
    }
654

  
655
    if (ivshmem_has_feature(proxy, IVSHMEM_MSI)) {
656
        msix_load(&proxy->dev, f);
657
        for (i = 0; i < proxy->vectors; i++) {
658
            msix_vector_use(&proxy->dev, i);
659
        }
660
    } else {
661
        proxy->intrstatus = qemu_get_be32(f);
662
        proxy->intrmask = qemu_get_be32(f);
663
    }
664

  
665
    return 0;
666
}
667

  
668
static int pci_ivshmem_init(PCIDevice *dev)
669
{
670
    IVShmemState *s = DO_UPCAST(IVShmemState, dev, dev);
671
    uint8_t *pci_conf;
672

  
673
    if (s->sizearg == NULL)
674
        s->ivshmem_size = 4 << 20; /* 4 MB default */
675
    else {
676
        s->ivshmem_size = ivshmem_get_size(s);
677
    }
678

  
679
    register_savevm(&s->dev.qdev, "ivshmem", 0, 0, ivshmem_save, ivshmem_load,
680
                                                                        dev);
681

  
682
    /* IRQFD requires MSI */
683
    if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD) &&
684
        !ivshmem_has_feature(s, IVSHMEM_MSI)) {
685
        fprintf(stderr, "ivshmem: ioeventfd/irqfd requires MSI\n");
686
        exit(1);
687
    }
688

  
689
    /* check that role is reasonable */
690
    if (s->role) {
691
        if (strncmp(s->role, "peer", 5) == 0) {
692
            s->role_val = IVSHMEM_PEER;
693
        } else if (strncmp(s->role, "master", 7) == 0) {
694
            s->role_val = IVSHMEM_MASTER;
695
        } else {
696
            fprintf(stderr, "ivshmem: 'role' must be 'peer' or 'master'\n");
697
            exit(1);
698
        }
699
    } else {
700
        s->role_val = IVSHMEM_MASTER; /* default */
701
    }
702

  
703
    if (s->role_val == IVSHMEM_PEER) {
704
        register_device_unmigratable(&s->dev.qdev, "ivshmem", s);
705
    }
706

  
707
    pci_conf = s->dev.config;
708
    pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_REDHAT_QUMRANET);
709
    pci_conf[0x02] = 0x10;
710
    pci_conf[0x03] = 0x11;
711
    pci_conf[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY;
712
    pci_config_set_class(pci_conf, PCI_CLASS_MEMORY_RAM);
713
    pci_conf[PCI_HEADER_TYPE] = PCI_HEADER_TYPE_NORMAL;
714

  
715
    pci_config_set_interrupt_pin(pci_conf, 1);
716

  
717
    s->shm_pci_addr = 0;
718
    s->ivshmem_offset = 0;
719
    s->shm_fd = 0;
720

  
721
    s->ivshmem_mmio_io_addr = cpu_register_io_memory(ivshmem_mmio_read,
722
                                    ivshmem_mmio_write, s);
723
    /* region for registers*/
724
    pci_register_bar(&s->dev, 0, IVSHMEM_REG_BAR_SIZE,
725
                           PCI_BASE_ADDRESS_SPACE_MEMORY, ivshmem_mmio_map);
726

  
727
    if ((s->server_chr != NULL) &&
728
                        (strncmp(s->server_chr->filename, "unix:", 5) == 0)) {
729
        /* if we get a UNIX socket as the parameter we will talk
730
         * to the ivshmem server to receive the memory region */
731

  
732
        if (s->shmobj != NULL) {
733
            fprintf(stderr, "WARNING: do not specify both 'chardev' "
734
                                                "and 'shm' with ivshmem\n");
735
        }
736

  
737
        IVSHMEM_DPRINTF("using shared memory server (socket = %s)\n",
738
                                                    s->server_chr->filename);
739

  
740
        if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
741
            ivshmem_setup_msi(s);
742
        }
743

  
744
        /* we allocate enough space for 16 guests and grow as needed */
745
        s->nb_peers = 16;
746
        s->vm_id = -1;
747

  
748
        /* allocate/initialize space for interrupt handling */
749
        s->peers = qemu_mallocz(s->nb_peers * sizeof(Peer));
750

  
751
        pci_register_bar(&s->dev, 2, s->ivshmem_size,
752
                                PCI_BASE_ADDRESS_SPACE_MEMORY, ivshmem_map);
753

  
754
        s->eventfd_chr = qemu_mallocz(s->vectors * sizeof(CharDriverState *));
755

  
756
        qemu_chr_add_handlers(s->server_chr, ivshmem_can_receive, ivshmem_read,
757
                     ivshmem_event, s);
758
    } else {
759
        /* just map the file immediately, we're not using a server */
760
        int fd;
761

  
762
        if (s->shmobj == NULL) {
763
            fprintf(stderr, "Must specify 'chardev' or 'shm' to ivshmem\n");
764
        }
765

  
766
        IVSHMEM_DPRINTF("using shm_open (shm object = %s)\n", s->shmobj);
767

  
768
        /* try opening with O_EXCL and if it succeeds zero the memory
769
         * by truncating to 0 */
770
        if ((fd = shm_open(s->shmobj, O_CREAT|O_RDWR|O_EXCL,
771
                        S_IRWXU|S_IRWXG|S_IRWXO)) > 0) {
772
           /* truncate file to length PCI device's memory */
773
            if (ftruncate(fd, s->ivshmem_size) != 0) {
774
                fprintf(stderr, "ivshmem: could not truncate shared file\n");
775
            }
776

  
777
        } else if ((fd = shm_open(s->shmobj, O_CREAT|O_RDWR,
778
                        S_IRWXU|S_IRWXG|S_IRWXO)) < 0) {
779
            fprintf(stderr, "ivshmem: could not open shared file\n");
780
            exit(-1);
781

  
782
        }
783

  
784
        if (check_shm_size(s, fd) == -1) {
785
            exit(-1);
786
        }
787

  
788
        create_shared_memory_BAR(s, fd);
789

  
790
    }
791

  
792
    return 0;
793
}
794

  
795
static int pci_ivshmem_uninit(PCIDevice *dev)
796
{
797
    IVShmemState *s = DO_UPCAST(IVShmemState, dev, dev);
798

  
799
    cpu_unregister_io_memory(s->ivshmem_mmio_io_addr);
800
    unregister_savevm(&dev->qdev, "ivshmem", s);
801

  
802
    return 0;
803
}
804

  
805
static PCIDeviceInfo ivshmem_info = {
806
    .qdev.name  = "ivshmem",
807
    .qdev.size  = sizeof(IVShmemState),
808
    .qdev.reset = ivshmem_reset,
809
    .init       = pci_ivshmem_init,
810
    .exit       = pci_ivshmem_uninit,
811
    .qdev.props = (Property[]) {
812
        DEFINE_PROP_CHR("chardev", IVShmemState, server_chr),
813
        DEFINE_PROP_STRING("size", IVShmemState, sizearg),
814
        DEFINE_PROP_UINT32("vectors", IVShmemState, vectors, 1),
815
        DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD, false),
816
        DEFINE_PROP_BIT("msi", IVShmemState, features, IVSHMEM_MSI, true),
817
        DEFINE_PROP_STRING("shm", IVShmemState, shmobj),
818
        DEFINE_PROP_STRING("role", IVShmemState, role),
819
        DEFINE_PROP_END_OF_LIST(),
820
    }
821
};
822

  
823
static void ivshmem_register_devices(void)
824
{
825
    pci_qdev_register(&ivshmem_info);
826
}
827

  
828
device_init(ivshmem_register_devices)
b/qemu-char.c
2087 2087
    }
2088 2088
}
2089 2089

  
2090
CharDriverState *qemu_chr_open_eventfd(int eventfd){
2091

  
2092
    return qemu_chr_open_fd(eventfd, eventfd);
2093

  
2094
}
2095

  
2090 2096
static void tcp_chr_connect(void *opaque)
2091 2097
{
2092 2098
    CharDriverState *chr = opaque;
b/qemu-char.h
94 94
void qemu_chr_info(Monitor *mon, QObject **ret_data);
95 95
CharDriverState *qemu_chr_find(const char *name);
96 96

  
97
/* add an eventfd to the qemu devices that are polled */
98
CharDriverState *qemu_chr_open_eventfd(int eventfd);
99

  
97 100
extern int term_escape_char;
98 101

  
99 102
/* async I/O support */
b/qemu-doc.texi
706 706
that span several QEMU instances. See @ref{sec_invocation} to have a
707 707
basic example.
708 708

  
709
@section Other Devices
710

  
711
@subsection Inter-VM Shared Memory device
712

  
713
With KVM enabled on a Linux host, a shared memory device is available.  Guests
714
map a POSIX shared memory region into the guest as a PCI device that enables
715
zero-copy communication to the application level of the guests.  The basic
716
syntax is:
717

  
718
@example
719
qemu -device ivshmem,size=<size in format accepted by -m>[,shm=<shm name>]
720
@end example
721

  
722
If desired, interrupts can be sent between guest VMs accessing the same shared
723
memory region.  Interrupt support requires using a shared memory server and
724
using a chardev socket to connect to it.  The code for the shared memory server
725
is qemu.git/contrib/ivshmem-server.  An example syntax when using the shared
726
memory server is:
727

  
728
@example
729
qemu -device ivshmem,size=<size in format accepted by -m>[,chardev=<id>]
730
                        [,msi=on][,ioeventfd=on][,vectors=n][,role=peer|master]
731
qemu -chardev socket,path=<path>,id=<id>
732
@end example
733

  
734
When using the server, the guest will be assigned a VM ID (>=0) that allows guests
735
using the same server to communicate via interrupts.  Guests can read their
736
VM ID from a device register (see example code).  Since receiving the shared
737
memory region from the server is asynchronous, there is a (small) chance the
738
guest may boot before the shared memory is attached.  To allow an application
739
to ensure shared memory is attached, the VM ID register will return -1 (an
740
invalid VM ID) until the memory is attached.  Once the shared memory is
741
attached, the VM ID will return the guest's valid VM ID.  With these semantics,
742
the guest application can check to ensure the shared memory is attached to the
743
guest before proceeding.
744

  
745
The @option{role} argument can be set to either master or peer and will affect
746
how the shared memory is migrated.  With @option{role=master}, the guest will
747
copy the shared memory on migration to the destination host.  With
748
@option{role=peer}, the guest will not be able to migrate with the device attached.
749
With the @option{peer} case, the device should be detached and then reattached
750
after migration using the PCI hotplug support.
751

  
709 752
@node direct_linux_boot
710 753
@section Direct Linux Boot
711 754

  

Also available in: Unified diff