Statistics
| Branch: | Revision:

root / hw / ivshmem.c @ 43ad7e3e

History | View | Annotate | Download (23 kB)

1
/*
2
 * Inter-VM Shared Memory PCI device.
3
 *
4
 * Author:
5
 *      Cam Macdonell <cam@cs.ualberta.ca>
6
 *
7
 * Based On: cirrus_vga.c
8
 *          Copyright (c) 2004 Fabrice Bellard
9
 *          Copyright (c) 2004 Makoto Suzuki (suzu)
10
 *
11
 *      and rtl8139.c
12
 *          Copyright (c) 2006 Igor Kovalenko
13
 *
14
 * This code is licensed under the GNU GPL v2.
15
 */
16
#include "hw.h"
17
#include "pc.h"
18
#include "pci.h"
19
#include "msix.h"
20
#include "kvm.h"
21

    
22
#include <sys/mman.h>
23
#include <sys/types.h>
24

    
25
#define IVSHMEM_IOEVENTFD   0
26
#define IVSHMEM_MSI     1
27

    
28
#define IVSHMEM_PEER    0
29
#define IVSHMEM_MASTER  1
30

    
31
#define IVSHMEM_REG_BAR_SIZE 0x100
32

    
33
//#define DEBUG_IVSHMEM
34
#ifdef DEBUG_IVSHMEM
35
#define IVSHMEM_DPRINTF(fmt, ...)        \
36
    do {printf("IVSHMEM: " fmt, ## __VA_ARGS__); } while (0)
37
#else
38
#define IVSHMEM_DPRINTF(fmt, ...)
39
#endif
40

    
41
typedef struct Peer {
42
    int nb_eventfds;
43
    int *eventfds;
44
} Peer;
45

    
46
typedef struct EventfdEntry {
47
    PCIDevice *pdev;
48
    int vector;
49
} EventfdEntry;
50

    
51
typedef struct IVShmemState {
52
    PCIDevice dev;
53
    uint32_t intrmask;
54
    uint32_t intrstatus;
55
    uint32_t doorbell;
56

    
57
    CharDriverState **eventfd_chr;
58
    CharDriverState *server_chr;
59
    int ivshmem_mmio_io_addr;
60

    
61
    pcibus_t mmio_addr;
62
    pcibus_t shm_pci_addr;
63
    uint64_t ivshmem_offset;
64
    uint64_t ivshmem_size; /* size of shared memory region */
65
    int shm_fd; /* shared memory file descriptor */
66

    
67
    Peer *peers;
68
    int nb_peers; /* how many guests we have space for */
69
    int max_peer; /* maximum numbered peer */
70

    
71
    int vm_id;
72
    uint32_t vectors;
73
    uint32_t features;
74
    EventfdEntry *eventfd_table;
75

    
76
    char * shmobj;
77
    char * sizearg;
78
    char * role;
79
    int role_val;   /* scalar to avoid multiple string comparisons */
80
} IVShmemState;
81

    
82
/* registers for the Inter-VM shared memory device */
83
enum ivshmem_registers {
84
    INTRMASK = 0,
85
    INTRSTATUS = 4,
86
    IVPOSITION = 8,
87
    DOORBELL = 12,
88
};
89

    
90
static inline uint32_t ivshmem_has_feature(IVShmemState *ivs,
91
                                                    unsigned int feature) {
92
    return (ivs->features & (1 << feature));
93
}
94

    
95
static inline bool is_power_of_two(uint64_t x) {
96
    return (x & (x - 1)) == 0;
97
}
98

    
99
static void ivshmem_map(PCIDevice *pci_dev, int region_num,
100
                    pcibus_t addr, pcibus_t size, int type)
101
{
102
    IVShmemState *s = DO_UPCAST(IVShmemState, dev, pci_dev);
103

    
104
    s->shm_pci_addr = addr;
105

    
106
    if (s->ivshmem_offset > 0) {
107
        cpu_register_physical_memory(s->shm_pci_addr, s->ivshmem_size,
108
                                                            s->ivshmem_offset);
109
    }
110

    
111
    IVSHMEM_DPRINTF("guest pci addr = %" FMT_PCIBUS ", guest h/w addr = %"
112
        PRIu64 ", size = %" FMT_PCIBUS "\n", addr, s->ivshmem_offset, size);
113

    
114
}
115

    
116
/* accessing registers - based on rtl8139 */
117
static void ivshmem_update_irq(IVShmemState *s, int val)
118
{
119
    int isr;
120
    isr = (s->intrstatus & s->intrmask) & 0xffffffff;
121

    
122
    /* don't print ISR resets */
123
    if (isr) {
124
        IVSHMEM_DPRINTF("Set IRQ to %d (%04x %04x)\n",
125
           isr ? 1 : 0, s->intrstatus, s->intrmask);
126
    }
127

    
128
    qemu_set_irq(s->dev.irq[0], (isr != 0));
129
}
130

    
131
static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val)
132
{
133
    IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val);
134

    
135
    s->intrmask = val;
136

    
137
    ivshmem_update_irq(s, val);
138
}
139

    
140
static uint32_t ivshmem_IntrMask_read(IVShmemState *s)
141
{
142
    uint32_t ret = s->intrmask;
143

    
144
    IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret);
145

    
146
    return ret;
147
}
148

    
149
static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val)
150
{
151
    IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val);
152

    
153
    s->intrstatus = val;
154

    
155
    ivshmem_update_irq(s, val);
156
    return;
157
}
158

    
159
static uint32_t ivshmem_IntrStatus_read(IVShmemState *s)
160
{
161
    uint32_t ret = s->intrstatus;
162

    
163
    /* reading ISR clears all interrupts */
164
    s->intrstatus = 0;
165

    
166
    ivshmem_update_irq(s, 0);
167

    
168
    return ret;
169
}
170

    
171
static void ivshmem_io_writew(void *opaque, target_phys_addr_t addr,
172
                                                            uint32_t val)
173
{
174

    
175
    IVSHMEM_DPRINTF("We shouldn't be writing words\n");
176
}
177

    
178
static void ivshmem_io_writel(void *opaque, target_phys_addr_t addr,
179
                                                            uint32_t val)
180
{
181
    IVShmemState *s = opaque;
182

    
183
    uint64_t write_one = 1;
184
    uint16_t dest = val >> 16;
185
    uint16_t vector = val & 0xff;
186

    
187
    addr &= 0xfc;
188

    
189
    IVSHMEM_DPRINTF("writing to addr " TARGET_FMT_plx "\n", addr);
190
    switch (addr)
191
    {
192
        case INTRMASK:
193
            ivshmem_IntrMask_write(s, val);
194
            break;
195

    
196
        case INTRSTATUS:
197
            ivshmem_IntrStatus_write(s, val);
198
            break;
199

    
200
        case DOORBELL:
201
            /* check that dest VM ID is reasonable */
202
            if (dest > s->max_peer) {
203
                IVSHMEM_DPRINTF("Invalid destination VM ID (%d)\n", dest);
204
                break;
205
            }
206

    
207
            /* check doorbell range */
208
            if (vector < s->peers[dest].nb_eventfds) {
209
                IVSHMEM_DPRINTF("Writing %" PRId64 " to VM %d on vector %d\n",
210
                                                    write_one, dest, vector);
211
                if (write(s->peers[dest].eventfds[vector],
212
                                                    &(write_one), 8) != 8) {
213
                    IVSHMEM_DPRINTF("error writing to eventfd\n");
214
                }
215
            }
216
            break;
217
        default:
218
            IVSHMEM_DPRINTF("Invalid VM Doorbell VM %d\n", dest);
219
    }
220
}
221

    
222
static void ivshmem_io_writeb(void *opaque, target_phys_addr_t addr,
223
                                                                uint32_t val)
224
{
225
    IVSHMEM_DPRINTF("We shouldn't be writing bytes\n");
226
}
227

    
228
static uint32_t ivshmem_io_readw(void *opaque, target_phys_addr_t addr)
229
{
230

    
231
    IVSHMEM_DPRINTF("We shouldn't be reading words\n");
232
    return 0;
233
}
234

    
235
static uint32_t ivshmem_io_readl(void *opaque, target_phys_addr_t addr)
236
{
237

    
238
    IVShmemState *s = opaque;
239
    uint32_t ret;
240

    
241
    switch (addr)
242
    {
243
        case INTRMASK:
244
            ret = ivshmem_IntrMask_read(s);
245
            break;
246

    
247
        case INTRSTATUS:
248
            ret = ivshmem_IntrStatus_read(s);
249
            break;
250

    
251
        case IVPOSITION:
252
            /* return my VM ID if the memory is mapped */
253
            if (s->shm_fd > 0) {
254
                ret = s->vm_id;
255
            } else {
256
                ret = -1;
257
            }
258
            break;
259

    
260
        default:
261
            IVSHMEM_DPRINTF("why are we reading " TARGET_FMT_plx "\n", addr);
262
            ret = 0;
263
    }
264

    
265
    return ret;
266
}
267

    
268
static uint32_t ivshmem_io_readb(void *opaque, target_phys_addr_t addr)
269
{
270
    IVSHMEM_DPRINTF("We shouldn't be reading bytes\n");
271

    
272
    return 0;
273
}
274

    
275
static CPUReadMemoryFunc * const ivshmem_mmio_read[3] = {
276
    ivshmem_io_readb,
277
    ivshmem_io_readw,
278
    ivshmem_io_readl,
279
};
280

    
281
static CPUWriteMemoryFunc * const ivshmem_mmio_write[3] = {
282
    ivshmem_io_writeb,
283
    ivshmem_io_writew,
284
    ivshmem_io_writel,
285
};
286

    
287
static void ivshmem_receive(void *opaque, const uint8_t *buf, int size)
288
{
289
    IVShmemState *s = opaque;
290

    
291
    ivshmem_IntrStatus_write(s, *buf);
292

    
293
    IVSHMEM_DPRINTF("ivshmem_receive 0x%02x\n", *buf);
294
}
295

    
296
static int ivshmem_can_receive(void * opaque)
297
{
298
    return 8;
299
}
300

    
301
static void ivshmem_event(void *opaque, int event)
302
{
303
    IVSHMEM_DPRINTF("ivshmem_event %d\n", event);
304
}
305

    
306
static void fake_irqfd(void *opaque, const uint8_t *buf, int size) {
307

    
308
    EventfdEntry *entry = opaque;
309
    PCIDevice *pdev = entry->pdev;
310

    
311
    IVSHMEM_DPRINTF("interrupt on vector %p %d\n", pdev, entry->vector);
312
    msix_notify(pdev, entry->vector);
313
}
314

    
315
static CharDriverState* create_eventfd_chr_device(void * opaque, int eventfd,
316
                                                                    int vector)
317
{
318
    /* create a event character device based on the passed eventfd */
319
    IVShmemState *s = opaque;
320
    CharDriverState * chr;
321

    
322
    chr = qemu_chr_open_eventfd(eventfd);
323

    
324
    if (chr == NULL) {
325
        fprintf(stderr, "creating eventfd for eventfd %d failed\n", eventfd);
326
        exit(-1);
327
    }
328

    
329
    /* if MSI is supported we need multiple interrupts */
330
    if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
331
        s->eventfd_table[vector].pdev = &s->dev;
332
        s->eventfd_table[vector].vector = vector;
333

    
334
        qemu_chr_add_handlers(chr, ivshmem_can_receive, fake_irqfd,
335
                      ivshmem_event, &s->eventfd_table[vector]);
336
    } else {
337
        qemu_chr_add_handlers(chr, ivshmem_can_receive, ivshmem_receive,
338
                      ivshmem_event, s);
339
    }
340

    
341
    return chr;
342

    
343
}
344

    
345
static int check_shm_size(IVShmemState *s, int fd) {
346
    /* check that the guest isn't going to try and map more memory than the
347
     * the object has allocated return -1 to indicate error */
348

    
349
    struct stat buf;
350

    
351
    fstat(fd, &buf);
352

    
353
    if (s->ivshmem_size > buf.st_size) {
354
        fprintf(stderr,
355
                "IVSHMEM ERROR: Requested memory size greater"
356
                " than shared object size (%" PRIu64 " > %" PRIu64")\n",
357
                s->ivshmem_size, (uint64_t)buf.st_size);
358
        return -1;
359
    } else {
360
        return 0;
361
    }
362
}
363

    
364
/* create the shared memory BAR when we are not using the server, so we can
365
 * create the BAR and map the memory immediately */
366
static void create_shared_memory_BAR(IVShmemState *s, int fd) {
367

    
368
    void * ptr;
369

    
370
    s->shm_fd = fd;
371

    
372
    ptr = mmap(0, s->ivshmem_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
373

    
374
    s->ivshmem_offset = qemu_ram_alloc_from_ptr(&s->dev.qdev, "ivshmem.bar2",
375
                                                        s->ivshmem_size, ptr);
376

    
377
    /* region for shared memory */
378
    pci_register_bar(&s->dev, 2, s->ivshmem_size,
379
                                PCI_BASE_ADDRESS_SPACE_MEMORY, ivshmem_map);
380
}
381

    
382
static void close_guest_eventfds(IVShmemState *s, int posn)
383
{
384
    int i, guest_curr_max;
385

    
386
    guest_curr_max = s->peers[posn].nb_eventfds;
387

    
388
    for (i = 0; i < guest_curr_max; i++) {
389
        kvm_set_ioeventfd_mmio_long(s->peers[posn].eventfds[i],
390
                    s->mmio_addr + DOORBELL, (posn << 16) | i, 0);
391
        close(s->peers[posn].eventfds[i]);
392
    }
393

    
394
    qemu_free(s->peers[posn].eventfds);
395
    s->peers[posn].nb_eventfds = 0;
396
}
397

    
398
static void setup_ioeventfds(IVShmemState *s) {
399

    
400
    int i, j;
401

    
402
    for (i = 0; i <= s->max_peer; i++) {
403
        for (j = 0; j < s->peers[i].nb_eventfds; j++) {
404
            kvm_set_ioeventfd_mmio_long(s->peers[i].eventfds[j],
405
                    s->mmio_addr + DOORBELL, (i << 16) | j, 1);
406
        }
407
    }
408
}
409

    
410
/* this function increase the dynamic storage need to store data about other
411
 * guests */
412
static void increase_dynamic_storage(IVShmemState *s, int new_min_size) {
413

    
414
    int j, old_nb_alloc;
415

    
416
    old_nb_alloc = s->nb_peers;
417

    
418
    while (new_min_size >= s->nb_peers)
419
        s->nb_peers = s->nb_peers * 2;
420

    
421
    IVSHMEM_DPRINTF("bumping storage to %d guests\n", s->nb_peers);
422
    s->peers = qemu_realloc(s->peers, s->nb_peers * sizeof(Peer));
423

    
424
    /* zero out new pointers */
425
    for (j = old_nb_alloc; j < s->nb_peers; j++) {
426
        s->peers[j].eventfds = NULL;
427
        s->peers[j].nb_eventfds = 0;
428
    }
429
}
430

    
431
static void ivshmem_read(void *opaque, const uint8_t * buf, int flags)
432
{
433
    IVShmemState *s = opaque;
434
    int incoming_fd, tmp_fd;
435
    int guest_max_eventfd;
436
    long incoming_posn;
437

    
438
    memcpy(&incoming_posn, buf, sizeof(long));
439
    /* pick off s->server_chr->msgfd and store it, posn should accompany msg */
440
    tmp_fd = qemu_chr_get_msgfd(s->server_chr);
441
    IVSHMEM_DPRINTF("posn is %ld, fd is %d\n", incoming_posn, tmp_fd);
442

    
443
    /* make sure we have enough space for this guest */
444
    if (incoming_posn >= s->nb_peers) {
445
        increase_dynamic_storage(s, incoming_posn);
446
    }
447

    
448
    if (tmp_fd == -1) {
449
        /* if posn is positive and unseen before then this is our posn*/
450
        if ((incoming_posn >= 0) &&
451
                            (s->peers[incoming_posn].eventfds == NULL)) {
452
            /* receive our posn */
453
            s->vm_id = incoming_posn;
454
            return;
455
        } else {
456
            /* otherwise an fd == -1 means an existing guest has gone away */
457
            IVSHMEM_DPRINTF("posn %ld has gone away\n", incoming_posn);
458
            close_guest_eventfds(s, incoming_posn);
459
            return;
460
        }
461
    }
462

    
463
    /* because of the implementation of get_msgfd, we need a dup */
464
    incoming_fd = dup(tmp_fd);
465

    
466
    if (incoming_fd == -1) {
467
        fprintf(stderr, "could not allocate file descriptor %s\n",
468
                                                            strerror(errno));
469
        return;
470
    }
471

    
472
    /* if the position is -1, then it's shared memory region fd */
473
    if (incoming_posn == -1) {
474

    
475
        void * map_ptr;
476

    
477
        s->max_peer = 0;
478

    
479
        if (check_shm_size(s, incoming_fd) == -1) {
480
            exit(-1);
481
        }
482

    
483
        /* mmap the region and map into the BAR2 */
484
        map_ptr = mmap(0, s->ivshmem_size, PROT_READ|PROT_WRITE, MAP_SHARED,
485
                                                            incoming_fd, 0);
486
        s->ivshmem_offset = qemu_ram_alloc_from_ptr(&s->dev.qdev,
487
                                    "ivshmem.bar2", s->ivshmem_size, map_ptr);
488

    
489
        IVSHMEM_DPRINTF("guest pci addr = %" FMT_PCIBUS ", guest h/w addr = %"
490
                         PRIu64 ", size = %" PRIu64 "\n", s->shm_pci_addr,
491
                         s->ivshmem_offset, s->ivshmem_size);
492

    
493
        if (s->shm_pci_addr > 0) {
494
            /* map memory into BAR2 */
495
            cpu_register_physical_memory(s->shm_pci_addr, s->ivshmem_size,
496
                                                            s->ivshmem_offset);
497
        }
498

    
499
        /* only store the fd if it is successfully mapped */
500
        s->shm_fd = incoming_fd;
501

    
502
        return;
503
    }
504

    
505
    /* each guest has an array of eventfds, and we keep track of how many
506
     * guests for each VM */
507
    guest_max_eventfd = s->peers[incoming_posn].nb_eventfds;
508

    
509
    if (guest_max_eventfd == 0) {
510
        /* one eventfd per MSI vector */
511
        s->peers[incoming_posn].eventfds = (int *) qemu_malloc(s->vectors *
512
                                                                sizeof(int));
513
    }
514

    
515
    /* this is an eventfd for a particular guest VM */
516
    IVSHMEM_DPRINTF("eventfds[%ld][%d] = %d\n", incoming_posn,
517
                                            guest_max_eventfd, incoming_fd);
518
    s->peers[incoming_posn].eventfds[guest_max_eventfd] = incoming_fd;
519

    
520
    /* increment count for particular guest */
521
    s->peers[incoming_posn].nb_eventfds++;
522

    
523
    /* keep track of the maximum VM ID */
524
    if (incoming_posn > s->max_peer) {
525
        s->max_peer = incoming_posn;
526
    }
527

    
528
    if (incoming_posn == s->vm_id) {
529
        s->eventfd_chr[guest_max_eventfd] = create_eventfd_chr_device(s,
530
                   s->peers[s->vm_id].eventfds[guest_max_eventfd],
531
                   guest_max_eventfd);
532
    }
533

    
534
    if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
535
        if (kvm_set_ioeventfd_mmio_long(incoming_fd, s->mmio_addr + DOORBELL,
536
                        (incoming_posn << 16) | guest_max_eventfd, 1) < 0) {
537
            fprintf(stderr, "ivshmem: ioeventfd not available\n");
538
        }
539
    }
540

    
541
    return;
542
}
543

    
544
static void ivshmem_reset(DeviceState *d)
545
{
546
    IVShmemState *s = DO_UPCAST(IVShmemState, dev.qdev, d);
547

    
548
    s->intrstatus = 0;
549
    return;
550
}
551

    
552
static void ivshmem_mmio_map(PCIDevice *pci_dev, int region_num,
553
                       pcibus_t addr, pcibus_t size, int type)
554
{
555
    IVShmemState *s = DO_UPCAST(IVShmemState, dev, pci_dev);
556

    
557
    s->mmio_addr = addr;
558
    cpu_register_physical_memory(addr + 0, IVSHMEM_REG_BAR_SIZE,
559
                                                s->ivshmem_mmio_io_addr);
560

    
561
    if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
562
        setup_ioeventfds(s);
563
    }
564
}
565

    
566
static uint64_t ivshmem_get_size(IVShmemState * s) {
567

    
568
    uint64_t value;
569
    char *ptr;
570

    
571
    value = strtoull(s->sizearg, &ptr, 10);
572
    switch (*ptr) {
573
        case 0: case 'M': case 'm':
574
            value <<= 20;
575
            break;
576
        case 'G': case 'g':
577
            value <<= 30;
578
            break;
579
        default:
580
            fprintf(stderr, "qemu: invalid ram size: %s\n", s->sizearg);
581
            exit(1);
582
    }
583

    
584
    /* BARs must be a power of 2 */
585
    if (!is_power_of_two(value)) {
586
        fprintf(stderr, "ivshmem: size must be power of 2\n");
587
        exit(1);
588
    }
589

    
590
    return value;
591
}
592

    
593
static void ivshmem_setup_msi(IVShmemState * s) {
594

    
595
    int i;
596

    
597
    /* allocate the MSI-X vectors */
598

    
599
    if (!msix_init(&s->dev, s->vectors, 1, 0)) {
600
        pci_register_bar(&s->dev, 1,
601
                         msix_bar_size(&s->dev),
602
                         PCI_BASE_ADDRESS_SPACE_MEMORY,
603
                         msix_mmio_map);
604
        IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors);
605
    } else {
606
        IVSHMEM_DPRINTF("msix initialization failed\n");
607
        exit(1);
608
    }
609

    
610
    /* 'activate' the vectors */
611
    for (i = 0; i < s->vectors; i++) {
612
        msix_vector_use(&s->dev, i);
613
    }
614

    
615
    /* allocate Qemu char devices for receiving interrupts */
616
    s->eventfd_table = qemu_mallocz(s->vectors * sizeof(EventfdEntry));
617
}
618

    
619
static void ivshmem_save(QEMUFile* f, void *opaque)
620
{
621
    IVShmemState *proxy = opaque;
622

    
623
    IVSHMEM_DPRINTF("ivshmem_save\n");
624
    pci_device_save(&proxy->dev, f);
625

    
626
    if (ivshmem_has_feature(proxy, IVSHMEM_MSI)) {
627
        msix_save(&proxy->dev, f);
628
    } else {
629
        qemu_put_be32(f, proxy->intrstatus);
630
        qemu_put_be32(f, proxy->intrmask);
631
    }
632

    
633
}
634

    
635
static int ivshmem_load(QEMUFile* f, void *opaque, int version_id)
636
{
637
    IVSHMEM_DPRINTF("ivshmem_load\n");
638

    
639
    IVShmemState *proxy = opaque;
640
    int ret, i;
641

    
642
    if (version_id > 0) {
643
        return -EINVAL;
644
    }
645

    
646
    if (proxy->role_val == IVSHMEM_PEER) {
647
        fprintf(stderr, "ivshmem: 'peer' devices are not migratable\n");
648
        return -EINVAL;
649
    }
650

    
651
    ret = pci_device_load(&proxy->dev, f);
652
    if (ret) {
653
        return ret;
654
    }
655

    
656
    if (ivshmem_has_feature(proxy, IVSHMEM_MSI)) {
657
        msix_load(&proxy->dev, f);
658
        for (i = 0; i < proxy->vectors; i++) {
659
            msix_vector_use(&proxy->dev, i);
660
        }
661
    } else {
662
        proxy->intrstatus = qemu_get_be32(f);
663
        proxy->intrmask = qemu_get_be32(f);
664
    }
665

    
666
    return 0;
667
}
668

    
669
static int pci_ivshmem_init(PCIDevice *dev)
670
{
671
    IVShmemState *s = DO_UPCAST(IVShmemState, dev, dev);
672
    uint8_t *pci_conf;
673

    
674
    if (s->sizearg == NULL)
675
        s->ivshmem_size = 4 << 20; /* 4 MB default */
676
    else {
677
        s->ivshmem_size = ivshmem_get_size(s);
678
    }
679

    
680
    register_savevm(&s->dev.qdev, "ivshmem", 0, 0, ivshmem_save, ivshmem_load,
681
                                                                        dev);
682

    
683
    /* IRQFD requires MSI */
684
    if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD) &&
685
        !ivshmem_has_feature(s, IVSHMEM_MSI)) {
686
        fprintf(stderr, "ivshmem: ioeventfd/irqfd requires MSI\n");
687
        exit(1);
688
    }
689

    
690
    /* check that role is reasonable */
691
    if (s->role) {
692
        if (strncmp(s->role, "peer", 5) == 0) {
693
            s->role_val = IVSHMEM_PEER;
694
        } else if (strncmp(s->role, "master", 7) == 0) {
695
            s->role_val = IVSHMEM_MASTER;
696
        } else {
697
            fprintf(stderr, "ivshmem: 'role' must be 'peer' or 'master'\n");
698
            exit(1);
699
        }
700
    } else {
701
        s->role_val = IVSHMEM_MASTER; /* default */
702
    }
703

    
704
    if (s->role_val == IVSHMEM_PEER) {
705
        register_device_unmigratable(&s->dev.qdev, "ivshmem", s);
706
    }
707

    
708
    pci_conf = s->dev.config;
709
    pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_REDHAT_QUMRANET);
710
    pci_conf[0x02] = 0x10;
711
    pci_conf[0x03] = 0x11;
712
    pci_conf[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY;
713
    pci_config_set_class(pci_conf, PCI_CLASS_MEMORY_RAM);
714
    pci_conf[PCI_HEADER_TYPE] = PCI_HEADER_TYPE_NORMAL;
715

    
716
    pci_config_set_interrupt_pin(pci_conf, 1);
717

    
718
    s->shm_pci_addr = 0;
719
    s->ivshmem_offset = 0;
720
    s->shm_fd = 0;
721

    
722
    s->ivshmem_mmio_io_addr = cpu_register_io_memory(ivshmem_mmio_read,
723
                                    ivshmem_mmio_write, s);
724
    /* region for registers*/
725
    pci_register_bar(&s->dev, 0, IVSHMEM_REG_BAR_SIZE,
726
                           PCI_BASE_ADDRESS_SPACE_MEMORY, ivshmem_mmio_map);
727

    
728
    if ((s->server_chr != NULL) &&
729
                        (strncmp(s->server_chr->filename, "unix:", 5) == 0)) {
730
        /* if we get a UNIX socket as the parameter we will talk
731
         * to the ivshmem server to receive the memory region */
732

    
733
        if (s->shmobj != NULL) {
734
            fprintf(stderr, "WARNING: do not specify both 'chardev' "
735
                                                "and 'shm' with ivshmem\n");
736
        }
737

    
738
        IVSHMEM_DPRINTF("using shared memory server (socket = %s)\n",
739
                                                    s->server_chr->filename);
740

    
741
        if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
742
            ivshmem_setup_msi(s);
743
        }
744

    
745
        /* we allocate enough space for 16 guests and grow as needed */
746
        s->nb_peers = 16;
747
        s->vm_id = -1;
748

    
749
        /* allocate/initialize space for interrupt handling */
750
        s->peers = qemu_mallocz(s->nb_peers * sizeof(Peer));
751

    
752
        pci_register_bar(&s->dev, 2, s->ivshmem_size,
753
                                PCI_BASE_ADDRESS_SPACE_MEMORY, ivshmem_map);
754

    
755
        s->eventfd_chr = qemu_mallocz(s->vectors * sizeof(CharDriverState *));
756

    
757
        qemu_chr_add_handlers(s->server_chr, ivshmem_can_receive, ivshmem_read,
758
                     ivshmem_event, s);
759
    } else {
760
        /* just map the file immediately, we're not using a server */
761
        int fd;
762

    
763
        if (s->shmobj == NULL) {
764
            fprintf(stderr, "Must specify 'chardev' or 'shm' to ivshmem\n");
765
        }
766

    
767
        IVSHMEM_DPRINTF("using shm_open (shm object = %s)\n", s->shmobj);
768

    
769
        /* try opening with O_EXCL and if it succeeds zero the memory
770
         * by truncating to 0 */
771
        if ((fd = shm_open(s->shmobj, O_CREAT|O_RDWR|O_EXCL,
772
                        S_IRWXU|S_IRWXG|S_IRWXO)) > 0) {
773
           /* truncate file to length PCI device's memory */
774
            if (ftruncate(fd, s->ivshmem_size) != 0) {
775
                fprintf(stderr, "ivshmem: could not truncate shared file\n");
776
            }
777

    
778
        } else if ((fd = shm_open(s->shmobj, O_CREAT|O_RDWR,
779
                        S_IRWXU|S_IRWXG|S_IRWXO)) < 0) {
780
            fprintf(stderr, "ivshmem: could not open shared file\n");
781
            exit(-1);
782

    
783
        }
784

    
785
        if (check_shm_size(s, fd) == -1) {
786
            exit(-1);
787
        }
788

    
789
        create_shared_memory_BAR(s, fd);
790

    
791
    }
792

    
793
    return 0;
794
}
795

    
796
static int pci_ivshmem_uninit(PCIDevice *dev)
797
{
798
    IVShmemState *s = DO_UPCAST(IVShmemState, dev, dev);
799

    
800
    cpu_unregister_io_memory(s->ivshmem_mmio_io_addr);
801
    unregister_savevm(&dev->qdev, "ivshmem", s);
802

    
803
    return 0;
804
}
805

    
806
static PCIDeviceInfo ivshmem_info = {
807
    .qdev.name  = "ivshmem",
808
    .qdev.size  = sizeof(IVShmemState),
809
    .qdev.reset = ivshmem_reset,
810
    .init       = pci_ivshmem_init,
811
    .exit       = pci_ivshmem_uninit,
812
    .qdev.props = (Property[]) {
813
        DEFINE_PROP_CHR("chardev", IVShmemState, server_chr),
814
        DEFINE_PROP_STRING("size", IVShmemState, sizearg),
815
        DEFINE_PROP_UINT32("vectors", IVShmemState, vectors, 1),
816
        DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD, false),
817
        DEFINE_PROP_BIT("msi", IVShmemState, features, IVSHMEM_MSI, true),
818
        DEFINE_PROP_STRING("shm", IVShmemState, shmobj),
819
        DEFINE_PROP_STRING("role", IVShmemState, role),
820
        DEFINE_PROP_END_OF_LIST(),
821
    }
822
};
823

    
824
static void ivshmem_register_devices(void)
825
{
826
    pci_qdev_register(&ivshmem_info);
827
}
828

    
829
device_init(ivshmem_register_devices)