Statistics
| Branch: | Revision:

root / hw / ivshmem.c @ ac791b88

History | View | Annotate | Download (22.1 kB)

1
/*
2
 * Inter-VM Shared Memory PCI device.
3
 *
4
 * Author:
5
 *      Cam Macdonell <cam@cs.ualberta.ca>
6
 *
7
 * Based On: cirrus_vga.c
8
 *          Copyright (c) 2004 Fabrice Bellard
9
 *          Copyright (c) 2004 Makoto Suzuki (suzu)
10
 *
11
 *      and rtl8139.c
12
 *          Copyright (c) 2006 Igor Kovalenko
13
 *
14
 * This code is licensed under the GNU GPL v2.
15
 *
16
 * Contributions after 2012-01-13 are licensed under the terms of the
17
 * GNU GPL, version 2 or (at your option) any later version.
18
 */
19
#include "hw.h"
20
#include "pc.h"
21
#include "pci.h"
22
#include "msix.h"
23
#include "kvm.h"
24
#include "migration.h"
25
#include "qerror.h"
26

    
27
#include <sys/mman.h>
28
#include <sys/types.h>
29

    
30
#define IVSHMEM_IOEVENTFD   0
31
#define IVSHMEM_MSI     1
32

    
33
#define IVSHMEM_PEER    0
34
#define IVSHMEM_MASTER  1
35

    
36
#define IVSHMEM_REG_BAR_SIZE 0x100
37

    
38
//#define DEBUG_IVSHMEM
39
#ifdef DEBUG_IVSHMEM
40
#define IVSHMEM_DPRINTF(fmt, ...)        \
41
    do {printf("IVSHMEM: " fmt, ## __VA_ARGS__); } while (0)
42
#else
43
#define IVSHMEM_DPRINTF(fmt, ...)
44
#endif
45

    
46
typedef struct Peer {
47
    int nb_eventfds;
48
    int *eventfds;
49
} Peer;
50

    
51
typedef struct EventfdEntry {
52
    PCIDevice *pdev;
53
    int vector;
54
} EventfdEntry;
55

    
56
typedef struct IVShmemState {
57
    PCIDevice dev;
58
    uint32_t intrmask;
59
    uint32_t intrstatus;
60
    uint32_t doorbell;
61

    
62
    CharDriverState **eventfd_chr;
63
    CharDriverState *server_chr;
64
    MemoryRegion ivshmem_mmio;
65

    
66
    pcibus_t mmio_addr;
67
    /* We might need to register the BAR before we actually have the memory.
68
     * So prepare a container MemoryRegion for the BAR immediately and
69
     * add a subregion when we have the memory.
70
     */
71
    MemoryRegion bar;
72
    MemoryRegion ivshmem;
73
    MemoryRegion msix_bar;
74
    uint64_t ivshmem_size; /* size of shared memory region */
75
    int shm_fd; /* shared memory file descriptor */
76

    
77
    Peer *peers;
78
    int nb_peers; /* how many guests we have space for */
79
    int max_peer; /* maximum numbered peer */
80

    
81
    int vm_id;
82
    uint32_t vectors;
83
    uint32_t features;
84
    EventfdEntry *eventfd_table;
85

    
86
    Error *migration_blocker;
87

    
88
    char * shmobj;
89
    char * sizearg;
90
    char * role;
91
    int role_val;   /* scalar to avoid multiple string comparisons */
92
} IVShmemState;
93

    
94
/* registers for the Inter-VM shared memory device */
95
enum ivshmem_registers {
96
    INTRMASK = 0,
97
    INTRSTATUS = 4,
98
    IVPOSITION = 8,
99
    DOORBELL = 12,
100
};
101

    
102
static inline uint32_t ivshmem_has_feature(IVShmemState *ivs,
103
                                                    unsigned int feature) {
104
    return (ivs->features & (1 << feature));
105
}
106

    
107
static inline bool is_power_of_two(uint64_t x) {
108
    return (x & (x - 1)) == 0;
109
}
110

    
111
/* accessing registers - based on rtl8139 */
112
static void ivshmem_update_irq(IVShmemState *s, int val)
113
{
114
    int isr;
115
    isr = (s->intrstatus & s->intrmask) & 0xffffffff;
116

    
117
    /* don't print ISR resets */
118
    if (isr) {
119
        IVSHMEM_DPRINTF("Set IRQ to %d (%04x %04x)\n",
120
           isr ? 1 : 0, s->intrstatus, s->intrmask);
121
    }
122

    
123
    qemu_set_irq(s->dev.irq[0], (isr != 0));
124
}
125

    
126
static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val)
127
{
128
    IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val);
129

    
130
    s->intrmask = val;
131

    
132
    ivshmem_update_irq(s, val);
133
}
134

    
135
static uint32_t ivshmem_IntrMask_read(IVShmemState *s)
136
{
137
    uint32_t ret = s->intrmask;
138

    
139
    IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret);
140

    
141
    return ret;
142
}
143

    
144
static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val)
145
{
146
    IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val);
147

    
148
    s->intrstatus = val;
149

    
150
    ivshmem_update_irq(s, val);
151
    return;
152
}
153

    
154
static uint32_t ivshmem_IntrStatus_read(IVShmemState *s)
155
{
156
    uint32_t ret = s->intrstatus;
157

    
158
    /* reading ISR clears all interrupts */
159
    s->intrstatus = 0;
160

    
161
    ivshmem_update_irq(s, 0);
162

    
163
    return ret;
164
}
165

    
166
static void ivshmem_io_write(void *opaque, target_phys_addr_t addr,
167
                             uint64_t val, unsigned size)
168
{
169
    IVShmemState *s = opaque;
170

    
171
    uint64_t write_one = 1;
172
    uint16_t dest = val >> 16;
173
    uint16_t vector = val & 0xff;
174

    
175
    addr &= 0xfc;
176

    
177
    IVSHMEM_DPRINTF("writing to addr " TARGET_FMT_plx "\n", addr);
178
    switch (addr)
179
    {
180
        case INTRMASK:
181
            ivshmem_IntrMask_write(s, val);
182
            break;
183

    
184
        case INTRSTATUS:
185
            ivshmem_IntrStatus_write(s, val);
186
            break;
187

    
188
        case DOORBELL:
189
            /* check that dest VM ID is reasonable */
190
            if (dest > s->max_peer) {
191
                IVSHMEM_DPRINTF("Invalid destination VM ID (%d)\n", dest);
192
                break;
193
            }
194

    
195
            /* check doorbell range */
196
            if (vector < s->peers[dest].nb_eventfds) {
197
                IVSHMEM_DPRINTF("Writing %" PRId64 " to VM %d on vector %d\n",
198
                                                    write_one, dest, vector);
199
                if (write(s->peers[dest].eventfds[vector],
200
                                                    &(write_one), 8) != 8) {
201
                    IVSHMEM_DPRINTF("error writing to eventfd\n");
202
                }
203
            }
204
            break;
205
        default:
206
            IVSHMEM_DPRINTF("Invalid VM Doorbell VM %d\n", dest);
207
    }
208
}
209

    
210
static uint64_t ivshmem_io_read(void *opaque, target_phys_addr_t addr,
211
                                unsigned size)
212
{
213

    
214
    IVShmemState *s = opaque;
215
    uint32_t ret;
216

    
217
    switch (addr)
218
    {
219
        case INTRMASK:
220
            ret = ivshmem_IntrMask_read(s);
221
            break;
222

    
223
        case INTRSTATUS:
224
            ret = ivshmem_IntrStatus_read(s);
225
            break;
226

    
227
        case IVPOSITION:
228
            /* return my VM ID if the memory is mapped */
229
            if (s->shm_fd > 0) {
230
                ret = s->vm_id;
231
            } else {
232
                ret = -1;
233
            }
234
            break;
235

    
236
        default:
237
            IVSHMEM_DPRINTF("why are we reading " TARGET_FMT_plx "\n", addr);
238
            ret = 0;
239
    }
240

    
241
    return ret;
242
}
243

    
244
static const MemoryRegionOps ivshmem_mmio_ops = {
245
    .read = ivshmem_io_read,
246
    .write = ivshmem_io_write,
247
    .endianness = DEVICE_NATIVE_ENDIAN,
248
    .impl = {
249
        .min_access_size = 4,
250
        .max_access_size = 4,
251
    },
252
};
253

    
254
static void ivshmem_receive(void *opaque, const uint8_t *buf, int size)
255
{
256
    IVShmemState *s = opaque;
257

    
258
    ivshmem_IntrStatus_write(s, *buf);
259

    
260
    IVSHMEM_DPRINTF("ivshmem_receive 0x%02x\n", *buf);
261
}
262

    
263
static int ivshmem_can_receive(void * opaque)
264
{
265
    return 8;
266
}
267

    
268
static void ivshmem_event(void *opaque, int event)
269
{
270
    IVSHMEM_DPRINTF("ivshmem_event %d\n", event);
271
}
272

    
273
static void fake_irqfd(void *opaque, const uint8_t *buf, int size) {
274

    
275
    EventfdEntry *entry = opaque;
276
    PCIDevice *pdev = entry->pdev;
277

    
278
    IVSHMEM_DPRINTF("interrupt on vector %p %d\n", pdev, entry->vector);
279
    msix_notify(pdev, entry->vector);
280
}
281

    
282
static CharDriverState* create_eventfd_chr_device(void * opaque, int eventfd,
283
                                                                    int vector)
284
{
285
    /* create a event character device based on the passed eventfd */
286
    IVShmemState *s = opaque;
287
    CharDriverState * chr;
288

    
289
    chr = qemu_chr_open_eventfd(eventfd);
290

    
291
    if (chr == NULL) {
292
        fprintf(stderr, "creating eventfd for eventfd %d failed\n", eventfd);
293
        exit(-1);
294
    }
295

    
296
    /* if MSI is supported we need multiple interrupts */
297
    if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
298
        s->eventfd_table[vector].pdev = &s->dev;
299
        s->eventfd_table[vector].vector = vector;
300

    
301
        qemu_chr_add_handlers(chr, ivshmem_can_receive, fake_irqfd,
302
                      ivshmem_event, &s->eventfd_table[vector]);
303
    } else {
304
        qemu_chr_add_handlers(chr, ivshmem_can_receive, ivshmem_receive,
305
                      ivshmem_event, s);
306
    }
307

    
308
    return chr;
309

    
310
}
311

    
312
static int check_shm_size(IVShmemState *s, int fd) {
313
    /* check that the guest isn't going to try and map more memory than the
314
     * the object has allocated return -1 to indicate error */
315

    
316
    struct stat buf;
317

    
318
    fstat(fd, &buf);
319

    
320
    if (s->ivshmem_size > buf.st_size) {
321
        fprintf(stderr,
322
                "IVSHMEM ERROR: Requested memory size greater"
323
                " than shared object size (%" PRIu64 " > %" PRIu64")\n",
324
                s->ivshmem_size, (uint64_t)buf.st_size);
325
        return -1;
326
    } else {
327
        return 0;
328
    }
329
}
330

    
331
/* create the shared memory BAR when we are not using the server, so we can
332
 * create the BAR and map the memory immediately */
333
static void create_shared_memory_BAR(IVShmemState *s, int fd) {
334

    
335
    void * ptr;
336

    
337
    s->shm_fd = fd;
338

    
339
    ptr = mmap(0, s->ivshmem_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
340

    
341
    memory_region_init_ram_ptr(&s->ivshmem, "ivshmem.bar2",
342
                               s->ivshmem_size, ptr);
343
    vmstate_register_ram(&s->ivshmem, &s->dev.qdev);
344
    memory_region_add_subregion(&s->bar, 0, &s->ivshmem);
345

    
346
    /* region for shared memory */
347
    pci_register_bar(&s->dev, 2, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar);
348
}
349

    
350
static void close_guest_eventfds(IVShmemState *s, int posn)
351
{
352
    int i, guest_curr_max;
353

    
354
    guest_curr_max = s->peers[posn].nb_eventfds;
355

    
356
    for (i = 0; i < guest_curr_max; i++) {
357
        kvm_set_ioeventfd_mmio_long(s->peers[posn].eventfds[i],
358
                    s->mmio_addr + DOORBELL, (posn << 16) | i, 0);
359
        close(s->peers[posn].eventfds[i]);
360
    }
361

    
362
    g_free(s->peers[posn].eventfds);
363
    s->peers[posn].nb_eventfds = 0;
364
}
365

    
366
static void setup_ioeventfds(IVShmemState *s) {
367

    
368
    int i, j;
369

    
370
    for (i = 0; i <= s->max_peer; i++) {
371
        for (j = 0; j < s->peers[i].nb_eventfds; j++) {
372
            memory_region_add_eventfd(&s->ivshmem_mmio,
373
                                      DOORBELL,
374
                                      4,
375
                                      true,
376
                                      (i << 16) | j,
377
                                      s->peers[i].eventfds[j]);
378
        }
379
    }
380
}
381

    
382
/* this function increase the dynamic storage need to store data about other
383
 * guests */
384
static void increase_dynamic_storage(IVShmemState *s, int new_min_size) {
385

    
386
    int j, old_nb_alloc;
387

    
388
    old_nb_alloc = s->nb_peers;
389

    
390
    while (new_min_size >= s->nb_peers)
391
        s->nb_peers = s->nb_peers * 2;
392

    
393
    IVSHMEM_DPRINTF("bumping storage to %d guests\n", s->nb_peers);
394
    s->peers = g_realloc(s->peers, s->nb_peers * sizeof(Peer));
395

    
396
    /* zero out new pointers */
397
    for (j = old_nb_alloc; j < s->nb_peers; j++) {
398
        s->peers[j].eventfds = NULL;
399
        s->peers[j].nb_eventfds = 0;
400
    }
401
}
402

    
403
static void ivshmem_read(void *opaque, const uint8_t * buf, int flags)
404
{
405
    IVShmemState *s = opaque;
406
    int incoming_fd, tmp_fd;
407
    int guest_max_eventfd;
408
    long incoming_posn;
409

    
410
    memcpy(&incoming_posn, buf, sizeof(long));
411
    /* pick off s->server_chr->msgfd and store it, posn should accompany msg */
412
    tmp_fd = qemu_chr_fe_get_msgfd(s->server_chr);
413
    IVSHMEM_DPRINTF("posn is %ld, fd is %d\n", incoming_posn, tmp_fd);
414

    
415
    /* make sure we have enough space for this guest */
416
    if (incoming_posn >= s->nb_peers) {
417
        increase_dynamic_storage(s, incoming_posn);
418
    }
419

    
420
    if (tmp_fd == -1) {
421
        /* if posn is positive and unseen before then this is our posn*/
422
        if ((incoming_posn >= 0) &&
423
                            (s->peers[incoming_posn].eventfds == NULL)) {
424
            /* receive our posn */
425
            s->vm_id = incoming_posn;
426
            return;
427
        } else {
428
            /* otherwise an fd == -1 means an existing guest has gone away */
429
            IVSHMEM_DPRINTF("posn %ld has gone away\n", incoming_posn);
430
            close_guest_eventfds(s, incoming_posn);
431
            return;
432
        }
433
    }
434

    
435
    /* because of the implementation of get_msgfd, we need a dup */
436
    incoming_fd = dup(tmp_fd);
437

    
438
    if (incoming_fd == -1) {
439
        fprintf(stderr, "could not allocate file descriptor %s\n",
440
                                                            strerror(errno));
441
        return;
442
    }
443

    
444
    /* if the position is -1, then it's shared memory region fd */
445
    if (incoming_posn == -1) {
446

    
447
        void * map_ptr;
448

    
449
        s->max_peer = 0;
450

    
451
        if (check_shm_size(s, incoming_fd) == -1) {
452
            exit(-1);
453
        }
454

    
455
        /* mmap the region and map into the BAR2 */
456
        map_ptr = mmap(0, s->ivshmem_size, PROT_READ|PROT_WRITE, MAP_SHARED,
457
                                                            incoming_fd, 0);
458
        memory_region_init_ram_ptr(&s->ivshmem,
459
                                   "ivshmem.bar2", s->ivshmem_size, map_ptr);
460
        vmstate_register_ram(&s->ivshmem, &s->dev.qdev);
461

    
462
        IVSHMEM_DPRINTF("guest h/w addr = %" PRIu64 ", size = %" PRIu64 "\n",
463
                         s->ivshmem_offset, s->ivshmem_size);
464

    
465
        memory_region_add_subregion(&s->bar, 0, &s->ivshmem);
466

    
467
        /* only store the fd if it is successfully mapped */
468
        s->shm_fd = incoming_fd;
469

    
470
        return;
471
    }
472

    
473
    /* each guest has an array of eventfds, and we keep track of how many
474
     * guests for each VM */
475
    guest_max_eventfd = s->peers[incoming_posn].nb_eventfds;
476

    
477
    if (guest_max_eventfd == 0) {
478
        /* one eventfd per MSI vector */
479
        s->peers[incoming_posn].eventfds = (int *) g_malloc(s->vectors *
480
                                                                sizeof(int));
481
    }
482

    
483
    /* this is an eventfd for a particular guest VM */
484
    IVSHMEM_DPRINTF("eventfds[%ld][%d] = %d\n", incoming_posn,
485
                                            guest_max_eventfd, incoming_fd);
486
    s->peers[incoming_posn].eventfds[guest_max_eventfd] = incoming_fd;
487

    
488
    /* increment count for particular guest */
489
    s->peers[incoming_posn].nb_eventfds++;
490

    
491
    /* keep track of the maximum VM ID */
492
    if (incoming_posn > s->max_peer) {
493
        s->max_peer = incoming_posn;
494
    }
495

    
496
    if (incoming_posn == s->vm_id) {
497
        s->eventfd_chr[guest_max_eventfd] = create_eventfd_chr_device(s,
498
                   s->peers[s->vm_id].eventfds[guest_max_eventfd],
499
                   guest_max_eventfd);
500
    }
501

    
502
    if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
503
        if (kvm_set_ioeventfd_mmio_long(incoming_fd, s->mmio_addr + DOORBELL,
504
                        (incoming_posn << 16) | guest_max_eventfd, 1) < 0) {
505
            fprintf(stderr, "ivshmem: ioeventfd not available\n");
506
        }
507
    }
508

    
509
    return;
510
}
511

    
512
static void ivshmem_reset(DeviceState *d)
513
{
514
    IVShmemState *s = DO_UPCAST(IVShmemState, dev.qdev, d);
515

    
516
    s->intrstatus = 0;
517
    return;
518
}
519

    
520
static uint64_t ivshmem_get_size(IVShmemState * s) {
521

    
522
    uint64_t value;
523
    char *ptr;
524

    
525
    value = strtoull(s->sizearg, &ptr, 10);
526
    switch (*ptr) {
527
        case 0: case 'M': case 'm':
528
            value <<= 20;
529
            break;
530
        case 'G': case 'g':
531
            value <<= 30;
532
            break;
533
        default:
534
            fprintf(stderr, "qemu: invalid ram size: %s\n", s->sizearg);
535
            exit(1);
536
    }
537

    
538
    /* BARs must be a power of 2 */
539
    if (!is_power_of_two(value)) {
540
        fprintf(stderr, "ivshmem: size must be power of 2\n");
541
        exit(1);
542
    }
543

    
544
    return value;
545
}
546

    
547
static void ivshmem_setup_msi(IVShmemState * s) {
548

    
549
    int i;
550

    
551
    /* allocate the MSI-X vectors */
552

    
553
    memory_region_init(&s->msix_bar, "ivshmem-msix", 4096);
554
    if (!msix_init(&s->dev, s->vectors, &s->msix_bar, 1, 0)) {
555
        pci_register_bar(&s->dev, 1, PCI_BASE_ADDRESS_SPACE_MEMORY,
556
                         &s->msix_bar);
557
        IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors);
558
    } else {
559
        IVSHMEM_DPRINTF("msix initialization failed\n");
560
        exit(1);
561
    }
562

    
563
    /* 'activate' the vectors */
564
    for (i = 0; i < s->vectors; i++) {
565
        msix_vector_use(&s->dev, i);
566
    }
567

    
568
    /* allocate Qemu char devices for receiving interrupts */
569
    s->eventfd_table = g_malloc0(s->vectors * sizeof(EventfdEntry));
570
}
571

    
572
static void ivshmem_save(QEMUFile* f, void *opaque)
573
{
574
    IVShmemState *proxy = opaque;
575

    
576
    IVSHMEM_DPRINTF("ivshmem_save\n");
577
    pci_device_save(&proxy->dev, f);
578

    
579
    if (ivshmem_has_feature(proxy, IVSHMEM_MSI)) {
580
        msix_save(&proxy->dev, f);
581
    } else {
582
        qemu_put_be32(f, proxy->intrstatus);
583
        qemu_put_be32(f, proxy->intrmask);
584
    }
585

    
586
}
587

    
588
static int ivshmem_load(QEMUFile* f, void *opaque, int version_id)
589
{
590
    IVSHMEM_DPRINTF("ivshmem_load\n");
591

    
592
    IVShmemState *proxy = opaque;
593
    int ret, i;
594

    
595
    if (version_id > 0) {
596
        return -EINVAL;
597
    }
598

    
599
    if (proxy->role_val == IVSHMEM_PEER) {
600
        fprintf(stderr, "ivshmem: 'peer' devices are not migratable\n");
601
        return -EINVAL;
602
    }
603

    
604
    ret = pci_device_load(&proxy->dev, f);
605
    if (ret) {
606
        return ret;
607
    }
608

    
609
    if (ivshmem_has_feature(proxy, IVSHMEM_MSI)) {
610
        msix_load(&proxy->dev, f);
611
        for (i = 0; i < proxy->vectors; i++) {
612
            msix_vector_use(&proxy->dev, i);
613
        }
614
    } else {
615
        proxy->intrstatus = qemu_get_be32(f);
616
        proxy->intrmask = qemu_get_be32(f);
617
    }
618

    
619
    return 0;
620
}
621

    
622
static int pci_ivshmem_init(PCIDevice *dev)
623
{
624
    IVShmemState *s = DO_UPCAST(IVShmemState, dev, dev);
625
    uint8_t *pci_conf;
626

    
627
    if (s->sizearg == NULL)
628
        s->ivshmem_size = 4 << 20; /* 4 MB default */
629
    else {
630
        s->ivshmem_size = ivshmem_get_size(s);
631
    }
632

    
633
    register_savevm(&s->dev.qdev, "ivshmem", 0, 0, ivshmem_save, ivshmem_load,
634
                                                                        dev);
635

    
636
    /* IRQFD requires MSI */
637
    if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD) &&
638
        !ivshmem_has_feature(s, IVSHMEM_MSI)) {
639
        fprintf(stderr, "ivshmem: ioeventfd/irqfd requires MSI\n");
640
        exit(1);
641
    }
642

    
643
    /* check that role is reasonable */
644
    if (s->role) {
645
        if (strncmp(s->role, "peer", 5) == 0) {
646
            s->role_val = IVSHMEM_PEER;
647
        } else if (strncmp(s->role, "master", 7) == 0) {
648
            s->role_val = IVSHMEM_MASTER;
649
        } else {
650
            fprintf(stderr, "ivshmem: 'role' must be 'peer' or 'master'\n");
651
            exit(1);
652
        }
653
    } else {
654
        s->role_val = IVSHMEM_MASTER; /* default */
655
    }
656

    
657
    if (s->role_val == IVSHMEM_PEER) {
658
        error_set(&s->migration_blocker, QERR_DEVICE_FEATURE_BLOCKS_MIGRATION, "ivshmem", "peer mode");
659
        migrate_add_blocker(s->migration_blocker);
660
    }
661

    
662
    pci_conf = s->dev.config;
663
    pci_conf[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY;
664

    
665
    pci_config_set_interrupt_pin(pci_conf, 1);
666

    
667
    s->shm_fd = 0;
668

    
669
    memory_region_init_io(&s->ivshmem_mmio, &ivshmem_mmio_ops, s,
670
                          "ivshmem-mmio", IVSHMEM_REG_BAR_SIZE);
671

    
672
    if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
673
        setup_ioeventfds(s);
674
    }
675

    
676
    /* region for registers*/
677
    pci_register_bar(&s->dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY,
678
                     &s->ivshmem_mmio);
679

    
680
    memory_region_init(&s->bar, "ivshmem-bar2-container", s->ivshmem_size);
681

    
682
    if ((s->server_chr != NULL) &&
683
                        (strncmp(s->server_chr->filename, "unix:", 5) == 0)) {
684
        /* if we get a UNIX socket as the parameter we will talk
685
         * to the ivshmem server to receive the memory region */
686

    
687
        if (s->shmobj != NULL) {
688
            fprintf(stderr, "WARNING: do not specify both 'chardev' "
689
                                                "and 'shm' with ivshmem\n");
690
        }
691

    
692
        IVSHMEM_DPRINTF("using shared memory server (socket = %s)\n",
693
                                                    s->server_chr->filename);
694

    
695
        if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
696
            ivshmem_setup_msi(s);
697
        }
698

    
699
        /* we allocate enough space for 16 guests and grow as needed */
700
        s->nb_peers = 16;
701
        s->vm_id = -1;
702

    
703
        /* allocate/initialize space for interrupt handling */
704
        s->peers = g_malloc0(s->nb_peers * sizeof(Peer));
705

    
706
        pci_register_bar(&s->dev, 2,
707
                         PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar);
708

    
709
        s->eventfd_chr = g_malloc0(s->vectors * sizeof(CharDriverState *));
710

    
711
        qemu_chr_add_handlers(s->server_chr, ivshmem_can_receive, ivshmem_read,
712
                     ivshmem_event, s);
713
    } else {
714
        /* just map the file immediately, we're not using a server */
715
        int fd;
716

    
717
        if (s->shmobj == NULL) {
718
            fprintf(stderr, "Must specify 'chardev' or 'shm' to ivshmem\n");
719
        }
720

    
721
        IVSHMEM_DPRINTF("using shm_open (shm object = %s)\n", s->shmobj);
722

    
723
        /* try opening with O_EXCL and if it succeeds zero the memory
724
         * by truncating to 0 */
725
        if ((fd = shm_open(s->shmobj, O_CREAT|O_RDWR|O_EXCL,
726
                        S_IRWXU|S_IRWXG|S_IRWXO)) > 0) {
727
           /* truncate file to length PCI device's memory */
728
            if (ftruncate(fd, s->ivshmem_size) != 0) {
729
                fprintf(stderr, "ivshmem: could not truncate shared file\n");
730
            }
731

    
732
        } else if ((fd = shm_open(s->shmobj, O_CREAT|O_RDWR,
733
                        S_IRWXU|S_IRWXG|S_IRWXO)) < 0) {
734
            fprintf(stderr, "ivshmem: could not open shared file\n");
735
            exit(-1);
736

    
737
        }
738

    
739
        if (check_shm_size(s, fd) == -1) {
740
            exit(-1);
741
        }
742

    
743
        create_shared_memory_BAR(s, fd);
744

    
745
    }
746

    
747
    return 0;
748
}
749

    
750
static int pci_ivshmem_uninit(PCIDevice *dev)
751
{
752
    IVShmemState *s = DO_UPCAST(IVShmemState, dev, dev);
753

    
754
    if (s->migration_blocker) {
755
        migrate_del_blocker(s->migration_blocker);
756
        error_free(s->migration_blocker);
757
    }
758

    
759
    memory_region_destroy(&s->ivshmem_mmio);
760
    memory_region_del_subregion(&s->bar, &s->ivshmem);
761
    vmstate_unregister_ram(&s->ivshmem, &s->dev.qdev);
762
    memory_region_destroy(&s->ivshmem);
763
    memory_region_destroy(&s->bar);
764
    unregister_savevm(&dev->qdev, "ivshmem", s);
765

    
766
    return 0;
767
}
768

    
769
static PCIDeviceInfo ivshmem_info = {
770
    .qdev.name  = "ivshmem",
771
    .qdev.size  = sizeof(IVShmemState),
772
    .qdev.reset = ivshmem_reset,
773
    .init       = pci_ivshmem_init,
774
    .exit       = pci_ivshmem_uninit,
775
    .vendor_id  = PCI_VENDOR_ID_REDHAT_QUMRANET,
776
    .device_id  = 0x1110,
777
    .class_id   = PCI_CLASS_MEMORY_RAM,
778
    .qdev.props = (Property[]) {
779
        DEFINE_PROP_CHR("chardev", IVShmemState, server_chr),
780
        DEFINE_PROP_STRING("size", IVShmemState, sizearg),
781
        DEFINE_PROP_UINT32("vectors", IVShmemState, vectors, 1),
782
        DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD, false),
783
        DEFINE_PROP_BIT("msi", IVShmemState, features, IVSHMEM_MSI, true),
784
        DEFINE_PROP_STRING("shm", IVShmemState, shmobj),
785
        DEFINE_PROP_STRING("role", IVShmemState, role),
786
        DEFINE_PROP_END_OF_LIST(),
787
    }
788
};
789

    
790
static void ivshmem_register_devices(void)
791
{
792
    pci_qdev_register(&ivshmem_info);
793
}
794

    
795
device_init(ivshmem_register_devices)