Statistics
| Branch: | Revision:

root / hw / ivshmem.c @ 7e0a9247

History | View | Annotate | Download (22.5 kB)

1
/*
2
 * Inter-VM Shared Memory PCI device.
3
 *
4
 * Author:
5
 *      Cam Macdonell <cam@cs.ualberta.ca>
6
 *
7
 * Based On: cirrus_vga.c
8
 *          Copyright (c) 2004 Fabrice Bellard
9
 *          Copyright (c) 2004 Makoto Suzuki (suzu)
10
 *
11
 *      and rtl8139.c
12
 *          Copyright (c) 2006 Igor Kovalenko
13
 *
14
 * This code is licensed under the GNU GPL v2.
15
 *
16
 * Contributions after 2012-01-13 are licensed under the terms of the
17
 * GNU GPL, version 2 or (at your option) any later version.
18
 */
19
#include "hw.h"
20
#include "pc.h"
21
#include "pci.h"
22
#include "msix.h"
23
#include "kvm.h"
24
#include "migration.h"
25
#include "qerror.h"
26
#include "event_notifier.h"
27

    
28
#include <sys/mman.h>
29
#include <sys/types.h>
30

    
31
#define IVSHMEM_IOEVENTFD   0
32
#define IVSHMEM_MSI     1
33

    
34
#define IVSHMEM_PEER    0
35
#define IVSHMEM_MASTER  1
36

    
37
#define IVSHMEM_REG_BAR_SIZE 0x100
38

    
39
//#define DEBUG_IVSHMEM
40
#ifdef DEBUG_IVSHMEM
41
#define IVSHMEM_DPRINTF(fmt, ...)        \
42
    do {printf("IVSHMEM: " fmt, ## __VA_ARGS__); } while (0)
43
#else
44
#define IVSHMEM_DPRINTF(fmt, ...)
45
#endif
46

    
47
typedef struct Peer {
48
    int nb_eventfds;
49
    EventNotifier *eventfds;
50
} Peer;
51

    
52
typedef struct EventfdEntry {
53
    PCIDevice *pdev;
54
    int vector;
55
} EventfdEntry;
56

    
57
typedef struct IVShmemState {
58
    PCIDevice dev;
59
    uint32_t intrmask;
60
    uint32_t intrstatus;
61
    uint32_t doorbell;
62

    
63
    CharDriverState **eventfd_chr;
64
    CharDriverState *server_chr;
65
    MemoryRegion ivshmem_mmio;
66

    
67
    /* We might need to register the BAR before we actually have the memory.
68
     * So prepare a container MemoryRegion for the BAR immediately and
69
     * add a subregion when we have the memory.
70
     */
71
    MemoryRegion bar;
72
    MemoryRegion ivshmem;
73
    uint64_t ivshmem_size; /* size of shared memory region */
74
    uint32_t ivshmem_attr;
75
    uint32_t ivshmem_64bit;
76
    int shm_fd; /* shared memory file descriptor */
77

    
78
    Peer *peers;
79
    int nb_peers; /* how many guests we have space for */
80
    int max_peer; /* maximum numbered peer */
81

    
82
    int vm_id;
83
    uint32_t vectors;
84
    uint32_t features;
85
    EventfdEntry *eventfd_table;
86

    
87
    Error *migration_blocker;
88

    
89
    char * shmobj;
90
    char * sizearg;
91
    char * role;
92
    int role_val;   /* scalar to avoid multiple string comparisons */
93
} IVShmemState;
94

    
95
/* registers for the Inter-VM shared memory device */
96
enum ivshmem_registers {
97
    INTRMASK = 0,
98
    INTRSTATUS = 4,
99
    IVPOSITION = 8,
100
    DOORBELL = 12,
101
};
102

    
103
static inline uint32_t ivshmem_has_feature(IVShmemState *ivs,
104
                                                    unsigned int feature) {
105
    return (ivs->features & (1 << feature));
106
}
107

    
108
static inline bool is_power_of_two(uint64_t x) {
109
    return (x & (x - 1)) == 0;
110
}
111

    
112
/* accessing registers - based on rtl8139 */
113
static void ivshmem_update_irq(IVShmemState *s, int val)
114
{
115
    int isr;
116
    isr = (s->intrstatus & s->intrmask) & 0xffffffff;
117

    
118
    /* don't print ISR resets */
119
    if (isr) {
120
        IVSHMEM_DPRINTF("Set IRQ to %d (%04x %04x)\n",
121
           isr ? 1 : 0, s->intrstatus, s->intrmask);
122
    }
123

    
124
    qemu_set_irq(s->dev.irq[0], (isr != 0));
125
}
126

    
127
static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val)
128
{
129
    IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val);
130

    
131
    s->intrmask = val;
132

    
133
    ivshmem_update_irq(s, val);
134
}
135

    
136
static uint32_t ivshmem_IntrMask_read(IVShmemState *s)
137
{
138
    uint32_t ret = s->intrmask;
139

    
140
    IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret);
141

    
142
    return ret;
143
}
144

    
145
static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val)
146
{
147
    IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val);
148

    
149
    s->intrstatus = val;
150

    
151
    ivshmem_update_irq(s, val);
152
}
153

    
154
static uint32_t ivshmem_IntrStatus_read(IVShmemState *s)
155
{
156
    uint32_t ret = s->intrstatus;
157

    
158
    /* reading ISR clears all interrupts */
159
    s->intrstatus = 0;
160

    
161
    ivshmem_update_irq(s, 0);
162

    
163
    return ret;
164
}
165

    
166
static void ivshmem_io_write(void *opaque, hwaddr addr,
167
                             uint64_t val, unsigned size)
168
{
169
    IVShmemState *s = opaque;
170

    
171
    uint16_t dest = val >> 16;
172
    uint16_t vector = val & 0xff;
173

    
174
    addr &= 0xfc;
175

    
176
    IVSHMEM_DPRINTF("writing to addr " TARGET_FMT_plx "\n", addr);
177
    switch (addr)
178
    {
179
        case INTRMASK:
180
            ivshmem_IntrMask_write(s, val);
181
            break;
182

    
183
        case INTRSTATUS:
184
            ivshmem_IntrStatus_write(s, val);
185
            break;
186

    
187
        case DOORBELL:
188
            /* check that dest VM ID is reasonable */
189
            if (dest > s->max_peer) {
190
                IVSHMEM_DPRINTF("Invalid destination VM ID (%d)\n", dest);
191
                break;
192
            }
193

    
194
            /* check doorbell range */
195
            if (vector < s->peers[dest].nb_eventfds) {
196
                IVSHMEM_DPRINTF("Notifying VM %d on vector %d\n", dest, vector);
197
                event_notifier_set(&s->peers[dest].eventfds[vector]);
198
            }
199
            break;
200
        default:
201
            IVSHMEM_DPRINTF("Invalid VM Doorbell VM %d\n", dest);
202
    }
203
}
204

    
205
static uint64_t ivshmem_io_read(void *opaque, hwaddr addr,
206
                                unsigned size)
207
{
208

    
209
    IVShmemState *s = opaque;
210
    uint32_t ret;
211

    
212
    switch (addr)
213
    {
214
        case INTRMASK:
215
            ret = ivshmem_IntrMask_read(s);
216
            break;
217

    
218
        case INTRSTATUS:
219
            ret = ivshmem_IntrStatus_read(s);
220
            break;
221

    
222
        case IVPOSITION:
223
            /* return my VM ID if the memory is mapped */
224
            if (s->shm_fd > 0) {
225
                ret = s->vm_id;
226
            } else {
227
                ret = -1;
228
            }
229
            break;
230

    
231
        default:
232
            IVSHMEM_DPRINTF("why are we reading " TARGET_FMT_plx "\n", addr);
233
            ret = 0;
234
    }
235

    
236
    return ret;
237
}
238

    
239
static const MemoryRegionOps ivshmem_mmio_ops = {
240
    .read = ivshmem_io_read,
241
    .write = ivshmem_io_write,
242
    .endianness = DEVICE_NATIVE_ENDIAN,
243
    .impl = {
244
        .min_access_size = 4,
245
        .max_access_size = 4,
246
    },
247
};
248

    
249
static void ivshmem_receive(void *opaque, const uint8_t *buf, int size)
250
{
251
    IVShmemState *s = opaque;
252

    
253
    ivshmem_IntrStatus_write(s, *buf);
254

    
255
    IVSHMEM_DPRINTF("ivshmem_receive 0x%02x\n", *buf);
256
}
257

    
258
static int ivshmem_can_receive(void * opaque)
259
{
260
    return 8;
261
}
262

    
263
static void ivshmem_event(void *opaque, int event)
264
{
265
    IVSHMEM_DPRINTF("ivshmem_event %d\n", event);
266
}
267

    
268
static void fake_irqfd(void *opaque, const uint8_t *buf, int size) {
269

    
270
    EventfdEntry *entry = opaque;
271
    PCIDevice *pdev = entry->pdev;
272

    
273
    IVSHMEM_DPRINTF("interrupt on vector %p %d\n", pdev, entry->vector);
274
    msix_notify(pdev, entry->vector);
275
}
276

    
277
static CharDriverState* create_eventfd_chr_device(void * opaque, EventNotifier *n,
278
                                                  int vector)
279
{
280
    /* create a event character device based on the passed eventfd */
281
    IVShmemState *s = opaque;
282
    CharDriverState * chr;
283
    int eventfd = event_notifier_get_fd(n);
284

    
285
    chr = qemu_chr_open_eventfd(eventfd);
286

    
287
    if (chr == NULL) {
288
        fprintf(stderr, "creating eventfd for eventfd %d failed\n", eventfd);
289
        exit(-1);
290
    }
291

    
292
    /* if MSI is supported we need multiple interrupts */
293
    if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
294
        s->eventfd_table[vector].pdev = &s->dev;
295
        s->eventfd_table[vector].vector = vector;
296

    
297
        qemu_chr_add_handlers(chr, ivshmem_can_receive, fake_irqfd,
298
                      ivshmem_event, &s->eventfd_table[vector]);
299
    } else {
300
        qemu_chr_add_handlers(chr, ivshmem_can_receive, ivshmem_receive,
301
                      ivshmem_event, s);
302
    }
303

    
304
    return chr;
305

    
306
}
307

    
308
static int check_shm_size(IVShmemState *s, int fd) {
309
    /* check that the guest isn't going to try and map more memory than the
310
     * the object has allocated return -1 to indicate error */
311

    
312
    struct stat buf;
313

    
314
    fstat(fd, &buf);
315

    
316
    if (s->ivshmem_size > buf.st_size) {
317
        fprintf(stderr,
318
                "IVSHMEM ERROR: Requested memory size greater"
319
                " than shared object size (%" PRIu64 " > %" PRIu64")\n",
320
                s->ivshmem_size, (uint64_t)buf.st_size);
321
        return -1;
322
    } else {
323
        return 0;
324
    }
325
}
326

    
327
/* create the shared memory BAR when we are not using the server, so we can
328
 * create the BAR and map the memory immediately */
329
static void create_shared_memory_BAR(IVShmemState *s, int fd) {
330

    
331
    void * ptr;
332

    
333
    s->shm_fd = fd;
334

    
335
    ptr = mmap(0, s->ivshmem_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
336

    
337
    memory_region_init_ram_ptr(&s->ivshmem, "ivshmem.bar2",
338
                               s->ivshmem_size, ptr);
339
    vmstate_register_ram(&s->ivshmem, &s->dev.qdev);
340
    memory_region_add_subregion(&s->bar, 0, &s->ivshmem);
341

    
342
    /* region for shared memory */
343
    pci_register_bar(&s->dev, 2, s->ivshmem_attr, &s->bar);
344
}
345

    
346
static void ivshmem_add_eventfd(IVShmemState *s, int posn, int i)
347
{
348
    memory_region_add_eventfd(&s->ivshmem_mmio,
349
                              DOORBELL,
350
                              4,
351
                              true,
352
                              (posn << 16) | i,
353
                              &s->peers[posn].eventfds[i]);
354
}
355

    
356
static void ivshmem_del_eventfd(IVShmemState *s, int posn, int i)
357
{
358
    memory_region_del_eventfd(&s->ivshmem_mmio,
359
                              DOORBELL,
360
                              4,
361
                              true,
362
                              (posn << 16) | i,
363
                              &s->peers[posn].eventfds[i]);
364
}
365

    
366
static void close_guest_eventfds(IVShmemState *s, int posn)
367
{
368
    int i, guest_curr_max;
369

    
370
    if (!ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
371
        return;
372
    }
373

    
374
    guest_curr_max = s->peers[posn].nb_eventfds;
375

    
376
    memory_region_transaction_begin();
377
    for (i = 0; i < guest_curr_max; i++) {
378
        ivshmem_del_eventfd(s, posn, i);
379
    }
380
    memory_region_transaction_commit();
381
    for (i = 0; i < guest_curr_max; i++) {
382
        event_notifier_cleanup(&s->peers[posn].eventfds[i]);
383
    }
384

    
385
    g_free(s->peers[posn].eventfds);
386
    s->peers[posn].nb_eventfds = 0;
387
}
388

    
389
/* this function increase the dynamic storage need to store data about other
390
 * guests */
391
static void increase_dynamic_storage(IVShmemState *s, int new_min_size) {
392

    
393
    int j, old_nb_alloc;
394

    
395
    old_nb_alloc = s->nb_peers;
396

    
397
    while (new_min_size >= s->nb_peers)
398
        s->nb_peers = s->nb_peers * 2;
399

    
400
    IVSHMEM_DPRINTF("bumping storage to %d guests\n", s->nb_peers);
401
    s->peers = g_realloc(s->peers, s->nb_peers * sizeof(Peer));
402

    
403
    /* zero out new pointers */
404
    for (j = old_nb_alloc; j < s->nb_peers; j++) {
405
        s->peers[j].eventfds = NULL;
406
        s->peers[j].nb_eventfds = 0;
407
    }
408
}
409

    
410
static void ivshmem_read(void *opaque, const uint8_t * buf, int flags)
411
{
412
    IVShmemState *s = opaque;
413
    int incoming_fd, tmp_fd;
414
    int guest_max_eventfd;
415
    long incoming_posn;
416

    
417
    memcpy(&incoming_posn, buf, sizeof(long));
418
    /* pick off s->server_chr->msgfd and store it, posn should accompany msg */
419
    tmp_fd = qemu_chr_fe_get_msgfd(s->server_chr);
420
    IVSHMEM_DPRINTF("posn is %ld, fd is %d\n", incoming_posn, tmp_fd);
421

    
422
    /* make sure we have enough space for this guest */
423
    if (incoming_posn >= s->nb_peers) {
424
        increase_dynamic_storage(s, incoming_posn);
425
    }
426

    
427
    if (tmp_fd == -1) {
428
        /* if posn is positive and unseen before then this is our posn*/
429
        if ((incoming_posn >= 0) &&
430
                            (s->peers[incoming_posn].eventfds == NULL)) {
431
            /* receive our posn */
432
            s->vm_id = incoming_posn;
433
            return;
434
        } else {
435
            /* otherwise an fd == -1 means an existing guest has gone away */
436
            IVSHMEM_DPRINTF("posn %ld has gone away\n", incoming_posn);
437
            close_guest_eventfds(s, incoming_posn);
438
            return;
439
        }
440
    }
441

    
442
    /* because of the implementation of get_msgfd, we need a dup */
443
    incoming_fd = dup(tmp_fd);
444

    
445
    if (incoming_fd == -1) {
446
        fprintf(stderr, "could not allocate file descriptor %s\n",
447
                                                            strerror(errno));
448
        return;
449
    }
450

    
451
    /* if the position is -1, then it's shared memory region fd */
452
    if (incoming_posn == -1) {
453

    
454
        void * map_ptr;
455

    
456
        s->max_peer = 0;
457

    
458
        if (check_shm_size(s, incoming_fd) == -1) {
459
            exit(-1);
460
        }
461

    
462
        /* mmap the region and map into the BAR2 */
463
        map_ptr = mmap(0, s->ivshmem_size, PROT_READ|PROT_WRITE, MAP_SHARED,
464
                                                            incoming_fd, 0);
465
        memory_region_init_ram_ptr(&s->ivshmem,
466
                                   "ivshmem.bar2", s->ivshmem_size, map_ptr);
467
        vmstate_register_ram(&s->ivshmem, &s->dev.qdev);
468

    
469
        IVSHMEM_DPRINTF("guest h/w addr = %" PRIu64 ", size = %" PRIu64 "\n",
470
                         s->ivshmem_offset, s->ivshmem_size);
471

    
472
        memory_region_add_subregion(&s->bar, 0, &s->ivshmem);
473

    
474
        /* only store the fd if it is successfully mapped */
475
        s->shm_fd = incoming_fd;
476

    
477
        return;
478
    }
479

    
480
    /* each guest has an array of eventfds, and we keep track of how many
481
     * guests for each VM */
482
    guest_max_eventfd = s->peers[incoming_posn].nb_eventfds;
483

    
484
    if (guest_max_eventfd == 0) {
485
        /* one eventfd per MSI vector */
486
        s->peers[incoming_posn].eventfds = g_new(EventNotifier, s->vectors);
487
    }
488

    
489
    /* this is an eventfd for a particular guest VM */
490
    IVSHMEM_DPRINTF("eventfds[%ld][%d] = %d\n", incoming_posn,
491
                                            guest_max_eventfd, incoming_fd);
492
    event_notifier_init_fd(&s->peers[incoming_posn].eventfds[guest_max_eventfd],
493
                           incoming_fd);
494

    
495
    /* increment count for particular guest */
496
    s->peers[incoming_posn].nb_eventfds++;
497

    
498
    /* keep track of the maximum VM ID */
499
    if (incoming_posn > s->max_peer) {
500
        s->max_peer = incoming_posn;
501
    }
502

    
503
    if (incoming_posn == s->vm_id) {
504
        s->eventfd_chr[guest_max_eventfd] = create_eventfd_chr_device(s,
505
                   &s->peers[s->vm_id].eventfds[guest_max_eventfd],
506
                   guest_max_eventfd);
507
    }
508

    
509
    if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
510
        ivshmem_add_eventfd(s, incoming_posn, guest_max_eventfd);
511
    }
512
}
513

    
514
/* Select the MSI-X vectors used by device.
515
 * ivshmem maps events to vectors statically, so
516
 * we just enable all vectors on init and after reset. */
517
static void ivshmem_use_msix(IVShmemState * s)
518
{
519
    int i;
520

    
521
    if (!msix_present(&s->dev)) {
522
        return;
523
    }
524

    
525
    for (i = 0; i < s->vectors; i++) {
526
        msix_vector_use(&s->dev, i);
527
    }
528
}
529

    
530
static void ivshmem_reset(DeviceState *d)
531
{
532
    IVShmemState *s = DO_UPCAST(IVShmemState, dev.qdev, d);
533

    
534
    s->intrstatus = 0;
535
    ivshmem_use_msix(s);
536
}
537

    
538
static uint64_t ivshmem_get_size(IVShmemState * s) {
539

    
540
    uint64_t value;
541
    char *ptr;
542

    
543
    value = strtoull(s->sizearg, &ptr, 10);
544
    switch (*ptr) {
545
        case 0: case 'M': case 'm':
546
            value <<= 20;
547
            break;
548
        case 'G': case 'g':
549
            value <<= 30;
550
            break;
551
        default:
552
            fprintf(stderr, "qemu: invalid ram size: %s\n", s->sizearg);
553
            exit(1);
554
    }
555

    
556
    /* BARs must be a power of 2 */
557
    if (!is_power_of_two(value)) {
558
        fprintf(stderr, "ivshmem: size must be power of 2\n");
559
        exit(1);
560
    }
561

    
562
    return value;
563
}
564

    
565
static void ivshmem_setup_msi(IVShmemState * s)
566
{
567
    if (msix_init_exclusive_bar(&s->dev, s->vectors, 1)) {
568
        IVSHMEM_DPRINTF("msix initialization failed\n");
569
        exit(1);
570
    }
571

    
572
    IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors);
573

    
574
    /* allocate QEMU char devices for receiving interrupts */
575
    s->eventfd_table = g_malloc0(s->vectors * sizeof(EventfdEntry));
576

    
577
    ivshmem_use_msix(s);
578
}
579

    
580
static void ivshmem_save(QEMUFile* f, void *opaque)
581
{
582
    IVShmemState *proxy = opaque;
583

    
584
    IVSHMEM_DPRINTF("ivshmem_save\n");
585
    pci_device_save(&proxy->dev, f);
586

    
587
    if (ivshmem_has_feature(proxy, IVSHMEM_MSI)) {
588
        msix_save(&proxy->dev, f);
589
    } else {
590
        qemu_put_be32(f, proxy->intrstatus);
591
        qemu_put_be32(f, proxy->intrmask);
592
    }
593

    
594
}
595

    
596
static int ivshmem_load(QEMUFile* f, void *opaque, int version_id)
597
{
598
    IVSHMEM_DPRINTF("ivshmem_load\n");
599

    
600
    IVShmemState *proxy = opaque;
601
    int ret;
602

    
603
    if (version_id > 0) {
604
        return -EINVAL;
605
    }
606

    
607
    if (proxy->role_val == IVSHMEM_PEER) {
608
        fprintf(stderr, "ivshmem: 'peer' devices are not migratable\n");
609
        return -EINVAL;
610
    }
611

    
612
    ret = pci_device_load(&proxy->dev, f);
613
    if (ret) {
614
        return ret;
615
    }
616

    
617
    if (ivshmem_has_feature(proxy, IVSHMEM_MSI)) {
618
        msix_load(&proxy->dev, f);
619
        ivshmem_use_msix(proxy);
620
    } else {
621
        proxy->intrstatus = qemu_get_be32(f);
622
        proxy->intrmask = qemu_get_be32(f);
623
    }
624

    
625
    return 0;
626
}
627

    
628
static void ivshmem_write_config(PCIDevice *pci_dev, uint32_t address,
629
                                 uint32_t val, int len)
630
{
631
    pci_default_write_config(pci_dev, address, val, len);
632
    msix_write_config(pci_dev, address, val, len);
633
}
634

    
635
static int pci_ivshmem_init(PCIDevice *dev)
636
{
637
    IVShmemState *s = DO_UPCAST(IVShmemState, dev, dev);
638
    uint8_t *pci_conf;
639

    
640
    if (s->sizearg == NULL)
641
        s->ivshmem_size = 4 << 20; /* 4 MB default */
642
    else {
643
        s->ivshmem_size = ivshmem_get_size(s);
644
    }
645

    
646
    register_savevm(&s->dev.qdev, "ivshmem", 0, 0, ivshmem_save, ivshmem_load,
647
                                                                        dev);
648

    
649
    /* IRQFD requires MSI */
650
    if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD) &&
651
        !ivshmem_has_feature(s, IVSHMEM_MSI)) {
652
        fprintf(stderr, "ivshmem: ioeventfd/irqfd requires MSI\n");
653
        exit(1);
654
    }
655

    
656
    /* check that role is reasonable */
657
    if (s->role) {
658
        if (strncmp(s->role, "peer", 5) == 0) {
659
            s->role_val = IVSHMEM_PEER;
660
        } else if (strncmp(s->role, "master", 7) == 0) {
661
            s->role_val = IVSHMEM_MASTER;
662
        } else {
663
            fprintf(stderr, "ivshmem: 'role' must be 'peer' or 'master'\n");
664
            exit(1);
665
        }
666
    } else {
667
        s->role_val = IVSHMEM_MASTER; /* default */
668
    }
669

    
670
    if (s->role_val == IVSHMEM_PEER) {
671
        error_set(&s->migration_blocker, QERR_DEVICE_FEATURE_BLOCKS_MIGRATION,
672
                  "peer mode", "ivshmem");
673
        migrate_add_blocker(s->migration_blocker);
674
    }
675

    
676
    pci_conf = s->dev.config;
677
    pci_conf[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY;
678

    
679
    pci_config_set_interrupt_pin(pci_conf, 1);
680

    
681
    s->shm_fd = 0;
682

    
683
    memory_region_init_io(&s->ivshmem_mmio, &ivshmem_mmio_ops, s,
684
                          "ivshmem-mmio", IVSHMEM_REG_BAR_SIZE);
685

    
686
    /* region for registers*/
687
    pci_register_bar(&s->dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY,
688
                     &s->ivshmem_mmio);
689

    
690
    memory_region_init(&s->bar, "ivshmem-bar2-container", s->ivshmem_size);
691
    s->ivshmem_attr = PCI_BASE_ADDRESS_SPACE_MEMORY |
692
        PCI_BASE_ADDRESS_MEM_PREFETCH;
693
    if (s->ivshmem_64bit) {
694
        s->ivshmem_attr |= PCI_BASE_ADDRESS_MEM_TYPE_64;
695
    }
696

    
697
    if ((s->server_chr != NULL) &&
698
                        (strncmp(s->server_chr->filename, "unix:", 5) == 0)) {
699
        /* if we get a UNIX socket as the parameter we will talk
700
         * to the ivshmem server to receive the memory region */
701

    
702
        if (s->shmobj != NULL) {
703
            fprintf(stderr, "WARNING: do not specify both 'chardev' "
704
                                                "and 'shm' with ivshmem\n");
705
        }
706

    
707
        IVSHMEM_DPRINTF("using shared memory server (socket = %s)\n",
708
                                                    s->server_chr->filename);
709

    
710
        if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
711
            ivshmem_setup_msi(s);
712
        }
713

    
714
        /* we allocate enough space for 16 guests and grow as needed */
715
        s->nb_peers = 16;
716
        s->vm_id = -1;
717

    
718
        /* allocate/initialize space for interrupt handling */
719
        s->peers = g_malloc0(s->nb_peers * sizeof(Peer));
720

    
721
        pci_register_bar(&s->dev, 2, s->ivshmem_attr, &s->bar);
722

    
723
        s->eventfd_chr = g_malloc0(s->vectors * sizeof(CharDriverState *));
724

    
725
        qemu_chr_add_handlers(s->server_chr, ivshmem_can_receive, ivshmem_read,
726
                     ivshmem_event, s);
727
    } else {
728
        /* just map the file immediately, we're not using a server */
729
        int fd;
730

    
731
        if (s->shmobj == NULL) {
732
            fprintf(stderr, "Must specify 'chardev' or 'shm' to ivshmem\n");
733
        }
734

    
735
        IVSHMEM_DPRINTF("using shm_open (shm object = %s)\n", s->shmobj);
736

    
737
        /* try opening with O_EXCL and if it succeeds zero the memory
738
         * by truncating to 0 */
739
        if ((fd = shm_open(s->shmobj, O_CREAT|O_RDWR|O_EXCL,
740
                        S_IRWXU|S_IRWXG|S_IRWXO)) > 0) {
741
           /* truncate file to length PCI device's memory */
742
            if (ftruncate(fd, s->ivshmem_size) != 0) {
743
                fprintf(stderr, "ivshmem: could not truncate shared file\n");
744
            }
745

    
746
        } else if ((fd = shm_open(s->shmobj, O_CREAT|O_RDWR,
747
                        S_IRWXU|S_IRWXG|S_IRWXO)) < 0) {
748
            fprintf(stderr, "ivshmem: could not open shared file\n");
749
            exit(-1);
750

    
751
        }
752

    
753
        if (check_shm_size(s, fd) == -1) {
754
            exit(-1);
755
        }
756

    
757
        create_shared_memory_BAR(s, fd);
758

    
759
    }
760

    
761
    s->dev.config_write = ivshmem_write_config;
762

    
763
    return 0;
764
}
765

    
766
static void pci_ivshmem_uninit(PCIDevice *dev)
767
{
768
    IVShmemState *s = DO_UPCAST(IVShmemState, dev, dev);
769

    
770
    if (s->migration_blocker) {
771
        migrate_del_blocker(s->migration_blocker);
772
        error_free(s->migration_blocker);
773
    }
774

    
775
    memory_region_destroy(&s->ivshmem_mmio);
776
    memory_region_del_subregion(&s->bar, &s->ivshmem);
777
    vmstate_unregister_ram(&s->ivshmem, &s->dev.qdev);
778
    memory_region_destroy(&s->ivshmem);
779
    memory_region_destroy(&s->bar);
780
    unregister_savevm(&dev->qdev, "ivshmem", s);
781
}
782

    
783
static Property ivshmem_properties[] = {
784
    DEFINE_PROP_CHR("chardev", IVShmemState, server_chr),
785
    DEFINE_PROP_STRING("size", IVShmemState, sizearg),
786
    DEFINE_PROP_UINT32("vectors", IVShmemState, vectors, 1),
787
    DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD, false),
788
    DEFINE_PROP_BIT("msi", IVShmemState, features, IVSHMEM_MSI, true),
789
    DEFINE_PROP_STRING("shm", IVShmemState, shmobj),
790
    DEFINE_PROP_STRING("role", IVShmemState, role),
791
    DEFINE_PROP_UINT32("use64", IVShmemState, ivshmem_64bit, 1),
792
    DEFINE_PROP_END_OF_LIST(),
793
};
794

    
795
static void ivshmem_class_init(ObjectClass *klass, void *data)
796
{
797
    DeviceClass *dc = DEVICE_CLASS(klass);
798
    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
799

    
800
    k->init = pci_ivshmem_init;
801
    k->exit = pci_ivshmem_uninit;
802
    k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
803
    k->device_id = 0x1110;
804
    k->class_id = PCI_CLASS_MEMORY_RAM;
805
    dc->reset = ivshmem_reset;
806
    dc->props = ivshmem_properties;
807
}
808

    
809
static TypeInfo ivshmem_info = {
810
    .name          = "ivshmem",
811
    .parent        = TYPE_PCI_DEVICE,
812
    .instance_size = sizeof(IVShmemState),
813
    .class_init    = ivshmem_class_init,
814
};
815

    
816
static void ivshmem_register_types(void)
817
{
818
    type_register_static(&ivshmem_info);
819
}
820

    
821
type_init(ivshmem_register_types)