Statistics
| Branch: | Revision:

root / hw / vfio_pci.c @ fa2ddcb4

History | View | Annotate | Download (54.6 kB)

1
/*
2
 * vfio based device assignment support
3
 *
4
 * Copyright Red Hat, Inc. 2012
5
 *
6
 * Authors:
7
 *  Alex Williamson <alex.williamson@redhat.com>
8
 *
9
 * This work is licensed under the terms of the GNU GPL, version 2.  See
10
 * the COPYING file in the top-level directory.
11
 *
12
 * Based on qemu-kvm device-assignment:
13
 *  Adapted for KVM by Qumranet.
14
 *  Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com)
15
 *  Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com)
16
 *  Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com)
17
 *  Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com)
18
 *  Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com)
19
 */
20

    
21
#include <dirent.h>
22
#include <unistd.h>
23
#include <sys/ioctl.h>
24
#include <sys/mman.h>
25
#include <sys/stat.h>
26
#include <sys/types.h>
27
#include <linux/vfio.h>
28

    
29
#include "config.h"
30
#include "event_notifier.h"
31
#include "exec-memory.h"
32
#include "kvm.h"
33
#include "memory.h"
34
#include "msi.h"
35
#include "msix.h"
36
#include "qemu-error.h"
37
#include "range.h"
38
#include "vfio_pci_int.h"
39

    
40
/* #define DEBUG_VFIO */
41
#ifdef DEBUG_VFIO
42
#define DPRINTF(fmt, ...) \
43
    do { fprintf(stderr, "vfio: " fmt, ## __VA_ARGS__); } while (0)
44
#else
45
#define DPRINTF(fmt, ...) \
46
    do { } while (0)
47
#endif
48

    
49
#define MSIX_CAP_LENGTH 12
50

    
51
static QLIST_HEAD(, VFIOContainer)
52
    container_list = QLIST_HEAD_INITIALIZER(container_list);
53

    
54
static QLIST_HEAD(, VFIOGroup)
55
    group_list = QLIST_HEAD_INITIALIZER(group_list);
56

    
57
static void vfio_disable_interrupts(VFIODevice *vdev);
58
static uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
59
static void vfio_mmap_set_enabled(VFIODevice *vdev, bool enabled);
60

    
61
/*
62
 * Common VFIO interrupt disable
63
 */
64
static void vfio_disable_irqindex(VFIODevice *vdev, int index)
65
{
66
    struct vfio_irq_set irq_set = {
67
        .argsz = sizeof(irq_set),
68
        .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER,
69
        .index = index,
70
        .start = 0,
71
        .count = 0,
72
    };
73

    
74
    ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set);
75

    
76
    vdev->interrupt = VFIO_INT_NONE;
77
}
78

    
79
/*
80
 * INTx
81
 */
82
static void vfio_unmask_intx(VFIODevice *vdev)
83
{
84
    struct vfio_irq_set irq_set = {
85
        .argsz = sizeof(irq_set),
86
        .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK,
87
        .index = VFIO_PCI_INTX_IRQ_INDEX,
88
        .start = 0,
89
        .count = 1,
90
    };
91

    
92
    ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set);
93
}
94

    
95
static void vfio_intx_interrupt(void *opaque)
96
{
97
    VFIODevice *vdev = opaque;
98

    
99
    if (!event_notifier_test_and_clear(&vdev->intx.interrupt)) {
100
        return;
101
    }
102

    
103
    DPRINTF("%s(%04x:%02x:%02x.%x) Pin %c\n", __func__, vdev->host.domain,
104
            vdev->host.bus, vdev->host.slot, vdev->host.function,
105
            'A' + vdev->intx.pin);
106

    
107
    vdev->intx.pending = true;
108
    qemu_set_irq(vdev->pdev.irq[vdev->intx.pin], 1);
109
}
110

    
111
static void vfio_eoi(VFIODevice *vdev)
112
{
113
    if (!vdev->intx.pending) {
114
        return;
115
    }
116

    
117
    DPRINTF("%s(%04x:%02x:%02x.%x) EOI\n", __func__, vdev->host.domain,
118
            vdev->host.bus, vdev->host.slot, vdev->host.function);
119

    
120
    vdev->intx.pending = false;
121
    qemu_set_irq(vdev->pdev.irq[vdev->intx.pin], 0);
122
    vfio_unmask_intx(vdev);
123
}
124

    
125
typedef struct QEMU_PACKED VFIOIRQSetFD {
126
    struct vfio_irq_set irq_set;
127
    int32_t fd;
128
} VFIOIRQSetFD;
129

    
130
static int vfio_enable_intx(VFIODevice *vdev)
131
{
132
    VFIOIRQSetFD irq_set_fd = {
133
        .irq_set = {
134
            .argsz = sizeof(irq_set_fd),
135
            .flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER,
136
            .index = VFIO_PCI_INTX_IRQ_INDEX,
137
            .start = 0,
138
            .count = 1,
139
        },
140
    };
141
    uint8_t pin = vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1);
142
    int ret;
143

    
144
    if (vdev->intx.disabled || !pin) {
145
        return 0;
146
    }
147

    
148
    vfio_disable_interrupts(vdev);
149

    
150
    vdev->intx.pin = pin - 1; /* Pin A (1) -> irq[0] */
151
    ret = event_notifier_init(&vdev->intx.interrupt, 0);
152
    if (ret) {
153
        error_report("vfio: Error: event_notifier_init failed\n");
154
        return ret;
155
    }
156

    
157
    irq_set_fd.fd = event_notifier_get_fd(&vdev->intx.interrupt);
158
    qemu_set_fd_handler(irq_set_fd.fd, vfio_intx_interrupt, NULL, vdev);
159

    
160
    if (ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set_fd)) {
161
        error_report("vfio: Error: Failed to setup INTx fd: %m\n");
162
        return -errno;
163
    }
164

    
165
    /*
166
     * Disable mmaps so we can trap on BAR accesses.  We interpret any
167
     * access as a response to an interrupt and unmask the physical
168
     * device.  The device will re-assert if the interrupt is still
169
     * pending.  We'll likely retrigger on the host multiple times per
170
     * guest interrupt, but without EOI notification it's better than
171
     * nothing.  Acceleration paths through KVM will avoid this.
172
     */
173
    vfio_mmap_set_enabled(vdev, false);
174

    
175
    vdev->interrupt = VFIO_INT_INTx;
176

    
177
    DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain,
178
            vdev->host.bus, vdev->host.slot, vdev->host.function);
179

    
180
    return 0;
181
}
182

    
183
static void vfio_disable_intx(VFIODevice *vdev)
184
{
185
    int fd;
186

    
187
    vfio_disable_irqindex(vdev, VFIO_PCI_INTX_IRQ_INDEX);
188
    vdev->intx.pending = false;
189
    qemu_set_irq(vdev->pdev.irq[vdev->intx.pin], 0);
190
    vfio_mmap_set_enabled(vdev, true);
191

    
192
    fd = event_notifier_get_fd(&vdev->intx.interrupt);
193
    qemu_set_fd_handler(fd, NULL, NULL, vdev);
194
    event_notifier_cleanup(&vdev->intx.interrupt);
195

    
196
    vdev->interrupt = VFIO_INT_NONE;
197

    
198
    DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain,
199
            vdev->host.bus, vdev->host.slot, vdev->host.function);
200
}
201

    
202
/*
203
 * MSI/X
204
 */
205
static void vfio_msi_interrupt(void *opaque)
206
{
207
    VFIOMSIVector *vector = opaque;
208
    VFIODevice *vdev = vector->vdev;
209
    int nr = vector - vdev->msi_vectors;
210

    
211
    if (!event_notifier_test_and_clear(&vector->interrupt)) {
212
        return;
213
    }
214

    
215
    DPRINTF("%s(%04x:%02x:%02x.%x) vector %d\n", __func__,
216
            vdev->host.domain, vdev->host.bus, vdev->host.slot,
217
            vdev->host.function, nr);
218

    
219
    if (vdev->interrupt == VFIO_INT_MSIX) {
220
        msix_notify(&vdev->pdev, nr);
221
    } else if (vdev->interrupt == VFIO_INT_MSI) {
222
        msi_notify(&vdev->pdev, nr);
223
    } else {
224
        error_report("vfio: MSI interrupt receieved, but not enabled?\n");
225
    }
226
}
227

    
228
static int vfio_enable_vectors(VFIODevice *vdev, bool msix)
229
{
230
    struct vfio_irq_set *irq_set;
231
    int ret = 0, i, argsz;
232
    int32_t *fds;
233

    
234
    argsz = sizeof(*irq_set) + (vdev->nr_vectors * sizeof(*fds));
235

    
236
    irq_set = g_malloc0(argsz);
237
    irq_set->argsz = argsz;
238
    irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
239
    irq_set->index = msix ? VFIO_PCI_MSIX_IRQ_INDEX : VFIO_PCI_MSI_IRQ_INDEX;
240
    irq_set->start = 0;
241
    irq_set->count = vdev->nr_vectors;
242
    fds = (int32_t *)&irq_set->data;
243

    
244
    for (i = 0; i < vdev->nr_vectors; i++) {
245
        if (!vdev->msi_vectors[i].use) {
246
            fds[i] = -1;
247
            continue;
248
        }
249

    
250
        fds[i] = event_notifier_get_fd(&vdev->msi_vectors[i].interrupt);
251
    }
252

    
253
    ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
254

    
255
    g_free(irq_set);
256

    
257
    if (!ret) {
258
        vdev->interrupt = msix ? VFIO_INT_MSIX : VFIO_INT_MSI;
259
    }
260

    
261
    return ret;
262
}
263

    
264
static int vfio_msix_vector_use(PCIDevice *pdev,
265
                                unsigned int nr, MSIMessage msg)
266
{
267
    VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
268
    VFIOMSIVector *vector;
269
    int ret;
270

    
271
    DPRINTF("%s(%04x:%02x:%02x.%x) vector %d used\n", __func__,
272
            vdev->host.domain, vdev->host.bus, vdev->host.slot,
273
            vdev->host.function, nr);
274

    
275
    if (vdev->interrupt != VFIO_INT_MSIX) {
276
        vfio_disable_interrupts(vdev);
277
    }
278

    
279
    if (!vdev->msi_vectors) {
280
        vdev->msi_vectors = g_malloc0(vdev->msix->entries *
281
                                      sizeof(VFIOMSIVector));
282
    }
283

    
284
    vector = &vdev->msi_vectors[nr];
285
    vector->vdev = vdev;
286
    vector->use = true;
287

    
288
    msix_vector_use(pdev, nr);
289

    
290
    if (event_notifier_init(&vector->interrupt, 0)) {
291
        error_report("vfio: Error: event_notifier_init failed\n");
292
    }
293

    
294
    /*
295
     * Attempt to enable route through KVM irqchip,
296
     * default to userspace handling if unavailable.
297
     */
298
    vector->virq = kvm_irqchip_add_msi_route(kvm_state, msg);
299
    if (vector->virq < 0 ||
300
        kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
301
                                       vector->virq) < 0) {
302
        if (vector->virq >= 0) {
303
            kvm_irqchip_release_virq(kvm_state, vector->virq);
304
            vector->virq = -1;
305
        }
306
        qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
307
                            vfio_msi_interrupt, NULL, vector);
308
    }
309

    
310
    /*
311
     * We don't want to have the host allocate all possible MSI vectors
312
     * for a device if they're not in use, so we shutdown and incrementally
313
     * increase them as needed.
314
     */
315
    if (vdev->nr_vectors < nr + 1) {
316
        int i;
317

    
318
        vfio_disable_irqindex(vdev, VFIO_PCI_MSIX_IRQ_INDEX);
319
        vdev->nr_vectors = nr + 1;
320
        ret = vfio_enable_vectors(vdev, true);
321
        if (ret) {
322
            error_report("vfio: failed to enable vectors, %d\n", ret);
323
        }
324

    
325
        /* We don't know if we've missed interrupts in the interim... */
326
        for (i = 0; i < vdev->msix->entries; i++) {
327
            if (vdev->msi_vectors[i].use) {
328
                msix_notify(&vdev->pdev, i);
329
            }
330
        }
331
    } else {
332
        VFIOIRQSetFD irq_set_fd = {
333
            .irq_set = {
334
                .argsz = sizeof(irq_set_fd),
335
                .flags = VFIO_IRQ_SET_DATA_EVENTFD |
336
                         VFIO_IRQ_SET_ACTION_TRIGGER,
337
                .index = VFIO_PCI_MSIX_IRQ_INDEX,
338
                .start = nr,
339
                .count = 1,
340
            },
341
            .fd = event_notifier_get_fd(&vector->interrupt),
342
        };
343
        ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set_fd);
344
        if (ret) {
345
            error_report("vfio: failed to modify vector, %d\n", ret);
346
        }
347

    
348
        /*
349
         * If we were connected to the hardware PBA we could skip this,
350
         * until then, a spurious interrupt is better than starvation.
351
         */
352
        msix_notify(&vdev->pdev, nr);
353
    }
354

    
355
    return 0;
356
}
357

    
358
static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr)
359
{
360
    VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
361
    VFIOMSIVector *vector = &vdev->msi_vectors[nr];
362
    VFIOIRQSetFD irq_set_fd = {
363
        .irq_set = {
364
            .argsz = sizeof(irq_set_fd),
365
            .flags = VFIO_IRQ_SET_DATA_EVENTFD |
366
                     VFIO_IRQ_SET_ACTION_TRIGGER,
367
            .index = VFIO_PCI_MSIX_IRQ_INDEX,
368
            .start = nr,
369
            .count = 1,
370
        },
371
        .fd = -1,
372
    };
373

    
374
    DPRINTF("%s(%04x:%02x:%02x.%x) vector %d released\n", __func__,
375
            vdev->host.domain, vdev->host.bus, vdev->host.slot,
376
            vdev->host.function, nr);
377

    
378
    /*
379
     * XXX What's the right thing to do here?  This turns off the interrupt
380
     * completely, but do we really just want to switch the interrupt to
381
     * bouncing through userspace and let msix.c drop it?  Not sure.
382
     */
383
    msix_vector_unuse(pdev, nr);
384
    ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set_fd);
385

    
386
    if (vector->virq < 0) {
387
        qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
388
                            NULL, NULL, NULL);
389
    } else {
390
        kvm_irqchip_remove_irqfd_notifier(kvm_state, &vector->interrupt,
391
                                          vector->virq);
392
        kvm_irqchip_release_virq(kvm_state, vector->virq);
393
        vector->virq = -1;
394
    }
395

    
396
    event_notifier_cleanup(&vector->interrupt);
397
    vector->use = false;
398
}
399

    
400
/* TODO This should move to msi.c */
401
static MSIMessage msi_get_msg(PCIDevice *pdev, unsigned int vector)
402
{
403
    uint16_t flags = pci_get_word(pdev->config + pdev->msi_cap + PCI_MSI_FLAGS);
404
    bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
405
    MSIMessage msg;
406

    
407
    if (msi64bit) {
408
        msg.address = pci_get_quad(pdev->config +
409
                                   pdev->msi_cap + PCI_MSI_ADDRESS_LO);
410
    } else {
411
        msg.address = pci_get_long(pdev->config +
412
                                   pdev->msi_cap + PCI_MSI_ADDRESS_LO);
413
    }
414

    
415
    msg.data = pci_get_word(pdev->config + pdev->msi_cap +
416
                            (msi64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32));
417
    msg.data += vector;
418

    
419
    return msg;
420
}
421

    
422
/* So should this */
423
static void msi_set_qsize(PCIDevice *pdev, uint8_t size)
424
{
425
    uint8_t *config = pdev->config + pdev->msi_cap;
426
    uint16_t flags;
427

    
428
    flags = pci_get_word(config + PCI_MSI_FLAGS);
429
    flags = le16_to_cpu(flags);
430
    flags &= ~PCI_MSI_FLAGS_QSIZE;
431
    flags |= (size & 0x7) << 4;
432
    flags = cpu_to_le16(flags);
433
    pci_set_word(config + PCI_MSI_FLAGS, flags);
434
}
435

    
436
static void vfio_enable_msi(VFIODevice *vdev)
437
{
438
    int ret, i;
439

    
440
    vfio_disable_interrupts(vdev);
441

    
442
    vdev->nr_vectors = msi_nr_vectors_allocated(&vdev->pdev);
443
retry:
444
    vdev->msi_vectors = g_malloc0(vdev->nr_vectors * sizeof(VFIOMSIVector));
445

    
446
    for (i = 0; i < vdev->nr_vectors; i++) {
447
        MSIMessage msg;
448
        VFIOMSIVector *vector = &vdev->msi_vectors[i];
449

    
450
        vector->vdev = vdev;
451
        vector->use = true;
452

    
453
        if (event_notifier_init(&vector->interrupt, 0)) {
454
            error_report("vfio: Error: event_notifier_init failed\n");
455
        }
456

    
457
        msg = msi_get_msg(&vdev->pdev, i);
458

    
459
        /*
460
         * Attempt to enable route through KVM irqchip,
461
         * default to userspace handling if unavailable.
462
         */
463
        vector->virq = kvm_irqchip_add_msi_route(kvm_state, msg);
464
        if (vector->virq < 0 ||
465
            kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
466
                                           vector->virq) < 0) {
467
            qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
468
                                vfio_msi_interrupt, NULL, vector);
469
        }
470
    }
471

    
472
    ret = vfio_enable_vectors(vdev, false);
473
    if (ret) {
474
        if (ret < 0) {
475
            error_report("vfio: Error: Failed to setup MSI fds: %m\n");
476
        } else if (ret != vdev->nr_vectors) {
477
            error_report("vfio: Error: Failed to enable %d "
478
                         "MSI vectors, retry with %d\n", vdev->nr_vectors, ret);
479
        }
480

    
481
        for (i = 0; i < vdev->nr_vectors; i++) {
482
            VFIOMSIVector *vector = &vdev->msi_vectors[i];
483
            if (vector->virq >= 0) {
484
                kvm_irqchip_remove_irqfd_notifier(kvm_state, &vector->interrupt,
485
                                                  vector->virq);
486
                kvm_irqchip_release_virq(kvm_state, vector->virq);
487
                vector->virq = -1;
488
            } else {
489
                qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
490
                                    NULL, NULL, NULL);
491
            }
492
            event_notifier_cleanup(&vector->interrupt);
493
        }
494

    
495
        g_free(vdev->msi_vectors);
496

    
497
        if (ret > 0 && ret != vdev->nr_vectors) {
498
            vdev->nr_vectors = ret;
499
            goto retry;
500
        }
501
        vdev->nr_vectors = 0;
502

    
503
        return;
504
    }
505

    
506
    msi_set_qsize(&vdev->pdev, vdev->nr_vectors);
507

    
508
    DPRINTF("%s(%04x:%02x:%02x.%x) Enabled %d MSI vectors\n", __func__,
509
            vdev->host.domain, vdev->host.bus, vdev->host.slot,
510
            vdev->host.function, vdev->nr_vectors);
511
}
512

    
513
static void vfio_disable_msi_x(VFIODevice *vdev, bool msix)
514
{
515
    int i;
516

    
517
    vfio_disable_irqindex(vdev, msix ? VFIO_PCI_MSIX_IRQ_INDEX :
518
                                       VFIO_PCI_MSI_IRQ_INDEX);
519

    
520
    for (i = 0; i < vdev->nr_vectors; i++) {
521
        VFIOMSIVector *vector = &vdev->msi_vectors[i];
522

    
523
        if (!vector->use) {
524
            continue;
525
        }
526

    
527
        if (vector->virq >= 0) {
528
            kvm_irqchip_remove_irqfd_notifier(kvm_state,
529
                                              &vector->interrupt, vector->virq);
530
            kvm_irqchip_release_virq(kvm_state, vector->virq);
531
            vector->virq = -1;
532
        } else {
533
            qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
534
                                NULL, NULL, NULL);
535
        }
536

    
537
        if (msix) {
538
            msix_vector_unuse(&vdev->pdev, i);
539
        }
540

    
541
        event_notifier_cleanup(&vector->interrupt);
542
    }
543

    
544
    g_free(vdev->msi_vectors);
545
    vdev->msi_vectors = NULL;
546
    vdev->nr_vectors = 0;
547

    
548
    if (!msix) {
549
        msi_set_qsize(&vdev->pdev, 0); /* Actually still means 1 vector */
550
    }
551

    
552
    DPRINTF("%s(%04x:%02x:%02x.%x, msi%s)\n", __func__,
553
            vdev->host.domain, vdev->host.bus, vdev->host.slot,
554
            vdev->host.function, msix ? "x" : "");
555

    
556
    vfio_enable_intx(vdev);
557
}
558

    
559
/*
560
 * IO Port/MMIO - Beware of the endians, VFIO is always little endian
561
 */
562
static void vfio_bar_write(void *opaque, target_phys_addr_t addr,
563
                           uint64_t data, unsigned size)
564
{
565
    VFIOBAR *bar = opaque;
566
    union {
567
        uint8_t byte;
568
        uint16_t word;
569
        uint32_t dword;
570
        uint64_t qword;
571
    } buf;
572

    
573
    switch (size) {
574
    case 1:
575
        buf.byte = data;
576
        break;
577
    case 2:
578
        buf.word = cpu_to_le16(data);
579
        break;
580
    case 4:
581
        buf.dword = cpu_to_le32(data);
582
        break;
583
    default:
584
        hw_error("vfio: unsupported write size, %d bytes\n", size);
585
        break;
586
    }
587

    
588
    if (pwrite(bar->fd, &buf, size, bar->fd_offset + addr) != size) {
589
        error_report("%s(,0x%"TARGET_PRIxPHYS", 0x%"PRIx64", %d) failed: %m\n",
590
                     __func__, addr, data, size);
591
    }
592

    
593
    DPRINTF("%s(BAR%d+0x%"TARGET_PRIxPHYS", 0x%"PRIx64", %d)\n",
594
            __func__, bar->nr, addr, data, size);
595

    
596
    /*
597
     * A read or write to a BAR always signals an INTx EOI.  This will
598
     * do nothing if not pending (including not in INTx mode).  We assume
599
     * that a BAR access is in response to an interrupt and that BAR
600
     * accesses will service the interrupt.  Unfortunately, we don't know
601
     * which access will service the interrupt, so we're potentially
602
     * getting quite a few host interrupts per guest interrupt.
603
     */
604
    vfio_eoi(DO_UPCAST(VFIODevice, bars[bar->nr], bar));
605
}
606

    
607
static uint64_t vfio_bar_read(void *opaque,
608
                              target_phys_addr_t addr, unsigned size)
609
{
610
    VFIOBAR *bar = opaque;
611
    union {
612
        uint8_t byte;
613
        uint16_t word;
614
        uint32_t dword;
615
        uint64_t qword;
616
    } buf;
617
    uint64_t data = 0;
618

    
619
    if (pread(bar->fd, &buf, size, bar->fd_offset + addr) != size) {
620
        error_report("%s(,0x%"TARGET_PRIxPHYS", %d) failed: %m\n",
621
                     __func__, addr, size);
622
        return (uint64_t)-1;
623
    }
624

    
625
    switch (size) {
626
    case 1:
627
        data = buf.byte;
628
        break;
629
    case 2:
630
        data = le16_to_cpu(buf.word);
631
        break;
632
    case 4:
633
        data = le32_to_cpu(buf.dword);
634
        break;
635
    default:
636
        hw_error("vfio: unsupported read size, %d bytes\n", size);
637
        break;
638
    }
639

    
640
    DPRINTF("%s(BAR%d+0x%"TARGET_PRIxPHYS", %d) = 0x%"PRIx64"\n",
641
            __func__, bar->nr, addr, size, data);
642

    
643
    /* Same as write above */
644
    vfio_eoi(DO_UPCAST(VFIODevice, bars[bar->nr], bar));
645

    
646
    return data;
647
}
648

    
649
static const MemoryRegionOps vfio_bar_ops = {
650
    .read = vfio_bar_read,
651
    .write = vfio_bar_write,
652
    .endianness = DEVICE_LITTLE_ENDIAN,
653
};
654

    
655
/*
656
 * PCI config space
657
 */
658
static uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len)
659
{
660
    VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
661
    uint32_t val = 0;
662

    
663
    /*
664
     * We only need QEMU PCI config support for the ROM BAR, the MSI and MSIX
665
     * capabilities, and the multifunction bit below.  We let VFIO handle
666
     * virtualizing everything else.  Performance is not a concern here.
667
     */
668
    if (ranges_overlap(addr, len, PCI_ROM_ADDRESS, 4) ||
669
        (pdev->cap_present & QEMU_PCI_CAP_MSIX &&
670
         ranges_overlap(addr, len, pdev->msix_cap, MSIX_CAP_LENGTH)) ||
671
        (pdev->cap_present & QEMU_PCI_CAP_MSI &&
672
         ranges_overlap(addr, len, pdev->msi_cap, vdev->msi_cap_size))) {
673

    
674
        val = pci_default_read_config(pdev, addr, len);
675
    } else {
676
        if (pread(vdev->fd, &val, len, vdev->config_offset + addr) != len) {
677
            error_report("%s(%04x:%02x:%02x.%x, 0x%x, 0x%x) failed: %m\n",
678
                         __func__, vdev->host.domain, vdev->host.bus,
679
                         vdev->host.slot, vdev->host.function, addr, len);
680
            return -errno;
681
        }
682
        val = le32_to_cpu(val);
683
    }
684

    
685
    /* Multifunction bit is virualized in QEMU */
686
    if (unlikely(ranges_overlap(addr, len, PCI_HEADER_TYPE, 1))) {
687
        uint32_t mask = PCI_HEADER_TYPE_MULTI_FUNCTION;
688

    
689
        if (len == 4) {
690
            mask <<= 16;
691
        }
692

    
693
        if (pdev->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
694
            val |= mask;
695
        } else {
696
            val &= ~mask;
697
        }
698
    }
699

    
700
    DPRINTF("%s(%04x:%02x:%02x.%x, @0x%x, len=0x%x) %x\n", __func__,
701
            vdev->host.domain, vdev->host.bus, vdev->host.slot,
702
            vdev->host.function, addr, len, val);
703

    
704
    return val;
705
}
706

    
707
static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr,
708
                                  uint32_t val, int len)
709
{
710
    VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
711
    uint32_t val_le = cpu_to_le32(val);
712

    
713
    DPRINTF("%s(%04x:%02x:%02x.%x, @0x%x, 0x%x, len=0x%x)\n", __func__,
714
            vdev->host.domain, vdev->host.bus, vdev->host.slot,
715
            vdev->host.function, addr, val, len);
716

    
717
    /* Write everything to VFIO, let it filter out what we can't write */
718
    if (pwrite(vdev->fd, &val_le, len, vdev->config_offset + addr) != len) {
719
        error_report("%s(%04x:%02x:%02x.%x, 0x%x, 0x%x, 0x%x) failed: %m\n",
720
                     __func__, vdev->host.domain, vdev->host.bus,
721
                     vdev->host.slot, vdev->host.function, addr, val, len);
722
    }
723

    
724
    /* Write standard header bits to emulation */
725
    if (addr < PCI_CONFIG_HEADER_SIZE) {
726
        pci_default_write_config(pdev, addr, val, len);
727
        return;
728
    }
729

    
730
    /* MSI/MSI-X Enabling/Disabling */
731
    if (pdev->cap_present & QEMU_PCI_CAP_MSI &&
732
        ranges_overlap(addr, len, pdev->msi_cap, vdev->msi_cap_size)) {
733
        int is_enabled, was_enabled = msi_enabled(pdev);
734

    
735
        pci_default_write_config(pdev, addr, val, len);
736

    
737
        is_enabled = msi_enabled(pdev);
738

    
739
        if (!was_enabled && is_enabled) {
740
            vfio_enable_msi(vdev);
741
        } else if (was_enabled && !is_enabled) {
742
            vfio_disable_msi_x(vdev, false);
743
        }
744
    }
745

    
746
    if (pdev->cap_present & QEMU_PCI_CAP_MSIX &&
747
        ranges_overlap(addr, len, pdev->msix_cap, MSIX_CAP_LENGTH)) {
748
        int is_enabled, was_enabled = msix_enabled(pdev);
749

    
750
        pci_default_write_config(pdev, addr, val, len);
751

    
752
        is_enabled = msix_enabled(pdev);
753

    
754
        if (!was_enabled && is_enabled) {
755
            /* vfio_msix_vector_use handles this automatically */
756
        } else if (was_enabled && !is_enabled) {
757
            vfio_disable_msi_x(vdev, true);
758
        }
759
    }
760
}
761

    
762
/*
763
 * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
764
 */
765
static int vfio_dma_map(VFIOContainer *container, target_phys_addr_t iova,
766
                        ram_addr_t size, void *vaddr, bool readonly)
767
{
768
    struct vfio_iommu_type1_dma_map map = {
769
        .argsz = sizeof(map),
770
        .flags = VFIO_DMA_MAP_FLAG_READ,
771
        .vaddr = (__u64)(intptr_t)vaddr,
772
        .iova = iova,
773
        .size = size,
774
    };
775

    
776
    if (!readonly) {
777
        map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
778
    }
779

    
780
    if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map)) {
781
        DPRINTF("VFIO_MAP_DMA: %d\n", -errno);
782
        return -errno;
783
    }
784

    
785
    return 0;
786
}
787

    
788
static int vfio_dma_unmap(VFIOContainer *container,
789
                          target_phys_addr_t iova, ram_addr_t size)
790
{
791
    struct vfio_iommu_type1_dma_unmap unmap = {
792
        .argsz = sizeof(unmap),
793
        .flags = 0,
794
        .iova = iova,
795
        .size = size,
796
    };
797

    
798
    if (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
799
        DPRINTF("VFIO_UNMAP_DMA: %d\n", -errno);
800
        return -errno;
801
    }
802

    
803
    return 0;
804
}
805

    
806
static void vfio_listener_dummy1(MemoryListener *listener)
807
{
808
    /* We don't do batching (begin/commit) or care about logging */
809
}
810

    
811
static void vfio_listener_dummy2(MemoryListener *listener,
812
                                 MemoryRegionSection *section)
813
{
814
    /* We don't do logging or care about nops */
815
}
816

    
817
static void vfio_listener_dummy3(MemoryListener *listener,
818
                                 MemoryRegionSection *section,
819
                                 bool match_data, uint64_t data,
820
                                 EventNotifier *e)
821
{
822
    /* We don't care about eventfds */
823
}
824

    
825
static bool vfio_listener_skipped_section(MemoryRegionSection *section)
826
{
827
    return !memory_region_is_ram(section->mr);
828
}
829

    
830
static void vfio_listener_region_add(MemoryListener *listener,
831
                                     MemoryRegionSection *section)
832
{
833
    VFIOContainer *container = container_of(listener, VFIOContainer,
834
                                            iommu_data.listener);
835
    target_phys_addr_t iova, end;
836
    void *vaddr;
837
    int ret;
838

    
839
    if (vfio_listener_skipped_section(section)) {
840
        DPRINTF("vfio: SKIPPING region_add %"TARGET_PRIxPHYS" - %"PRIx64"\n",
841
                section->offset_within_address_space,
842
                section->offset_within_address_space + section->size - 1);
843
        return;
844
    }
845

    
846
    if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
847
                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
848
        error_report("%s received unaligned region\n", __func__);
849
        return;
850
    }
851

    
852
    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
853
    end = (section->offset_within_address_space + section->size) &
854
          TARGET_PAGE_MASK;
855

    
856
    if (iova >= end) {
857
        return;
858
    }
859

    
860
    vaddr = memory_region_get_ram_ptr(section->mr) +
861
            section->offset_within_region +
862
            (iova - section->offset_within_address_space);
863

    
864
    DPRINTF("vfio: region_add %"TARGET_PRIxPHYS" - %"TARGET_PRIxPHYS" [%p]\n",
865
            iova, end - 1, vaddr);
866

    
867
    ret = vfio_dma_map(container, iova, end - iova, vaddr, section->readonly);
868
    if (ret) {
869
        error_report("vfio_dma_map(%p, 0x%"TARGET_PRIxPHYS", "
870
                     "0x%"TARGET_PRIxPHYS", %p) = %d (%m)\n",
871
                     container, iova, end - iova, vaddr, ret);
872
    }
873
}
874

    
875
static void vfio_listener_region_del(MemoryListener *listener,
876
                                     MemoryRegionSection *section)
877
{
878
    VFIOContainer *container = container_of(listener, VFIOContainer,
879
                                            iommu_data.listener);
880
    target_phys_addr_t iova, end;
881
    int ret;
882

    
883
    if (vfio_listener_skipped_section(section)) {
884
        DPRINTF("vfio: SKIPPING region_del %"TARGET_PRIxPHYS" - %"PRIx64"\n",
885
                section->offset_within_address_space,
886
                section->offset_within_address_space + section->size - 1);
887
        return;
888
    }
889

    
890
    if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
891
                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
892
        error_report("%s received unaligned region\n", __func__);
893
        return;
894
    }
895

    
896
    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
897
    end = (section->offset_within_address_space + section->size) &
898
          TARGET_PAGE_MASK;
899

    
900
    if (iova >= end) {
901
        return;
902
    }
903

    
904
    DPRINTF("vfio: region_del %"TARGET_PRIxPHYS" - %"TARGET_PRIxPHYS"\n",
905
            iova, end - 1);
906

    
907
    ret = vfio_dma_unmap(container, iova, end - iova);
908
    if (ret) {
909
        error_report("vfio_dma_unmap(%p, 0x%"TARGET_PRIxPHYS", "
910
                     "0x%"TARGET_PRIxPHYS") = %d (%m)\n",
911
                     container, iova, end - iova, ret);
912
    }
913
}
914

    
915
static MemoryListener vfio_memory_listener = {
916
    .begin = vfio_listener_dummy1,
917
    .commit = vfio_listener_dummy1,
918
    .region_add = vfio_listener_region_add,
919
    .region_del = vfio_listener_region_del,
920
    .region_nop = vfio_listener_dummy2,
921
    .log_start = vfio_listener_dummy2,
922
    .log_stop = vfio_listener_dummy2,
923
    .log_sync = vfio_listener_dummy2,
924
    .log_global_start = vfio_listener_dummy1,
925
    .log_global_stop = vfio_listener_dummy1,
926
    .eventfd_add = vfio_listener_dummy3,
927
    .eventfd_del = vfio_listener_dummy3,
928
};
929

    
930
static void vfio_listener_release(VFIOContainer *container)
931
{
932
    memory_listener_unregister(&container->iommu_data.listener);
933
}
934

    
935
/*
936
 * Interrupt setup
937
 */
938
static void vfio_disable_interrupts(VFIODevice *vdev)
939
{
940
    switch (vdev->interrupt) {
941
    case VFIO_INT_INTx:
942
        vfio_disable_intx(vdev);
943
        break;
944
    case VFIO_INT_MSI:
945
        vfio_disable_msi_x(vdev, false);
946
        break;
947
    case VFIO_INT_MSIX:
948
        vfio_disable_msi_x(vdev, true);
949
        break;
950
    }
951
}
952

    
953
static int vfio_setup_msi(VFIODevice *vdev, int pos)
954
{
955
    uint16_t ctrl;
956
    bool msi_64bit, msi_maskbit;
957
    int ret, entries;
958

    
959
    /*
960
     * TODO: don't peek into msi_supported, let msi_init fail and
961
     * check for ENOTSUP
962
     */
963
    if (!msi_supported) {
964
        return 0;
965
    }
966

    
967
    if (pread(vdev->fd, &ctrl, sizeof(ctrl),
968
              vdev->config_offset + pos + PCI_CAP_FLAGS) != sizeof(ctrl)) {
969
        return -errno;
970
    }
971
    ctrl = le16_to_cpu(ctrl);
972

    
973
    msi_64bit = !!(ctrl & PCI_MSI_FLAGS_64BIT);
974
    msi_maskbit = !!(ctrl & PCI_MSI_FLAGS_MASKBIT);
975
    entries = 1 << ((ctrl & PCI_MSI_FLAGS_QMASK) >> 1);
976

    
977
    DPRINTF("%04x:%02x:%02x.%x PCI MSI CAP @0x%x\n", vdev->host.domain,
978
            vdev->host.bus, vdev->host.slot, vdev->host.function, pos);
979

    
980
    ret = msi_init(&vdev->pdev, pos, entries, msi_64bit, msi_maskbit);
981
    if (ret < 0) {
982
        error_report("vfio: msi_init failed\n");
983
        return ret;
984
    }
985
    vdev->msi_cap_size = 0xa + (msi_maskbit ? 0xa : 0) + (msi_64bit ? 0x4 : 0);
986

    
987
    return 0;
988
}
989

    
990
/*
991
 * We don't have any control over how pci_add_capability() inserts
992
 * capabilities into the chain.  In order to setup MSI-X we need a
993
 * MemoryRegion for the BAR.  In order to setup the BAR and not
994
 * attempt to mmap the MSI-X table area, which VFIO won't allow, we
995
 * need to first look for where the MSI-X table lives.  So we
996
 * unfortunately split MSI-X setup across two functions.
997
 */
998
static int vfio_early_setup_msix(VFIODevice *vdev)
999
{
1000
    uint8_t pos;
1001
    uint16_t ctrl;
1002
    uint32_t table, pba;
1003

    
1004
    pos = pci_find_capability(&vdev->pdev, PCI_CAP_ID_MSIX);
1005
    if (!pos) {
1006
        return 0;
1007
    }
1008

    
1009
    if (pread(vdev->fd, &ctrl, sizeof(ctrl),
1010
              vdev->config_offset + pos + PCI_CAP_FLAGS) != sizeof(ctrl)) {
1011
        return -errno;
1012
    }
1013

    
1014
    if (pread(vdev->fd, &table, sizeof(table),
1015
              vdev->config_offset + pos + PCI_MSIX_TABLE) != sizeof(table)) {
1016
        return -errno;
1017
    }
1018

    
1019
    if (pread(vdev->fd, &pba, sizeof(pba),
1020
              vdev->config_offset + pos + PCI_MSIX_PBA) != sizeof(pba)) {
1021
        return -errno;
1022
    }
1023

    
1024
    ctrl = le16_to_cpu(ctrl);
1025
    table = le32_to_cpu(table);
1026
    pba = le32_to_cpu(pba);
1027

    
1028
    vdev->msix = g_malloc0(sizeof(*(vdev->msix)));
1029
    vdev->msix->table_bar = table & PCI_MSIX_FLAGS_BIRMASK;
1030
    vdev->msix->table_offset = table & ~PCI_MSIX_FLAGS_BIRMASK;
1031
    vdev->msix->pba_bar = pba & PCI_MSIX_FLAGS_BIRMASK;
1032
    vdev->msix->pba_offset = pba & ~PCI_MSIX_FLAGS_BIRMASK;
1033
    vdev->msix->entries = (ctrl & PCI_MSIX_FLAGS_QSIZE) + 1;
1034

    
1035
    DPRINTF("%04x:%02x:%02x.%x "
1036
            "PCI MSI-X CAP @0x%x, BAR %d, offset 0x%x, entries %d\n",
1037
            vdev->host.domain, vdev->host.bus, vdev->host.slot,
1038
            vdev->host.function, pos, vdev->msix->table_bar,
1039
            vdev->msix->table_offset, vdev->msix->entries);
1040

    
1041
    return 0;
1042
}
1043

    
1044
static int vfio_setup_msix(VFIODevice *vdev, int pos)
1045
{
1046
    int ret;
1047

    
1048
    /*
1049
     * TODO: don't peek into msi_supported, let msix_init fail and
1050
     * check for ENOTSUP
1051
     */
1052
    if (!msi_supported) {
1053
        return 0;
1054
    }
1055

    
1056
    ret = msix_init(&vdev->pdev, vdev->msix->entries,
1057
                    &vdev->bars[vdev->msix->table_bar].mem,
1058
                    vdev->msix->table_bar, vdev->msix->table_offset,
1059
                    &vdev->bars[vdev->msix->pba_bar].mem,
1060
                    vdev->msix->pba_bar, vdev->msix->pba_offset, pos);
1061
    if (ret < 0) {
1062
        error_report("vfio: msix_init failed\n");
1063
        return ret;
1064
    }
1065

    
1066
    ret = msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use,
1067
                                    vfio_msix_vector_release);
1068
    if (ret) {
1069
        error_report("vfio: msix_set_vector_notifiers failed %d\n", ret);
1070
        msix_uninit(&vdev->pdev, &vdev->bars[vdev->msix->table_bar].mem,
1071
                    &vdev->bars[vdev->msix->pba_bar].mem);
1072
        return ret;
1073
    }
1074

    
1075
    return 0;
1076
}
1077

    
1078
static void vfio_teardown_msi(VFIODevice *vdev)
1079
{
1080
    msi_uninit(&vdev->pdev);
1081

    
1082
    if (vdev->msix) {
1083
        /* FIXME: Why can't unset just silently do nothing?? */
1084
        if (vdev->pdev.msix_vector_use_notifier &&
1085
            vdev->pdev.msix_vector_release_notifier) {
1086
            msix_unset_vector_notifiers(&vdev->pdev);
1087
        }
1088

    
1089
        msix_uninit(&vdev->pdev, &vdev->bars[vdev->msix->table_bar].mem,
1090
                    &vdev->bars[vdev->msix->pba_bar].mem);
1091
    }
1092
}
1093

    
1094
/*
1095
 * Resource setup
1096
 */
1097
static void vfio_mmap_set_enabled(VFIODevice *vdev, bool enabled)
1098
{
1099
    int i;
1100

    
1101
    for (i = 0; i < PCI_ROM_SLOT; i++) {
1102
        VFIOBAR *bar = &vdev->bars[i];
1103

    
1104
        if (!bar->size) {
1105
            continue;
1106
        }
1107

    
1108
        memory_region_set_enabled(&bar->mmap_mem, enabled);
1109
        if (vdev->msix && vdev->msix->table_bar == i) {
1110
            memory_region_set_enabled(&vdev->msix->mmap_mem, enabled);
1111
        }
1112
    }
1113
}
1114

    
1115
static void vfio_unmap_bar(VFIODevice *vdev, int nr)
1116
{
1117
    VFIOBAR *bar = &vdev->bars[nr];
1118

    
1119
    if (!bar->size) {
1120
        return;
1121
    }
1122

    
1123
    memory_region_del_subregion(&bar->mem, &bar->mmap_mem);
1124
    munmap(bar->mmap, memory_region_size(&bar->mmap_mem));
1125

    
1126
    if (vdev->msix && vdev->msix->table_bar == nr) {
1127
        memory_region_del_subregion(&bar->mem, &vdev->msix->mmap_mem);
1128
        munmap(vdev->msix->mmap, memory_region_size(&vdev->msix->mmap_mem));
1129
    }
1130

    
1131
    memory_region_destroy(&bar->mem);
1132
}
1133

    
1134
static int vfio_mmap_bar(VFIOBAR *bar, MemoryRegion *mem, MemoryRegion *submem,
1135
                         void **map, size_t size, off_t offset,
1136
                         const char *name)
1137
{
1138
    int ret = 0;
1139

    
1140
    if (size && bar->flags & VFIO_REGION_INFO_FLAG_MMAP) {
1141
        int prot = 0;
1142

    
1143
        if (bar->flags & VFIO_REGION_INFO_FLAG_READ) {
1144
            prot |= PROT_READ;
1145
        }
1146

    
1147
        if (bar->flags & VFIO_REGION_INFO_FLAG_WRITE) {
1148
            prot |= PROT_WRITE;
1149
        }
1150

    
1151
        *map = mmap(NULL, size, prot, MAP_SHARED,
1152
                    bar->fd, bar->fd_offset + offset);
1153
        if (*map == MAP_FAILED) {
1154
            *map = NULL;
1155
            ret = -errno;
1156
            goto empty_region;
1157
        }
1158

    
1159
        memory_region_init_ram_ptr(submem, name, size, *map);
1160
    } else {
1161
empty_region:
1162
        /* Create a zero sized sub-region to make cleanup easy. */
1163
        memory_region_init(submem, name, 0);
1164
    }
1165

    
1166
    memory_region_add_subregion(mem, offset, submem);
1167

    
1168
    return ret;
1169
}
1170

    
1171
static void vfio_map_bar(VFIODevice *vdev, int nr)
1172
{
1173
    VFIOBAR *bar = &vdev->bars[nr];
1174
    unsigned size = bar->size;
1175
    char name[64];
1176
    uint32_t pci_bar;
1177
    uint8_t type;
1178
    int ret;
1179

    
1180
    /* Skip both unimplemented BARs and the upper half of 64bit BARS. */
1181
    if (!size) {
1182
        return;
1183
    }
1184

    
1185
    snprintf(name, sizeof(name), "VFIO %04x:%02x:%02x.%x BAR %d",
1186
             vdev->host.domain, vdev->host.bus, vdev->host.slot,
1187
             vdev->host.function, nr);
1188

    
1189
    /* Determine what type of BAR this is for registration */
1190
    ret = pread(vdev->fd, &pci_bar, sizeof(pci_bar),
1191
                vdev->config_offset + PCI_BASE_ADDRESS_0 + (4 * nr));
1192
    if (ret != sizeof(pci_bar)) {
1193
        error_report("vfio: Failed to read BAR %d (%m)\n", nr);
1194
        return;
1195
    }
1196

    
1197
    pci_bar = le32_to_cpu(pci_bar);
1198
    type = pci_bar & (pci_bar & PCI_BASE_ADDRESS_SPACE_IO ?
1199
           ~PCI_BASE_ADDRESS_IO_MASK : ~PCI_BASE_ADDRESS_MEM_MASK);
1200

    
1201
    /* A "slow" read/write mapping underlies all BARs */
1202
    memory_region_init_io(&bar->mem, &vfio_bar_ops, bar, name, size);
1203
    pci_register_bar(&vdev->pdev, nr, type, &bar->mem);
1204

    
1205
    /*
1206
     * We can't mmap areas overlapping the MSIX vector table, so we
1207
     * potentially insert a direct-mapped subregion before and after it.
1208
     */
1209
    if (vdev->msix && vdev->msix->table_bar == nr) {
1210
        size = vdev->msix->table_offset & TARGET_PAGE_MASK;
1211
    }
1212

    
1213
    strncat(name, " mmap", sizeof(name) - strlen(name) - 1);
1214
    if (vfio_mmap_bar(bar, &bar->mem,
1215
                      &bar->mmap_mem, &bar->mmap, size, 0, name)) {
1216
        error_report("%s unsupported. Performance may be slow\n", name);
1217
    }
1218

    
1219
    if (vdev->msix && vdev->msix->table_bar == nr) {
1220
        unsigned start;
1221

    
1222
        start = TARGET_PAGE_ALIGN(vdev->msix->table_offset +
1223
                                  (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE));
1224

    
1225
        size = start < bar->size ? bar->size - start : 0;
1226
        strncat(name, " msix-hi", sizeof(name) - strlen(name) - 1);
1227
        /* VFIOMSIXInfo contains another MemoryRegion for this mapping */
1228
        if (vfio_mmap_bar(bar, &bar->mem, &vdev->msix->mmap_mem,
1229
                          &vdev->msix->mmap, size, start, name)) {
1230
            error_report("%s unsupported. Performance may be slow\n", name);
1231
        }
1232
    }
1233
}
1234

    
1235
static void vfio_map_bars(VFIODevice *vdev)
1236
{
1237
    int i;
1238

    
1239
    for (i = 0; i < PCI_ROM_SLOT; i++) {
1240
        vfio_map_bar(vdev, i);
1241
    }
1242
}
1243

    
1244
static void vfio_unmap_bars(VFIODevice *vdev)
1245
{
1246
    int i;
1247

    
1248
    for (i = 0; i < PCI_ROM_SLOT; i++) {
1249
        vfio_unmap_bar(vdev, i);
1250
    }
1251
}
1252

    
1253
/*
1254
 * General setup
1255
 */
1256
static uint8_t vfio_std_cap_max_size(PCIDevice *pdev, uint8_t pos)
1257
{
1258
    uint8_t tmp, next = 0xff;
1259

    
1260
    for (tmp = pdev->config[PCI_CAPABILITY_LIST]; tmp;
1261
         tmp = pdev->config[tmp + 1]) {
1262
        if (tmp > pos && tmp < next) {
1263
            next = tmp;
1264
        }
1265
    }
1266

    
1267
    return next - pos;
1268
}
1269

    
1270
static int vfio_add_std_cap(VFIODevice *vdev, uint8_t pos)
1271
{
1272
    PCIDevice *pdev = &vdev->pdev;
1273
    uint8_t cap_id, next, size;
1274
    int ret;
1275

    
1276
    cap_id = pdev->config[pos];
1277
    next = pdev->config[pos + 1];
1278

    
1279
    /*
1280
     * If it becomes important to configure capabilities to their actual
1281
     * size, use this as the default when it's something we don't recognize.
1282
     * Since QEMU doesn't actually handle many of the config accesses,
1283
     * exact size doesn't seem worthwhile.
1284
     */
1285
    size = vfio_std_cap_max_size(pdev, pos);
1286

    
1287
    /*
1288
     * pci_add_capability always inserts the new capability at the head
1289
     * of the chain.  Therefore to end up with a chain that matches the
1290
     * physical device, we insert from the end by making this recursive.
1291
     * This is also why we pre-caclulate size above as cached config space
1292
     * will be changed as we unwind the stack.
1293
     */
1294
    if (next) {
1295
        ret = vfio_add_std_cap(vdev, next);
1296
        if (ret) {
1297
            return ret;
1298
        }
1299
    } else {
1300
        pdev->config[PCI_CAPABILITY_LIST] = 0; /* Begin the rebuild */
1301
    }
1302

    
1303
    switch (cap_id) {
1304
    case PCI_CAP_ID_MSI:
1305
        ret = vfio_setup_msi(vdev, pos);
1306
        break;
1307
    case PCI_CAP_ID_MSIX:
1308
        ret = vfio_setup_msix(vdev, pos);
1309
        break;
1310
    default:
1311
        ret = pci_add_capability(pdev, cap_id, pos, size);
1312
        break;
1313
    }
1314

    
1315
    if (ret < 0) {
1316
        error_report("vfio: %04x:%02x:%02x.%x Error adding PCI capability "
1317
                     "0x%x[0x%x]@0x%x: %d\n", vdev->host.domain,
1318
                     vdev->host.bus, vdev->host.slot, vdev->host.function,
1319
                     cap_id, size, pos, ret);
1320
        return ret;
1321
    }
1322

    
1323
    return 0;
1324
}
1325

    
1326
static int vfio_add_capabilities(VFIODevice *vdev)
1327
{
1328
    PCIDevice *pdev = &vdev->pdev;
1329

    
1330
    if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST) ||
1331
        !pdev->config[PCI_CAPABILITY_LIST]) {
1332
        return 0; /* Nothing to add */
1333
    }
1334

    
1335
    return vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST]);
1336
}
1337

    
1338
static int vfio_load_rom(VFIODevice *vdev)
1339
{
1340
    uint64_t size = vdev->rom_size;
1341
    char name[32];
1342
    off_t off = 0, voff = vdev->rom_offset;
1343
    ssize_t bytes;
1344
    void *ptr;
1345

    
1346
    /* If loading ROM from file, pci handles it */
1347
    if (vdev->pdev.romfile || !vdev->pdev.rom_bar || !size) {
1348
        return 0;
1349
    }
1350

    
1351
    DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain,
1352
            vdev->host.bus, vdev->host.slot, vdev->host.function);
1353

    
1354
    snprintf(name, sizeof(name), "vfio[%04x:%02x:%02x.%x].rom",
1355
             vdev->host.domain, vdev->host.bus, vdev->host.slot,
1356
             vdev->host.function);
1357
    memory_region_init_ram(&vdev->pdev.rom, name, size);
1358
    ptr = memory_region_get_ram_ptr(&vdev->pdev.rom);
1359
    memset(ptr, 0xff, size);
1360

    
1361
    while (size) {
1362
        bytes = pread(vdev->fd, ptr + off, size, voff + off);
1363
        if (bytes == 0) {
1364
            break; /* expect that we could get back less than the ROM BAR */
1365
        } else if (bytes > 0) {
1366
            off += bytes;
1367
            size -= bytes;
1368
        } else {
1369
            if (errno == EINTR || errno == EAGAIN) {
1370
                continue;
1371
            }
1372
            error_report("vfio: Error reading device ROM: %m\n");
1373
            memory_region_destroy(&vdev->pdev.rom);
1374
            return -errno;
1375
        }
1376
    }
1377

    
1378
    pci_register_bar(&vdev->pdev, PCI_ROM_SLOT, 0, &vdev->pdev.rom);
1379
    vdev->pdev.has_rom = true;
1380
    return 0;
1381
}
1382

    
1383
static int vfio_connect_container(VFIOGroup *group)
1384
{
1385
    VFIOContainer *container;
1386
    int ret, fd;
1387

    
1388
    if (group->container) {
1389
        return 0;
1390
    }
1391

    
1392
    QLIST_FOREACH(container, &container_list, next) {
1393
        if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
1394
            group->container = container;
1395
            QLIST_INSERT_HEAD(&container->group_list, group, container_next);
1396
            return 0;
1397
        }
1398
    }
1399

    
1400
    fd = qemu_open("/dev/vfio/vfio", O_RDWR);
1401
    if (fd < 0) {
1402
        error_report("vfio: failed to open /dev/vfio/vfio: %m\n");
1403
        return -errno;
1404
    }
1405

    
1406
    ret = ioctl(fd, VFIO_GET_API_VERSION);
1407
    if (ret != VFIO_API_VERSION) {
1408
        error_report("vfio: supported vfio version: %d, "
1409
                     "reported version: %d\n", VFIO_API_VERSION, ret);
1410
        close(fd);
1411
        return -EINVAL;
1412
    }
1413

    
1414
    container = g_malloc0(sizeof(*container));
1415
    container->fd = fd;
1416

    
1417
    if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU)) {
1418
        ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd);
1419
        if (ret) {
1420
            error_report("vfio: failed to set group container: %m\n");
1421
            g_free(container);
1422
            close(fd);
1423
            return -errno;
1424
        }
1425

    
1426
        ret = ioctl(fd, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU);
1427
        if (ret) {
1428
            error_report("vfio: failed to set iommu for container: %m\n");
1429
            g_free(container);
1430
            close(fd);
1431
            return -errno;
1432
        }
1433

    
1434
        container->iommu_data.listener = vfio_memory_listener;
1435
        container->iommu_data.release = vfio_listener_release;
1436

    
1437
        memory_listener_register(&container->iommu_data.listener,
1438
                                 get_system_memory());
1439
    } else {
1440
        error_report("vfio: No available IOMMU models\n");
1441
        g_free(container);
1442
        close(fd);
1443
        return -EINVAL;
1444
    }
1445

    
1446
    QLIST_INIT(&container->group_list);
1447
    QLIST_INSERT_HEAD(&container_list, container, next);
1448

    
1449
    group->container = container;
1450
    QLIST_INSERT_HEAD(&container->group_list, group, container_next);
1451

    
1452
    return 0;
1453
}
1454

    
1455
static void vfio_disconnect_container(VFIOGroup *group)
1456
{
1457
    VFIOContainer *container = group->container;
1458

    
1459
    if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) {
1460
        error_report("vfio: error disconnecting group %d from container\n",
1461
                     group->groupid);
1462
    }
1463

    
1464
    QLIST_REMOVE(group, container_next);
1465
    group->container = NULL;
1466

    
1467
    if (QLIST_EMPTY(&container->group_list)) {
1468
        if (container->iommu_data.release) {
1469
            container->iommu_data.release(container);
1470
        }
1471
        QLIST_REMOVE(container, next);
1472
        DPRINTF("vfio_disconnect_container: close container->fd\n");
1473
        close(container->fd);
1474
        g_free(container);
1475
    }
1476
}
1477

    
1478
static VFIOGroup *vfio_get_group(int groupid)
1479
{
1480
    VFIOGroup *group;
1481
    char path[32];
1482
    struct vfio_group_status status = { .argsz = sizeof(status) };
1483

    
1484
    QLIST_FOREACH(group, &group_list, next) {
1485
        if (group->groupid == groupid) {
1486
            return group;
1487
        }
1488
    }
1489

    
1490
    group = g_malloc0(sizeof(*group));
1491

    
1492
    snprintf(path, sizeof(path), "/dev/vfio/%d", groupid);
1493
    group->fd = qemu_open(path, O_RDWR);
1494
    if (group->fd < 0) {
1495
        error_report("vfio: error opening %s: %m\n", path);
1496
        g_free(group);
1497
        return NULL;
1498
    }
1499

    
1500
    if (ioctl(group->fd, VFIO_GROUP_GET_STATUS, &status)) {
1501
        error_report("vfio: error getting group status: %m\n");
1502
        close(group->fd);
1503
        g_free(group);
1504
        return NULL;
1505
    }
1506

    
1507
    if (!(status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
1508
        error_report("vfio: error, group %d is not viable, please ensure "
1509
                     "all devices within the iommu_group are bound to their "
1510
                     "vfio bus driver.\n", groupid);
1511
        close(group->fd);
1512
        g_free(group);
1513
        return NULL;
1514
    }
1515

    
1516
    group->groupid = groupid;
1517
    QLIST_INIT(&group->device_list);
1518

    
1519
    if (vfio_connect_container(group)) {
1520
        error_report("vfio: failed to setup container for group %d\n", groupid);
1521
        close(group->fd);
1522
        g_free(group);
1523
        return NULL;
1524
    }
1525

    
1526
    QLIST_INSERT_HEAD(&group_list, group, next);
1527

    
1528
    return group;
1529
}
1530

    
1531
static void vfio_put_group(VFIOGroup *group)
1532
{
1533
    if (!QLIST_EMPTY(&group->device_list)) {
1534
        return;
1535
    }
1536

    
1537
    vfio_disconnect_container(group);
1538
    QLIST_REMOVE(group, next);
1539
    DPRINTF("vfio_put_group: close group->fd\n");
1540
    close(group->fd);
1541
    g_free(group);
1542
}
1543

    
1544
static int vfio_get_device(VFIOGroup *group, const char *name, VFIODevice *vdev)
1545
{
1546
    struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };
1547
    struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) };
1548
    int ret, i;
1549

    
1550
    ret = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name);
1551
    if (ret < 0) {
1552
        error_report("vfio: error getting device %s from group %d: %m\n",
1553
                     name, group->groupid);
1554
        error_report("Verify all devices in group %d are bound to vfio-pci "
1555
                     "or pci-stub and not already in use\n", group->groupid);
1556
        return ret;
1557
    }
1558

    
1559
    vdev->fd = ret;
1560
    vdev->group = group;
1561
    QLIST_INSERT_HEAD(&group->device_list, vdev, next);
1562

    
1563
    /* Sanity check device */
1564
    ret = ioctl(vdev->fd, VFIO_DEVICE_GET_INFO, &dev_info);
1565
    if (ret) {
1566
        error_report("vfio: error getting device info: %m\n");
1567
        goto error;
1568
    }
1569

    
1570
    DPRINTF("Device %s flags: %u, regions: %u, irgs: %u\n", name,
1571
            dev_info.flags, dev_info.num_regions, dev_info.num_irqs);
1572

    
1573
    if (!(dev_info.flags & VFIO_DEVICE_FLAGS_PCI)) {
1574
        error_report("vfio: Um, this isn't a PCI device\n");
1575
        goto error;
1576
    }
1577

    
1578
    vdev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET);
1579
    if (!vdev->reset_works) {
1580
        error_report("Warning, device %s does not support reset\n", name);
1581
    }
1582

    
1583
    if (dev_info.num_regions != VFIO_PCI_NUM_REGIONS) {
1584
        error_report("vfio: unexpected number of io regions %u\n",
1585
                     dev_info.num_regions);
1586
        goto error;
1587
    }
1588

    
1589
    if (dev_info.num_irqs != VFIO_PCI_NUM_IRQS) {
1590
        error_report("vfio: unexpected number of irqs %u\n", dev_info.num_irqs);
1591
        goto error;
1592
    }
1593

    
1594
    for (i = VFIO_PCI_BAR0_REGION_INDEX; i < VFIO_PCI_ROM_REGION_INDEX; i++) {
1595
        reg_info.index = i;
1596

    
1597
        ret = ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info);
1598
        if (ret) {
1599
            error_report("vfio: Error getting region %d info: %m\n", i);
1600
            goto error;
1601
        }
1602

    
1603
        DPRINTF("Device %s region %d:\n", name, i);
1604
        DPRINTF("  size: 0x%lx, offset: 0x%lx, flags: 0x%lx\n",
1605
                (unsigned long)reg_info.size, (unsigned long)reg_info.offset,
1606
                (unsigned long)reg_info.flags);
1607

    
1608
        vdev->bars[i].flags = reg_info.flags;
1609
        vdev->bars[i].size = reg_info.size;
1610
        vdev->bars[i].fd_offset = reg_info.offset;
1611
        vdev->bars[i].fd = vdev->fd;
1612
        vdev->bars[i].nr = i;
1613
    }
1614

    
1615
    reg_info.index = VFIO_PCI_ROM_REGION_INDEX;
1616

    
1617
    ret = ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info);
1618
    if (ret) {
1619
        error_report("vfio: Error getting ROM info: %m\n");
1620
        goto error;
1621
    }
1622

    
1623
    DPRINTF("Device %s ROM:\n", name);
1624
    DPRINTF("  size: 0x%lx, offset: 0x%lx, flags: 0x%lx\n",
1625
            (unsigned long)reg_info.size, (unsigned long)reg_info.offset,
1626
            (unsigned long)reg_info.flags);
1627

    
1628
    vdev->rom_size = reg_info.size;
1629
    vdev->rom_offset = reg_info.offset;
1630

    
1631
    reg_info.index = VFIO_PCI_CONFIG_REGION_INDEX;
1632

    
1633
    ret = ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info);
1634
    if (ret) {
1635
        error_report("vfio: Error getting config info: %m\n");
1636
        goto error;
1637
    }
1638

    
1639
    DPRINTF("Device %s config:\n", name);
1640
    DPRINTF("  size: 0x%lx, offset: 0x%lx, flags: 0x%lx\n",
1641
            (unsigned long)reg_info.size, (unsigned long)reg_info.offset,
1642
            (unsigned long)reg_info.flags);
1643

    
1644
    vdev->config_size = reg_info.size;
1645
    vdev->config_offset = reg_info.offset;
1646

    
1647
error:
1648
    if (ret) {
1649
        QLIST_REMOVE(vdev, next);
1650
        vdev->group = NULL;
1651
        close(vdev->fd);
1652
    }
1653
    return ret;
1654
}
1655

    
1656
static void vfio_put_device(VFIODevice *vdev)
1657
{
1658
    QLIST_REMOVE(vdev, next);
1659
    vdev->group = NULL;
1660
    DPRINTF("vfio_put_device: close vdev->fd\n");
1661
    close(vdev->fd);
1662
    if (vdev->msix) {
1663
        g_free(vdev->msix);
1664
        vdev->msix = NULL;
1665
    }
1666
}
1667

    
1668
static int vfio_initfn(PCIDevice *pdev)
1669
{
1670
    VFIODevice *pvdev, *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
1671
    VFIOGroup *group;
1672
    char path[PATH_MAX], iommu_group_path[PATH_MAX], *group_name;
1673
    ssize_t len;
1674
    struct stat st;
1675
    int groupid;
1676
    int ret;
1677

    
1678
    /* Check that the host device exists */
1679
    snprintf(path, sizeof(path),
1680
             "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/",
1681
             vdev->host.domain, vdev->host.bus, vdev->host.slot,
1682
             vdev->host.function);
1683
    if (stat(path, &st) < 0) {
1684
        error_report("vfio: error: no such host device: %s\n", path);
1685
        return -errno;
1686
    }
1687

    
1688
    strncat(path, "iommu_group", sizeof(path) - strlen(path) - 1);
1689

    
1690
    len = readlink(path, iommu_group_path, PATH_MAX);
1691
    if (len <= 0) {
1692
        error_report("vfio: error no iommu_group for device\n");
1693
        return -errno;
1694
    }
1695

    
1696
    iommu_group_path[len] = 0;
1697
    group_name = basename(iommu_group_path);
1698

    
1699
    if (sscanf(group_name, "%d", &groupid) != 1) {
1700
        error_report("vfio: error reading %s: %m\n", path);
1701
        return -errno;
1702
    }
1703

    
1704
    DPRINTF("%s(%04x:%02x:%02x.%x) group %d\n", __func__, vdev->host.domain,
1705
            vdev->host.bus, vdev->host.slot, vdev->host.function, groupid);
1706

    
1707
    group = vfio_get_group(groupid);
1708
    if (!group) {
1709
        error_report("vfio: failed to get group %d\n", groupid);
1710
        return -ENOENT;
1711
    }
1712

    
1713
    snprintf(path, sizeof(path), "%04x:%02x:%02x.%01x",
1714
            vdev->host.domain, vdev->host.bus, vdev->host.slot,
1715
            vdev->host.function);
1716

    
1717
    QLIST_FOREACH(pvdev, &group->device_list, next) {
1718
        if (pvdev->host.domain == vdev->host.domain &&
1719
            pvdev->host.bus == vdev->host.bus &&
1720
            pvdev->host.slot == vdev->host.slot &&
1721
            pvdev->host.function == vdev->host.function) {
1722

    
1723
            error_report("vfio: error: device %s is already attached\n", path);
1724
            vfio_put_group(group);
1725
            return -EBUSY;
1726
        }
1727
    }
1728

    
1729
    ret = vfio_get_device(group, path, vdev);
1730
    if (ret) {
1731
        error_report("vfio: failed to get device %s\n", path);
1732
        vfio_put_group(group);
1733
        return ret;
1734
    }
1735

    
1736
    /* Get a copy of config space */
1737
    ret = pread(vdev->fd, vdev->pdev.config,
1738
                MIN(pci_config_size(&vdev->pdev), vdev->config_size),
1739
                vdev->config_offset);
1740
    if (ret < (int)MIN(pci_config_size(&vdev->pdev), vdev->config_size)) {
1741
        ret = ret < 0 ? -errno : -EFAULT;
1742
        error_report("vfio: Failed to read device config space\n");
1743
        goto out_put;
1744
    }
1745

    
1746
    /*
1747
     * Clear host resource mapping info.  If we choose not to register a
1748
     * BAR, such as might be the case with the option ROM, we can get
1749
     * confusing, unwritable, residual addresses from the host here.
1750
     */
1751
    memset(&vdev->pdev.config[PCI_BASE_ADDRESS_0], 0, 24);
1752
    memset(&vdev->pdev.config[PCI_ROM_ADDRESS], 0, 4);
1753

    
1754
    vfio_load_rom(vdev);
1755

    
1756
    ret = vfio_early_setup_msix(vdev);
1757
    if (ret) {
1758
        goto out_put;
1759
    }
1760

    
1761
    vfio_map_bars(vdev);
1762

    
1763
    ret = vfio_add_capabilities(vdev);
1764
    if (ret) {
1765
        goto out_teardown;
1766
    }
1767

    
1768
    if (vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1)) {
1769
        if (vdev->intx.intx && strcmp(vdev->intx.intx, "off")) {
1770
            error_report("vfio: Unknown option x-intx=%s, "
1771
                         "valid options: \"off\".\n", vdev->intx.intx);
1772
            ret = -EINVAL;
1773
            goto out_teardown;
1774
        }
1775

    
1776
        if (vdev->intx.intx && !strcmp(vdev->intx.intx, "off")) {
1777
            vdev->intx.disabled = true;
1778
        }
1779

    
1780
        ret = vfio_enable_intx(vdev);
1781
        if (ret) {
1782
            goto out_teardown;
1783
        }
1784
    }
1785

    
1786
    return 0;
1787

    
1788
out_teardown:
1789
    pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
1790
    vfio_teardown_msi(vdev);
1791
    vfio_unmap_bars(vdev);
1792
out_put:
1793
    vfio_put_device(vdev);
1794
    vfio_put_group(group);
1795
    return ret;
1796
}
1797

    
1798
static void vfio_exitfn(PCIDevice *pdev)
1799
{
1800
    VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
1801
    VFIOGroup *group = vdev->group;
1802

    
1803
    pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
1804
    vfio_disable_interrupts(vdev);
1805
    vfio_teardown_msi(vdev);
1806
    vfio_unmap_bars(vdev);
1807
    vfio_put_device(vdev);
1808
    vfio_put_group(group);
1809
}
1810

    
1811
static void vfio_pci_reset(DeviceState *dev)
1812
{
1813
    PCIDevice *pdev = DO_UPCAST(PCIDevice, qdev, dev);
1814
    VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
1815

    
1816
    if (!vdev->reset_works) {
1817
        return;
1818
    }
1819

    
1820
    if (ioctl(vdev->fd, VFIO_DEVICE_RESET)) {
1821
        error_report("vfio: Error unable to reset physical device "
1822
                     "(%04x:%02x:%02x.%x): %m\n", vdev->host.domain,
1823
                     vdev->host.bus, vdev->host.slot, vdev->host.function);
1824
    }
1825
}
1826

    
1827
static Property vfio_pci_dev_properties[] = {
1828
    DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIODevice, host),
1829
    DEFINE_PROP_STRING("x-intx", VFIODevice, intx.intx),
1830
    /*
1831
     * TODO - support passed fds... is this necessary?
1832
     * DEFINE_PROP_STRING("vfiofd", VFIODevice, vfiofd_name),
1833
     * DEFINE_PROP_STRING("vfiogroupfd, VFIODevice, vfiogroupfd_name),
1834
     */
1835
    DEFINE_PROP_END_OF_LIST(),
1836
};
1837

    
1838

    
1839
static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
1840
{
1841
    DeviceClass *dc = DEVICE_CLASS(klass);
1842
    PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass);
1843

    
1844
    dc->reset = vfio_pci_reset;
1845
    dc->props = vfio_pci_dev_properties;
1846
    pdc->init = vfio_initfn;
1847
    pdc->exit = vfio_exitfn;
1848
    pdc->config_read = vfio_pci_read_config;
1849
    pdc->config_write = vfio_pci_write_config;
1850
}
1851

    
1852
static const TypeInfo vfio_pci_dev_info = {
1853
    .name = "vfio-pci",
1854
    .parent = TYPE_PCI_DEVICE,
1855
    .instance_size = sizeof(VFIODevice),
1856
    .class_init = vfio_pci_dev_class_init,
1857
};
1858

    
1859
static void register_vfio_pci_dev_type(void)
1860
{
1861
    type_register_static(&vfio_pci_dev_info);
1862
}
1863

    
1864
type_init(register_vfio_pci_dev_type)